コード例 #1
0
def create_model_main(users, model_id, is_normalise=True):
    """
    creating markov chain model for users text
    """
    mc = markov_chain(model_id, booster)
    for m_user in users:
        timeline_text = tools.flush(m_user.timeline, lambda x:x['text'])
        for timeline_text_el in timeline_text:
            message = get_words(timeline_text_el, is_normalise=is_normalise)
            mc.add_message(message)
    mc.save()
    return mc
コード例 #2
0
def create_model(user, is_normalise=True, mc=None):
    """
    creating model for one user
    """
    if not mc:
        mc = markov_chain(user.name_, booster)

    timeline_text = tools.flush(user.timeline, lambda x:x['text'])

    for tt_el in timeline_text:
        mc.add_message(get_words(tt_el), is_normalise)

    mc.save()
    return mc
コード例 #3
0
def clust(models):
    out = []
    for mc in models:
        t1 = time.time()

        nearest = max([{el: diff_markov_chains(mc.model_id_, el.model_id_, booster)} for el in models if el != mc],
            key=lambda x: x.values()[0])
        nearest.keys()[0].print_me()
        print nearest.values()[0]
        new_mc_id = booster.sum_models(mc.model_id_, nearest.keys()[0].model_id_)
        new_mc = markov_chain(new_mc_id, booster)
        out.append(new_mc)

        t2 = time.time()
        print 'time: ', t2 - t1
    return clust(out)
コード例 #4
0
def big_differences():
    log.info('extract messages')

    users = main_db.get_not_loaded_users()

    model_main = markov_chain('main', booster)
    result = []

    log.info('---------users to find is %s-------------------------------' % len(users))
    loaded_users = []
    for user in users:
        log.info('load user %s' % user)
        loaded_user = engine.scrap(user, neighbourhood=0)
        if not loaded_user:
            continue
        main_db.set_user_loaded(user)
        model_main = create_model(loaded_user, mc=model_main)
        create_model(loaded_user)
        loaded_users.append(loaded_user)

    log.info('---------start process differences of models--------------')
    for user in loaded_users:
        model_current = markov_chain.create(user.name_, booster)
        diff_element = diff_markov_chains(model_main, model_current)
        result.append({'name': user.name_, 'x': diff_element['content'], 'y': user.timeline_count})
        log.info('create difference... %s' % diff_element['content'])

    diff_main = diff_markov_chains(model_main, model_main)
    nodes, edges = model_main.get_unique_nodes_edges()
    model_diffs = [
            {'x': diff_main['content'], 'y': float(edges) / nodes},
    ]
    vis.visualise(result,
                  header='diff and tweets count',
                  x_title='difference between this and main',
                  y_title='count tweets',
                  spec_symbols=model_diffs)

    model_main.visualise(100)
コード例 #5
0
def process_names(file_name, class_name):
    """
    get from file ser names, scrapping saving and forming markov chains for any user timeline
    """
    names = open(file_name).readlines()
    result = []
    for name in names:
        name = tools.imply_dog(name, with_dog=True).strip()
        log.info("start processing name %s" % name)

        user = api_engine.scrap(name)
        db_.set_class(name, class_name)
        mc = markov_chain(name, booster)

        messages = []
        for t_el in user.timeline:
            log.debug('>>>>%s' % t_el)
            if t_el:
                mc.add_message(model_splitter(t_el['text']))

        mc.save()
        result.append(mc)
    return result