예제 #1
0
def create_one_big_model(models):
    log.info('create big model')
    n = len(models)
    prev_model_id_ = booster.sum_models(models[0].model_id_, models[1].model_id_)
    for i in range(2, n):
        log.info('difference between: %s  < -- > %s' % (prev_model_id_, models[i].model_id_))
        prev_model_id_ = booster.sum_models(prev_model_id_, models[i].model_id_)
        log.info('is win! : ' + prev_model_id_)
    return markov_chain.create(prev_model_id_, booster)
def big_differences():
    log.info('extract messages')

    users = main_db.get_not_loaded_users()

    model_main = markov_chain('main', booster)
    result = []

    log.info('---------users to find is %s-------------------------------' % len(users))
    loaded_users = []
    for user in users:
        log.info('load user %s' % user)
        loaded_user = engine.scrap(user, neighbourhood=0)
        if not loaded_user:
            continue
        main_db.set_user_loaded(user)
        model_main = create_model(loaded_user, mc=model_main)
        create_model(loaded_user)
        loaded_users.append(loaded_user)

    log.info('---------start process differences of models--------------')
    for user in loaded_users:
        model_current = markov_chain.create(user.name_, booster)
        diff_element = diff_markov_chains(model_main, model_current)
        result.append({'name': user.name_, 'x': diff_element['content'], 'y': user.timeline_count})
        log.info('create difference... %s' % diff_element['content'])

    diff_main = diff_markov_chains(model_main, model_main)
    nodes, edges = model_main.get_unique_nodes_edges()
    model_diffs = [
            {'x': diff_main['content'], 'y': float(edges) / nodes},
    ]
    vis.visualise(result,
                  header='diff and tweets count',
                  x_title='difference between this and main',
                  y_title='count tweets',
                  spec_symbols=model_diffs)

    model_main.visualise(100)
    for user in loaded_users:
        model_current = markov_chain.create(user.name_, booster)
        diff_element = diff_markov_chains(model_main, model_current)
        result.append({'name': user.name_, 'x': diff_element['content'], 'y': user.timeline_count})
        log.info('create difference... %s' % diff_element['content'])

    diff_main = diff_markov_chains(model_main, model_main)
    nodes, edges = model_main.get_unique_nodes_edges()
    model_diffs = [
            {'x': diff_main['content'], 'y': float(edges) / nodes},
    ]
    vis.visualise(result,
                  header='diff and tweets count',
                  x_title='difference between this and main',
                  y_title='count tweets',
                  spec_symbols=model_diffs)

    model_main.visualise(100)

if __name__ == '__main__':
#   little_differences()
    model_spam = markov_chain.create('no_spam', booster)
    model_spam.visualise(100)

##visualise





예제 #4
0
def get_models(model_ids):
    result = []
    for model_id in model_ids:
        result.append(markov_chain.create(model_id, booster))
    return result