def create_one_big_model(models): log.info('create big model') n = len(models) prev_model_id_ = booster.sum_models(models[0].model_id_, models[1].model_id_) for i in range(2, n): log.info('difference between: %s < -- > %s' % (prev_model_id_, models[i].model_id_)) prev_model_id_ = booster.sum_models(prev_model_id_, models[i].model_id_) log.info('is win! : ' + prev_model_id_) return markov_chain.create(prev_model_id_, booster)
def big_differences(): log.info('extract messages') users = main_db.get_not_loaded_users() model_main = markov_chain('main', booster) result = [] log.info('---------users to find is %s-------------------------------' % len(users)) loaded_users = [] for user in users: log.info('load user %s' % user) loaded_user = engine.scrap(user, neighbourhood=0) if not loaded_user: continue main_db.set_user_loaded(user) model_main = create_model(loaded_user, mc=model_main) create_model(loaded_user) loaded_users.append(loaded_user) log.info('---------start process differences of models--------------') for user in loaded_users: model_current = markov_chain.create(user.name_, booster) diff_element = diff_markov_chains(model_main, model_current) result.append({'name': user.name_, 'x': diff_element['content'], 'y': user.timeline_count}) log.info('create difference... %s' % diff_element['content']) diff_main = diff_markov_chains(model_main, model_main) nodes, edges = model_main.get_unique_nodes_edges() model_diffs = [ {'x': diff_main['content'], 'y': float(edges) / nodes}, ] vis.visualise(result, header='diff and tweets count', x_title='difference between this and main', y_title='count tweets', spec_symbols=model_diffs) model_main.visualise(100)
for user in loaded_users: model_current = markov_chain.create(user.name_, booster) diff_element = diff_markov_chains(model_main, model_current) result.append({'name': user.name_, 'x': diff_element['content'], 'y': user.timeline_count}) log.info('create difference... %s' % diff_element['content']) diff_main = diff_markov_chains(model_main, model_main) nodes, edges = model_main.get_unique_nodes_edges() model_diffs = [ {'x': diff_main['content'], 'y': float(edges) / nodes}, ] vis.visualise(result, header='diff and tweets count', x_title='difference between this and main', y_title='count tweets', spec_symbols=model_diffs) model_main.visualise(100) if __name__ == '__main__': # little_differences() model_spam = markov_chain.create('no_spam', booster) model_spam.visualise(100) ##visualise
def get_models(model_ids): result = [] for model_id in model_ids: result.append(markov_chain.create(model_id, booster)) return result