def visualise_digits_count(self):
     points = []
     for el in self.db['clust_values'].find():
         x = el['_id']
         y = el['value']['count']
         points.append({'x':x,'y':y})
     visualise(points,x_title='digit',y_title='count in usernames')
    def visualise_entropy(self):
        #todo visualise without normal values
        points = []
        for el in self.clust.find({'entropy':{'$exists':True}}):

            x = len(el['value'])
            y = el['entropy']
            points.append({'x':x,'y':y})
        visualise(points,x_title='count of variable digits',y_title='entropy')
 def visualise_clust(self, max):
     points = []
     for i in range(max):
         x = i
         y = self.clust.find({'value': {'$size': i}}).count()
         if (y < 100):
             continue
         points.append({'x': x, 'y': y})
         print x, y
     visualise(points, header='count of names and', x_title='count of variable digits',
         y_title='count of cluster names')
def big_differences():
    log.info('extract messages')

    users = main_db.get_not_loaded_users()

    model_main = markov_chain('main', booster)
    result = []

    log.info('---------users to find is %s-------------------------------' % len(users))
    loaded_users = []
    for user in users:
        log.info('load user %s' % user)
        loaded_user = engine.scrap(user, neighbourhood=0)
        if not loaded_user:
            continue
        main_db.set_user_loaded(user)
        model_main = create_model(loaded_user, mc=model_main)
        create_model(loaded_user)
        loaded_users.append(loaded_user)

    log.info('---------start process differences of models--------------')
    for user in loaded_users:
        model_current = markov_chain.create(user.name_, booster)
        diff_element = diff_markov_chains(model_main, model_current)
        result.append({'name': user.name_, 'x': diff_element['content'], 'y': user.timeline_count})
        log.info('create difference... %s' % diff_element['content'])

    diff_main = diff_markov_chains(model_main, model_main)
    nodes, edges = model_main.get_unique_nodes_edges()
    model_diffs = [
            {'x': diff_main['content'], 'y': float(edges) / nodes},
    ]
    vis.visualise(result,
                  header='diff and tweets count',
                  x_title='difference between this and main',
                  y_title='count tweets',
                  spec_symbols=model_diffs)

    model_main.visualise(100)