def visualise_digits_count(self): points = [] for el in self.db['clust_values'].find(): x = el['_id'] y = el['value']['count'] points.append({'x':x,'y':y}) visualise(points,x_title='digit',y_title='count in usernames')
def visualise_entropy(self): #todo visualise without normal values points = [] for el in self.clust.find({'entropy':{'$exists':True}}): x = len(el['value']) y = el['entropy'] points.append({'x':x,'y':y}) visualise(points,x_title='count of variable digits',y_title='entropy')
def visualise_clust(self, max): points = [] for i in range(max): x = i y = self.clust.find({'value': {'$size': i}}).count() if (y < 100): continue points.append({'x': x, 'y': y}) print x, y visualise(points, header='count of names and', x_title='count of variable digits', y_title='count of cluster names')
def big_differences(): log.info('extract messages') users = main_db.get_not_loaded_users() model_main = markov_chain('main', booster) result = [] log.info('---------users to find is %s-------------------------------' % len(users)) loaded_users = [] for user in users: log.info('load user %s' % user) loaded_user = engine.scrap(user, neighbourhood=0) if not loaded_user: continue main_db.set_user_loaded(user) model_main = create_model(loaded_user, mc=model_main) create_model(loaded_user) loaded_users.append(loaded_user) log.info('---------start process differences of models--------------') for user in loaded_users: model_current = markov_chain.create(user.name_, booster) diff_element = diff_markov_chains(model_main, model_current) result.append({'name': user.name_, 'x': diff_element['content'], 'y': user.timeline_count}) log.info('create difference... %s' % diff_element['content']) diff_main = diff_markov_chains(model_main, model_main) nodes, edges = model_main.get_unique_nodes_edges() model_diffs = [ {'x': diff_main['content'], 'y': float(edges) / nodes}, ] vis.visualise(result, header='diff and tweets count', x_title='difference between this and main', y_title='count tweets', spec_symbols=model_diffs) model_main.visualise(100)