def results(doc_id): remaining_days = None try: job = mongo.find_document('samediff', doc_id) if job['sample_id'] == u'': remaining_days = mongo.get_remaining_days('samediff', doc_id) except: logger.warning("Unable to find doc '%s'", doc_id) return render_template('no_results.html', tool_name='samediff') whatnext = {} whatnext['most_common_word'] = job['sameWords'][0][1] if len(job['sameWords']) > 0 else '' whatnext['second_most_common_word'] = job['sameWords'][1][1] if len(job['sameWords']) > 1 else '' whatnext['doc2_most_common_word'] = job['diffWordsDoc2'][0][1] if len(job['diffWordsDoc2']) > 0 else '' if(job['totalWordsDoc1'] > job['totalWordsDoc2']): pct_length_diff = float(job['totalWordsDoc1'] - job['totalWordsDoc2']) / float(job['totalWordsDoc1']) else: pct_length_diff = float(job['totalWordsDoc2'] - job['totalWordsDoc1']) / float(job['totalWordsDoc2']) return render_template('samediff/results.html', results=job, pct_length_diff = pct_length_diff, cosine_similarity= {'score':job['cosineSimilarity'],'description':interpretCosineSimilarity(job['cosineSimilarity'])}, whatnext=whatnext, tool_name='samediff', doc_id=doc_id, remaining_days=remaining_days)
def render_results(doc_id): """ Render results page """ doc = mongo.find_document('connectthedots', doc_id) results = doc.get('results') if doc.get('source') != 'sample': remaining_days = mongo.get_remaining_days('connectthedots', doc_id) else: remaining_days = None first_mismatch = None # get first centrality/degree mismatch degree_index = 0 centrality_index = 0 table_by_degree = sorted(results['table'], key=operator.itemgetter('degree'), reverse=True) table_by_centrality = results['table'] for i, row in enumerate(table_by_degree): if row['id'] != table_by_centrality[i]['id']: first_mismatch = row['id'] degree_index = i break if first_mismatch is not None: for i, row in enumerate( table_by_centrality[degree_index + 1:]): # start from where we left off if row['id'] == first_mismatch: centrality_index = i + degree_index + 1 break what_next = { 'mismatch_id': first_mismatch, 'mismatch_degree': ordinal(degree_index + 1), 'mismatch_centrality': ordinal(centrality_index + 1), 'lowest_degree': table_by_degree[-1]['id'] } biography = results['biography'] if 'biography' in results else None return render_template('connectthedots/results.html', results=results, whatnext=what_next, tool_name='connectthedots', source=doc['source'], has_multiple_sheets=results['has_multiple_sheets'], remaining_days=remaining_days, biography=biography)
def render_results(doc_id): """ Render results page """ doc = mongo.find_document('connectthedots', doc_id) results = doc.get('results') if doc.get('source') != 'sample': remaining_days = mongo.get_remaining_days('connectthedots', doc_id) else: remaining_days = None first_mismatch = None # get first centrality/degree mismatch degree_index = 0 centrality_index = 0 table_by_degree = sorted(results['table'], key=operator.itemgetter('degree'), reverse=True) table_by_centrality = results['table'] for i, row in enumerate(table_by_degree): if row['id'] != table_by_centrality[i]['id']: first_mismatch = row['id'] degree_index = i break if first_mismatch is not None: for i, row in enumerate(table_by_centrality[degree_index + 1:]): # start from where we left off if row['id'] == first_mismatch: centrality_index = i + degree_index + 1 break whatnext = {} whatnext['mismatch_id'] = first_mismatch whatnext['mismatch_degree'] = ordinal(degree_index + 1) whatnext['mismatch_centrality'] = ordinal(centrality_index + 1) whatnext['lowest_degree'] = table_by_degree[-1]['id'] return render_template('connectthedots/results.html', results=results, whatnext=whatnext, tool_name='connectthedots', source=doc['source'], has_multiple_sheets=results['has_multiple_sheets'], remaining_days=remaining_days)
def render_results(doc_id, sheet_idx): doc = mongo.find_document('wtfcsv', doc_id) results = doc.get('results') if doc['sample_id'] == u'': remaining_days = mongo.get_remaining_days('wtfcsv', doc_id) else: remaining_days = None if 'bad_formatting' in results: return render_template('wtfcsv/results.html', results=results, tool_name='wtfcsv', index=0) def get_random_column(): return random.choice(results[int(sheet_idx)]['columns']) columns = results[int(sheet_idx)]['columns'] if len(columns) < 1: whatnext = 'no_data' else: random_column = get_random_column() random_column2 = get_random_column() random_column3 = get_random_column() if len(columns) > 0 and next( (c for c in columns if 'most_freq_values' in c), None) is not None: while 'most_freq_values' not in random_column: random_column = get_random_column() if len(columns) > 1: while random_column2 == random_column: random_column2 = get_random_column() else: random_column2 = random_column if len(columns) > 2: while random_column3 == random_column or random_column3 == random_column2: random_column3 = get_random_column() else: random_column3 = random_column whatnext = {} if 'most_freq_values' in random_column and len( random_column['most_freq_values']) > 0: whatnext[ 'random_column_top_value'] = random_column['most_freq_values'][ 0]['value'] if 'most_freq_values' in random_column else '' else: whatnext['random_column_top_value'] = 0 whatnext['random_column_name'] = random_column['name'] whatnext['random_column_name2'] = random_column2['name'] whatnext['random_column_name3'] = random_column3['name'] # build a list of summary result data for the chart for col in columns: is_string = 'text' in col['display_type_name'] data_to_use = [] # pick the right results to summarize if 'deciles' in col: data_to_use = col['deciles'] elif 'most_freq_values' in col: data_to_use = col['most_freq_values'] elif 'word_counts' in col: #for word in col['word_counts']['unique_words'][:20]: # print str(word[0]) + " is " + str(word[1]) data_to_use = [{ 'value': word[0], 'count': word[1] } for word in col['word_counts']['unique_words'][:20]] # stitch together the overview overview_data = {'categories': [], 'values': []} for d in data_to_use: key = str(d['value']) if is_string else str(d['value']).replace( '_', '.') overview_data['categories'].append(key) overview_data['values'].append(d['count']) if 'others' in col: overview_data['categories'].append(gettext('Other')) overview_data['values'].append(int(col['others'])) col['overview'] = overview_data return render_template('wtfcsv/results.html', results=results, whatnext=whatnext, tool_name='wtfcsv', index=int(sheet_idx), source=doc['source'], remaining_days=remaining_days)
def results(doc_id): counts = None results = {} remaining_days = None try: doc = mongo.find_document('wordcounter', doc_id) if doc['sample_id'] == u'': remaining_days = mongo.get_remaining_days('wordcounter', doc_id) except: logger.warning("Unable to find doc '%s'", doc_id) return render_template('no_results.html', tool_name='wordcounter') counts = doc.get('counts') # only render the top 40 results on the page (the csv contains all results) results['unique_words'] = counts['unique_words'][:40] results['bigrams'] = counts['bigrams'][:40] results['trigrams'] = counts['trigrams'][:40] max_index = min(20, len(results['unique_words'])) min_index = max(0, max_index-5) random_unpopular_word = ['',''] top_word = '' word_in_bigrams_count = 0 word_in_trigrams_count = 0 if len(results['unique_words']) > 0: random_unpopular_word = results['unique_words'][random.randrange(min_index, max_index+1)] if len(results['unique_words']) > 1 else results['unique_words'][0] ''' Find the most popular word that is also present in bigrams and trigrams. If none can be found, just get the most popular word. ''' if results['unique_words'] and results['bigrams'] and results['trigrams']: for word in results['unique_words']: top_word = word[0] word_in_bigrams_count = 0 word_in_trigrams_count = 0 for b in results['bigrams']: if top_word in b[0]: word_in_bigrams_count += 1 for t in results['trigrams']: if top_word in t[0]: word_in_trigrams_count += 1 if word_in_bigrams_count > 0 and word_in_trigrams_count > 0: break if word_in_bigrams_count == 0 and word_in_trigrams_count == 0: top_word = results['unique_words'][0][0] whatnext = {} whatnext['top_word'] = top_word whatnext['word_in_bigrams_count'] = word_in_bigrams_count whatnext['word_in_trigrams_count'] = word_in_trigrams_count whatnext['random_unpopular_word'] = random_unpopular_word[0] whatnext['random_unpopular_word_count'] = random_unpopular_word[1] return render_template('wordcounter/results.html', results=results, whatnext=whatnext, tool_name='wordcounter', title=doc['title'], doc_id=doc_id, source=doc['source'], remaining_days=remaining_days, total_words=counts['total_word_count'])
def results_for_doc(doc_id): results = {} remaining_days = None try: doc = mongo.find_document('wordcounter', doc_id) if doc['sample_id'] == '': remaining_days = mongo.get_remaining_days('wordcounter', doc_id) except: logger.warning("Unable to find doc '%s'", doc_id) return render_template('no_results.html', tool_name='wordcounter') counts = doc.get('counts') # only render the top 40 results on the page (the csv contains all results) results['unique_words'] = counts['unique_words'][:40] results['bigrams'] = counts['bigrams'][:40] results['trigrams'] = counts['trigrams'][:40] max_index = min(20, len(results['unique_words'])) min_index = max(0, max_index - 5) random_unpopular_word = ['', ''] top_word = '' word_in_bigrams_count = 0 word_in_trigrams_count = 0 if len(results['unique_words']) > 0: random_unpopular_word = results['unique_words'][random.randrange(min_index, max_index+1)]\ if len(results['unique_words']) > 1 else results['unique_words'][0] ''' Find the most popular word that is also present in bigrams and trigrams. If none can be found, just get the most popular word. ''' if results['unique_words'] and results['bigrams'] and results[ 'trigrams']: for word in results['unique_words']: top_word = word[0] word_in_bigrams_count = 0 word_in_trigrams_count = 0 for b in results['bigrams']: if top_word in b[0]: word_in_bigrams_count += 1 for t in results['trigrams']: if top_word in t[0]: word_in_trigrams_count += 1 if word_in_bigrams_count > 0 and word_in_trigrams_count > 0: break if word_in_bigrams_count == 0 and word_in_trigrams_count == 0: top_word = results['unique_words'][0][0] whatnext = {} whatnext['top_word'] = top_word whatnext['word_in_bigrams_count'] = word_in_bigrams_count whatnext['word_in_trigrams_count'] = word_in_trigrams_count whatnext['random_unpopular_word'] = random_unpopular_word[0] whatnext['random_unpopular_word_count'] = random_unpopular_word[1] biography = doc['biography'] if 'biography' in doc else None return render_template('wordcounter/results.html', results=results, whatnext=whatnext, tool_name='wordcounter', title=doc['title'], doc_id=doc_id, source=doc['source'], remaining_days=remaining_days, total_words=counts['total_word_count'], biography=biography)
def render_results(doc_id, sheet_idx): doc = mongo.find_document('wtfcsv', doc_id) results = doc.get('results') if doc['sample_id'] == u'': remaining_days = mongo.get_remaining_days('wtfcsv', doc_id) else: remaining_days = None if 'bad_formatting' in results: return render_template('wtfcsv/results.html', results=results, tool_name='wtfcsv', index=0) def get_random_column(): return random.choice(results[int(sheet_idx)]['columns']) columns = results[int(sheet_idx)]['columns'] if len(columns) < 1: whatnext = 'no_data' else: random_column = get_random_column() random_column2 = get_random_column() random_column3 = get_random_column() if len(columns) > 0 and next((c for c in columns if 'most_freq_values' in c), None) is not None: while 'most_freq_values' not in random_column: random_column = get_random_column() if len(columns) > 1: while random_column2 == random_column: random_column2 = get_random_column() else: random_column2 = random_column if len(columns) > 2: while random_column3 == random_column or random_column3 == random_column2: random_column3 = get_random_column() else: random_column3 = random_column whatnext = {} if 'most_freq_values' in random_column and len(random_column['most_freq_values']) > 0: whatnext['random_column_top_value'] = random_column['most_freq_values'][0]['value'] if 'most_freq_values' in random_column else '' else: whatnext['random_column_top_value'] = 0 whatnext['random_column_name'] = random_column['name'] whatnext['random_column_name2'] = random_column2['name'] whatnext['random_column_name3'] = random_column3['name'] # build a list of summary result data for the chart for col in columns: is_string = 'text' in col['display_type_name'] data_to_use = [] # pick the right results to summarize if 'deciles' in col: data_to_use = col['deciles'] elif 'most_freq_values' in col: data_to_use = col['most_freq_values'] elif 'word_counts' in col: for word in col['word_counts']['unique_words'][:20]: print str(word[0]) + " is " + str(word[1]) data_to_use = [ {'value':word[0], 'count':word[1]} for word in col['word_counts']['unique_words'][:20] ] # stitch together the overview overview_data = {'categories':[],'values':[]} for d in data_to_use: key = str(d['value']) if is_string else str(d['value']).replace('_', '.') overview_data['categories'].append(key) overview_data['values'].append(d['count']) if 'others' in col: overview_data['categories'].append(gettext('Other')) overview_data['values'].append(int(col['others'])) col['overview'] = overview_data return render_template('wtfcsv/results.html', results=results, whatnext=whatnext, tool_name='wtfcsv', index=int(sheet_idx), source=doc['source'], remaining_days=remaining_days)