Esempio n. 1
0
def results(doc_id):

    remaining_days = None

    try:
        job = mongo.find_document('samediff', doc_id)
        if job['sample_id'] == u'':
            remaining_days = mongo.get_remaining_days('samediff', doc_id)
    except:
        logger.warning("Unable to find doc '%s'", doc_id)
        return render_template('no_results.html', tool_name='samediff')

    whatnext = {}
    whatnext['most_common_word'] = job['sameWords'][0][1] if len(job['sameWords']) > 0 else ''
    whatnext['second_most_common_word'] = job['sameWords'][1][1] if len(job['sameWords']) > 1 else ''
    whatnext['doc2_most_common_word'] = job['diffWordsDoc2'][0][1] if len(job['diffWordsDoc2']) > 0 else ''

    if(job['totalWordsDoc1'] > job['totalWordsDoc2']):
        pct_length_diff = float(job['totalWordsDoc1'] - job['totalWordsDoc2']) / float(job['totalWordsDoc1'])
    else:
        pct_length_diff = float(job['totalWordsDoc2'] - job['totalWordsDoc1']) / float(job['totalWordsDoc2'])

    return render_template('samediff/results.html', results=job, 
        pct_length_diff = pct_length_diff,
        cosine_similarity= {'score':job['cosineSimilarity'],'description':interpretCosineSimilarity(job['cosineSimilarity'])},
        whatnext=whatnext, tool_name='samediff', doc_id=doc_id,
        remaining_days=remaining_days)
Esempio n. 2
0
def results(doc_id):

    remaining_days = None

    try:
        job = mongo.find_document('samediff', doc_id)
        if job['sample_id'] == u'':
            remaining_days = mongo.get_remaining_days('samediff', doc_id)
    except:
        logger.warning("Unable to find doc '%s'", doc_id)
        return render_template('no_results.html', tool_name='samediff')

    whatnext = {}
    whatnext['most_common_word'] = job['sameWords'][0][1] if len(job['sameWords']) > 0 else ''
    whatnext['second_most_common_word'] = job['sameWords'][1][1] if len(job['sameWords']) > 1 else ''
    whatnext['doc2_most_common_word'] = job['diffWordsDoc2'][0][1] if len(job['diffWordsDoc2']) > 0 else ''

    if(job['totalWordsDoc1'] > job['totalWordsDoc2']):
        pct_length_diff = float(job['totalWordsDoc1'] - job['totalWordsDoc2']) / float(job['totalWordsDoc1'])
    else:
        pct_length_diff = float(job['totalWordsDoc2'] - job['totalWordsDoc1']) / float(job['totalWordsDoc2'])

    return render_template('samediff/results.html', results=job, 
        pct_length_diff = pct_length_diff,
        cosine_similarity= {'score':job['cosineSimilarity'],'description':interpretCosineSimilarity(job['cosineSimilarity'])},
        whatnext=whatnext, tool_name='samediff', doc_id=doc_id,
        remaining_days=remaining_days)
Esempio n. 3
0
def render_results(doc_id):
    """
    Render results page
    """
    doc = mongo.find_document('connectthedots', doc_id)
    results = doc.get('results')

    if doc.get('source') != 'sample':
        remaining_days = mongo.get_remaining_days('connectthedots', doc_id)
    else:
        remaining_days = None

    first_mismatch = None  # get first centrality/degree mismatch
    degree_index = 0
    centrality_index = 0
    table_by_degree = sorted(results['table'],
                             key=operator.itemgetter('degree'),
                             reverse=True)
    table_by_centrality = results['table']

    for i, row in enumerate(table_by_degree):
        if row['id'] != table_by_centrality[i]['id']:
            first_mismatch = row['id']
            degree_index = i
            break

    if first_mismatch is not None:
        for i, row in enumerate(
                table_by_centrality[degree_index +
                                    1:]):  # start from where we left off
            if row['id'] == first_mismatch:
                centrality_index = i + degree_index + 1
                break

    what_next = {
        'mismatch_id': first_mismatch,
        'mismatch_degree': ordinal(degree_index + 1),
        'mismatch_centrality': ordinal(centrality_index + 1),
        'lowest_degree': table_by_degree[-1]['id']
    }

    biography = results['biography'] if 'biography' in results else None

    return render_template('connectthedots/results.html',
                           results=results,
                           whatnext=what_next,
                           tool_name='connectthedots',
                           source=doc['source'],
                           has_multiple_sheets=results['has_multiple_sheets'],
                           remaining_days=remaining_days,
                           biography=biography)
Esempio n. 4
0
def render_results(doc_id):
    """
    Render results page
    """
    doc = mongo.find_document('connectthedots', doc_id)
    results = doc.get('results')

    if doc.get('source') != 'sample':
        remaining_days = mongo.get_remaining_days('connectthedots', doc_id)
    else:
        remaining_days = None

    first_mismatch = None # get first centrality/degree mismatch
    degree_index = 0
    centrality_index = 0
    table_by_degree = sorted(results['table'], key=operator.itemgetter('degree'), reverse=True)
    table_by_centrality = results['table']

    for i, row in enumerate(table_by_degree):
        if row['id'] != table_by_centrality[i]['id']:
            first_mismatch = row['id']
            degree_index = i
            break

    if first_mismatch is not None:
        for i, row in enumerate(table_by_centrality[degree_index + 1:]): # start from where we left off
            if row['id'] == first_mismatch:
                centrality_index = i + degree_index + 1
                break

    whatnext = {}
    whatnext['mismatch_id'] = first_mismatch
    whatnext['mismatch_degree'] = ordinal(degree_index + 1)
    whatnext['mismatch_centrality'] = ordinal(centrality_index + 1)
    whatnext['lowest_degree'] = table_by_degree[-1]['id']

    return render_template('connectthedots/results.html', 
                           results=results,
                           whatnext=whatnext,
                           tool_name='connectthedots',
                           source=doc['source'],
                           has_multiple_sheets=results['has_multiple_sheets'],
                           remaining_days=remaining_days)
Esempio n. 5
0
def render_results(doc_id, sheet_idx):

    doc = mongo.find_document('wtfcsv', doc_id)
    results = doc.get('results')

    if doc['sample_id'] == u'':
        remaining_days = mongo.get_remaining_days('wtfcsv', doc_id)
    else:
        remaining_days = None

    if 'bad_formatting' in results:
        return render_template('wtfcsv/results.html',
                               results=results,
                               tool_name='wtfcsv',
                               index=0)

    def get_random_column():
        return random.choice(results[int(sheet_idx)]['columns'])

    columns = results[int(sheet_idx)]['columns']

    if len(columns) < 1:
        whatnext = 'no_data'
    else:
        random_column = get_random_column()
        random_column2 = get_random_column()
        random_column3 = get_random_column()

        if len(columns) > 0 and next(
            (c for c in columns if 'most_freq_values' in c), None) is not None:
            while 'most_freq_values' not in random_column:
                random_column = get_random_column()

        if len(columns) > 1:
            while random_column2 == random_column:
                random_column2 = get_random_column()
        else:
            random_column2 = random_column

        if len(columns) > 2:
            while random_column3 == random_column or random_column3 == random_column2:
                random_column3 = get_random_column()
        else:
            random_column3 = random_column

        whatnext = {}
        if 'most_freq_values' in random_column and len(
                random_column['most_freq_values']) > 0:
            whatnext[
                'random_column_top_value'] = random_column['most_freq_values'][
                    0]['value'] if 'most_freq_values' in random_column else ''
        else:
            whatnext['random_column_top_value'] = 0
        whatnext['random_column_name'] = random_column['name']
        whatnext['random_column_name2'] = random_column2['name']
        whatnext['random_column_name3'] = random_column3['name']

    # build a list of summary result data for the chart
    for col in columns:
        is_string = 'text' in col['display_type_name']
        data_to_use = []
        # pick the right results to summarize
        if 'deciles' in col:
            data_to_use = col['deciles']
        elif 'most_freq_values' in col:
            data_to_use = col['most_freq_values']
        elif 'word_counts' in col:
            #for word in col['word_counts']['unique_words'][:20]:
            #    print str(word[0]) + " is " + str(word[1])
            data_to_use = [{
                'value': word[0],
                'count': word[1]
            } for word in col['word_counts']['unique_words'][:20]]
        # stitch together the overview
        overview_data = {'categories': [], 'values': []}
        for d in data_to_use:
            key = str(d['value']) if is_string else str(d['value']).replace(
                '_', '.')
            overview_data['categories'].append(key)
            overview_data['values'].append(d['count'])
        if 'others' in col:
            overview_data['categories'].append(gettext('Other'))
            overview_data['values'].append(int(col['others']))
        col['overview'] = overview_data
    return render_template('wtfcsv/results.html',
                           results=results,
                           whatnext=whatnext,
                           tool_name='wtfcsv',
                           index=int(sheet_idx),
                           source=doc['source'],
                           remaining_days=remaining_days)
Esempio n. 6
0
def results(doc_id):
    
    counts = None
    results = {}
    remaining_days = None

    try:
        doc = mongo.find_document('wordcounter', doc_id)
        if doc['sample_id'] == u'':
            remaining_days = mongo.get_remaining_days('wordcounter', doc_id)
    except:
        logger.warning("Unable to find doc '%s'", doc_id)
        return render_template('no_results.html', tool_name='wordcounter')

    counts = doc.get('counts')
    
    # only render the top 40 results on the page (the csv contains all results)
    results['unique_words'] = counts['unique_words'][:40]
    results['bigrams'] = counts['bigrams'][:40]
    results['trigrams'] = counts['trigrams'][:40]
    

    max_index = min(20, len(results['unique_words']))
    min_index = max(0, max_index-5)
    random_unpopular_word = ['','']
    top_word = ''
    word_in_bigrams_count = 0
    word_in_trigrams_count = 0

    if len(results['unique_words']) > 0:
        random_unpopular_word = results['unique_words'][random.randrange(min_index, max_index+1)] if len(results['unique_words']) > 1 else results['unique_words'][0]

        '''
        Find the most popular word that is also present in bigrams and trigrams. 
        If none can be found, just get the most popular word.
        '''

        if results['unique_words'] and results['bigrams'] and results['trigrams']:
            for word in results['unique_words']:
                top_word = word[0]
                word_in_bigrams_count = 0
                word_in_trigrams_count = 0
                for b in results['bigrams']:
                    if top_word in b[0]:
                        word_in_bigrams_count += 1
                for t in results['trigrams']:
                    if top_word in t[0]:
                        word_in_trigrams_count += 1
                if word_in_bigrams_count > 0 and word_in_trigrams_count > 0:
                    break   

        if word_in_bigrams_count == 0 and word_in_trigrams_count == 0:
            top_word = results['unique_words'][0][0]

    whatnext = {}
    whatnext['top_word'] = top_word
    whatnext['word_in_bigrams_count'] = word_in_bigrams_count
    whatnext['word_in_trigrams_count'] = word_in_trigrams_count
    whatnext['random_unpopular_word'] = random_unpopular_word[0]
    whatnext['random_unpopular_word_count'] = random_unpopular_word[1]

    return render_template('wordcounter/results.html', 
        results=results, 
        whatnext=whatnext, 
        tool_name='wordcounter', 
        title=doc['title'], 
        doc_id=doc_id, 
        source=doc['source'], 
        remaining_days=remaining_days, 
        total_words=counts['total_word_count'])
Esempio n. 7
0
def results_for_doc(doc_id):
    results = {}
    remaining_days = None

    try:
        doc = mongo.find_document('wordcounter', doc_id)
        if doc['sample_id'] == '':
            remaining_days = mongo.get_remaining_days('wordcounter', doc_id)
    except:
        logger.warning("Unable to find doc '%s'", doc_id)
        return render_template('no_results.html', tool_name='wordcounter')

    counts = doc.get('counts')

    # only render the top 40 results on the page (the csv contains all results)
    results['unique_words'] = counts['unique_words'][:40]
    results['bigrams'] = counts['bigrams'][:40]
    results['trigrams'] = counts['trigrams'][:40]

    max_index = min(20, len(results['unique_words']))
    min_index = max(0, max_index - 5)
    random_unpopular_word = ['', '']
    top_word = ''
    word_in_bigrams_count = 0
    word_in_trigrams_count = 0

    if len(results['unique_words']) > 0:
        random_unpopular_word = results['unique_words'][random.randrange(min_index, max_index+1)]\
            if len(results['unique_words']) > 1 else results['unique_words'][0]
        '''
        Find the most popular word that is also present in bigrams and trigrams. 
        If none can be found, just get the most popular word.
        '''

        if results['unique_words'] and results['bigrams'] and results[
                'trigrams']:
            for word in results['unique_words']:
                top_word = word[0]
                word_in_bigrams_count = 0
                word_in_trigrams_count = 0
                for b in results['bigrams']:
                    if top_word in b[0]:
                        word_in_bigrams_count += 1
                for t in results['trigrams']:
                    if top_word in t[0]:
                        word_in_trigrams_count += 1
                if word_in_bigrams_count > 0 and word_in_trigrams_count > 0:
                    break

        if word_in_bigrams_count == 0 and word_in_trigrams_count == 0:
            top_word = results['unique_words'][0][0]

    whatnext = {}
    whatnext['top_word'] = top_word
    whatnext['word_in_bigrams_count'] = word_in_bigrams_count
    whatnext['word_in_trigrams_count'] = word_in_trigrams_count
    whatnext['random_unpopular_word'] = random_unpopular_word[0]
    whatnext['random_unpopular_word_count'] = random_unpopular_word[1]
    biography = doc['biography'] if 'biography' in doc else None

    return render_template('wordcounter/results.html',
                           results=results,
                           whatnext=whatnext,
                           tool_name='wordcounter',
                           title=doc['title'],
                           doc_id=doc_id,
                           source=doc['source'],
                           remaining_days=remaining_days,
                           total_words=counts['total_word_count'],
                           biography=biography)
Esempio n. 8
0
def render_results(doc_id, sheet_idx):

    doc = mongo.find_document('wtfcsv', doc_id)
    results = doc.get('results')

    if doc['sample_id'] == u'':
        remaining_days = mongo.get_remaining_days('wtfcsv', doc_id)
    else:
        remaining_days = None

    if 'bad_formatting' in results:
        return render_template('wtfcsv/results.html', results=results, tool_name='wtfcsv', index=0)

    def get_random_column():
        return random.choice(results[int(sheet_idx)]['columns'])

    columns = results[int(sheet_idx)]['columns']

    if len(columns) < 1:
        whatnext = 'no_data'
    else:
        random_column = get_random_column()
        random_column2 = get_random_column()
        random_column3 = get_random_column()

        if len(columns) > 0 and next((c for c in columns if 'most_freq_values' in c), None) is not None:
            while 'most_freq_values' not in random_column:
                random_column = get_random_column()

        if len(columns) > 1:
            while random_column2 == random_column:
                random_column2 = get_random_column()
        else:
            random_column2 = random_column
        
        if len(columns) > 2:
            while random_column3 == random_column or random_column3 == random_column2:
                random_column3 = get_random_column()
        else:
            random_column3 = random_column

        whatnext = {}
        if 'most_freq_values' in random_column and len(random_column['most_freq_values']) > 0:
            whatnext['random_column_top_value'] = random_column['most_freq_values'][0]['value'] if 'most_freq_values' in random_column else ''
        else:
            whatnext['random_column_top_value'] = 0
        whatnext['random_column_name'] = random_column['name']
        whatnext['random_column_name2'] = random_column2['name']
        whatnext['random_column_name3'] = random_column3['name']

    # build a list of summary result data for the chart
    for col in columns:
        is_string = 'text' in col['display_type_name']
        data_to_use = []
        # pick the right results to summarize
        if 'deciles' in col:
            data_to_use = col['deciles']
        elif 'most_freq_values' in col:
            data_to_use = col['most_freq_values']
        elif 'word_counts' in col:
            for word in col['word_counts']['unique_words'][:20]:
                print str(word[0]) + " is " + str(word[1])
            data_to_use = [ {'value':word[0], 'count':word[1]} for word in col['word_counts']['unique_words'][:20] ]
        # stitch together the overview
        overview_data = {'categories':[],'values':[]}
        for d in data_to_use:
            key = str(d['value']) if is_string else str(d['value']).replace('_', '.')
            overview_data['categories'].append(key)
            overview_data['values'].append(d['count'])
        if 'others' in col:
            overview_data['categories'].append(gettext('Other'))
            overview_data['values'].append(int(col['others']))
        col['overview'] = overview_data
    return render_template('wtfcsv/results.html', 
        results=results, 
        whatnext=whatnext, 
        tool_name='wtfcsv', 
        index=int(sheet_idx), 
        source=doc['source'],
        remaining_days=remaining_days)