Beispiel #1
0
def create_csv_file(counts, analysis_type):
    try:
        if analysis_type == 'words':
            return filehandler.write_to_csv(['word', 'frequency'],
                                            counts['unique_words'],
                                            '-word-counts.csv')
        elif analysis_type == 'bigrams':
            bigrams = []
            for w in counts['bigrams']:
                freq = w[1]
                phrase = " ".join(w[0])
                bigrams.append([phrase, freq])
            return filehandler.write_to_csv(['bigram phrase', 'frequency'],
                                            bigrams, '-bigram-counts.csv')
        elif analysis_type == 'trigrams':
            trigrams = []
            for w in counts['trigrams']:
                freq = w[1]
                phrase = " ".join(w[0])
                trigrams.append([phrase, freq])
            return filehandler.write_to_csv(['trigram phrase', 'frequency'],
                                            trigrams, '-trigram-counts.csv')
        logger.error("Requested unknown csv type: %s", analysis_type)
    except Exception as e:
        logger.exception(e)
    return render_template('no_results.html', tool_name='wordcounter')
Beispiel #2
0
def process_paste(text, has_header_row=True):
    """
    Return results for a pasted table
    """
    rows = text.splitlines()
    csv_rows = []

    for r in rows:
        groups = re.findall(r'"(.*?)+"|\t', r)
        if len(groups) == 3:
            csv.rows.append((groups[0], groups[2]))
        elif len(groups) == 1:
            csv_rows.append((r.split('\t')[0], r.split('\t')[1]))
        else:
            return None

    headers = csv_rows.pop(0) if has_header_row else ['source', 'target']
    file_path = filehandler.write_to_csv(headers, csv_rows)
    file_size = os.stat(file_path).st_size
    logger.debug('[CTD] File size: %d bytes', file_size)

    results = ctd.get_summary(file_path)
    results['has_multiple_sheets'] = False
    results['filename'] = 'Your Pasted Data'

    filehandler.delete_files([file_path])
    return results
Beispiel #3
0
def download(doc_id):
    try:
        logger.debug("Download %s", doc_id)
        doc = mongo.find_document('samediff', doc_id)
        headers = [
            _('word'),
            _('uses in') + ' ' + doc['filenames'][0],
            _('uses in') + ' ' + doc['filenames'][1],
            _('total uses')
        ]
        rows = []
        for f, w in doc['sameWords']:
            doc1Count = next(f2 for f2, w2 in doc['mostFrequentDoc1']
                             if w == w2)
            doc2Count = next(f2 for f2, w2 in doc['mostFrequentDoc2']
                             if w == w2)
            rows.append([w, doc1Count, doc2Count, f])
        for f, w in doc['diffWordsDoc1']:
            rows.append([w, f, 0, f])
        for f, w in doc['diffWordsDoc1']:
            rows.append([w, 0, f, f])
        # TODO: clean up file name
        file_path = filehandler.write_to_csv(
            headers, rows,
            filehandler.generate_filename('csv', '', doc['filenames'][0],
                                          doc['filenames'][1]), False)
        logger.debug('  created csv to download at %s', file_path)
        return filehandler.generate_csv(file_path)
    except Exception as e:
        logging.exception(e)
        abort(400)
Beispiel #4
0
def process_paste(text, has_header_row=True):
    """
    Return results for a pasted table
    """
    rows = text.splitlines()
    csv_rows = []

    for r in rows:
        groups = re.findall(r'"(.*?)+"|\t', r)
        if len(groups) == 3:
            csv.rows.append((groups[0], groups[2]))
        elif len(groups) == 1:
            csv_rows.append((r.split('\t')[0], r.split('\t')[1]))
        else:
            return None

    headers = csv_rows.pop(0) if has_header_row else ['source', 'target']
    file_path = filehandler.write_to_csv(headers, csv_rows)
    file_size = os.stat(file_path).st_size
    logger.debug('[CTD] File size: %d bytes', file_size)

    results = ctd.get_summary(file_path)
    results['has_multiple_sheets'] = False
    results['filename'] = 'Your Pasted Data'

    filehandler.delete_files([file_path])
    return results
Beispiel #5
0
def create_csv_file(counts, analysis_type):
    if analysis_type == 'words':
        return filehandler.write_to_csv(['word', 'frequency'], counts['unique_words'], '-word-counts.csv')
    elif analysis_type == 'bigrams':
        bigrams = []
        for w in counts['bigrams']:
            freq = w[1]
            phrase = " ".join(w[0])
            bigrams.append([phrase, freq])
        return filehandler.write_to_csv(['bigram phrase', 'frequency'], bigrams, '-bigram-counts.csv')
    elif analysis_type == 'trigrams':
        trigrams = []
        for w in counts['trigrams']:
            freq = w[1]
            phrase = " ".join(w[0])
            trigrams.append([phrase, freq])
        return filehandler.write_to_csv(['trigram phrase', 'frequency'], trigrams, '-trigram-counts.csv')
    logger.error("Requested unknown csv type: %s", analysis_type)
    return None # if was an invalid analysis_type
Beispiel #6
0
def create_csv_file(counts, analysis_type):
    if analysis_type == 'words':
        return filehandler.write_to_csv(['word', 'frequency'],
                                        counts['unique_words'],
                                        '-word-counts.csv')
    elif analysis_type == 'bigrams':
        bigrams = []
        for w in counts['bigrams']:
            freq = w[1]
            phrase = " ".join(w[0])
            bigrams.append([phrase, freq])
        return filehandler.write_to_csv(['bigram phrase', 'frequency'],
                                        bigrams, '-bigram-counts.csv')
    elif analysis_type == 'trigrams':
        trigrams = []
        for w in counts['trigrams']:
            freq = w[1]
            phrase = " ".join(w[0])
            trigrams.append([phrase, freq])
        return filehandler.write_to_csv(['trigram phrase', 'frequency'],
                                        trigrams, '-trigram-counts.csv')
    logger.error("Requested unknown csv type: %s", analysis_type)
    return None  # if was an invalid analysis_type
Beispiel #7
0
def download(doc_id):
    try:
        logger.debug("Download %s", doc_id)
        doc = mongo.find_document('samediff', doc_id)
        headers = [_('word'), _('uses in') +' ' + doc['filenames'][0], _('uses in') + ' ' + doc['filenames'][1], _('total uses')]
        rows = []
        for f, w in doc['sameWords']:
            doc1Count = next(f2 for f2, w2 in doc['mostFrequentDoc1'] if w == w2)
            doc2Count = next(f2 for f2, w2 in doc['mostFrequentDoc2'] if w == w2)
            rows.append([w, doc1Count, doc2Count, f])
        for f, w in doc['diffWordsDoc1']:
            rows.append([w, f, 0, f])
        for f, w in doc['diffWordsDoc1']:
            rows.append([w, 0, f, f])
        # TODO: clean up file name
        file_path = filehandler.write_to_csv(headers, rows, 
            filehandler.generate_filename('csv', '', doc['filenames'][0], doc['filenames'][1]), False)
        logger.debug('  created csv to download at %s', file_path)
        return filehandler.generate_csv(file_path)
    except Exception as e:
        logging.exception(e)
        abort(400)