def create_csv_file(counts, analysis_type): try: if analysis_type == 'words': return filehandler.write_to_csv(['word', 'frequency'], counts['unique_words'], '-word-counts.csv') elif analysis_type == 'bigrams': bigrams = [] for w in counts['bigrams']: freq = w[1] phrase = " ".join(w[0]) bigrams.append([phrase, freq]) return filehandler.write_to_csv(['bigram phrase', 'frequency'], bigrams, '-bigram-counts.csv') elif analysis_type == 'trigrams': trigrams = [] for w in counts['trigrams']: freq = w[1] phrase = " ".join(w[0]) trigrams.append([phrase, freq]) return filehandler.write_to_csv(['trigram phrase', 'frequency'], trigrams, '-trigram-counts.csv') logger.error("Requested unknown csv type: %s", analysis_type) except Exception as e: logger.exception(e) return render_template('no_results.html', tool_name='wordcounter')
def process_paste(text, has_header_row=True): """ Return results for a pasted table """ rows = text.splitlines() csv_rows = [] for r in rows: groups = re.findall(r'"(.*?)+"|\t', r) if len(groups) == 3: csv.rows.append((groups[0], groups[2])) elif len(groups) == 1: csv_rows.append((r.split('\t')[0], r.split('\t')[1])) else: return None headers = csv_rows.pop(0) if has_header_row else ['source', 'target'] file_path = filehandler.write_to_csv(headers, csv_rows) file_size = os.stat(file_path).st_size logger.debug('[CTD] File size: %d bytes', file_size) results = ctd.get_summary(file_path) results['has_multiple_sheets'] = False results['filename'] = 'Your Pasted Data' filehandler.delete_files([file_path]) return results
def download(doc_id): try: logger.debug("Download %s", doc_id) doc = mongo.find_document('samediff', doc_id) headers = [ _('word'), _('uses in') + ' ' + doc['filenames'][0], _('uses in') + ' ' + doc['filenames'][1], _('total uses') ] rows = [] for f, w in doc['sameWords']: doc1Count = next(f2 for f2, w2 in doc['mostFrequentDoc1'] if w == w2) doc2Count = next(f2 for f2, w2 in doc['mostFrequentDoc2'] if w == w2) rows.append([w, doc1Count, doc2Count, f]) for f, w in doc['diffWordsDoc1']: rows.append([w, f, 0, f]) for f, w in doc['diffWordsDoc1']: rows.append([w, 0, f, f]) # TODO: clean up file name file_path = filehandler.write_to_csv( headers, rows, filehandler.generate_filename('csv', '', doc['filenames'][0], doc['filenames'][1]), False) logger.debug(' created csv to download at %s', file_path) return filehandler.generate_csv(file_path) except Exception as e: logging.exception(e) abort(400)
def process_paste(text, has_header_row=True): """ Return results for a pasted table """ rows = text.splitlines() csv_rows = [] for r in rows: groups = re.findall(r'"(.*?)+"|\t', r) if len(groups) == 3: csv.rows.append((groups[0], groups[2])) elif len(groups) == 1: csv_rows.append((r.split('\t')[0], r.split('\t')[1])) else: return None headers = csv_rows.pop(0) if has_header_row else ['source', 'target'] file_path = filehandler.write_to_csv(headers, csv_rows) file_size = os.stat(file_path).st_size logger.debug('[CTD] File size: %d bytes', file_size) results = ctd.get_summary(file_path) results['has_multiple_sheets'] = False results['filename'] = 'Your Pasted Data' filehandler.delete_files([file_path]) return results
def create_csv_file(counts, analysis_type): if analysis_type == 'words': return filehandler.write_to_csv(['word', 'frequency'], counts['unique_words'], '-word-counts.csv') elif analysis_type == 'bigrams': bigrams = [] for w in counts['bigrams']: freq = w[1] phrase = " ".join(w[0]) bigrams.append([phrase, freq]) return filehandler.write_to_csv(['bigram phrase', 'frequency'], bigrams, '-bigram-counts.csv') elif analysis_type == 'trigrams': trigrams = [] for w in counts['trigrams']: freq = w[1] phrase = " ".join(w[0]) trigrams.append([phrase, freq]) return filehandler.write_to_csv(['trigram phrase', 'frequency'], trigrams, '-trigram-counts.csv') logger.error("Requested unknown csv type: %s", analysis_type) return None # if was an invalid analysis_type
def create_csv_file(counts, analysis_type): if analysis_type == 'words': return filehandler.write_to_csv(['word', 'frequency'], counts['unique_words'], '-word-counts.csv') elif analysis_type == 'bigrams': bigrams = [] for w in counts['bigrams']: freq = w[1] phrase = " ".join(w[0]) bigrams.append([phrase, freq]) return filehandler.write_to_csv(['bigram phrase', 'frequency'], bigrams, '-bigram-counts.csv') elif analysis_type == 'trigrams': trigrams = [] for w in counts['trigrams']: freq = w[1] phrase = " ".join(w[0]) trigrams.append([phrase, freq]) return filehandler.write_to_csv(['trigram phrase', 'frequency'], trigrams, '-trigram-counts.csv') logger.error("Requested unknown csv type: %s", analysis_type) return None # if was an invalid analysis_type
def download(doc_id): try: logger.debug("Download %s", doc_id) doc = mongo.find_document('samediff', doc_id) headers = [_('word'), _('uses in') +' ' + doc['filenames'][0], _('uses in') + ' ' + doc['filenames'][1], _('total uses')] rows = [] for f, w in doc['sameWords']: doc1Count = next(f2 for f2, w2 in doc['mostFrequentDoc1'] if w == w2) doc2Count = next(f2 for f2, w2 in doc['mostFrequentDoc2'] if w == w2) rows.append([w, doc1Count, doc2Count, f]) for f, w in doc['diffWordsDoc1']: rows.append([w, f, 0, f]) for f, w in doc['diffWordsDoc1']: rows.append([w, 0, f, f]) # TODO: clean up file name file_path = filehandler.write_to_csv(headers, rows, filehandler.generate_filename('csv', '', doc['filenames'][0], doc['filenames'][1]), False) logger.debug(' created csv to download at %s', file_path) return filehandler.generate_csv(file_path) except Exception as e: logging.exception(e) abort(400)