def index(): doc_url = oauth.doc_url() if doc_url is not None: return redirect_to_results(process_link(doc_url), 'link') forms = OrderedDict() forms['sample'] = WTFCSVSample(g.current_lang) forms['upload'] = WTFCSVUpload() # forms['link'] = WTFCSVLink() if request.method == 'POST': btn_value = request.form['btn'] sample_id = '' results = None if btn_value == 'upload': upload_file = forms['upload'].data['upload'] logger.debug("New from upload: %s", upload_file.filename) results = process_upload(upload_file) elif btn_value == 'link': doc_url = forms['link'].data['link'] logger.debug("New from link: %s", doc_url) doc = oauth.open_doc_from_url(doc_url, request.url) if doc['authenticate'] is not None: return redirect(doc['authenticate']) elif doc['doc'] is not None: results = process_link(doc['doc']) elif btn_value == 'sample': sample_source = forms['sample'].data['sample'] sample = filehandler.get_sample(sample_source) sample_name = sample['title'] sample_id = sample_source existing_doc_id = mongo.results_for_sample('wtfcsv', sample_id) if existing_doc_id is not None: logger.debug("Existing from sample: %s", sample_source) return redirect(request.url + 'results/' + existing_doc_id) logger.debug("New from sample: %s", sample_name) sample_path = sample['path'] logger.debug(" loading from %s", sample_path) results = [ wtfcsvstat.get_summary(sample_path, language=g.current_lang) ] # logger.info(results) results[0]['filename'] = sample_name + '.csv' results[0]['biography'] = sample['biography'] else: results = None if results: return redirect_to_results(results, btn_value, sample_id) return render_template('wtfcsv.html', forms=list(forms.items()), tool_name='wtfcsv', max_file_size_in_mb=g.max_file_size_mb)
def process_link(sheet): file_paths = filehandler.open_workbook(sheet) results = [] for f in file_paths: summary = wtfcsvstat.get_summary(f) if 'bad_formatting' not in summary: summary['sheet_name'] = _get_sheet_name(f) summary['filename'] = sheet.sheet1.title results.append(summary) filehandler.delete_files(file_paths) return results
def process_link(sheet): file_paths = filehandler.open_workbook(sheet) results = [] for f in file_paths: summary = wtfcsvstat.get_summary(f) if 'bad_formatting' not in summary: summary['sheet_name'] = _get_sheet_name(f) summary['filename'] = sheet.sheet1.title results.append (summary) filehandler.delete_files(file_paths) return results
def index(): doc_url = oauth.doc_url() if doc_url is not None: return redirect_to_results(process_link(doc_url), 'link') tab = 'paste' if not 'tab' in request.args else request.args['tab'] results = None forms = OrderedDict() forms['sample'] = WTFCSVSample(g.current_lang) forms['upload'] = WTFCSVUpload() # forms['link'] = WTFCSVLink() if request.method == 'POST': btn_value = request.form['btn'] sample_id = '' if btn_value == 'upload': upload_file = forms['upload'].data['upload'] logger.debug("New from upload: %s", upload_file.filename) results = process_upload(upload_file) elif btn_value == 'link': doc_url = forms['link'].data['link'] logger.debug("New from link: %s", doc_url) doc = oauth.open_doc_from_url(doc_url, request.url) if doc['authenticate'] is not None: return redirect(doc['authenticate']) elif doc['doc'] is not None: results = process_link(doc['doc']) elif btn_value == 'sample': sample_source = forms['sample'].data['sample'] samplename = filehandler.get_sample_title(sample_source) sample_id = sample_source existing_doc_id = mongo.results_for_sample('wtfcsv',sample_id) if existing_doc_id is not None: logger.debug("Existing from sample: %s", sample_source) return redirect(request.url + 'results/' + existing_doc_id) logger.debug("New from sample: %s", samplename) sample_path = filehandler.get_sample_path(sample_source) logger.debug(" loading from %s", sample_path) results = [] results.append(wtfcsvstat.get_summary(sample_path)) results[0]['filename'] = samplename + '.csv' if btn_value is not None and btn_value is not u'': return redirect_to_results(results, btn_value, sample_id) return render_template('wtfcsv.html', forms=forms.items(), tool_name='wtfcsv', max_file_size_in_mb = g.max_file_size_mb)
def process_upload(csv_file): file_path = filehandler.open_doc(csv_file) file_size = os.stat(file_path).st_size # because browser might not have sent content_length logger.debug("Upload: %d bytes", file_size) file_paths = filehandler.convert_to_csv(file_path) results = [] for f in file_paths: summary = wtfcsvstat.get_summary(f) if 'bad_formatting' not in summary: summary['sheet_name'] = _get_sheet_name(f) summary['filename'] = csv_file.filename results.append(summary) filehandler.delete_files(file_paths) return results
def process_upload(csv_file): file_path = filehandler.open_doc(csv_file) file_size = os.stat( file_path ).st_size # because browser might not have sent content_length logger.debug("Upload: %d bytes", file_size) file_paths = filehandler.convert_to_csv(file_path) results = [] for f in file_paths: summary = wtfcsvstat.get_summary(f) if 'bad_formatting' not in summary: summary['sheet_name'] = _get_sheet_name(f) summary['filename'] = csv_file.filename results.append(summary) filehandler.delete_files(file_paths) return results
def test_get_summary_from_csv(self): test_data_path = os.path.join(self._fixtures_dir,'somerville-tree-details.csv') results = wtfcsvstat.get_summary(test_data_path) self.assertEqual(len(results['columns']), 43) self.assertEqual(results['row_count'], 13882)
def test_trailing_comma(self): test_data_path = os.path.join(self._fixtures_dir,'trailing-comma.csv') results = wtfcsvstat.get_summary(test_data_path) self.assertEqual(len(results['columns']), 2) self.assertEqual(results['row_count'], 3)
def test_get_summary_from_xls(self): fixture_path = os.path.join(self._fixtures_dir,'HowAmericaInjuresItself_FromNEISS.xlsx') csv_file = filehandler.convert_to_csv(fixture_path)[0] results = wtfcsvstat.get_summary(csv_file) self.assertEqual(len(results['columns']), 19) self.assertEqual(results['row_count'], 26303)