def _write_read_file_into_url(base_url, filename, compression=None): if ".avro" in filename and not _has_avro: return is_local = base_url.startswith("./") if compression is not None: if is_local: return filename = filename + "." + compression codec = fsspec.utils.infer_compression(filename) if codec is None: print("\n - %s SKIPPED " % filename, file=sys.stderr, end="") return print("\n - %s " % filename, file=sys.stderr, end="") if is_local: source_url = base_url + filename else: source_url = os.path.join(base_url, filename) _show__rows_from("Expected:", _table) if ".avro" in filename: toavro(_table, source_url) actual = fromavro(source_url) else: tocsv(_table, source_url, encoding="ascii", lineterminator="\n") actual = fromcsv(source_url, encoding="ascii") _show__rows_from("Actual:", actual) ieq(_table, actual) ieq(_table, actual) # verify can iterate twice
def _write_read_file_into_url(base_url, filename, compression=None, pkg=None): if not _is_installed(pkg, filename): return source_url = _build_source_url_from(base_url, filename, compression) if source_url is None: return actual = None if ".avro" in filename: toavro(_table, source_url) actual = fromavro(source_url) elif ".xlsx" in filename: toxlsx(_table, source_url, 'test1', mode='overwrite') toxlsx(_table2, source_url, 'test2', mode='add') actual = fromxlsx(source_url, 'test1') elif ".xls" in filename: toxls(_table, source_url, 'test') actual = fromxls(source_url, 'test') elif ".json" in filename: tojson(_table, source_url) actual = fromjson(source_url) elif ".csv" in filename: tocsv(_table, source_url, encoding="ascii", lineterminator="\n") actual = fromcsv(source_url, encoding="ascii") if actual is not None: _show__rows_from("Expected:", _table) _show__rows_from("Actual:", actual) ieq(_table, actual) ieq(_table, actual) # verify can iterate twice else: print("\n - %s SKIPPED " % filename, file=sys.stderr, end="")
def test_fromcsv_gz(): table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) for lt in '\n', '\r\n': # N.B., '\r' not supported because universal newline mode is # not supported by gzip module f = NamedTemporaryFile(delete=False) f.close() fn = f.name + '.gz' os.rename(f.name, fn) fz = gzip.open(fn, 'wb') writer = csv.writer(fz, delimiter='\t', lineterminator=lt) for row in table: writer.writerow(row) fz.close() actual = fromcsv(fn, delimiter='\t') ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def counts(request, uuid): try: csvdownload = CSVDownload.objects.get(uuid=uuid) except CSVDownload.DoesNotExist: return HttpResponseNotFound("Not found.") fname = '{0}.csv'.format(csvdownload.uuid) full_fname = os.path.join(settings.CSV_DIR, fname) people = fromcsv(full_fname) columns_str = request.GET.get('columns', '') columns = sorted([c for c in columns_str.split(',') if c.strip()]) for column in columns: if column not in header(people): return HttpResponseBadRequest('Bad request.') if not columns: return redirect(to=reverse('people_list', kwargs={'uuid': uuid})) counts = valuecounts(people, *columns) counts = cutout(counts, 'frequency') return render( request, 'counts.html', { 'csvdownload': csvdownload, 'columns': header(people), 'headers': header(counts), 'counts': data(counts), 'queryparams': { 'columns': columns } })
def test_fromcsv_gz(): data = [b'foo,bar', b'a,1', b'b,2', b'c,2'] expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) # '\r' not supported in PY2 because universal newline mode is # not supported by gzip module if PY2: lts = b'\n', b'\r\n' else: lts = b'\r', b'\n', b'\r\n' for lt in lts: f = NamedTemporaryFile(delete=False) f.close() fn = f.name + '.gz' os.rename(f.name, fn) fz = gzip.open(fn, 'wb') fz.write(lt.join(data)) fz.close() actual = fromcsv(fn, encoding='ascii') ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_popensource(): expect = (('foo', 'bar'), ('a', '1')) actual = fromcsv(PopenSource(r'echo -e foo bar\\na 1', shell=True, executable='/bin/bash'), delimiter=' ') ieq(expect, actual)
def test_fromcsv_quoted(): import csv data = [b'"foo","bar"', b'"a",1', b'"b",2', b'"c",2'] f = NamedTemporaryFile(mode='wb', delete=False) f.write(b'\n'.join(data)) f.close() expect = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) actual = fromcsv(f.name, quoting=csv.QUOTE_NONNUMERIC) debug(actual) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromcsv(): data = [b'foo,bar', b'a,1', b'b,2', b'c,2'] f = NamedTemporaryFile(mode='wb', delete=False) f.write(b'\n'.join(data)) f.close() expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) actual = fromcsv(f.name, encoding='ascii') debug(actual) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromcsv(): f = NamedTemporaryFile(delete=False) writer = csv.writer(f, delimiter='\t') table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) for row in table: writer.writerow(row) f.close() actual = fromcsv(f.name, delimiter='\t') expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromcsv_lineterminators(): data = (u'name,id', u'Արամ Խաչատրյան,1', u'Johann Strauß,2', u'Вагиф Сәмәдоғлу,3', u'章子怡,4') expect = ((u'name', u'id'), (u'Արամ Խաչատրյան', u'1'), (u'Johann Strauß', u'2'), (u'Вагиф Сәмәдоғлу', u'3'), (u'章子怡', u'4')) for lt in u'\r', u'\n', u'\r\n': fn = NamedTemporaryFile().name uf = io.open(fn, encoding='utf-8', mode='wt', newline='') uf.write(lt.join(data)) uf.close() actual = fromcsv(fn, encoding='utf-8') ieq(expect, actual)
def test_fromcsv_lineterminators(): data = [b'foo,bar', b'a,1', b'b,2', b'c,2'] expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) for lt in b'\r', b'\n', b'\r\n': debug(repr(lt)) f = NamedTemporaryFile(mode='wb', delete=False) f.write(lt.join(data)) f.close() with open(f.name, 'rb') as g: debug(repr(g.read())) actual = fromcsv(f.name, encoding='ascii') debug(actual) ieq(expect, actual)
def _write_read_file_into_url(base_url, filename, compression=None, pkg=None): if pkg is not None: if not _is_installed(pkg): print("\n - %s SKIPPED " % filename, file=sys.stderr, end="") return is_local = base_url.startswith("./") if compression is not None: if is_local: return filename = filename + "." + compression codec = fsspec.utils.infer_compression(filename) if codec is None: print("\n - %s SKIPPED " % filename, file=sys.stderr, end="") return print("\n - %s " % filename, file=sys.stderr, end="") if is_local: source_url = base_url + filename else: source_url = os.path.join(base_url, filename) actual = None if ".avro" in filename: toavro(_table, source_url) actual = fromavro(source_url) elif ".xlsx" in filename: toxlsx(_table, source_url, 'test1', mode='overwrite') toxlsx(_table2, source_url, 'test2', mode='add') actual = fromxlsx(source_url, 'test1') elif ".xls" in filename: toxls(_table, source_url, 'test') actual = fromxls(source_url, 'test') elif ".json" in filename: tojson(_table, source_url) actual = fromjson(source_url) elif ".csv" in filename: tocsv(_table, source_url, encoding="ascii", lineterminator="\n") actual = fromcsv(source_url, encoding="ascii") if actual is not None: _show__rows_from("Expected:", _table) _show__rows_from("Actual:", actual) ieq(_table, actual) ieq(_table, actual) # verify can iterate twice else: print("\n - %s SKIPPED " % filename, file=sys.stderr, end="")
def test_fromcsv(): data = (u"name,id\n" u"Արամ Խաչատրյան,1\n" u"Johann Strauß,2\n" u"Вагиф Сәмәдоғлу,3\n" u"章子怡,4\n") fn = NamedTemporaryFile().name uf = io.open(fn, encoding='utf-8', mode='wt') uf.write(data) uf.close() actual = fromcsv(fn, encoding='utf-8') expect = ((u'name', u'id'), (u'Արամ Խաչատրյան', u'1'), (u'Johann Strauß', u'2'), (u'Вагиф Сәмәдоғлу', u'3'), (u'章子怡', u'4')) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromcsv_header(): header = ['foo', 'bar'] data = [b'a,1', b'b,2', b'c,2'] f = NamedTemporaryFile(mode='wb', delete=False) f.write(b'\n'.join(data)) f.close() expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) actual = fromcsv(f.name, encoding='ascii', header=header) debug(actual) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromcsv_gz(): f = NamedTemporaryFile(delete=False) f.close() fn = f.name + '.gz' os.rename(f.name, fn) fz = gzip.open(fn, 'wb') writer = csv.writer(fz, delimiter='\t') table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) for row in table: writer.writerow(row) fz.close() actual = fromcsv(fn, delimiter='\t') expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromcsv_lineterminators(): table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) for lt in '\r', '\n', '\r\n': f = NamedTemporaryFile(delete=False) writer = csv.writer(f, lineterminator=lt) for row in table: writer.writerow(row) f.close() actual = fromcsv(f.name) ieq(expect, actual)
def test_stringsource(): table1 = (("foo", "bar"), ("a", "1"), ("b", "2"), ("c", "2")) # test writing to a string buffer ss = StringSource() tocsv(table1, ss) expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\n" actual = ss.getvalue() eq_(expect, actual) # test reading from a string buffer table2 = fromcsv(StringSource(actual)) ieq(table1, table2) ieq(table1, table2) # test appending appendcsv(table1, ss) actual = ss.getvalue() expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\na,1\r\nb,2\r\nc,2\r\n" eq_(expect, actual)
def test_stringsource(): table1 = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) # test writing to a string buffer ss = StringSource() tocsv(table1, ss) expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\n" actual = ss.getvalue() eq_(expect, actual) # test reading from a string buffer table2 = fromcsv(StringSource(actual)) ieq(table1, table2) ieq(table1, table2) # test appending appendcsv(table1, ss) actual = ss.getvalue() expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\na,1\r\nb,2\r\nc,2\r\n" eq_(expect, actual)
def people_list(request, uuid): try: csvdownload = CSVDownload.objects.get(uuid=uuid) except CSVDownload.DoesNotExist: return HttpResponseNotFound("Not found.") fname = '{0}.csv'.format(csvdownload.uuid) full_fname = os.path.join(settings.CSV_DIR, fname) people = fromcsv(full_fname) sortby = request.GET.get('sortby', 'name') ordering = request.GET.get('ordering', 'asc') count_str = request.GET.get('count', '10') if sortby not in header(people): return HttpResponseBadRequest('Bad request.') if ordering not in ('asc', 'desc'): return HttpResponseBadRequest('Bad request.') try: count = int(count_str) except ValueError: return HttpResponseBadRequest('Bad request.') if count < 1: return HttpResponseBadRequest('Bad request.') people = sort(people, sortby, reverse=ordering == 'desc') people = head(people, count) return render( request, 'people_list.html', { 'csvdownload': csvdownload, 'headers': header(people), 'people': data(people), 'has_more': len(people) > count, 'queryparams': { 'sortby': sortby, 'ordering': ordering, 'count': str(count + 10) } })
def test_fromcsv(): data = ( u"name,id\n" u"Արամ Խաչատրյան,1\n" u"Johann Strauß,2\n" u"Вагиф Сәмәдоғлу,3\n" u"章子怡,4\n" ) fn = NamedTemporaryFile().name uf = io.open(fn, encoding='utf-8', mode='wt') uf.write(data) uf.close() actual = fromcsv(fn, encoding='utf-8') expect = ((u'name', u'id'), (u'Արամ Խաչատրյան', u'1'), (u'Johann Strauß', u'2'), (u'Вагиф Сәмәдоғлу', u'3'), (u'章子怡', u'4')) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_popensource(): expect = (("foo", "bar"), ("a", "1")) actual = fromcsv(PopenSource(r"echo -e foo bar\\na 1", shell=True, executable="/bin/bash"), delimiter=" ") ieq(expect, actual)