def test_append(self): # Get a private file so as to not mess the original one csv_file = tempfile.mktemp(".csv") with open(csv_file, "w") as f: f.write(self.buf) dd = CSV(csv_file, schema=self.schema, mode='r+') dd.extend([["k4", "v4", 4, True]]) vals = [nd.as_py(v) for v in dd.chunks(blen=2)] self.assertEqual(vals, [[{ u'f0': u'k1', u'f1': u'v1', u'f2': 1, u'f3': False }, { u'f0': u'k2', u'f1': u'v2', u'f2': 2, u'f3': True }], [{ u'f0': u'k3', u'f1': u'v3', u'f2': 3, u'f3': False }, { u'f0': u'k4', u'f1': u'v4', u'f2': 4, u'f3': True }]]) self.assertRaises(ValueError, lambda: dd.extend([3.3])) os.remove(csv_file)
def csv(schema): csv = CSV('test.csv', schema=schema, mode='w') csv.extend(data) yield csv try: os.remove(csv.path) except OSError: pass
def test_table_resource(): with tmpfile('csv') as filename: csv = CSV(filename, 'w', schema='{x: int, y: int}') csv.extend([[1, 2], [10, 20]]) t = Data(filename) assert isinstance(t.data, CSV) assert list(compute(t)) == list(csv)
def csv(): csv = CSV('test.csv', schema=schema, mode='w') csv.extend(data) yield csv try: os.remove(csv.path) except OSError: pass
class Test_Dialect(unittest.TestCase): buf = sanitize(u"""Name Amount Alice 100 Bob 200 Alice 50 """) schema = "{ f0: string, f1: int }" def setUp(self): self.csv_file = tempfile.mktemp(".csv") with open(self.csv_file, "w") as f: f.write(self.buf) self.dd = CSV(self.csv_file, dialect='excel', schema=self.schema, delimiter=' ', mode='r+') def tearDown(self): os.remove(self.csv_file) def test_has_header(self): assert has_header(self.buf) def test_overwrite_delimiter(self): self.assertEquals(self.dd.dialect['delimiter'], ' ') def test_content(self): s = str(list(self.dd)) assert 'Alice' in s and 'Bob' in s def test_append(self): self.dd.extend([('Alice', 100)]) with open(self.csv_file) as f: self.assertEqual(f.readlines()[-1].strip(), 'Alice 100') def test_append_dict(self): self.dd.extend([{'f0': 'Alice', 'f1': 100}]) with open(self.csv_file) as f: self.assertEqual(f.readlines()[-1].strip(), 'Alice 100') def test_extend_structured(self): with filetext('1,1.0\n2,2.0\n') as fn: csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',') csv.extend([(3, 3)]) assert (list(csv) == [[1, 1.0], [2, 2.0], [3, 3.0]] or list(csv) == [{ 'x': 1, 'y': 1.0 }, { 'x': 2, 'y': 2.0 }, { 'x': 3, 'y': 3.0 }])
def test_a_mode(): text = ("id, name, balance\n1, Alice, 100\n2, Bob, 200\n" "3, Charlie, 300\n4, Denis, 400\n5, Edith, 500") with filetext(text) as fn: csv = CSV(fn, 'a') csv.extend([(6, 'Frank', 600), (7, 'Georgina', 700)]) result = set(csv[:, 'name']) assert 'Georgina' in result
def test_extend_structured_many_newlines(self): inan = np.array([np.nan]).astype('int32').item() with filetext('1,1.0\n2,2.0\n\n\n\n') as fn: csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',') csv.extend([(3, 3)]) result = tuplify(tuple(csv)) expected = ((1, 1.0), (2, 2.0), (inan, np.nan), (inan, np.nan), (inan, np.nan), (3, 3.0)) assert np.isclose(result, expected, equal_nan=True).all()
def date_data(): data = [('Alice', 100.0, datetime(2014, 9, 11, 0, 0, 0, 0)), ('Alice', -200.0, datetime(2014, 9, 10, 0, 0, 0, 0)), ('Bob', 300.0, None)] schema = dshape('{name: string, amount: float32, date: ?datetime}') with tmpfile('.csv') as f: csv = CSV(f, schema=schema, mode='w') csv.extend(data) yield CSV(f, schema=schema, mode='r')
def test_a_mode(self): text = ("id, name, balance\n1, Alice, 100\n2, Bob, 200\n" "3, Charlie, 300\n4, Denis, 400\n5, Edith, 500") with filetext(text) as fn: csv = CSV(fn, 'a') csv.extend([(6, 'Frank', 600), (7, 'Georgina', 700)]) assert 'Georgina' in set(csv.py[:, 'name'])
def test_extend(tmpcsv, schema): dd = CSV(tmpcsv, 'w', schema=schema, delimiter=' ') dd.extend(data) with open(tmpcsv) as f: lines = f.readlines() expected_lines = 'Alice 100', 'Bob 200', 'Alice 50' for i, eline in enumerate(expected_lines): assert lines[i].strip() == eline expected_dshape = datashape.DataShape(datashape.Var(), datashape.dshape(schema)) assert str(dd.dshape) == str(expected_dshape)
def test_json_csv_structured(self): data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}] text = '\n'.join(map(json.dumps, data)) schema = '{x: int, y: int}' with filetext(text) as json_fn: with filetext('') as csv_fn: js = JSON_Streaming(json_fn, schema=schema) csv = CSV(csv_fn, mode='r+', schema=schema) csv.extend(js) self.assertEquals(tuple(map(tuple, (csv))), ((1, 1), (2, 2)))
def test_extend(self): dd = CSV(self.filename, 'w', schema=self.schema, delimiter=' ') dd.extend(self.data) with open(self.filename) as f: lines = f.readlines() self.assertEqual(lines[0].strip(), 'Alice 100') self.assertEqual(lines[1].strip(), 'Bob 200') self.assertEqual(lines[2].strip(), 'Alice 50') expected_dshape = datashape.DataShape(datashape.Var(), self.schema) # TODO: datashape comparison is broken self.assertEqual(str(dd.dshape).replace(' ', ''), str(expected_dshape).replace(' ', ''))
class Test_Dialect(unittest.TestCase): buf = sanitize( u"""Name Amount Alice 100 Bob 200 Alice 50 """) schema = "{ f0: string, f1: int }" def setUp(self): self.csv_file = tempfile.mktemp(".csv") with open(self.csv_file, "w") as f: f.write(self.buf) self.dd = CSV(self.csv_file, dialect='excel', schema=self.schema, delimiter=' ', mode='r+') def tearDown(self): os.remove(self.csv_file) def test_has_header(self): assert has_header(self.buf) def test_overwrite_delimiter(self): self.assertEquals(self.dd.dialect['delimiter'], ' ') def test_content(self): s = str(list(self.dd)) assert 'Alice' in s and 'Bob' in s def test_append(self): self.dd.extend([('Alice', 100)]) with open(self.csv_file) as f: self.assertEqual(f.readlines()[-1].strip(), 'Alice 100') def test_append_dict(self): self.dd.extend([{'f0': 'Alice', 'f1': 100}]) with open(self.csv_file) as f: self.assertEqual(f.readlines()[-1].strip(), 'Alice 100') def test_extend_structured(self): with filetext('1,1.0\n2,2.0\n') as fn: csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',') csv.extend([(3, 3)]) assert (list(csv) == [[1, 1.0], [2, 2.0], [3, 3.0]] or list(csv) == [{'x': 1, 'y': 1.0}, {'x': 2, 'y': 2.0}, {'x': 3, 'y': 3.0}])
def test_extend(self): dd = CSV(self.filename, 'w', schema=self.schema, delimiter=' ') dd.extend(self.data) with open(self.filename) as f: lines = f.readlines() self.assertEqual(lines[0].strip(), 'Alice 100') self.assertEqual(lines[1].strip(), 'Bob 200') self.assertEqual(lines[2].strip(), 'Alice 50') expected_dshape = datashape.DataShape(datashape.Var(), self.schema) # TODO: datashape comparison is broken self.assertEqual( str(dd.dshape).replace(' ', ''), str(expected_dshape).replace(' ', ''))
def test_append(self): # Get a private file so as to not mess the original one csv_file = tempfile.mktemp(".csv") with open(csv_file, "w") as f: f.write(self.buf) dd = CSV(csv_file, schema=self.schema, mode='r+') dd.extend([["k4", "v4", 4, True]]) vals = [nd.as_py(v) for v in dd.chunks(blen=2)] self.assertEqual(vals, [ [{u'f0': u'k1', u'f1': u'v1', u'f2': 1, u'f3': False}, {u'f0': u'k2', u'f1': u'v2', u'f2': 2, u'f3': True}], [{u'f0': u'k3', u'f1': u'v3', u'f2': 3, u'f3': False}, {u'f0': u'k4', u'f1': u'v4', u'f2': 4, u'f3': True}]]) self.assertRaises(ValueError, lambda: dd.extend([3.3])) os.remove(csv_file)
def test_re_dialect(self): dialect1 = {'delimiter': ',', 'lineterminator': '\n'} dialect2 = {'delimiter': ';', 'lineterminator': '--'} text = '1,1\n2,2\n' schema = '2 * int32' with filetext(text) as source_fn: with filetext('') as dest_fn: src = CSV(source_fn, schema=schema, **dialect1) dst = CSV(dest_fn, mode='w', schema=schema, **dialect2) # Perform copy dst.extend(src) with open(dest_fn) as f: self.assertEquals(f.read(), '1;1--2;2--')
def test_append(self): dd = CSV(self.filename, 'w', schema=self.schema, delimiter=' ') dd.extend([self.data[0]]) with open(self.filename) as f: self.assertEqual(f.readlines()[0].strip(), 'Alice 100')
def test_extend_structured(self): with filetext('1,1.0\n2,2.0\n') as fn: csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',') csv.extend([(3, 3)]) assert tuplify(tuple(csv)) == ((1, 1.0), (2, 2.0), (3, 3.0))
class Test_Dialect(unittest.TestCase): buf = sanitize( u"""Name Amount Alice 100 Bob 200 Alice 50 """) schema = "{ f0: string, f1: int }" def setUp(self): self.csv_file = tempfile.mktemp(".csv") with open(self.csv_file, "w") as f: f.write(self.buf) self.dd = CSV(self.csv_file, dialect='excel', schema=self.schema, delimiter=' ', mode='r+') def tearDown(self): os.remove(self.csv_file) def test_schema_detection(self): dd = CSV(self.csv_file) assert dd.schema == dshape('{Name: string, Amount: ?int64}') dd = CSV(self.csv_file, columns=['foo', 'bar']) assert dd.schema == dshape('{foo: string, bar: ?int64}') @min_python_version def test_has_header(self): assert has_header(self.buf) def test_overwrite_delimiter(self): self.assertEquals(self.dd.dialect['delimiter'], ' ') def test_content(self): s = str(list(self.dd)) assert 'Alice' in s and 'Bob' in s def test_append(self): self.dd.extend([('Alice', 100)]) with open(self.csv_file) as f: self.assertEqual(f.readlines()[-1].strip(), 'Alice 100') def test_append_dict(self): self.dd.extend([{'f0': 'Alice', 'f1': 100}]) with open(self.csv_file) as f: self.assertEqual(f.readlines()[-1].strip(), 'Alice 100') def test_extend_structured(self): with filetext('1,1.0\n2,2.0\n') as fn: csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',') csv.extend([(3, 3)]) assert tuplify(tuple(csv)) == ((1, 1.0), (2, 2.0), (3, 3.0)) def test_discover_dialect(self): s = '1,1\r\n2,2' self.assertEqual(discover_dialect(s), {'escapechar': None, 'skipinitialspace': False, 'quoting': 0, 'delimiter': ',', 'lineterminator': '\r\n', 'quotechar': '"', 'doublequote': False})
def test_extend_structured_many_newlines(): with filetext('1,1.0\n2,2.0\n\n\n\n') as fn: csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',') csv.extend([(3, 3)]) result = tuplify(tuple(csv)) assert discover(result) == dshape('6 * (int64, float64)')
def test_append(tmpcsv, schema): dd = CSV(tmpcsv, 'w', schema=schema, delimiter=' ') dd.extend([data[0]]) with open(tmpcsv) as f: s = f.readlines()[0].strip() assert s == 'Alice 100'
def test_extend_structured_no_newline(): with filetext('1,1.0\n2,2.0') as fn: csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',') csv.extend([(3, 3)]) assert tuplify(tuple(csv)) == ((1, 1.0), (2, 2.0), (3, 3.0))
def test_write_csv_with_header_emits_header(): with tmpfile('.csv') as fn: csv = CSV(fn, header=True, schema='{a: int, b: int}', mode='w') csv.extend([(1, 2), (10, 20)]) with open(fn) as f: assert 'a' in f.read()