def test_chunks(self): with filetext(self.text) as fn: dd = JSON_Streaming(fn, schema=self.schema) chunks = list(dd.chunks(blen=2)) assert isinstance(chunks[0], nd.array) self.assertEquals(len(chunks), 3) self.assertEquals(nd.as_py(chunks[0]), self.data[:2])
def test_chunks(self): with filetext(self.text) as fn: dd = JSON_Streaming(fn, schema=self.schema) chunks = list(dd.chunks(blen=2)) assert isinstance(chunks[0], nd.array) self.assertEquals(len(chunks), 3) self.assertEquals(nd.as_py(chunks[0]), self.dicts[:2])
def test_append(self): with filetext('') as fn: dd = JSON_Streaming(fn, mode='w', schema=self.schema) dd.extend([self.tuples[0]]) with open(fn) as f: self.assertEquals(json.loads(f.read().strip()), self.dicts[0]) self.assertRaises(ValueError, lambda : dd.extend([5.5])) self.assertRaises(ValueError, lambda : dd.extend([{'name': 5, 'amount': 1.3}]))
def json_data(): data = { 'a.csv': [{ 'x': 1, 'y': 2 }, { 'x': 3, 'y': 4 }], 'b.csv': [{ 'x': 5, 'y': 6 }, { 'x': 7, 'y': 8 }], 'c.csv': [{ 'x': 9, 'y': 10 }, { 'x': 11, 'y': 12 }] } text = dict( (fn, '\n'.join(map(json.dumps, dicts))) for fn, dicts in data.items()) with filetexts(text) as filenames: descriptors = [ JSON_Streaming(fn, schema='{x: int32, y: int32}') for fn in sorted(filenames) ] yield Stack(descriptors)
def test_init(self): with filetext(self.text) as fn: dd = JSON_Streaming(fn, schema=self.schema) self.assertEquals(list(dd), self.data) assert dd.dshape in set(( datashape.dshape('var * {name: string, amount: int32}'), datashape.dshape('5 * {name: string, amount: int32}')))
def setUp(self): self.fn = tempfile.mktemp(".json") with open(self.fn, 'w') as f: for d in self.dicts: f.write(json.dumps(d)) f.write('\n') self.dd = JSON_Streaming(self.fn, schema=self.schema)
def test_csv_json_chunked(self): with filetext('1,1\n2,2\n') as csv_fn: with filetext('') as json_fn: schema = '{a: int32, b: int32}' csv = CSV(csv_fn, schema=schema) json = JSON_Streaming(json_fn, mode='r+', schema=schema) into(json, csv) self.assertEquals(tuplify(tuple(json)), ((1, 1), (2, 2)))
def test_csv_json(self): with filetext('1,1\n2,2\n') as csv_fn: with filetext('') as json_fn: schema = '2 * int' csv = CSV(csv_fn, schema=schema) json = JSON_Streaming(json_fn, mode='r+', schema=schema) json.extend(csv) self.assertEquals(list(json), [[1, 1], [2, 2]])
def test_json_csv_structured(self): data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}] text = '\n'.join(map(json.dumps, data)) schema = '{x: int, y: int}' with filetext(text) as json_fn: with filetext('') as csv_fn: js = JSON_Streaming(json_fn, schema=schema) csv = CSV(csv_fn, mode='r+', schema=schema) csv.extend(js) self.assertEquals(tuple(map(tuple, (csv))), ((1, 1), (2, 2)))
def test_json_csv_chunked(self): data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}] text = '\n'.join(map(json.dumps, data)) schema = '{x: int, y: int}' with filetext(text) as json_fn: with filetext('') as csv_fn: js = JSON_Streaming(json_fn, schema=schema) csv = CSV(csv_fn, mode='r+', schema=schema) copy(js, csv) self.assertEquals(list(csv), data)
def test_json_csv_chunked(self): data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}] tuples = ((1, 1), (2, 2)) text = '\n'.join(map(json.dumps, data)) schema = '{x: int, y: int}' with filetext(text) as json_fn: with filetext('') as csv_fn: js = JSON_Streaming(json_fn, schema=schema) csv = CSV(csv_fn, mode='r+', schema=schema) into(csv, js) self.assertEquals(tuple(csv), tuples)
def test_append(self): with filetext('') as fn: dd = JSON_Streaming(fn, mode='w', schema=self.schema) dd.extend([self.data[0]]) with open(fn) as f: self.assertEquals(json.loads(f.read().strip()), self.data[0]) self.assertRaises(ValueError, lambda : dd.extend([5.5])) self.assertRaises(ValueError, lambda : dd.extend([{'name': 5, 'amount': 1.3}]))
def test_csv_sql_json(self): data = [('Alice', 100), ('Bob', 200)] text = '\n'.join(','.join(map(str, row)) for row in data) schema = '{name: string, amount: int}' with filetext(text) as csv_fn: with filetext('') as json_fn: with tmpfile('db') as sqldb: csv = CSV(csv_fn, mode='r', schema=schema) sql = SQL('sqlite:///' + sqldb, 'testtable', schema=schema) json = JSON_Streaming(json_fn, mode='r+', schema=schema) into(sql, csv) self.assertEqual(into(list, sql), data) into(json, sql) with open(json_fn) as f: assert 'Alice' in f.read()
def test_csv_sql_json(self): data = [('Alice', 100), ('Bob', 200)] text = '\n'.join(','.join(map(str, row)) for row in data) schema = '{name: string, amount: int}' engine = create_engine('sqlite:///:memory:') with filetext(text) as csv_fn: with filetext('') as json_fn: csv = CSV(csv_fn, mode='r', schema=schema) sql = SQL(engine, 'testtable', schema=schema) json = JSON_Streaming(json_fn, mode='r+', schema=schema) copy(csv, sql) self.assertEqual(list(sql), data) copy(sql, json) with open(json_fn) as f: assert 'Alice' in f.read()
def test_as_py(self): with filetext(self.text) as fn: dd = JSON_Streaming(fn, mode='r', schema=self.schema) self.assertEqual(dd.as_py(), self.tuples)
def test_as_dynd(self): with filetext(self.text) as fn: dd = JSON_Streaming(fn, mode='r', schema=self.schema) assert nd.as_py(dd.as_dynd()) == self.dicts
def test_extend_tuples(self): with filetext('') as fn: dd = JSON_Streaming(fn, mode='r+', schema=self.schema) dd.extend(self.tuples) self.assertEquals(tuplify(tuple(dd)), self.tuples)
def test_as_py(self): with filetext(self.text) as fn: dd = JSON_Streaming(fn, mode='r', schema=self.schema) assert dd.as_py() == self.data
def test_getitem(self): with filetext(self.text) as fn: dd = JSON_Streaming(fn, mode='r', schema=self.schema) assert dd[0] == self.data[0] assert dd[2:4] == self.data[2:4]
def test_extend(self): with filetext('') as fn: dd = JSON_Streaming(fn, mode='r+', schema=self.schema) dd.extend(self.data) self.assertEquals(list(dd), self.data)
def test_discovery(self): dd = JSON_Streaming(self.fn) assert set(dd.schema[0].names) == set(['name', 'amount']) assert 'string' in str(dd.schema[0]['name'])
def test_getitem(self): with filetext(self.text) as fn: dd = JSON_Streaming(fn, mode='r', schema=self.schema) self.assertEqual(tuplify(dd[0]), self.tuples[0]) self.assertEqual(tuplify(dd[2:4]), self.tuples[2:4])