Example #1
0
def date_data():
    data = [('Alice', 100.0, datetime(2014, 9, 11, 0, 0, 0, 0)),
            ('Alice', -200.0, datetime(2014, 9, 10, 0, 0, 0, 0)),
            ('Bob', 300.0, None)]
    schema = dshape('{name: string, amount: float32, date: ?datetime}')
    with tmpfile('.csv') as f:
        csv = CSV(f, schema=schema, mode='w')
        csv.extend(data)
        yield CSV(f, schema=schema, mode='r')
def test_gzopen_csv():
    with tmpfile('.csv.gz') as filename:
        with gzip.open(filename, 'w') as f:
            f.write('1,1\n2,2')

        # Not a valid CSV file
        assert raises(Exception, lambda: list(CSV(filename, schema='2 * int')))

        dd = CSV(filename, schema='2 * int', open=gzip.open)

        assert list(dd) == [[1, 1], [2, 2]]
Example #3
0
def test_schema_detection_modifiers():
    text = "name amount date\nAlice 100 20120101\nBob 200 20120102"
    with filetext(text) as fn:
        assert (CSV(fn).schema == dshape(
            '{name: string, amount: ?int64, date: ?int64}'))
        assert (CSV(fn, columns=[
            'NAME', 'AMOUNT', 'DATE'
        ]).schema == dshape('{NAME: string, AMOUNT: ?int64, DATE: ?int64}'))
        assert (str(CSV(fn, types=['string', 'int32', 'date']).schema) == str(
            dshape('{name: string, amount: int32, date: date}')))

        a = CSV(fn, typehints={'date': 'date'}).schema
        b = dshape('{name: string, amount: ?int64, date: date}')
        assert str(a) == str(b)
Example #4
0
def test_csv_into_mongodb_colon_del(empty_collec, file_name_colon):
    csv = CSV(file_name_colon)
    coll = empty_collec
    lhs = into(list, csv)
    newcoll = into(coll, csv)
    rhs = into(list, newcoll)
    assert lhs == rhs
Example #5
0
def test_csv_into_mongodb(empty_collec, file_name):
    csv = CSV(file_name)
    coll = empty_collec
    res = into(coll, csv)
    mongo_data = list(res.find({}, {'_0': 1, '_id': 0}))

    assert list(csv[:, '_0']) == [i['_0'] for i in mongo_data]
Example #6
0
def test_csv_into_mongodb_columns(empty_collec, file_name):
    csv = CSV(file_name, schema='{x: int, y: int}')

    coll = empty_collec

    lhs = into(list, csv)
    assert lhs == into(list, into(coll, csv))
Example #7
0
def test_into_cds_mixed():
    pytest.importorskip('bokeh')
    from bokeh.objects import ColumnDataSource
    n = 25
    ddict = {
        'first': np.random.choice(list('abc'), size=n),
        'second': np.random.choice(['cachaça', 'tres leches', 'pizza'],
                                   size=n),
        'third': list(range(n))
    }
    df = pd.DataFrame(ddict)
    with tmpfile('.csv') as fn:
        df.to_csv(fn, header=None, index=False, encoding='utf8')
        csv = CSV(fn, columns=['first', 'second', 'third'], encoding='utf8')
        t = Data(csv)

        cds = into(ColumnDataSource, t)
        assert isinstance(cds, ColumnDataSource)
        expected = dict(
            (k, into(list, csv[:, k])) for k in ['first', 'second', 'third'])
        assert cds.data == expected

        cds = into(ColumnDataSource, t[['first', 'second']])
        assert isinstance(cds, ColumnDataSource)
        expected = dict(
            (k, into(list, csv[:, k])) for k in ['first', 'second'])
        assert cds.data == expected

        cds = into(ColumnDataSource, t['first'])
        assert isinstance(cds, ColumnDataSource)
        assert cds.data == {'first': into(list, csv[:, 'first'])}
Example #8
0
def test_stack(stack_data):
    descriptors = [CSV(fn, schema='2 * int32') for fn in sorted(stack_data)]
    dd = Stack(descriptors)
    assert dd.dshape == 3 * descriptors[0].dshape

    expected = (((1, 1), (2, 2)), ((3, 3), (4, 4)), ((5, 5), (6, 6)))

    assert tuplify(tuple(dd.as_py())) == expected

    result = dd.as_dynd()
    expected2 = nd.array(expected, dtype='int32')
    assert nd.as_py(result) == nd.as_py(expected2)

    assert tuplify(tuple(dd)) == expected
    assert tuplify(tuple(dd)) == expected  # Not one use only

    chunks = dd.chunks()
    assert all(isinstance(chunk, nd.array) for chunk in chunks)

    assert tuple(dd[[0, 2], 0, 0]) == (1, 5)
    assert tuplify(tuple(dd[0])) == ((1, 1), (2, 2))
    res = dd[0, :, [1]]
    x = tuple(res)
    assert tuplify(x) == ((1, ), (2, ))
    assert tuplify(tuple(dd[0])) == expected[0]

    assert isinstance(dd[:, 0], Iterator)
    assert isinstance(dd[:], Iterator)
Example #9
0
def file_data():
    data = {'a.csv': '1,1\n2,2', 'b.csv': '3,3\n4,4\n5,5', 'c.csv': '6,6\n7,7'}
    with filetexts(data) as filenames:
        descriptors = [
            CSV(fn, schema='{a: int32, b: int32}') for fn in sorted(filenames)
        ]
        yield Concat(descriptors)
Example #10
0
def test_into_filename_filename():
    with filetext('1,2\n3,4', extension='csv') as source_fn:
        with tmpfile('csv') as target_fn:
            into(target_fn, source_fn)

            csv = CSV(target_fn)
            assert into(list, csv) == [(1, 2), (3, 4)]
 def test_append(self):
     # Get a private file so as to not mess the original one
     csv_file = tempfile.mktemp(".csv")
     with open(csv_file, "w") as f:
         f.write(self.buf)
     dd = CSV(csv_file, schema=self.schema, mode='r+')
     dd.extend([["k4", "v4", 4, True]])
     vals = [nd.as_py(v) for v in dd.chunks(blen=2)]
     self.assertEqual(vals, [[{
         u'f0': u'k1',
         u'f1': u'v1',
         u'f2': 1,
         u'f3': False
     }, {
         u'f0': u'k2',
         u'f1': u'v2',
         u'f2': 2,
         u'f3': True
     }],
                             [{
                                 u'f0': u'k3',
                                 u'f1': u'v3',
                                 u'f2': 3,
                                 u'f3': False
                             }, {
                                 u'f0': u'k4',
                                 u'f1': u'v4',
                                 u'f2': 4,
                                 u'f3': True
                             }]])
     self.assertRaises(ValueError, lambda: dd.extend([3.3]))
     os.remove(csv_file)
Example #12
0
def test_csv_mongodb_load(db, file_name, empty_collec):

    csv = CSV(file_name)

    #with out header
    # mongoimport -d test_db -c testcollection --type csv --file /Users/quasiben/test.csv --fields alpha,beta
    # with collection([]) as coll:

    # --ignoreBlanks

    coll = empty_collec
    copy_info = {
        'dbname': db.name,
        'coll': coll.name,
        'abspath': csv._abspath,
        'column_names': ','.join(csv.columns)
    }

    copy_cmd = """mongoimport -d {dbname} -c {coll} --type csv --file {abspath} --fields {column_names}"""
    copy_cmd = copy_cmd.format(**copy_info)

    ps = subprocess.Popen(copy_cmd,
                          shell=os.name != 'nt',
                          stdout=subprocess.PIPE)
    output = ps.stdout.read()
    mongo_data = list(coll.find({}, {'_0': 1, '_id': 0}))

    assert list(csv[:, '_0']) == [i['_0'] for i in mongo_data]
Example #13
0
def test_string_dataset(tmpcsv):
    raw = 'a,b,2.0\nc,1999,3.0\nd,3.0,4.0'
    with open(tmpcsv, mode='w') as f:
        f.write(raw)
    csv = CSV(tmpcsv, columns=list('xyz'))
    t = Table(csv)
    x = into(list, t)
    assert x == [('a', 'b', 2.0), ('c', '1999', 3.0), ('d', '3.0', 4.0)]
 def test_getitem_stop(self):
     dd = CSV(self.csv_file, schema=self.schema)
     self.assertEqual(dd[:1], [{
         u'f0': u'k1',
         u'f1': u'v1',
         u'f2': 1,
         u'f3': False
     }])
Example #15
0
def test_repr_hdma():
    csv = CSV(example('hmda-small.csv'))
    t = TableSymbol('hmda', csv.schema)

    assert compute(t.head(), csv)

    columns = ['action_taken_name', 'agency_abbr', 'applicant_ethnicity_name']
    assert compute(t[columns].head(), csv)
 def test_getitem_start_step(self):
     dd = CSV(self.csv_file, schema=self.schema)
     self.assertEqual(dd[1::2], [{
         u'f0': u'k2',
         u'f1': u'v2',
         u'f2': 2,
         u'f3': True
     }])
Example #17
0
def test_delayed_bad_datashape():
    text = 'a,b\n' + '\n'.join(['1,2'] * 20) + '\n1,3.14'
    with filetext(text) as fn:
        csv = CSV(fn, nrows_discovery=2)
        assert csv.schema == dshape('{a: int64, b: int64}')

        with pytest.raises(ValueError):
            list(csv)
Example #18
0
def csv(schema):
    csv = CSV('test.csv', schema=schema, mode='w')
    csv.extend(data)
    yield csv
    try:
        os.remove(csv.path)
    except OSError:
        pass
Example #19
0
def test_into_DataFrame_concat():
    csv = CSV(os.path.join(os.path.dirname(__file__), 'accounts.csv'))
    df = into(pd.DataFrame, Concat([csv, csv]))
    csv_df = csv.pandas_read_csv()
    assert df.index.tolist() == list(range(len(df)))
    assert df.values.tolist() == (csv_df.values.tolist() +
                                  csv_df.values.tolist())
    assert df.columns.tolist() == csv_df.columns.tolist()
Example #20
0
def test_into_filename():
    with tmpfile('csv') as filename:
        df = DataFrame([['Alice', 100], ['Bob', 200]],
                       columns=['name', 'amount'])
        into(filename, df)

        csv = CSV(filename)
        assert into(list, csv) == into(list, df)
Example #21
0
def test_table_resource():
    with tmpfile('csv') as filename:
        csv = CSV(filename, 'w', schema='{x: int, y: int}')
        csv.extend([[1, 2], [10, 20]])

        t = Data(filename)
        assert isinstance(t.data, CSV)
        assert list(compute(t)) == list(csv)
    def test_re_dialect(self):
        dialect1 = {'delimiter': ',', 'lineterminator': '\n'}
        dialect2 = {'delimiter': ';', 'lineterminator': '--'}

        text = '1,1\n2,2\n'

        schema = '2 * int32'

        with filetext(text) as source_fn:
            with filetext('') as dest_fn:
                src = CSV(source_fn, schema=schema, **dialect1)
                dst = CSV(dest_fn, mode='w', schema=schema, **dialect2)

                # Perform copy
                dst.extend(src)

                with open(dest_fn) as f:
                    self.assertEquals(f.read(), '1;1--2;2--')
 def setUp(self):
     self.csv_file = tempfile.mktemp(".csv")
     with open(self.csv_file, "w") as f:
         f.write(self.buf)
     self.dd = CSV(self.csv_file,
                   dialect='excel',
                   schema=self.schema,
                   delimiter=' ',
                   mode='r+')
Example #24
0
def test_DataFrame_CSV():
    with filetext('1,2\n3,4\n') as fn:
        csv = CSV(fn, schema='{a: int64, b: float64}')
        df = into(DataFrame, csv)

        expected = DataFrame([[1, 2.0], [3, 4.0]], columns=['a', 'b'])

        assert str(df) == str(expected)
        assert list(df.dtypes) == [np.int64, np.float64]
Example #25
0
def test_a_mode():
    text = ("id, name, balance\n1, Alice, 100\n2, Bob, 200\n"
            "3, Charlie, 300\n4, Denis, 400\n5, Edith, 500")
    with filetext(text) as fn:
        csv = CSV(fn, 'a')
        csv.extend([(6, 'Frank', 600), (7, 'Georgina', 700)])

        result = set(csv[:, 'name'])
        assert 'Georgina' in result
Example #26
0
def test_datetime_csv_reader_same_as_into_types():
    csv = CSV(os.path.join(os.path.dirname(__file__), 'accounts.csv'))
    rhs = csv.pandas_read_csv().dtypes
    df = into(pd.DataFrame, csv)
    dtypes = df.dtypes
    expected = pd.Series(
        [np.dtype(x) for x in ['i8', 'i8', 'O', 'datetime64[ns]']],
        index=csv.columns)
    assert dtypes.index.tolist() == expected.index.tolist()
    assert dtypes.tolist() == expected.tolist()
Example #27
0
    def test_csv_json_chunked(self):
        with filetext('1,1\n2,2\n') as csv_fn:
            with filetext('') as json_fn:
                schema = '{a: int32, b: int32}'
                csv = CSV(csv_fn, schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                into(json, csv)

                self.assertEquals(tuplify(tuple(json)), ((1, 1), (2, 2)))
    def test_csv_json(self):
        with filetext('1,1\n2,2\n') as csv_fn:
            with filetext('') as json_fn:
                schema = '2 * int'
                csv = CSV(csv_fn, schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                json.extend(csv)

                self.assertEquals(list(json), [[1, 1], [2, 2]])
Example #29
0
def test_tuple_types():
    """
    CSVs with uniform types still create record types with names
    """
    with filetext('1,1\n2,2\n') as fn:
        csv = CSV(fn, 'r+', delimiter=',')
        assert csv[0] == (1, 1)
        assert isinstance(csv.schema[0], Record)
        assert len(csv.schema[0].types) == 2
        assert len(set(csv.schema[0].types)) == 1
    def test_csv_hdf5(self):
        import h5py
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='2 * int')
                hdf5 = HDF5(hdf5_fn, '/data', mode='a', schema='2 * int')

                copy(csv, hdf5)

                self.assertEquals(nd.as_py(hdf5.as_dynd()), [[1, 1], [2, 2]])