Exemple #1
0
def test_into_filename_filename():
    with filetext('1,2\n3,4', extension='csv') as source_fn:
        with tmpfile('csv') as target_fn:
            into(target_fn, source_fn)

            csv = CSV(target_fn)
            assert into(list, csv) == [(1, 2), (3, 4)]
Exemple #2
0
def test_complex_into():
    # data from: http://dummydata.me/generate

    this_dir = os.path.dirname(__file__)
    file_name = os.path.join(this_dir, 'dummydata.csv')

    tbl = 'testtable_into_complex'

    csv = CSV(
        file_name,
        schema=
        '{Name: string, RegistrationDate: date, ZipCode: int32, Consts: float64}'
    )
    sql = SQL(url, tbl, schema=csv.schema)

    into(sql, csv, if_exists="replace")

    df = pd.read_csv(file_name, parse_dates=['RegistrationDate'])

    assert_allclose([sql[0]], [csv[0]])

    for col in sql.columns:
        # need to convert to python datetime
        if col == "RegistrationDate":
            py_dates = list(
                df['RegistrationDate'].map(lambda x: x.date()).values)
            assert list(sql[:, col]) == list(csv[:, col]) == py_dates
        elif col == 'Consts':
            l, r = list(sql[:, col]), list(csv[:, col])
            assert np.allclose(l, df[col].values)
            assert np.allclose(l, r)
        else:
            assert list(sql[:, col]) == list(csv[:, col]) == list(
                df[col].values)
Exemple #3
0
 def test_into(self):
     with filetext('1,1\n2,2', extension='.csv') as a:
         with tmpfile(extension='.csv') as b:
             A = resource(a, schema='{x: int, y: int}')
             B = resource(b, schema='{x: int, y: int}', mode='a')
             B = into(B, A)
             assert into(list, B) == [(1, 1), (2, 2)]
Exemple #4
0
 def test_into(self):
     with filetext('1,1\n2,2', extension='.csv') as a:
         with tmpfile(extension='.csv') as b:
             A = resource(a, schema='{x: int, y: int}')
             B = resource(b, schema='{x: int, y: int}', mode='a')
             B = into(B, A)
             assert into(list, B) == [(1, 1), (2, 2)]
Exemple #5
0
def test_into_filename_filename():
    with filetext('1,2\n3,4', extension='csv') as source_fn:
        with tmpfile('csv') as target_fn:
            into(target_fn, source_fn)

            csv = CSV(target_fn)
            assert into(list, csv) == [(1, 2), (3, 4)]
Exemple #6
0
 def test_dynd(self):
     self.assertEqual(nd.as_py(into(nd.array(), (1, 2, 3))),
                      nd.as_py(nd.array([1, 2, 3])))
     self.assertEqual(into([], nd.array([1, 2])),
                               [1, 2])
     self.assertEqual(into([], nd.array([[1, 2], [3, 4]])),
                               [[1, 2], [3, 4]])
Exemple #7
0
def test_complex_into():
    # data from: http://dummydata.me/generate

    this_dir = os.path.dirname(__file__)
    file_name = os.path.join(this_dir, 'dummydata.csv')

    tbl = 'testtable_into_complex'

    csv = CSV(file_name, schema='{Name: string, RegistrationDate: date, ZipCode: int32, Consts: float64}')
    sql = SQL(url,tbl, schema=csv.schema)

    into(sql,csv, if_exists="replace")

    df = pd.read_csv(file_name, parse_dates=['RegistrationDate'])

    assert sql[0] == csv[0]

    #implement count method
    print(len(list(sql[:])))

    # assert sql[] == csv[-1]
    for col in sql.columns:
        #need to convert to python datetime
        if col == "RegistrationDate":
            py_dates = list(df['RegistrationDate'].astype(object).values)
            py_dates = [dt.date(d.year, d.month, d.day) for d in py_dates]
            assert list(sql[:,col]) == list(csv[:,col]) == py_dates
        #handle floating point precision -- perhaps it's better to call out to assert_array_almost_equal
        elif col == 'Consts':
            assert list(sql[:,col]) == list(csv[:,col]) == [round(val, 6) for val in df[col].values]
        else:
            assert list(sql[:,col]) == list(csv[:,col]) == list(df[col].values)
Exemple #8
0
def test_pandas_seq():
    assert str(into(DataFrame, [1, 2])) == \
            str(DataFrame([1, 2]))
    assert str(into(DataFrame, (1, 2))) == \
            str(DataFrame([1, 2]))
    assert str(into(DataFrame(columns=['a', 'b']), [(1, 2), (3, 4)])) == \
            str(DataFrame([[1, 2], [3, 4]], columns=['a', 'b']))
def test_complex_into():
    # data from: http://dummydata.me/generate

    this_dir = os.path.dirname(__file__)
    file_name = os.path.join(this_dir, 'dummydata.csv')

    tbl = 'testtable_into_complex'

    csv = CSV(file_name, schema='{Name: string, RegistrationDate: date, ZipCode: int32, Consts: float64}')
    sql = SQL(url, tbl, schema=csv.schema)

    into(sql, csv, if_exists="replace")

    df = pd.read_csv(file_name, parse_dates=['RegistrationDate'])

    assert_allclose([sql[0]], [csv[0]])

    for col in sql.columns:
        # need to convert to python datetime
        if col == "RegistrationDate":
            py_dates = list(df['RegistrationDate'].map(lambda x: x.date()).values)
            assert list(sql[:, col]) == list(csv[:, col]) == py_dates
        elif col == 'Consts':
            l, r = list(sql[:, col]), list(csv[:, col])
            assert np.allclose(l, df[col].values)
            assert np.allclose(l, r)
        else:
            assert list(sql[:, col]) == list(csv[:,col]) == list(df[col].values)
Exemple #10
0
def test_into_filename():
    with tmpfile('csv') as filename:
        df = DataFrame([['Alice', 100], ['Bob', 200]],
                       columns=['name', 'amount'])
        into(filename, df)

        csv = CSV(filename)
        assert into(list, csv) == into(list, df)
Exemple #11
0
def test_into_filename():
    with tmpfile('csv') as filename:
        df = DataFrame([['Alice', 100], ['Bob', 200]],
                       columns=['name', 'amount'])
        into(filename, df)

        csv = CSV(filename)
        assert into(list, csv) == into(list, df)
Exemple #12
0
def test_failing_argument():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = resource(url, tbl, dshape=csv.dshape)

    into(sql, csv, if_exists="replace", skipinitialspace="alpha") # failing call
Exemple #13
0
def test_failing_argument():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = SQL(url,tbl, schema= csv.schema)

    into(sql,csv, if_exists="replace", skipinitialspace="alpha") # failing call
Exemple #14
0
def test_numpy_list():
    data = [('Alice', 100), ('Bob', 200)]

    dtype = into(np.ndarray, data).dtype
    assert np.issubdtype(dtype[0], str)
    assert np.issubdtype(dtype[1], int)

    assert into([], into(np.ndarray, data)) == data
Exemple #15
0
def test_series_single_column(data):
    data = [('Alice', -200.0, 1), ('Bob', -300.0, 2)]
    t = Table(data, '{name: string, amount: float64, id: int64}')

    df = into(pd.Series, t['name'])
    out_df = into(df, into(DataFrame, t['amount']))
    assert isinstance(df, pd.Series)
    expected = pd.DataFrame(data, columns=t.schema.measure.names).name
    assert str(df) == str(expected)
    assert df.name == out_df.name
    def test_csv_json_chunked(self):
        with filetext('1,1\n2,2\n') as csv_fn:
            with filetext('') as json_fn:
                schema = '{a: int32, b: int32}'
                csv = CSV(csv_fn, schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                into(json, csv)

                self.assertEquals(tuplify(tuple(json)), ((1, 1), (2, 2)))
Exemple #17
0
def test_no_header_no_columns():

    tbl = 'testtable_into_2'

    csv = CSV(file_name)
    sql = resource(url, tbl, dshape=csv.dshape)

    into(sql,csv, if_exists="replace")

    assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
Exemple #18
0
def test_simple_into():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = resource(url, tbl, dshape=csv.dshape)

    into(sql,csv, if_exists="replace")

    assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
Exemple #19
0
def test_series_single_column():
    data = [('Alice', -200.0, 1), ('Bob', -300.0, 2)]
    t = Data(data, '2 * {name: string, amount: float64, id: int64}')

    df = into(pd.Series, t['name'])
    out_df = into(df, into(DataFrame, t['amount']))
    assert isinstance(df, pd.Series)
    expected = pd.DataFrame(data, columns=t.schema.measure.names).name
    assert str(df) == str(expected)
    assert df.name == out_df.name
Exemple #20
0
def test_into_tables_path(good_csv, out_hdf5, out_hdf5_alt):
    import tables as tb
    tble = into(tb.Table, good_csv, filename=out_hdf5, datapath='/foo')
    tble2 = into(tb.Table, good_csv, filename=out_hdf5_alt, datapath='/foo',
        output_path=out_hdf5_alt)
    n = len(tble)
    x = len(tble2)
    tble._v_file.close()
    assert n == x
    assert n == 3
Exemple #21
0
    def test_csv_json_chunked(self):
        with filetext('1,1\n2,2\n') as csv_fn:
            with filetext('') as json_fn:
                schema = '{a: int32, b: int32}'
                csv = CSV(csv_fn, schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                into(json, csv)

                self.assertEquals(tuplify(tuple(json)), ((1, 1), (2, 2)))
Exemple #22
0
 def test_containers(self):
     self.assertEqual(into([], (1, 2, 3)),
                               [1, 2, 3])
     self.assertEqual(into((), (1, 2, 3)),
                               (1, 2, 3))
     self.assertEqual(into({}, [(1, 2), (3, 4)]),
                               {1: 2, 3: 4})
     self.assertEqual(into((), {1: 2, 3: 4}),
                               ((1, 2), (3, 4)))
     self.assertEqual(into((), {'cat': 2, 'dog': 4}),
                               (('cat', 2), ('dog', 4)))
Exemple #23
0
def test_no_header_no_columns():

    tbl = 'testtable_into_2'

    csv = CSV(file_name)
    sql = SQL(url,tbl, schema= '{x: int, y: int}')

    into(sql,csv, if_exists="replace")

    assert list(sql[:, 'x']) == [1, 10, 100]
    assert list(sql[:, 'y']) == [2, 20, 200]
Exemple #24
0
def test_simple_into():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = SQL(url, tbl, schema=csv.schema)

    into(sql, csv, if_exists="replace")

    assert list(sql[:, 'a']) == [1, 10, 100]
    assert list(sql[:, 'b']) == [2, 20, 200]
Exemple #25
0
def test_tryexcept_into():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = resource(url, tbl, dshape=csv.dshape)

    into(sql,csv, if_exists="replace", QUOTE="alpha", FORMAT="csv") # uses multi-byte character and
                                                      # fails over to using sql.extend()

    assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
Exemple #26
0
def test_simple_float_into():

    tbl = 'testtable_into_float'

    csv = CSV(file_name_floats, columns=['a', 'b'])
    sql = SQL(url,tbl, schema= csv.schema)

    into(sql,csv, if_exists="replace")

    assert list(sql[:, 'a']) == [1.02, 102.02, 1002.02]
    assert list(sql[:, 'b']) == [2.02, 202.02, 2002.02]
Exemple #27
0
def test_simple_into():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = SQL(url,tbl, schema= csv.schema)

    into(sql,csv, if_exists="replace")

    assert list(sql[:, 'a']) == [1, 10, 100]
    assert list(sql[:, 'b']) == [2, 20, 200]
Exemple #28
0
def test_pandas_numpy(data):
    dtype = [('name', 'O'), ('amount', int)]

    x = np.array(data, dtype=dtype)

    result = into(DataFrame(), x)
    expected = DataFrame(data, columns=['name', 'amount'])
    assert str(result) == str(expected)

    result = into(DataFrame(columns=['name', 'amount']), x)
    expected = DataFrame(data, columns=['name', 'amount'])
    assert str(result) == str(expected)
Exemple #29
0
def test_pandas_numpy():
    dtype = [('name', 'O'), ('amount', int)]

    x = np.array(data, dtype=dtype)

    result = into(DataFrame(), x)
    expected = DataFrame(data, columns=['name', 'amount'])
    assert str(result) == str(expected)

    result = into(DataFrame(columns=['name', 'amount']), x)
    expected = DataFrame(data, columns=['name', 'amount'])
    assert str(result) == str(expected)
    def test_csv_hdf5(self):
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='{a: int32, b: int32}')
                hdf5 = HDF5(hdf5_fn, '/data', schema='{a: int32, b: int32}')

                into(hdf5, csv)

                self.assertEquals(nd.as_py(hdf5.as_dynd()),
                                  [{'a': 1, 'b': 1},
                                   {'a': 2, 'b': 2}])
    def test_csv_hdf5(self):
        import h5py
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='2 * int')
                hdf5 = HDF5(hdf5_fn, '/data', schema='2 * int')

                into(hdf5, csv)

                self.assertEquals(nd.as_py(hdf5.as_dynd()),
                                  [[1, 1], [2, 2]])
Exemple #32
0
def test_tryexcept_into():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = SQL(url,tbl, schema= csv.schema)

    into(sql,csv, if_exists="replace", QUOTE="alpha", FORMAT="csv") # uses multi-byte character and
                                                      # fails over to using sql.extend()

    assert list(sql[:, 'a']) == [1, 10, 100]
    assert list(sql[:, 'b']) == [2, 20, 200]
Exemple #33
0
def test_into_tables_path_bad_csv(bad_csv_df, out_hdf5):
    import tables as tb
    tble = into(tb.Table, bad_csv_df, filename=out_hdf5, datapath='/foo',
                error_bad_lines=False)
    df_from_tbl = into(DataFrame, tble)
    tble._v_file.close()

    # Check that it's the same as straight from the CSV
    df_from_csv = into(DataFrame, bad_csv_df, error_bad_lines=False)
    assert len(df_from_csv) == len(df_from_tbl)
    assert list(df_from_csv.columns) == list(df_from_tbl.columns)
    assert (df_from_csv == df_from_tbl).all().all()
Exemple #34
0
def test_into_tables_path(good_csv, out_hdf5, out_hdf5_alt):
    import tables as tb
    tble = into(tb.Table, good_csv, filename=out_hdf5, datapath='/foo')
    tble2 = into(tb.Table,
                 good_csv,
                 filename=out_hdf5_alt,
                 datapath='/foo',
                 output_path=out_hdf5_alt)
    n = len(tble)
    x = len(tble2)
    tble._v_file.close()
    assert n == x
    assert n == 3
    def test_json_csv_chunked(self):
        data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}]
        tuples = ((1, 1), (2, 2))
        text = '\n'.join(map(json.dumps, data))
        schema = '{x: int, y: int}'

        with filetext(text) as json_fn:
            with filetext('') as csv_fn:
                js = JSON_Streaming(json_fn, schema=schema)
                csv = CSV(csv_fn, mode='r+', schema=schema)

                into(csv, js)

                self.assertEquals(tuple(csv), tuples)
Exemple #36
0
    def test_json_csv_chunked(self):
        data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}]
        tuples = ((1, 1), (2, 2))
        text = '\n'.join(map(json.dumps, data))
        schema = '{x: int, y: int}'

        with filetext(text) as json_fn:
            with filetext('') as csv_fn:
                js = JSON_Streaming(json_fn, schema=schema)
                csv = CSV(csv_fn, mode='r+', schema=schema)

                into(csv, js)

                self.assertEquals(tuple(csv), tuples)
Exemple #37
0
def test_into_PyTables(a, h5tmp):
    dshape = 'var * {amount: int64, id: int64, name: string[7, "A"], timestamp: datetime}'
    lhs = into(tables.Table, a, dshape=dshape, filename=h5tmp, datapath='/data')
    result = into(np.ndarray, lhs)
    expected = numpy_ensure_bytes(x)

    assert into(list, result) == into(list, expected)
    assert result.dtype.names == expected.dtype.names

    # Ideally we would be doing this.  Sadly there is a float/int discrepancy
    # np.testing.assert_array_equal(into(np.ndarray, lhs),
    #                               numpy_ensure_bytes(x))

    lhs._v_file.close()
    def test_hdf5_csv(self):
        import h5py
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('') as csv_fn:
                with h5py.File(hdf5_fn, 'w') as f:
                    d = f.create_dataset('data', (3, 3), dtype='i8')
                    d[:] = 1

                csv = CSV(csv_fn, mode='r+', schema='3 * int')
                hdf5 = HDF5(hdf5_fn, '/data')

                into(csv, hdf5)

                self.assertEquals(tuple(map(tuple, csv)),
                                  ((1, 1, 1), (1, 1, 1), (1, 1, 1)))
Exemple #39
0
def test_pandas_dynd():
    arr = nd.array(data, dtype=schema)

    result = into(DataFrame, arr)
    expected = DataFrame(data, columns=['name', 'amount'])
    assert str(result) == str(expected)

    nda = nd.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    csv = CSV(example('accounts.csv'))
    df_csv = into(DataFrame, csv)
    df_nd = into(df_csv, nda)
    df_no_names = into(DataFrame, nda)

    assert list(df_nd.columns) == list(df_csv.columns)
    assert list(df_no_names.columns) == [0, 1, 2]
Exemple #40
0
def test_pandas_dynd(data, schema):
    arr = nd.array(data, dtype=schema)

    result = into(DataFrame, arr)
    expected = DataFrame(data, columns=['name', 'amount'])
    assert str(result) == str(expected)

    nda = nd.array([[1,2,3], [4,5,6], [7,8,9]])
    csv = CSV(example('accounts.csv'))
    df_csv = into(DataFrame, csv)
    df_nd = into(df_csv, nda)
    df_no_names = into(DataFrame, nda)

    assert list(df_nd.columns) == list(df_csv.columns)
    assert list(df_no_names.columns) == [0,1,2]
Exemple #41
0
def test_pandas_numpy():
    data = [('Alice', 100), ('Bob', 200)]
    dtype=[('name', 'O'), ('amount', int)]

    x = np.array(data, dtype=dtype)

    result = into(DataFrame(), x)
    expected = DataFrame(data, columns=['name', 'amount'])
    assert str(result) == str(expected)

    result = into(DataFrame(columns=['name', 'amount']), x)
    expected = DataFrame(data, columns=['name', 'amount'])
    print(result)
    print(expected)
    assert str(result) == str(expected)
Exemple #42
0
def test_into_tables_path_bad_csv(bad_csv_df, out_hdf5):
    import tables as tb
    tble = into(tb.Table,
                bad_csv_df,
                filename=out_hdf5,
                datapath='/foo',
                error_bad_lines=False)
    df_from_tbl = into(DataFrame, tble)
    tble._v_file.close()

    # Check that it's the same as straight from the CSV
    df_from_csv = into(DataFrame, bad_csv_df, error_bad_lines=False)
    assert len(df_from_csv) == len(df_from_tbl)
    assert list(df_from_csv.columns) == list(df_from_tbl.columns)
    assert (df_from_csv == df_from_tbl).all().all()
Exemple #43
0
    def test_csv_hdf5(self):
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='{a: int32, b: int32}')
                hdf5 = HDF5(hdf5_fn, '/data', schema='{a: int32, b: int32}')

                into(hdf5, csv)

                self.assertEquals(nd.as_py(hdf5.as_dynd()), [{
                    'a': 1,
                    'b': 1
                }, {
                    'a': 2,
                    'b': 2
                }])
Exemple #44
0
def test_into_ColumnDataSource_pytables():
    pytest.importorskip('bokeh')
    from bokeh.objects import ColumnDataSource

    pyt = PyTables(example('accounts.h5'), '/accounts')
    cds = into(ColumnDataSource, pyt)
    assert 'balance' and 'id' and 'name' in cds.column_names
Exemple #45
0
def test_pandas_pandas():
    df = DataFrame(data, columns=['name', 'balance'])
    new_df = into(DataFrame, df)
    # Data must be the same
    assert np.all(new_df == df)
    # new_df should be a copy of df
    assert id(new_df) != id(df)
Exemple #46
0
def test_into_DataFrame_concat():
    csv = CSV(os.path.join(os.path.dirname(__file__), 'accounts.csv'))
    df = into(pd.DataFrame, Concat([csv, csv]))
    csv_df = csv.pandas_read_csv()
    assert df.index.tolist() == list(range(len(df)))
    assert df.values.tolist() == (csv_df.values.tolist() +
                                  csv_df.values.tolist())
    assert df.columns.tolist() == csv_df.columns.tolist()
Exemple #47
0
def test_data_frame_single_column_projection():
    data = [('Alice', -200.0, 1), ('Bob', -300.0, 2)]
    t = Data(data, '2 * {name: string, amount: float64, id: int64}')

    df = into(pd.DataFrame, t[['name']])
    assert isinstance(df, pd.DataFrame)
    expected = pd.DataFrame(data, columns=t.schema.measure.names)[['name']]
    assert str(df) == str(expected)
Exemple #48
0
def test_into_DataFrame_Excel_xlsx_format():
    pytest.importorskip('xlrd')
    dirname = os.path.dirname(__file__)
    fn = os.path.join(dirname, 'accounts_1.xlsx')
    exp = DataFrame([[1, "Alice", 100], [2, "Bob", 200]],
                    columns=["id", "name", "amount"])
    df = into(DataFrame, fn)
    assert (df == exp).all().all()
Exemple #49
0
def test_Column_data_source():
    pytest.importorskip('bokeh')
    from bokeh.objects import ColumnDataSource

    cds = into(ColumnDataSource(), data_table)

    assert isinstance(cds, ColumnDataSource)
    assert set(cds.column_names) == set(data_table.fields)
Exemple #50
0
 def test_resource_gz(self):
     with filetext(b'1,1\n2,2\n',
                   extension='.csv.gz',
                   open=gzip.open,
                   mode='wb') as fn:
         dd = resource(fn, schema='{x: int, y: int}')
         assert isinstance(dd, CSV)
         assert dd.open == gzip.open
         assert into(list, dd) == [(1, 1), (2, 2)]
Exemple #51
0
def test_into_PyTables(a, h5tmp):
    dshape = 'var * {amount: int64, id: int64, name: string[7, "A"], timestamp: datetime}'
    lhs = into(tables.Table,
               a,
               dshape=dshape,
               filename=h5tmp,
               datapath='/data')
    result = into(np.ndarray, lhs)
    expected = numpy_ensure_bytes(x)

    assert into(list, result) == into(list, expected)
    assert result.dtype.names == expected.dtype.names

    # Ideally we would be doing this.  Sadly there is a float/int discrepancy
    # np.testing.assert_array_equal(into(np.ndarray, lhs),
    #                               numpy_ensure_bytes(x))

    lhs._v_file.close()
Exemple #52
0
def test_base():
    """ Test all pairs of base in-memory data structures """
    sources = [v for k, v in data if k not in [list]]
    targets = [
        v for k, v in data if k not in [Data, Collection, CSV, nd.array, SQL]
    ]
    for a in sources:
        for b in targets:
            assert normalize(into(type(b), a)) == normalize(b)
Exemple #53
0
def test_simple_into(engine, csv):

    tbl = 'testtable_into_2'

    sql = SQL(engine, tbl, schema=csv.schema)

    into(sql, csv, if_exists="replace")
    conn = sql.engine.raw_connection()
    cursor = conn.cursor()
    cursor.execute(
        "SELECT name FROM sqlite_master WHERE type='table' and name='{0}';".
        format(tbl))

    sqlite_tbl_names = cursor.fetchall()
    assert sqlite_tbl_names[0][0] == tbl

    assert list(sql[:, 'a']) == [1, 10, 100]
    assert list(sql[:, 'b']) == [2, 20, 200]
Exemple #54
0
def test_DataFrame_CSV():
    with filetext('1,2\n3,4\n') as fn:
        csv = CSV(fn, schema='{a: int64, b: float64}')
        df = into(DataFrame, csv)

        expected = DataFrame([[1, 2.0], [3, 4.0]], columns=['a', 'b'])

        assert str(df) == str(expected)
        assert list(df.dtypes) == [np.int64, np.float64]
Exemple #55
0
def test_datetime_csv_reader_same_as_into_types():
    csv = CSV(os.path.join(os.path.dirname(__file__), 'accounts.csv'))
    rhs = csv.pandas_read_csv().dtypes
    df = into(pd.DataFrame, csv)
    dtypes = df.dtypes
    expected = pd.Series(
        [np.dtype(x) for x in ['i8', 'i8', 'O', 'datetime64[ns]']],
        index=csv.columns)
    assert dtypes.index.tolist() == expected.index.tolist()
    assert dtypes.tolist() == expected.tolist()
Exemple #56
0
def test_into_DataFrame_Excel_xls_format():
    pytest.importorskip('xlrd')
    dirname = os.path.dirname(__file__)
    fn = os.path.join(dirname, 'accounts.xls')
    exp = DataFrame([[100, 1, "Alice", "2000-12-25T00:00:01"],
                     [200, 2, "Bob", "2001-12-25T00:00:01"],
                     [300, 3, "Charlie", "2002-12-25T00:00:01"]],
                    columns=["amount", "id", "name", "timestamp"])
    df = into(DataFrame, fn)
    assert (df == exp).all().all()