Example #1
0
def test_pandas_discover_on_gzipped_files():
    with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02',
                  open=gzip.open,
                  extension='.csv.gz') as fn:
        csv = CSV(fn, has_header=True)
        ds = datashape.dshape('var * {name: string, when: datetime}')
        assert discover(csv) == ds
Example #2
0
def test_csv():
    with tmpfile('.csv') as fn:
        csv = CSV(fn,
                  dshape='var * {name: string, amount: int}',
                  delimiter=',')

        assert csv.dialect['delimiter'] == ','
Example #3
0
def test_pandas_read_supports_read_csv_kwargs():
    with filetext('Alice,1\nBob,2') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds, usecols=['name'])
        assert isinstance(df, pd.DataFrame)
        assert convert(list, df) == [('Alice', ), ('Bob', )]
Example #4
0
def test_temp_ssh_files():
    with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        scsv = into(Temp(SSH(CSV)), csv, hostname='localhost')
        assert discover(csv) == discover(scsv)

        assert isinstance(scsv, _Temp)
Example #5
0
def test_simple_into(tbl):
    csv = CSV(file_name)
    sql = resource(url, tbl, dshape=ds)

    into(sql, csv, dshape=ds)

    assert into(list, sql) == data
Example #6
0
def test_no_header_no_columns(tbl):
    csv = CSV(file_name)
    sql = resource(url, tbl, dshape=ds)

    into(sql, csv, dshape=ds)

    assert into(list, sql) == data
Example #7
0
def test_pandas_loads_in_datetimes_naively():
    with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02') as fn:
        csv = CSV(fn, has_header=True)
        ds = datashape.dshape('var * {name: ?string, when: ?datetime}')
        assert discover(csv) == ds

        df = convert(pd.DataFrame, csv)
        assert df.dtypes['when'] == 'M8[ns]'
Example #8
0
def test_pandas_read_supports_missing_integers():
    with filetext('Alice,1\nBob,') as fn:
        ds = datashape.dshape('var * {name: string, val: ?int32}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert list(df.columns) == ['name', 'val']
        assert df.dtypes['val'] == 'f4'
Example #9
0
def test_pandas_read():
    with filetext('Alice,1\nBob,2') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert convert(list, df) == [('Alice', 1), ('Bob', 2)]
        assert list(df.columns) == ['name', 'amount']
Example #10
0
def test_unused_datetime_columns():
    ds = datashape.dshape('var * {val: string, when: datetime}')
    with filetext("val,when\na,2000-01-01\nb,2000-02-02") as fn:
        csv = CSV(fn, has_header=True)
        assert convert(
            list,
            csv_to_DataFrame(csv, usecols=['val'], squeeze=True,
                             dshape=ds)) == ['a', 'b']
Example #11
0
def test_pandas_read_supports_datetimes():
    with filetext('Alice,2014-01-02\nBob,2014-01-03') as fn:
        ds = datashape.dshape('var * {name: string, when: date}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert list(df.columns) == ['name', 'when']
        assert df.dtypes['when'] == 'M8[ns]'
Example #12
0
def test_pandas_writes_header_by_default():
    with tmpfile('.csv') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        data = [('Alice', 1), ('Bob', 2)]
        csv = CSV(fn)
        append(csv, data, dshape=ds)

        with open(fn) as f:
            assert 'name' in f.read()
Example #13
0
def test_no_header_no_columns():
    tbl = 'testtable_into_2'

    csv = CSV(file_name)
    sql = resource(url + '::' + tbl, dshape=csv.dshape)

    into(sql, csv, if_exists="replace")

    assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
Example #14
0
def test_failing_argument():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = resource(url + '::' + tbl, dshape=csv.dshape)

    into(sql, csv, if_exists="replace",
         skipinitialspace="alpha")  # failing call
Example #15
0
def test_table_resource():
    with tmpfile('csv') as filename:
        ds = dshape('var * {a: int, b: int}')
        csv = CSV(filename)
        append(csv, [[1, 2], [10, 20]], dshape=ds)

        t = Data(filename)
        assert isinstance(t.data, CSV)
        assert into(list, compute(t)) == into(list, csv)
Example #16
0
def test_append(tbl):
    csv = CSV(file_name)
    sql = resource(url, tbl, dshape=ds)

    into(sql, csv)
    assert into(list, sql) == data

    into(sql, csv)
    assert into(list, sql) == data + data
Example #17
0
def test_header_disagrees_with_dshape():
    ds = datashape.dshape('var * {name: string, bal: int64}')
    with filetext('name,val\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn, header=True)
        assert convert(list, csv) == [('Alice', 100), ('Bob', 200)]

        assert list(convert(pd.DataFrame, csv).columns) == ['name', 'val']
        assert list(convert(pd.DataFrame, csv,
                            dshape=ds).columns) == ['name', 'bal']
Example #18
0
def test_simple_float_into():
    tbl = 'testtable_into_float'

    csv = CSV(file_name_floats, columns=['a', 'b'])
    sql = resource(url + '::' + tbl, dshape=csv.dshape)

    into(sql, csv, if_exists="replace")

    assert into(list, sql) == \
            [(1.02, 2.02), (102.02, 202.02), (1002.02, 2002.02)]
Example #19
0
def test_simple_into():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = resource(url + '::' + tbl, dshape=csv.dshape)

    into(sql, csv, if_exists="replace")

    assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
Example #20
0
def test_pandas_read_supports_gzip():
    with filetext('Alice,1\nBob,2',
                  open=gzip.open,
                  mode='wt',
                  extension='.csv.gz') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert convert(list, df) == [('Alice', 1), ('Bob', 2)]
        assert list(df.columns) == ['name', 'amount']
Example #21
0
def test_tryexcept_into():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = resource(url + '::' + tbl, dshape=csv.dshape)

    into(sql, csv, if_exists="replace", QUOTE="alpha",
         FORMAT="csv")  # uses multi-byte character and
    # fails over to using sql.extend()

    assert into(list, sql) == [(1, 2), (10, 20), (100, 200)]
Example #22
0
def test_pandas_write_gzip():
    with tmpfile('.csv.gz') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        data = [('Alice', 1), ('Bob', 2)]
        csv = CSV(fn, has_header=True)
        append(csv, data, dshape=ds)

        f = gzip.open(fn)
        s = f.read()
        assert 'name' in s
        assert 'Alice,1' in s
        f.close()
Example #23
0
def test_convert_through_temporary_local_storage():
    with filetext('name,quantity\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        df = into(pd.DataFrame, csv)
        scsv = into(Temp(SSH(CSV)), csv, hostname='localhost')

        assert into(list, csv) == into(list, scsv)

        scsv2 = into(Temp(SSH(CSV)), df, hostname='localhost')
        assert into(list, scsv2) == into(list, df)

        sjson = into(Temp(SSH(JSONLines)), df, hostname='localhost')
        assert (into(np.ndarray, sjson) == into(np.ndarray, df)).all()
Example #24
0
def test_csv_append():
    with tmpfile('.csv') as fn:
        csv = CSV(fn, has_header=False)

        data = [('Alice', 100), ('Bob', 200)]
        append(csv, data)

        assert list(convert(Iterator, csv)) == data

        with open(fn) as f:
            s = f.read()
            assert 'Alice' in s
            assert '100' in s
Example #25
0
def test_pandas_write():
    with tmpfile('.csv') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        data = [('Alice', 1), ('Bob', 2)]
        csv = CSV(fn, has_header=True)
        append(csv, data, dshape=ds)

        with open(fn) as f:
            assert 'name' in f.read()

        # Doesn't write header twice
        append(csv, data, dshape=ds)
        with open(fn) as f:
            s = f.read()
            assert s.count('name') == 1
Example #26
0
def test_complex_into(tbl):
    # data from: http://dummydata.me/generate
    this_dir = os.path.dirname(__file__)
    file_name = os.path.join(this_dir, 'dummydata.csv')

    ds = dshape("""
    var * {
        Name: string, RegistrationDate: date, ZipCode: int32, Consts: float64
    }""")

    csv = CSV(file_name, has_header=True)
    sql = resource(url, tbl, dshape=ds)

    into(sql, csv)

    assert_allclose(into(list, sql), into(list, csv))
Example #27
0
def test_complex_into():
    # data from: http://dummydata.me/generate

    this_dir = os.path.dirname(__file__)
    file_name = os.path.join(this_dir, 'dummydata.csv')

    tbl = 'testtable_into_complex'

    csv = CSV(
        file_name,
        schema=
        '{Name: string, RegistrationDate: date, ZipCode: int64, Consts: float64}'
    )

    sql = resource(url + '::' + tbl, dshape=csv.dshape)
    into(sql, csv, if_exists="replace")

    df = pd.read_csv(file_name, parse_dates=['RegistrationDate'])

    assert into(list, sql) == into(list, csv)
Example #28
0
def test_csv_postgres_load():
    tbl = 'testtable'

    engine = sqlalchemy.create_engine(url)

    if engine.has_table(tbl):
        metadata = sqlalchemy.MetaData()
        metadata.reflect(engine)
        t = metadata.tables[tbl]
        t.drop(engine)

    csv = CSV(file_name)

    sql = resource(url + '::' + tbl, dshape=csv.dshape)
    engine = sql.bind
    conn = engine.raw_connection()

    cursor = conn.cursor()
    full_path = os.path.abspath(file_name)
    load = '''LOAD DATA INFILE '{0}' INTO TABLE {1} FIELDS TERMINATED BY ','
        lines terminated by '\n'
        '''.format(full_path, tbl)
    cursor.execute(load)
    conn.commit()
Example #29
0
def test_Data_attribute_repr():
    t = Data(CSV(example('accounts-datetimes.csv')))
    result = t.when.day
    expected = pd.DataFrame({'when_day': [1,2,3,4,5]})
    assert repr(result) == repr(expected)
Example #30
0
def test_unicode_column_names():
    with filetext('foo\xc4\x87,a\n1,2\n3,4', extension='csv') as fn:
        csv = CSV(fn, has_header=True)
        df = into(pd.DataFrame, csv)