Exemple #1
0
def csv():
    data = [(1, 2), (10, 20), (100, 200)]

    with tmpfile('csv') as filename:
        csv = CSV(filename, 'w', schema='{a: int32, b: int32}')
        csv.extend(data)
        csv = CSV(filename, schema='{a: int32, b: int32}')
        yield csv
Exemple #2
0
def test_complex_into():
    # data from: http://dummydata.me/generate

    this_dir = os.path.dirname(__file__)
    file_name = os.path.join(this_dir, 'dummydata.csv')

    tbl = 'testtable_into_complex'

    csv = CSV(
        file_name,
        schema=
        '{Name: string, RegistrationDate: date, ZipCode: int32, Consts: float64}'
    )
    sql = SQL(url, tbl, schema=csv.schema)

    into(sql, csv, if_exists="replace")

    df = pd.read_csv(file_name, parse_dates=['RegistrationDate'])

    assert_allclose([sql[0]], [csv[0]])

    for col in sql.columns:
        # need to convert to python datetime
        if col == "RegistrationDate":
            py_dates = list(
                df['RegistrationDate'].map(lambda x: x.date()).values)
            assert list(sql[:, col]) == list(csv[:, col]) == py_dates
        elif col == 'Consts':
            l, r = list(sql[:, col]), list(csv[:, col])
            assert np.allclose(l, df[col].values)
            assert np.allclose(l, r)
        else:
            assert list(sql[:, col]) == list(csv[:, col]) == list(
                df[col].values)
Exemple #3
0
def test_csv_postgres_load():

    tbl = 'testtable'

    engine = sqlalchemy.create_engine(url)

    if engine.has_table(tbl):
        metadata = sqlalchemy.MetaData()
        metadata.reflect(engine)
        t = metadata.tables[tbl]
        t.drop(engine)

    csv = CSV(file_name)

    sql = SQL(url,tbl, schema=csv.schema)
    engine = sql.engine
    conn = engine.raw_connection()

    cursor = conn.cursor()
    full_path = os.path.abspath(file_name)
    load = '''LOAD DATA INFILE '{0}' INTO TABLE {1} FIELDS TERMINATED BY ','
        lines terminated by '\n'
        '''.format(full_path, tbl)
    cursor.execute(load)
    conn.commit()
Exemple #4
0
def test_failing_argument():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = SQL(url,tbl, schema= csv.schema)

    into(sql,csv, if_exists="replace", skipinitialspace="alpha") # failing call
Exemple #5
0
def test_no_header_no_columns():

    tbl = 'testtable_into_2'

    csv = CSV(file_name)
    sql = SQL(url,tbl, schema= '{x: int, y: int}')

    into(sql,csv, if_exists="replace")

    assert list(sql[:, 'x']) == [1, 10, 100]
    assert list(sql[:, 'y']) == [2, 20, 200]
Exemple #6
0
def test_simple_float_into():

    tbl = 'testtable_into_float'

    csv = CSV(file_name_floats, columns=['a', 'b'])
    sql = SQL(url,tbl, schema= csv.schema)

    into(sql,csv, if_exists="replace")

    assert list(sql[:, 'a']) == [1.02, 102.02, 1002.02]
    assert list(sql[:, 'b']) == [2.02, 202.02, 2002.02]
Exemple #7
0
def test_simple_into():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = SQL(url, tbl, schema=csv.schema)

    into(sql, csv, if_exists="replace")

    assert list(sql[:, 'a']) == [1, 10, 100]
    assert list(sql[:, 'b']) == [2, 20, 200]
Exemple #8
0
def test_tryexcept_into():

    tbl = 'testtable_into_2'

    csv = CSV(file_name, columns=['a', 'b'])
    sql = SQL(url,tbl, schema= csv.schema)

    into(sql,csv, if_exists="replace", QUOTE="alpha", FORMAT="csv") # uses multi-byte character and
                                                      # fails over to using sql.extend()

    assert list(sql[:, 'a']) == [1, 10, 100]
    assert list(sql[:, 'b']) == [2, 20, 200]
Exemple #9
0
def csv():
    data = [(1, 2), (10, 20), (100, 200)]

    with tmpfile('csv') as filename:
        csv = CSV(filename, 'w', schema='{a: int32, b: int32}')
        csv.extend(data)
        csv = CSV(filename, schema='{a: int32, b: int32}')
        yield csv
Exemple #10
0
def test_complex_into():
    # data from: http://dummydata.me/generate

    this_dir = os.path.dirname(__file__)
    file_name = os.path.join(this_dir, 'dummydata.csv')

    tbl = 'testtable_into_complex'

    csv = CSV(file_name, schema='{Name: string, RegistrationDate: date, ZipCode: int64, Consts: float64}')

    sql = SQL(url,tbl, schema=csv.schema)
    into(sql,csv, if_exists="replace")

    df = pd.read_csv(file_name, parse_dates=['RegistrationDate'])

    assert sql[0] == csv[0]

    #implement count method
    print(len(list(sql[:])))

    # assert sql[] == csv[-1]
    for col in sql.columns:
        #need to convert to python datetime
        if col == "RegistrationDate":
            py_dates = list(df['RegistrationDate'].astype(object).values)
            py_dates = [dt.date(d.year, d.month, d.day) for d in py_dates]
            assert list(sql[:,col]) == list(csv[:,col]) == py_dates
        #handle floating point precision -- perhaps it's better to call out to assert_array_almost_equal
        elif col == 'Consts':

            ##  WARNING!!! Floats are truncated with MySQL and the assertion fails
            sql_array = np.array(list(sql[:,col]))
            csv_array = list(csv[:,col])
            df_array = df[col].values
            np.testing.assert_almost_equal(sql_array,csv_array, decimal=5)
            np.testing.assert_almost_equal(sql_array,df_array, decimal=5)
        else:
            assert list(sql[:,col]) == list(csv[:,col]) == list(df[col].values)
Exemple #11
0
def test_csv_postgres_load():

    tbl = 'testtable'

    engine = sqlalchemy.create_engine(url)

    if engine.has_table(tbl):
        metadata = sqlalchemy.MetaData()
        metadata.reflect(engine)
        t = metadata.tables[tbl]
        t.drop(engine)

    csv = CSV(file_name)

    sql = SQL(url, tbl, schema=csv.schema)
    engine = sql.engine
    conn = engine.raw_connection()

    cursor = conn.cursor()
    full_path = os.path.abspath(file_name)
    load = '''copy {0} from '{1}'(FORMAT CSV, DELIMITER ',', NULL '');'''.format(
        tbl, full_path)
    cursor.execute(load)
    conn.commit()
Exemple #12
0
def iris_server(request):
    iris = CSV(example('iris.csv'))
    return Server(iris).app.test_client()
Exemple #13
0
def iris_server():
    iris = CSV(example('iris.csv'))
    s = Server(iris, all_formats, allow_add=True)
    s.app.testing = True
    with s.app.test_client() as c:
        yield c
Exemple #14
0
@pytest.mark.parametrize('serial', all_formats)
def dont_test_compute_with_namespace(test, serial):
    query = {'expr': {'op': 'Field', 'args': ['accounts', 'name']}}
    expected = ['Alice', 'Bob']

    response = test.post('/compute',
                         data=serial.dumps(query),
                         headers=mimetype(serial))

    assert 'OK' in response.status
    tdata = serial.loads(response.data)
    assert serial.data_loads(tdata['data']) == expected
    assert tdata['names'] == ['name']


iris = CSV(example('iris.csv'))


@pytest.mark.parametrize('serial', all_formats)
def test_compute_with_variable_in_namespace(iris_server, serial):
    test = iris_server
    t = symbol('t', discover(iris))
    pl = symbol('pl', 'float32')
    expr = t[t.petal_length > pl].species
    tree = to_tree(expr, {pl: 'pl'})

    blob = serial.dumps({'expr': tree, 'namespace': {'pl': 5}})
    resp = test.post('/compute', data=blob, headers=mimetype(serial))

    assert 'OK' in resp.status
    tdata = serial.loads(resp.data)
Exemple #15
0
from datetime import datetime

from pandas import DataFrame
from blaze.utils import example
from blaze import CSV

data = CSV(example('iris.csv'))
Exemple #16
0
def iris_server():
    iris = CSV(example('iris.csv'))
    server = Server(iris)
    return server.app.test_client()
Exemple #17
0
def test_different_schema_raises():
    with tmpfile('.csv') as filename:
        df = pd.DataFrame(np.random.randn(10, 2))
        df.to_csv(filename, index=False, header=False)
        with pytest.raises(TypeError):
            Table(CSV(filename), columns=list('ab'))
Exemple #18
0
def iris_server():
    iris = CSV(example('iris.csv'))
    server = Server(datasets={'iris': iris})
    return server.app.test_client()