def csv(): data = [(1, 2), (10, 20), (100, 200)] with tmpfile('csv') as filename: csv = CSV(filename, 'w', schema='{a: int32, b: int32}') csv.extend(data) csv = CSV(filename, schema='{a: int32, b: int32}') yield csv
def test_complex_into(): # data from: http://dummydata.me/generate this_dir = os.path.dirname(__file__) file_name = os.path.join(this_dir, 'dummydata.csv') tbl = 'testtable_into_complex' csv = CSV( file_name, schema= '{Name: string, RegistrationDate: date, ZipCode: int32, Consts: float64}' ) sql = SQL(url, tbl, schema=csv.schema) into(sql, csv, if_exists="replace") df = pd.read_csv(file_name, parse_dates=['RegistrationDate']) assert_allclose([sql[0]], [csv[0]]) for col in sql.columns: # need to convert to python datetime if col == "RegistrationDate": py_dates = list( df['RegistrationDate'].map(lambda x: x.date()).values) assert list(sql[:, col]) == list(csv[:, col]) == py_dates elif col == 'Consts': l, r = list(sql[:, col]), list(csv[:, col]) assert np.allclose(l, df[col].values) assert np.allclose(l, r) else: assert list(sql[:, col]) == list(csv[:, col]) == list( df[col].values)
def test_csv_postgres_load(): tbl = 'testtable' engine = sqlalchemy.create_engine(url) if engine.has_table(tbl): metadata = sqlalchemy.MetaData() metadata.reflect(engine) t = metadata.tables[tbl] t.drop(engine) csv = CSV(file_name) sql = SQL(url,tbl, schema=csv.schema) engine = sql.engine conn = engine.raw_connection() cursor = conn.cursor() full_path = os.path.abspath(file_name) load = '''LOAD DATA INFILE '{0}' INTO TABLE {1} FIELDS TERMINATED BY ',' lines terminated by '\n' '''.format(full_path, tbl) cursor.execute(load) conn.commit()
def test_failing_argument(): tbl = 'testtable_into_2' csv = CSV(file_name, columns=['a', 'b']) sql = SQL(url,tbl, schema= csv.schema) into(sql,csv, if_exists="replace", skipinitialspace="alpha") # failing call
def test_no_header_no_columns(): tbl = 'testtable_into_2' csv = CSV(file_name) sql = SQL(url,tbl, schema= '{x: int, y: int}') into(sql,csv, if_exists="replace") assert list(sql[:, 'x']) == [1, 10, 100] assert list(sql[:, 'y']) == [2, 20, 200]
def test_simple_float_into(): tbl = 'testtable_into_float' csv = CSV(file_name_floats, columns=['a', 'b']) sql = SQL(url,tbl, schema= csv.schema) into(sql,csv, if_exists="replace") assert list(sql[:, 'a']) == [1.02, 102.02, 1002.02] assert list(sql[:, 'b']) == [2.02, 202.02, 2002.02]
def test_simple_into(): tbl = 'testtable_into_2' csv = CSV(file_name, columns=['a', 'b']) sql = SQL(url, tbl, schema=csv.schema) into(sql, csv, if_exists="replace") assert list(sql[:, 'a']) == [1, 10, 100] assert list(sql[:, 'b']) == [2, 20, 200]
def test_tryexcept_into(): tbl = 'testtable_into_2' csv = CSV(file_name, columns=['a', 'b']) sql = SQL(url,tbl, schema= csv.schema) into(sql,csv, if_exists="replace", QUOTE="alpha", FORMAT="csv") # uses multi-byte character and # fails over to using sql.extend() assert list(sql[:, 'a']) == [1, 10, 100] assert list(sql[:, 'b']) == [2, 20, 200]
def test_complex_into(): # data from: http://dummydata.me/generate this_dir = os.path.dirname(__file__) file_name = os.path.join(this_dir, 'dummydata.csv') tbl = 'testtable_into_complex' csv = CSV(file_name, schema='{Name: string, RegistrationDate: date, ZipCode: int64, Consts: float64}') sql = SQL(url,tbl, schema=csv.schema) into(sql,csv, if_exists="replace") df = pd.read_csv(file_name, parse_dates=['RegistrationDate']) assert sql[0] == csv[0] #implement count method print(len(list(sql[:]))) # assert sql[] == csv[-1] for col in sql.columns: #need to convert to python datetime if col == "RegistrationDate": py_dates = list(df['RegistrationDate'].astype(object).values) py_dates = [dt.date(d.year, d.month, d.day) for d in py_dates] assert list(sql[:,col]) == list(csv[:,col]) == py_dates #handle floating point precision -- perhaps it's better to call out to assert_array_almost_equal elif col == 'Consts': ## WARNING!!! Floats are truncated with MySQL and the assertion fails sql_array = np.array(list(sql[:,col])) csv_array = list(csv[:,col]) df_array = df[col].values np.testing.assert_almost_equal(sql_array,csv_array, decimal=5) np.testing.assert_almost_equal(sql_array,df_array, decimal=5) else: assert list(sql[:,col]) == list(csv[:,col]) == list(df[col].values)
def test_csv_postgres_load(): tbl = 'testtable' engine = sqlalchemy.create_engine(url) if engine.has_table(tbl): metadata = sqlalchemy.MetaData() metadata.reflect(engine) t = metadata.tables[tbl] t.drop(engine) csv = CSV(file_name) sql = SQL(url, tbl, schema=csv.schema) engine = sql.engine conn = engine.raw_connection() cursor = conn.cursor() full_path = os.path.abspath(file_name) load = '''copy {0} from '{1}'(FORMAT CSV, DELIMITER ',', NULL '');'''.format( tbl, full_path) cursor.execute(load) conn.commit()
def iris_server(request): iris = CSV(example('iris.csv')) return Server(iris).app.test_client()
def iris_server(): iris = CSV(example('iris.csv')) s = Server(iris, all_formats, allow_add=True) s.app.testing = True with s.app.test_client() as c: yield c
@pytest.mark.parametrize('serial', all_formats) def dont_test_compute_with_namespace(test, serial): query = {'expr': {'op': 'Field', 'args': ['accounts', 'name']}} expected = ['Alice', 'Bob'] response = test.post('/compute', data=serial.dumps(query), headers=mimetype(serial)) assert 'OK' in response.status tdata = serial.loads(response.data) assert serial.data_loads(tdata['data']) == expected assert tdata['names'] == ['name'] iris = CSV(example('iris.csv')) @pytest.mark.parametrize('serial', all_formats) def test_compute_with_variable_in_namespace(iris_server, serial): test = iris_server t = symbol('t', discover(iris)) pl = symbol('pl', 'float32') expr = t[t.petal_length > pl].species tree = to_tree(expr, {pl: 'pl'}) blob = serial.dumps({'expr': tree, 'namespace': {'pl': 5}}) resp = test.post('/compute', data=blob, headers=mimetype(serial)) assert 'OK' in resp.status tdata = serial.loads(resp.data)
from datetime import datetime from pandas import DataFrame from blaze.utils import example from blaze import CSV data = CSV(example('iris.csv'))
def iris_server(): iris = CSV(example('iris.csv')) server = Server(iris) return server.app.test_client()
def test_different_schema_raises(): with tmpfile('.csv') as filename: df = pd.DataFrame(np.random.randn(10, 2)) df.to_csv(filename, index=False, header=False) with pytest.raises(TypeError): Table(CSV(filename), columns=list('ab'))
def iris_server(): iris = CSV(example('iris.csv')) server = Server(datasets={'iris': iris}) return server.app.test_client()