def test_register(sql): with tmpfile('.db') as fn: uri = 'sqlite:///' + fn sql = SQL(uri, 'foo', schema='{x: int, y: int}') assert isinstance(resource(uri, 'foo'), SQL) assert isinstance(resource(uri + '::foo'), SQL) sql = SQL('sqlite:///:memory:', 'foo', schema='{x: int, y: int}') assert isinstance(resource('sqlite:///:memory:', 'foo', schema='{x: int, y: int}'), SQL) assert isinstance(resource('sqlite:///:memory:::foo', schema='{x: int, y: int}'), SQL)
def test_complex_into(): # data from: http://dummydata.me/generate this_dir = os.path.dirname(__file__) file_name = os.path.join(this_dir, 'dummydata.csv') tbl = 'testtable_into_complex' csv = CSV( file_name, schema= '{Name: string, RegistrationDate: date, ZipCode: int32, Consts: float64}' ) sql = SQL(url, tbl, schema=csv.schema) into(sql, csv, if_exists="replace") df = pd.read_csv(file_name, parse_dates=['RegistrationDate']) assert_allclose([sql[0]], [csv[0]]) for col in sql.columns: # need to convert to python datetime if col == "RegistrationDate": py_dates = list( df['RegistrationDate'].map(lambda x: x.date()).values) assert list(sql[:, col]) == list(csv[:, col]) == py_dates elif col == 'Consts': l, r = list(sql[:, col]), list(csv[:, col]) assert np.allclose(l, df[col].values) assert np.allclose(l, r) else: assert list(sql[:, col]) == list(csv[:, col]) == list( df[col].values)
def test_csv_postgres_load(): tbl = 'testtable' engine = sqlalchemy.create_engine(url) if engine.has_table(tbl): metadata = sqlalchemy.MetaData() metadata.reflect(engine) t = metadata.tables[tbl] t.drop(engine) csv = CSV(file_name) sql = SQL(url,tbl, schema=csv.schema) engine = sql.engine conn = engine.raw_connection() cursor = conn.cursor() full_path = os.path.abspath(file_name) load = '''LOAD DATA INFILE '{0}' INTO TABLE {1} FIELDS TERMINATED BY ',' lines terminated by '\n' '''.format(full_path, tbl) cursor.execute(load) conn.commit()
def test_failing_argument(): tbl = 'testtable_into_2' csv = CSV(file_name, columns=['a', 'b']) sql = SQL(url,tbl, schema= csv.schema) into(sql,csv, if_exists="replace", skipinitialspace="alpha") # failing call
def test_simple_into(): tbl = 'testtable_into_2' csv = CSV(file_name, columns=['a', 'b']) sql = SQL(url, tbl, schema=csv.schema) into(sql, csv, if_exists="replace") assert list(sql[:, 'a']) == [1, 10, 100] assert list(sql[:, 'b']) == [2, 20, 200]
def test_no_header_no_columns(): tbl = 'testtable_into_2' csv = CSV(file_name) sql = SQL(url,tbl, schema= '{x: int, y: int}') into(sql,csv, if_exists="replace") assert list(sql[:, 'x']) == [1, 10, 100] assert list(sql[:, 'y']) == [2, 20, 200]
def test_simple_float_into(): tbl = 'testtable_into_float' csv = CSV(file_name_floats, columns=['a', 'b']) sql = SQL(url,tbl, schema= csv.schema) into(sql,csv, if_exists="replace") assert list(sql[:, 'a']) == [1.02, 102.02, 1002.02] assert list(sql[:, 'b']) == [2.02, 202.02, 2002.02]
def test_tryexcept_into(): tbl = 'testtable_into_2' csv = CSV(file_name, columns=['a', 'b']) sql = SQL(url,tbl, schema= csv.schema) into(sql,csv, if_exists="replace", QUOTE="alpha", FORMAT="csv") # uses multi-byte character and # fails over to using sql.extend() assert list(sql[:, 'a']) == [1, 10, 100] assert list(sql[:, 'b']) == [2, 20, 200]
def test_simple_into(engine, csv): tbl = 'testtable_into_2' sql = SQL(engine, tbl, schema=csv.schema) into(sql, csv, if_exists="replace") conn = sql.engine.raw_connection() cursor = conn.cursor() cursor.execute( "SELECT name FROM sqlite_master WHERE type='table' and name='{0}';". format(tbl)) sqlite_tbl_names = cursor.fetchall() assert sqlite_tbl_names[0][0] == tbl assert list(sql[:, 'a']) == [1, 10, 100] assert list(sql[:, 'b']) == [2, 20, 200]
def test_complex_into(): # data from: http://dummydata.me/generate this_dir = os.path.dirname(__file__) file_name = os.path.join(this_dir, 'dummydata.csv') tbl = 'testtable_into_complex' csv = CSV(file_name, schema='{Name: string, RegistrationDate: date, ZipCode: int64, Consts: float64}') sql = SQL(url,tbl, schema=csv.schema) into(sql,csv, if_exists="replace") df = pd.read_csv(file_name, parse_dates=['RegistrationDate']) assert sql[0] == csv[0] #implement count method print(len(list(sql[:]))) # assert sql[] == csv[-1] for col in sql.columns: #need to convert to python datetime if col == "RegistrationDate": py_dates = list(df['RegistrationDate'].astype(object).values) py_dates = [dt.date(d.year, d.month, d.day) for d in py_dates] assert list(sql[:,col]) == list(csv[:,col]) == py_dates #handle floating point precision -- perhaps it's better to call out to assert_array_almost_equal elif col == 'Consts': ## WARNING!!! Floats are truncated with MySQL and the assertion fails sql_array = np.array(list(sql[:,col])) csv_array = list(csv[:,col]) df_array = df[col].values np.testing.assert_almost_equal(sql_array,csv_array, decimal=5) np.testing.assert_almost_equal(sql_array,df_array, decimal=5) else: assert list(sql[:,col]) == list(csv[:,col]) == list(df[col].values)
def test_csv_postgres_load(): tbl = 'testtable' engine = sqlalchemy.create_engine(url) if engine.has_table(tbl): metadata = sqlalchemy.MetaData() metadata.reflect(engine) t = metadata.tables[tbl] t.drop(engine) csv = CSV(file_name) sql = SQL(url, tbl, schema=csv.schema) engine = sql.engine conn = engine.raw_connection() cursor = conn.cursor() full_path = os.path.abspath(file_name) load = '''copy {0} from '{1}'(FORMAT CSV, DELIMITER ',', NULL '');'''.format( tbl, full_path) cursor.execute(load) conn.commit()
def test_outer_join(): L = TableSymbol('L', '{id: int, name: string, amount: real}') R = TableSymbol('R', '{city: string, id: int}') from blaze.sql import SQL engine = sa.create_engine('sqlite:///:memory:') _left = [(1, 'Alice', 100), (2, 'Bob', 200), (4, 'Dennis', 400)] left = SQL(engine, 'left', schema=L.schema) left.extend(_left) _right = [('NYC', 1), ('Boston', 1), ('LA', 3), ('Moscow', 4)] right = SQL(engine, 'right', schema=R.schema) right.extend(_right) conn = engine.connect() query = compute(join(L, R, how='inner'), {L: left.table, R: right.table}) result = list(map(tuple, conn.execute(query).fetchall())) assert set(result) == set( [(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'), (4, 'Dennis', 400, 'Moscow')]) query = compute(join(L, R, how='left'), {L: left.table, R: right.table}) result = list(map(tuple, conn.execute(query).fetchall())) assert set(result) == set( [(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'), (2, 'Bob', 200, None), (4, 'Dennis', 400, 'Moscow')]) query = compute(join(L, R, how='right'), {L: left.table, R: right.table}) print(query) result = list(map(tuple, conn.execute(query).fetchall())) print(result) assert set(result) == set( [(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'), (3, None, None, 'LA'), (4, 'Dennis', 400, 'Moscow')]) # SQLAlchemy doesn't support full outer join """ query = compute(join(L, R, how='outer'), {L: left.table, R: right.table}) result = list(map(tuple, conn.execute(query).fetchall())) assert set(result) == set( [(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'), (2, 'Bob', 200, None), (3, None, None, 'LA'), (4, 'Dennis', 400, 'Moscow')]) """ conn.close()
def sql(): data = [(1, 2), (10, 20), (100, 200)] sql = SQL('sqlite:///:memory:', 'foo', schema='{x: int, y: int}') sql.extend(data) return sql