def test_can_transfer_sqlite(tmp_directory): """ >>> import tempfile >>> tmp_directory = tempfile.mkdtemp() """ tmp = Path(tmp_directory) # create clientections to 2 dbs client_in = SQLAlchemyClient('sqlite:///{}'.format(tmp / "database_in.db")) client_out = SQLAlchemyClient('sqlite:///{}'.format(tmp / "database_out.db")) # make some data and save it in the db df = pd.DataFrame({'a': np.arange(0, 100), 'b': np.arange(100, 200)}) df.to_sql('numbers', client_in.engine, index=False) # create the task and run it dag = DAG() SQLTransfer('SELECT * FROM numbers', SQLiteRelation((None, 'numbers2', 'table'), client=client_out), dag, name='transfer', client=client_in, chunksize=10) dag.build() # load dumped data and data from the db original = pd.read_sql_query('SELECT * FROM numbers', client_in.engine) transfer = pd.read_sql_query('SELECT * FROM numbers2', client_out.engine) client_in.close() client_out.close() # make sure they are the same assert original.equals(transfer)
def test_source_loader_and_task(sqlite_client_and_tmp_dir): client, tmp_dir = sqlite_client_and_tmp_dir Path(tmp_dir, 'data_query.sql').write_text('SELECT * FROM data') source_loader = SourceLoader(str(tmp_dir)) dag = DAG() dag.clients[SQLTransfer] = client dag.clients[SQLiteRelation] = client SQLTransfer(source_loader['data_query.sql'], product=SQLiteRelation((None, 'data2', 'table')), dag=dag, name='transfer') dag.build()
SELECT * FROM {{upstream["transfer"]}} WHERE x > 1 """)) ############################################################################### # DAG declaration dag = DAG(executor=Serial(build_in_subprocess=False)) dag.clients[SQLTransfer] = client dag.clients[SQLiteRelation] = client dag.clients[SQLScript] = client source_loader = SourceLoader(tmp_dir) transfer = SQLTransfer(source_loader['data_select.sql'], product=SQLiteRelation((None, 'data2', 'table')), dag=dag, name='transfer') subset = SQLScript(source_loader['subset_create.sql'], product=SQLiteRelation((None, 'subset', 'table')), dag=dag, name='subset') transfer >> subset dag.render() ############################################################################### # Our macro is correctly rendered: print(dag['subset'].source)