def test_can_transfer_sqlite(tmp_directory): """ >>> import tempfile >>> tmp_directory = tempfile.mkdtemp() """ tmp = Path(tmp_directory) # create clientections to 2 dbs client_in = SQLAlchemyClient('sqlite:///{}'.format(tmp / "database_in.db")) client_out = SQLAlchemyClient('sqlite:///{}'.format(tmp / "database_out.db")) # make some data and save it in the db df = pd.DataFrame({'a': np.arange(0, 100), 'b': np.arange(100, 200)}) df.to_sql('numbers', client_in.engine, index=False) # create the task and run it dag = DAG() SQLTransfer('SELECT * FROM numbers', SQLiteRelation((None, 'numbers2', 'table'), client=client_out), dag, name='transfer', client=client_in, chunksize=10) dag.build() # load dumped data and data from the db original = pd.read_sql_query('SELECT * FROM numbers', client_in.engine) transfer = pd.read_sql_query('SELECT * FROM numbers2', client_out.engine) client_in.close() client_out.close() # make sure they are the same assert original.equals(transfer)
def test_custom_io_handler(tmp_directory): dag = DAG() client = SQLAlchemyClient('sqlite:///database.db') dag.clients[SQLUpload] = client dag.clients[SQLiteRelation] = client df = pd.DataFrame({'a': [1, 2, 3], 'b': [1, 2, 3]}) df.to_csv('some-file.tsv', sep='\t', index=False) def my_reading_fn(path): return pd.read_csv(path, sep='\t') SQLUpload('some-file.tsv', SQLiteRelation(('my-table', 'table')), dag=dag, name='task', io_handler=my_reading_fn, to_sql_kwargs=dict(index=False)) dag.build() other = pd.read_sql('SELECT * FROM "my-table"', con=client) client.close() assert other.equals(df)
def pg_client_and_schema(): """ Creates a temporary schema for the testing session, drops everything at the end """ db = _load_db_credentials() # set a new schema for this session, otherwise if two test sessions # are run at the same time, tests might conflict with each other # NOTE: avoid upper case characters, pandas.DataFrame.to_sql does not like # them schema = (''.join(random.choice(string.ascii_letters) for i in range(12))).lower() # initialize client, set default schema # info: https://www.postgresonline.com/article_pfriendly/279.html client = SQLAlchemyClient(db['uri'], create_engine_kwargs=dict(connect_args=dict( options=f'-c search_path={schema}'))) # create schema client.execute('CREATE SCHEMA {};'.format(schema)) df = pd.DataFrame({'x': range(10)}) df.to_sql('data', client.engine) yield client, schema # clean up schema client.execute('DROP SCHEMA {} CASCADE;'.format(schema)) client.close()
def test_render_remote_with_non_file_products(tmp_directory): client = SQLAlchemyClient('sqlite:///my.db') pd.DataFrame({'x': range(3)}).to_sql('data', client.engine) def make(client): dag = DAG() dag.clients[SQLScript] = client dag.clients[SQLiteRelation] = client SQLScript('CREATE TABLE {{product}} AS SELECT * FROM data', SQLiteRelation(['data2', 'table']), dag=dag, name='task') return dag make(client).build() dag = make(client) dag.render(remote=True) client.close() # should match the local status assert {t.exec_status for t in dag.values()} == {TaskStatus.Skipped}
def sqlite_client_and_tmp_dir(): """ Creates a sqlite db with sample data and yields initialized client along with a temporary directory location """ tmp_dir = Path(tempfile.mkdtemp()) client = SQLAlchemyClient('sqlite:///' + str(tmp_dir / 'my_db.db')) df = pd.DataFrame({'x': range(10)}) df.to_sql('data', client.engine) yield client, tmp_dir client.close()
def pg_client(): db = _load_db_credentials() client = SQLAlchemyClient(db['uri']) # set a new schema for this session, otherwise if two test sessions # are run at the same time, tests might conflict with each other schema = (''.join(random.choice(string.ascii_letters) for i in range(8))) client.execute('CREATE SCHEMA {};'.format(schema)) client.execute('SET search_path TO {};'.format(schema)) yield client # clean up schema client.execute('drop schema {} cascade;'.format(schema)) client.close()
def pg_client_and_schema(): db = _load_db_credentials() client = SQLAlchemyClient(db['uri']) # set a new schema for this session, otherwise if two test sessions # are run at the same time, tests might conflict with each other # NOTE: avoid upper case characters, pandas.DataFrame.to_sql does not like # them schema = (''.join(random.choice(string.ascii_letters) for i in range(12))).lower() client.execute('CREATE SCHEMA {};'.format(schema)) client.execute('SET search_path TO {};'.format(schema)) yield client, schema # clean up schema client.execute('drop schema {} cascade;'.format(schema)) client.close()
def test_ignores_non_file_products(tmp_directory, monkeypatch): mock_remote = Mock() monkeypatch.setattr(file._RemoteFile, '_fetch_remote_metadata', mock_remote) client = SQLAlchemyClient('sqlite:///my.db') dag = DAG() dag.clients[SQLScript] = client dag.clients[SQLiteRelation] = client dag.clients[File] = LocalStorageClient('remote', path_to_project_root='.') SQLScript('CREATE TABLE {{product}} AS SELECT * FROM data', SQLiteRelation(['data2', 'table']), dag=dag, name='task') dag.render() client.close() mock_remote.assert_not_called()