def test_passing_upstream_and_product_in_postgres(pg_client, db_credentials): dag = DAG() client = SQLAlchemyClient(db_credentials['uri']) dag.clients[SQLScript] = client dag.clients[PostgresRelation] = client conn = pg_client.connection cur = conn.cursor() cur.execute('drop table if exists series;') conn.commit() conn.close() ta_t = """begin; drop table if exists {{product}}; create table {{product}} as select * from generate_series(0, 15) as n; commit;""" ta_rel = PostgresRelation((None, 'series', 'table')) ta = SQLScript(ta_t, ta_rel, dag, 'ta') dag.build() assert ta_rel.exists()
def test_can_upload_file_from_upstream_dependency(tmp_directory, pg_client_and_schema): pg_client, schema = pg_client_and_schema dag = DAG() dag.clients[SQLUpload] = pg_client dag.clients[PostgresRelation] = pg_client make = PythonCallable(make_data, product=File('data.parquet'), dag=dag, name='make') name = 'test_can_upload_file_from_upstream_dependency' pg = SQLUpload('{{upstream["make"]}}', product=PostgresRelation((schema, name, 'table')), dag=dag, name='upload', to_sql_kwargs={'if_exists': 'replace'}) make >> pg dag.build()
def test_can_upload_file_from_upstream_dependency(tmp_directory, pg_client_and_schema): pg_client, schema = pg_client_and_schema dag = DAG() dag.clients[PostgresRelation] = pg_client dag.clients[PostgresCopyFrom] = pg_client make = PythonCallable(make_data, product=File('data.parquet'), dag=dag, name='make') name = 'test_can_upload_file_from_upstream_dependency' pg = PostgresCopyFrom('{{upstream["make"]}}', product=PostgresRelation((schema, name, 'table')), dag=dag, name='upload') make >> pg dag.build() product = str(dag['upload']) assert pd.read_sql(f'SELECT * FROM {product}', pg_client).to_dict(orient='list') == { 'a': [1, 2, 3] }
def test_append_rows(tmp_directory, pg_client_and_schema): pg_client, schema = pg_client_and_schema df = pd.DataFrame({'a': [1, 2, 3]}) df.to_csv('data.csv', index=False) dag = DAG() dag.clients[SQLUpload] = pg_client dag.clients[PostgresRelation] = pg_client # create table df.to_sql('test_append', pg_client.engine, schema=schema, if_exists='replace', index=False) SQLUpload('data.csv', product=PostgresRelation((schema, 'test_append', 'table')), dag=dag, name='upload', to_sql_kwargs={ 'if_exists': 'append', 'index': False }) dag.build() df = pd.read_sql('SELECT * FROM {}.test_append'.format(schema), pg_client.engine) assert df.shape[0] == 6
def test_placeholder_is_copied_upon_initialization(): dag = DAG() dag.clients[SQLScript] = Mock() dag.clients[PostgresRelation] = Mock() p = Placeholder('CREATE TABLE {{product}} AS SELECT * FROM TABLE') t1 = SQLScript(p, PostgresRelation(('schema', 'a_table', 'table')), dag, name='t1') t2 = SQLScript(p, PostgresRelation(('schema', 'another_table', 'table')), dag, name='t2') assert t1.source._placeholder is not t2.source._placeholder
def test_passing_upstream_and_product_in_postgres(pg_client_and_schema): client, _ = pg_client_and_schema dag = DAG() dag.clients[SQLScript] = client dag.clients[PostgresRelation] = client client.execute('drop table if exists series;') ta_t = """begin; drop table if exists {{product}}; create table {{product}} as select * from generate_series(0, 15) as n; commit;""" ta_rel = PostgresRelation((None, 'series', 'table')) SQLScript(ta_t, ta_rel, dag, 'ta') dag.build() assert ta_rel.exists()
def client_and_prod(request, sqlite_client_and_tmp_dir, pg_client_and_schema): # Based on: https://github.com/pytest-dev/pytest/issues/349#issue-88534390 if request.param == 'sqlite': client, _ = sqlite_client_and_tmp_dir product = SQLiteRelation((None, 'numbers', 'table'), client) schema = None else: client, schema = pg_client_and_schema product = PostgresRelation((schema, 'numbers', 'table'), client) yield client, product, schema product.delete()
def test_postgresscript_with_relation(): dag = DAG() t = SQLScript('CREATE TABLE {{product}} AS SELECT * FROM {{name}}', PostgresRelation(('user', 'table', 'table'), client=Dummy()), dag, name='name', params=dict(name='some_table'), client=Dummy()) t.render() assert str(t.product) == 'user.table' assert (str( t.source) == 'CREATE TABLE user.table AS SELECT * FROM some_table')
def test_can_upload_a_file_using_a_path(tmp_directory, pg_client_and_schema): pg_client, schema = pg_client_and_schema df = pd.DataFrame({'a': [1, 2, 3]}) df.to_parquet('data.parquet') dag = DAG() dag.clients[PostgresRelation] = pg_client dag.clients[SQLUpload] = pg_client SQLUpload(Path('data.parquet'), product=PostgresRelation( (schema, 'test_can_upload_a_file', 'table')), dag=dag, name='upload') dag.build()
def test_can_upload_a_file(serializer, task_arg, tmp_directory, pg_client_and_schema): pg_client, schema = pg_client_and_schema df = pd.DataFrame({'a': [1, 2, 3]}) getattr(df, serializer)(task_arg) dag = DAG() dag.clients[SQLUpload] = pg_client dag.clients[PostgresRelation] = pg_client SQLUpload(task_arg, product=PostgresRelation( (schema, 'test_can_upload_a_file', 'table')), dag=dag, name='upload', to_sql_kwargs={'if_exists': 'replace'}) dag.build()
def test_can_upload_file_from_upstream_dependency(tmp_directory, pg_client_and_schema): pg_client, schema = pg_client_and_schema dag = DAG() dag.clients[PostgresRelation] = pg_client dag.clients[PostgresCopyFrom] = pg_client make = PythonCallable(make_data, product=File('data.parquet'), dag=dag, name='make') name = 'test_can_upload_file_from_upstream_dependency' pg = PostgresCopyFrom('{{upstream["make"]}}', product=PostgresRelation((schema, name, 'table')), dag=dag, name='upload') make >> pg dag.build()
def test_can_upload_a_file_using_a_path(tmp_directory, pg_client_and_schema): pg_client, schema = pg_client_and_schema df = pd.DataFrame({'a': [1, 2, 3]}) df.to_parquet('data.parquet') dag = DAG() dag.clients[PostgresRelation] = pg_client dag.clients[PostgresCopyFrom] = pg_client PostgresCopyFrom(Path('data.parquet'), product=PostgresRelation( (schema, 'test_can_upload_a_file', 'table')), dag=dag, name='upload') dag.build() product = str(dag['upload']) assert pd.read_sql(f'SELECT * FROM {product}', pg_client).to_dict(orient='list') == { 'a': [1, 2, 3] }
File('data.csv')], [ SQLScript, 'CREATE TABLE {{product}} AS SELECT * FROM my_table', SQLRelation(['schema', 'name', 'table']) ], [ SQLTransfer, 'SELECT * FROM my_table', SQLiteRelation(['schema', 'name', 'table']) ], [ SQLUpload, 'SELECT * FROM my_table', SQLiteRelation(['schema', 'name', 'table']) ], [ PostgresCopyFrom, 'SELECT * FROM my_table', PostgresRelation(['schema', 'name', 'table']) ]]) def test_exception_if_missing_task_client(task_class, task_arg, product): task = task_class(task_arg, product, dag=DAG(), name='task') with pytest.raises(MissingClientError): task.client @pytest.mark.parametrize('product_class, arg', [ [SQLiteRelation, ['name', 'schema', 'table']], [PostgresRelation, ['name', 'schema', 'table']], [GenericSQLRelation, ['name', 'schema', 'table']], [GenericProduct, 'something'], [File, 'something'],
def test_postgres_relation_is_picklable(): rel = PostgresRelation(('schema', 'name', 'table')) pickle.loads(pickle.dumps(rel))
def test_exists_with_empty_schema(pg_client_and_schema): client, schema = pg_client_and_schema product = PostgresRelation(('data', 'table'), client) assert product.exists()
name='get_data') sample = PythonCallable(sample, (File(env.path.input / 'sample' / 'red.csv'), File(env.path.input / 'sample' / 'white.csv')), name='sample', dag=dag) get_data >> sample # Tasks can have parameters, on this case we are passing a uri parameter to the csvsql command line # utility, furthermore, since we declared a product and an upstream dependency, those are # available as parameters as well. This task does not create a File but a PostgresRelation, # this type of product is a a table named red in the public schema red_task = ShellScript(('csvsql --db {{uri}} --tables {{product.name}} --insert {{upstream["sample"][0]}} ' '--overwrite'), PostgresRelation(('public', 'red', 'table')), dag, params=dict(uri=uri), name='red') sample >> red_task white_task = ShellScript(('csvsql --db {{uri}} --tables {{product.name}} --insert {{upstream["sample"][1]}} ' '--overwrite'), PostgresRelation(('public', 'white', 'table')), dag, params=dict(uri=uri), name='white') sample >> white_task # let's introduce a new type of task, a SQLScript, this task will execute
], [SQLDump, dict(source='SELECT * FROM some_tablle', kwargs={})], ]) def test_init_source(class_, kwargs): assert class_._init_source(**kwargs) @pytest.mark.parametrize('Task, prod, source', [ (ShellScript, GenericProduct('file.txt'), 'touch {{product}}'), (SQLScript, GenericSQLRelation( ('name', 'table')), 'CREATE TABLE {{product}}'), (SQLDump, GenericProduct('file.txt'), 'SELECT * FROM {{upstream["key"]}}'), (SQLTransfer, GenericSQLRelation( ('name', 'table')), 'SELECT * FROM {{upstream["key"]}}'), (SQLUpload, GenericSQLRelation(('name', 'table')), 'some_file.txt'), (PostgresCopyFrom, PostgresRelation(('name', 'table')), 'file.parquet') ]) def test_task_init_source_with_placeholder_obj(Task, prod, source): """ Testing we can initialize a task with a Placeholder as the source argument """ dag = DAG() dag.clients[Task] = Mock() dag.clients[type(prod)] = Mock() Task(Placeholder(source), prod, dag, name='task') def test_task_build_clears_cached_status(tmp_directory): dag = DAG() t = PythonCallable(touch, File('my_file'), dag)