예제 #1
0
def test_passing_upstream_and_product_in_postgres(pg_client, db_credentials):
    dag = DAG()

    client = SQLAlchemyClient(db_credentials['uri'])

    dag.clients[SQLScript] = client
    dag.clients[PostgresRelation] = client

    conn = pg_client.connection
    cur = conn.cursor()
    cur.execute('drop table if exists series;')
    conn.commit()
    conn.close()

    ta_t = """begin;
              drop table if exists {{product}};
              create table {{product}} as
              select * from generate_series(0, 15) as n;
              commit;"""
    ta_rel = PostgresRelation((None, 'series', 'table'))
    ta = SQLScript(ta_t, ta_rel, dag, 'ta')

    dag.build()

    assert ta_rel.exists()
예제 #2
0
def test_can_upload_file_from_upstream_dependency(tmp_directory,
                                                  pg_client_and_schema):

    pg_client, schema = pg_client_and_schema

    dag = DAG()

    dag.clients[SQLUpload] = pg_client
    dag.clients[PostgresRelation] = pg_client

    make = PythonCallable(make_data,
                          product=File('data.parquet'),
                          dag=dag,
                          name='make')

    name = 'test_can_upload_file_from_upstream_dependency'
    pg = SQLUpload('{{upstream["make"]}}',
                   product=PostgresRelation((schema, name, 'table')),
                   dag=dag,
                   name='upload',
                   to_sql_kwargs={'if_exists': 'replace'})

    make >> pg

    dag.build()
예제 #3
0
def test_can_upload_file_from_upstream_dependency(tmp_directory,
                                                  pg_client_and_schema):

    pg_client, schema = pg_client_and_schema

    dag = DAG()

    dag.clients[PostgresRelation] = pg_client
    dag.clients[PostgresCopyFrom] = pg_client

    make = PythonCallable(make_data,
                          product=File('data.parquet'),
                          dag=dag,
                          name='make')

    name = 'test_can_upload_file_from_upstream_dependency'
    pg = PostgresCopyFrom('{{upstream["make"]}}',
                          product=PostgresRelation((schema, name, 'table')),
                          dag=dag,
                          name='upload')

    make >> pg

    dag.build()

    product = str(dag['upload'])
    assert pd.read_sql(f'SELECT * FROM {product}',
                       pg_client).to_dict(orient='list') == {
                           'a': [1, 2, 3]
                       }
예제 #4
0
def test_append_rows(tmp_directory, pg_client_and_schema):
    pg_client, schema = pg_client_and_schema

    df = pd.DataFrame({'a': [1, 2, 3]})
    df.to_csv('data.csv', index=False)

    dag = DAG()

    dag.clients[SQLUpload] = pg_client
    dag.clients[PostgresRelation] = pg_client

    # create table
    df.to_sql('test_append',
              pg_client.engine,
              schema=schema,
              if_exists='replace',
              index=False)

    SQLUpload('data.csv',
              product=PostgresRelation((schema, 'test_append', 'table')),
              dag=dag,
              name='upload',
              to_sql_kwargs={
                  'if_exists': 'append',
                  'index': False
              })

    dag.build()

    df = pd.read_sql('SELECT * FROM {}.test_append'.format(schema),
                     pg_client.engine)

    assert df.shape[0] == 6
예제 #5
0
def test_placeholder_is_copied_upon_initialization():
    dag = DAG()
    dag.clients[SQLScript] = Mock()
    dag.clients[PostgresRelation] = Mock()

    p = Placeholder('CREATE TABLE {{product}} AS SELECT * FROM TABLE')

    t1 = SQLScript(p,
                   PostgresRelation(('schema', 'a_table', 'table')),
                   dag,
                   name='t1')
    t2 = SQLScript(p,
                   PostgresRelation(('schema', 'another_table', 'table')),
                   dag,
                   name='t2')

    assert t1.source._placeholder is not t2.source._placeholder
예제 #6
0
def test_passing_upstream_and_product_in_postgres(pg_client_and_schema):
    client, _ = pg_client_and_schema
    dag = DAG()
    dag.clients[SQLScript] = client
    dag.clients[PostgresRelation] = client

    client.execute('drop table if exists series;')

    ta_t = """begin;
              drop table if exists {{product}};
              create table {{product}} as
              select * from generate_series(0, 15) as n;
              commit;"""
    ta_rel = PostgresRelation((None, 'series', 'table'))
    SQLScript(ta_t, ta_rel, dag, 'ta')

    dag.build()

    assert ta_rel.exists()
예제 #7
0
def client_and_prod(request, sqlite_client_and_tmp_dir, pg_client_and_schema):
    # Based on: https://github.com/pytest-dev/pytest/issues/349#issue-88534390
    if request.param == 'sqlite':
        client, _ = sqlite_client_and_tmp_dir
        product = SQLiteRelation((None, 'numbers', 'table'), client)
        schema = None
    else:
        client, schema = pg_client_and_schema
        product = PostgresRelation((schema, 'numbers', 'table'), client)

    yield client, product, schema

    product.delete()
예제 #8
0
def test_postgresscript_with_relation():
    dag = DAG()
    t = SQLScript('CREATE TABLE {{product}} AS SELECT * FROM {{name}}',
                  PostgresRelation(('user', 'table', 'table'), client=Dummy()),
                  dag,
                  name='name',
                  params=dict(name='some_table'),
                  client=Dummy())

    t.render()

    assert str(t.product) == 'user.table'
    assert (str(
        t.source) == 'CREATE TABLE user.table AS SELECT * FROM some_table')
예제 #9
0
def test_can_upload_a_file_using_a_path(tmp_directory, pg_client_and_schema):
    pg_client, schema = pg_client_and_schema

    df = pd.DataFrame({'a': [1, 2, 3]})
    df.to_parquet('data.parquet')

    dag = DAG()

    dag.clients[PostgresRelation] = pg_client
    dag.clients[SQLUpload] = pg_client

    SQLUpload(Path('data.parquet'),
              product=PostgresRelation(
                  (schema, 'test_can_upload_a_file', 'table')),
              dag=dag,
              name='upload')

    dag.build()
예제 #10
0
def test_can_upload_a_file(serializer, task_arg, tmp_directory,
                           pg_client_and_schema):
    pg_client, schema = pg_client_and_schema

    df = pd.DataFrame({'a': [1, 2, 3]})
    getattr(df, serializer)(task_arg)

    dag = DAG()

    dag.clients[SQLUpload] = pg_client
    dag.clients[PostgresRelation] = pg_client

    SQLUpload(task_arg,
              product=PostgresRelation(
                  (schema, 'test_can_upload_a_file', 'table')),
              dag=dag,
              name='upload',
              to_sql_kwargs={'if_exists': 'replace'})

    dag.build()
예제 #11
0
def test_can_upload_file_from_upstream_dependency(tmp_directory,
                                                  pg_client_and_schema):

    pg_client, schema = pg_client_and_schema

    dag = DAG()

    dag.clients[PostgresRelation] = pg_client
    dag.clients[PostgresCopyFrom] = pg_client

    make = PythonCallable(make_data,
                          product=File('data.parquet'),
                          dag=dag,
                          name='make')

    name = 'test_can_upload_file_from_upstream_dependency'
    pg = PostgresCopyFrom('{{upstream["make"]}}',
                          product=PostgresRelation((schema, name, 'table')),
                          dag=dag,
                          name='upload')

    make >> pg

    dag.build()
예제 #12
0
def test_can_upload_a_file_using_a_path(tmp_directory, pg_client_and_schema):
    pg_client, schema = pg_client_and_schema

    df = pd.DataFrame({'a': [1, 2, 3]})
    df.to_parquet('data.parquet')

    dag = DAG()

    dag.clients[PostgresRelation] = pg_client
    dag.clients[PostgresCopyFrom] = pg_client

    PostgresCopyFrom(Path('data.parquet'),
                     product=PostgresRelation(
                         (schema, 'test_can_upload_a_file', 'table')),
                     dag=dag,
                     name='upload')

    dag.build()

    product = str(dag['upload'])
    assert pd.read_sql(f'SELECT * FROM {product}',
                       pg_client).to_dict(orient='list') == {
                           'a': [1, 2, 3]
                       }
예제 #13
0
      File('data.csv')],
     [
         SQLScript, 'CREATE TABLE {{product}} AS SELECT * FROM my_table',
         SQLRelation(['schema', 'name', 'table'])
     ],
     [
         SQLTransfer, 'SELECT * FROM my_table',
         SQLiteRelation(['schema', 'name', 'table'])
     ],
     [
         SQLUpload, 'SELECT * FROM my_table',
         SQLiteRelation(['schema', 'name', 'table'])
     ],
     [
         PostgresCopyFrom, 'SELECT * FROM my_table',
         PostgresRelation(['schema', 'name', 'table'])
     ]])
def test_exception_if_missing_task_client(task_class, task_arg, product):

    task = task_class(task_arg, product, dag=DAG(), name='task')

    with pytest.raises(MissingClientError):
        task.client


@pytest.mark.parametrize('product_class, arg', [
    [SQLiteRelation, ['name', 'schema', 'table']],
    [PostgresRelation, ['name', 'schema', 'table']],
    [GenericSQLRelation, ['name', 'schema', 'table']],
    [GenericProduct, 'something'],
    [File, 'something'],
예제 #14
0
def test_postgres_relation_is_picklable():
    rel = PostgresRelation(('schema', 'name', 'table'))
    pickle.loads(pickle.dumps(rel))
예제 #15
0
def test_exists_with_empty_schema(pg_client_and_schema):
    client, schema = pg_client_and_schema
    product = PostgresRelation(('data', 'table'), client)
    assert product.exists()
예제 #16
0
                       name='get_data')

sample = PythonCallable(sample,
                        (File(env.path.input / 'sample' / 'red.csv'),
                         File(env.path.input / 'sample' / 'white.csv')),
                        name='sample',
                        dag=dag)
get_data >> sample

# Tasks can have parameters, on this case we are passing a uri parameter to the csvsql command line
# utility, furthermore, since we declared a product and an upstream dependency, those are
# available as parameters as well. This task does not create a File but a PostgresRelation,
# this type of product is a a table named red in the public schema
red_task = ShellScript(('csvsql --db {{uri}} --tables {{product.name}} --insert {{upstream["sample"][0]}} '
                        '--overwrite'),
                       PostgresRelation(('public', 'red', 'table')),
                       dag,
                       params=dict(uri=uri),
                       name='red')
sample >> red_task

white_task = ShellScript(('csvsql --db {{uri}} --tables {{product.name}} --insert {{upstream["sample"][1]}} '
                          '--overwrite'),
                         PostgresRelation(('public', 'white', 'table')),
                         dag,
                         params=dict(uri=uri),
                         name='white')
sample >> white_task


# let's introduce a new type of task, a SQLScript, this task will execute
예제 #17
0
    ],
    [SQLDump, dict(source='SELECT * FROM some_tablle', kwargs={})],
])
def test_init_source(class_, kwargs):
    assert class_._init_source(**kwargs)


@pytest.mark.parametrize('Task, prod, source', [
    (ShellScript, GenericProduct('file.txt'), 'touch {{product}}'),
    (SQLScript, GenericSQLRelation(
        ('name', 'table')), 'CREATE TABLE {{product}}'),
    (SQLDump, GenericProduct('file.txt'), 'SELECT * FROM {{upstream["key"]}}'),
    (SQLTransfer, GenericSQLRelation(
        ('name', 'table')), 'SELECT * FROM {{upstream["key"]}}'),
    (SQLUpload, GenericSQLRelation(('name', 'table')), 'some_file.txt'),
    (PostgresCopyFrom, PostgresRelation(('name', 'table')), 'file.parquet')
])
def test_task_init_source_with_placeholder_obj(Task, prod, source):
    """
    Testing we can initialize a task with a Placeholder as the source argument
    """
    dag = DAG()
    dag.clients[Task] = Mock()
    dag.clients[type(prod)] = Mock()

    Task(Placeholder(source), prod, dag, name='task')


def test_task_build_clears_cached_status(tmp_directory):
    dag = DAG()
    t = PythonCallable(touch, File('my_file'), dag)