Beispiel #1
0
def test_can_transfer_sqlite(tmp_directory):
    """
    >>> import tempfile
    >>> tmp_directory = tempfile.mkdtemp()
    """
    tmp = Path(tmp_directory)

    # create clientections to 2 dbs
    client_in = SQLAlchemyClient('sqlite:///{}'.format(tmp / "database_in.db"))
    client_out = SQLAlchemyClient('sqlite:///{}'.format(tmp /
                                                        "database_out.db"))

    # make some data and save it in the db
    df = pd.DataFrame({'a': np.arange(0, 100), 'b': np.arange(100, 200)})
    df.to_sql('numbers', client_in.engine, index=False)

    # create the task and run it
    dag = DAG()
    SQLTransfer('SELECT * FROM numbers',
                SQLiteRelation((None, 'numbers2', 'table'), client=client_out),
                dag,
                name='transfer',
                client=client_in,
                chunksize=10)
    dag.build()

    # load dumped data and data from the db
    original = pd.read_sql_query('SELECT * FROM numbers', client_in.engine)
    transfer = pd.read_sql_query('SELECT * FROM numbers2', client_out.engine)

    client_in.close()
    client_out.close()

    # make sure they are the same
    assert original.equals(transfer)
Beispiel #2
0
def test_custom_io_handler(tmp_directory):
    dag = DAG()
    client = SQLAlchemyClient('sqlite:///database.db')
    dag.clients[SQLUpload] = client
    dag.clients[SQLiteRelation] = client

    df = pd.DataFrame({'a': [1, 2, 3], 'b': [1, 2, 3]})
    df.to_csv('some-file.tsv', sep='\t', index=False)

    def my_reading_fn(path):
        return pd.read_csv(path, sep='\t')

    SQLUpload('some-file.tsv',
              SQLiteRelation(('my-table', 'table')),
              dag=dag,
              name='task',
              io_handler=my_reading_fn,
              to_sql_kwargs=dict(index=False))

    dag.build()

    other = pd.read_sql('SELECT * FROM "my-table"', con=client)

    client.close()

    assert other.equals(df)
Beispiel #3
0
def pg_client_and_schema():
    """
    Creates a temporary schema for the testing session, drops everything
    at the end
    """
    db = _load_db_credentials()

    # set a new schema for this session, otherwise if two test sessions
    # are run at the same time, tests might conflict with each other
    # NOTE: avoid upper case characters, pandas.DataFrame.to_sql does not like
    # them
    schema = (''.join(random.choice(string.ascii_letters)
                      for i in range(12))).lower()

    # initialize client, set default schema
    # info: https://www.postgresonline.com/article_pfriendly/279.html
    client = SQLAlchemyClient(db['uri'],
                              create_engine_kwargs=dict(connect_args=dict(
                                  options=f'-c search_path={schema}')))

    # create schema
    client.execute('CREATE SCHEMA {};'.format(schema))

    df = pd.DataFrame({'x': range(10)})
    df.to_sql('data', client.engine)

    yield client, schema

    # clean up schema
    client.execute('DROP SCHEMA {} CASCADE;'.format(schema))
    client.close()
Beispiel #4
0
def test_render_remote_with_non_file_products(tmp_directory):
    client = SQLAlchemyClient('sqlite:///my.db')

    pd.DataFrame({'x': range(3)}).to_sql('data', client.engine)

    def make(client):
        dag = DAG()
        dag.clients[SQLScript] = client
        dag.clients[SQLiteRelation] = client

        SQLScript('CREATE TABLE {{product}} AS SELECT * FROM data',
                  SQLiteRelation(['data2', 'table']),
                  dag=dag,
                  name='task')

        return dag

    make(client).build()

    dag = make(client)
    dag.render(remote=True)
    client.close()

    # should match the local status
    assert {t.exec_status for t in dag.values()} == {TaskStatus.Skipped}
Beispiel #5
0
def sqlite_client_and_tmp_dir():
    """
    Creates a sqlite db with sample data and yields initialized client
    along with a temporary directory location
    """
    tmp_dir = Path(tempfile.mkdtemp())
    client = SQLAlchemyClient('sqlite:///' + str(tmp_dir / 'my_db.db'))
    df = pd.DataFrame({'x': range(10)})
    df.to_sql('data', client.engine)
    yield client, tmp_dir
    client.close()
Beispiel #6
0
def pg_client():
    db = _load_db_credentials()

    client = SQLAlchemyClient(db['uri'])

    # set a new schema for this session, otherwise if two test sessions
    # are run at the same time, tests might conflict with each other
    schema = (''.join(random.choice(string.ascii_letters) for i in range(8)))

    client.execute('CREATE SCHEMA {};'.format(schema))
    client.execute('SET search_path TO {};'.format(schema))

    yield client

    # clean up schema
    client.execute('drop schema {} cascade;'.format(schema))

    client.close()
Beispiel #7
0
def pg_client_and_schema():
    db = _load_db_credentials()

    client = SQLAlchemyClient(db['uri'])

    # set a new schema for this session, otherwise if two test sessions
    # are run at the same time, tests might conflict with each other
    # NOTE: avoid upper case characters, pandas.DataFrame.to_sql does not like
    # them
    schema = (''.join(random.choice(string.ascii_letters)
                      for i in range(12))).lower()

    client.execute('CREATE SCHEMA {};'.format(schema))
    client.execute('SET search_path TO {};'.format(schema))

    yield client, schema

    # clean up schema
    client.execute('drop schema {} cascade;'.format(schema))

    client.close()
def test_ignores_non_file_products(tmp_directory, monkeypatch):
    mock_remote = Mock()
    monkeypatch.setattr(file._RemoteFile, '_fetch_remote_metadata',
                        mock_remote)

    client = SQLAlchemyClient('sqlite:///my.db')

    dag = DAG()
    dag.clients[SQLScript] = client
    dag.clients[SQLiteRelation] = client
    dag.clients[File] = LocalStorageClient('remote', path_to_project_root='.')

    SQLScript('CREATE TABLE {{product}} AS SELECT * FROM data',
              SQLiteRelation(['data2', 'table']),
              dag=dag,
              name='task')

    dag.render()
    client.close()

    mock_remote.assert_not_called()