예제 #1
0
    def __init__(self):
        self.downstream_relation = Relation(
            name='downstream_relation', **self.rand_relation_helper())
        self.upstream_relation = Relation(
            name='upstream_relation', **self.rand_relation_helper())
        self.iso_relation = Relation(
            name='iso_relation', **self.rand_relation_helper())
        self.birelation_left = Relation(
            name='birelation_left', **self.rand_relation_helper())
        self.birelation_right = Relation(
            name='birelation_right', **self.rand_relation_helper())
        self.view_relation = Relation(
            name='view_relation', **self.rand_relation_helper())
        self.bidirectional_key_left = rand_string(10),
        self.bidirectional_key_right = rand_string(8),
        self.directional_key = rand_string(15)

        # update specifics
        self.view_relation.materialization = mz.VIEW

        for n in ('downstream_relation', 'upstream_relation',):
            self.__dict__[n].attributes = [
                Attribute(self.directional_key, dt.INTEGER)]

        self.birelation_right.attributes = [
            Attribute(self.bidirectional_key_right, dt.VARCHAR)]
        self.birelation_left.attributes = [
            Attribute(self.bidirectional_key_left, dt.VARCHAR)]

        for r in ('downstream_relation', 'upstream_relation', 'iso_relation', 'birelation_left', 'birelation_right', 'view_relation',):
            self.__dict__[r].compiled_query = ''
예제 #2
0
 def __init__(self):
     self.string_attribute = Attribute(
         name=rand_string(10), data_type=dt.VARCHAR)
     self.integer_attribute = Attribute(
         name=rand_string(10), data_type=dt.INTEGER)
     self.double_attribute = Attribute(
         name=rand_string(10), data_type=dt.DOUBLE)
예제 #3
0
def tests_incremental_flag(graph, stub_configs):
    graph.return_value.graph = mock.Mock()
    replica = ReplicaFactory()
    replica.load_config(stub_configs())
    test_name = rand_string(10)
    replica.incremental = rand_string(10)
    adapter = replica.config.target_profile.adapter = mock.Mock(
        spec=BaseTargetAdapter)
    result = replica.create(test_name, False)
    adapter.initialize_replica.assert_called_once_with('default',
                                                       replica.incremental)
    adapter.build_catalog.assert_called()
    assert 'image up-to-date' in result
def test_directional_statement():
    sf = SnowflakeAdapter()
    DATABASE, SCHEMA, TABLE, LOCAL_KEY, REMOTE_KEY = [
        rand_string(10) for _ in range(5)
    ]
    relation = Relation(database=DATABASE,
                        schema=SCHEMA,
                        name=TABLE,
                        materialization=TABLE,
                        attributes=[])
    relation.core_query = f"""
SELECT
    *
FROM 
    {DATABASE}.{SCHEMA}.{TABLE}
    SAMPLE BERNOULLI (10)
"""
    statement = sf.predicate_constraint_statement(relation, True, LOCAL_KEY,
                                                  REMOTE_KEY)
    assert query_equalize(statement) == query_equalize(f"""
{LOCAL_KEY} IN 
    ( SELECT  
        {REMOTE_KEY}
      AS {LOCAL_KEY}
    FROM (
SELECT
    *
FROM 
    {DATABASE}.{SCHEMA}.{TABLE}
    SAMPLE BERNOULLI (10)
))
""")
예제 #5
0
def test_init_cli_sad_path(tmpdir):
    """make sure init does not overwrite"""
    runner = CliRunner()
    pathdir = tmpdir.mkdir(rand_string(10)).strpath
    Path(os.path.join(pathdir, 'replica.yml')).touch()
    result = runner.invoke(main.cli, ('init', pathdir))
    assert result.exit_code == 1
예제 #6
0
def test_init_cli_happy_path(tmpdir):
    runner = CliRunner()
    pathdir = tmpdir.mkdir(rand_string(10)).strpath
    result = runner.invoke(main.cli, ('init', pathdir))
    assert result.exit_code == 0
    assert os.path.isfile(os.path.join(pathdir, 'replica.yml'))
    assert os.path.isfile(os.path.join(pathdir, 'credentials.yml'))
    assert f"sample files created in directory {os.path.abspath(pathdir)}" in result.output
예제 #7
0
def test_errors_on_bad_profile(stub_configs):
    stub_configs = stub_configs()
    SOURCE_PROFILE, TARGET_PROFILE, STORAGE_PROFILE = [
        rand_string(10) for _ in range(3)]
    stub_configs['source']['profile'] = SOURCE_PROFILE
    stub_configs['storage']['profile'] = STORAGE_PROFILE

    with pytest.raises(ValueError):
        mock_config_file = StringIO(yaml.dump(stub_configs))
        ConfigurationParser().from_file_or_path(mock_config_file)
예제 #8
0
def test_logger_debug_log_level(temp_log):
    log_engine = Logger()
    log_engine.initialize_logger(log_file_location=temp_log.strpath)
    log_engine.set_log_level(logging.DEBUG)
    logger = log_engine.logger
    with LogCapture() as capture:
        ERROR = rand_string(10)
        INFO = rand_string(10)
        DEBUG = rand_string(10)
        WARNING = rand_string(10)
        logger.warning(WARNING)
        logger.error(ERROR)
        logger.info(INFO)
        logger.debug(DEBUG)
        capture.check(
            ('snowshu', 'WARNING', WARNING),
            ('snowshu', 'ERROR', ERROR),
            ('snowshu', 'INFO', INFO),
            ('snowshu', 'DEBUG', DEBUG),
        )
예제 #9
0
def test_logger_always_logs_debug_to_file(temp_log):
    levels = ('WARNING', 'DEBUG', 'INFO', 'CRITICAL',)
    log_engine = Logger()
    log_engine.initialize_logger(log_file_location=temp_log.strpath)
    for level in LOG_LEVELS:
        log_engine.set_log_level(level)
        logger = log_engine.logger
        message = rand_string(10)
        logger.debug(message)
        with open(temp_log) as tmp:
            line = tmp.readlines()[-1]
            assert 'DEBUG' in line
            assert message in line
def test_conn_string_basic():
    sf = SnowflakeAdapter()
    USER, PASSWORD, ACCOUNT, DATABASE = [rand_string(15) for _ in range(4)]

    creds = Credentials(user=USER,
                        password=PASSWORD,
                        account=ACCOUNT,
                        database=DATABASE)

    sf.credentials = creds

    conn_string = sf.get_connection()

    assert str(conn_string.url
               ) == f'snowflake://{USER}:{PASSWORD}@{ACCOUNT}/{DATABASE}/'
def test_sample_statement():
    sf = SnowflakeAdapter()
    DATABASE, SCHEMA, TABLE = [rand_string(10) for _ in range(3)]
    relation = Relation(database=DATABASE,
                        schema=SCHEMA,
                        name=TABLE,
                        materialization=TABLE,
                        attributes=[])
    sample = sf.sample_statement_from_relation(
        relation, BernoulliSampleMethod(10, units="probability"))
    assert query_equalize(sample) == query_equalize(f"""
SELECT
    *
FROM 
    {DATABASE}.{SCHEMA}.{TABLE}
    SAMPLE BERNOULLI (10)
""")
예제 #12
0
def test_sample_args_valid(run, replica):
    runner = CliRunner()
    with runner.isolated_filesystem():
        logger = Logger().logger
        tempfile = Path('./test-file.yml')
        tempfile.touch()
        EXPECTED_REPLICA_FILE = tempfile.absolute()
        EXPECTED_TAG = rand_string(10)
        EXPECTED_DEBUG = True
        result = runner.invoke(main.cli, (
            '--debug',
            'create',
            '--replica-file',
            EXPECTED_REPLICA_FILE,
        ))
        replica.assert_called_once_with(EXPECTED_REPLICA_FILE)
        assert logger.getEffectiveLevel() == DEBUG
예제 #13
0
def test_loads_good_creds(stub_creds,stub_configs):
    stub_creds = stub_creds()
    stub_configs = stub_configs()
    
    SOURCES_NAME, SOURCES_PASSWORD, STORAGES_ACCOUNT = [
        rand_string(10) for _ in range(3)]
    with tempfile.NamedTemporaryFile(mode='w') as mock_file:
        stub_creds['sources'][0]['name'] = SOURCES_NAME
        stub_creds['sources'][0]['password'] = SOURCES_PASSWORD
        stub_configs['source']['profile'] = SOURCES_NAME
        json.dump(stub_creds, mock_file)
        mock_file.seek(0)
        stub_configs['credpath']=mock_file.name
        adapter_profile=ConfigurationParser()._build_adapter_profile('source', stub_configs)

    assert adapter_profile.name == SOURCES_NAME
    assert adapter_profile.adapter.credentials.password == SOURCES_PASSWORD
def test_analyze_wrap_statement():
    sf = SnowflakeAdapter()
    DATABASE, SCHEMA, NAME = [rand_string(10) for _ in range(3)]
    relation = Relation(database=DATABASE,
                        schema=SCHEMA,
                        name=NAME,
                        materialization=TABLE,
                        attributes=[])
    sql = f"SELECT * FROM some_crazy_query"
    statement = sf.analyze_wrap_statement(sql, relation)
    assert query_equalize(statement) == query_equalize(f"""
WITH
    {relation.scoped_cte('SNOWSHU_COUNT_POPULATION')} AS (
SELECT
    COUNT(*) AS population_size
FROM
    {relation.quoted_dot_notation}
)
,{relation.scoped_cte('SNOWSHU_CORE_SAMPLE')} AS (
{sql}
)
,{relation.scoped_cte('SNOWSHU_CORE_SAMPLE_COUNT')} AS (
SELECT
    COUNT(*) AS sample_size
FROM
    {relation.scoped_cte('SNOWSHU_CORE_SAMPLE')}
)
SELECT
    s.sample_size AS sample_size
    ,p.population_size AS population_size
FROM
    {relation.scoped_cte('SNOWSHU_CORE_SAMPLE_COUNT')} s
INNER JOIN
    {relation.scoped_cte('SNOWSHU_COUNT_POPULATION')} p
ON
    1=1
LIMIT 1
""")
예제 #15
0
def test_launch_docker_cmd(docker, mock_docker_image):
    replica_name = rand_string(10)
    docker.return_value = [mock_docker_image.get_image(replica_name)]
    result = ReplicaManager.launch_docker_command(replica_name)
    cmd = f'docker run -d -p 9999:9999 --rm --name {replica_name} snowshu_replica_{replica_name}'
    assert result == cmd
예제 #16
0
def test_launch_docker_cmd_bad(docker, mock_docker_image):
    replica_name = 'does_not_exist'
    docker.return_value = [mock_docker_image.get_image(rand_string(10))]
    result = ReplicaManager.launch_docker_command(replica_name)

    assert result == f'No replica found for does_not_exist.'
예제 #17
0
def tests_replica_rename(_, build_graph, get_graphs, stub_configs):
    replica = ReplicaFactory()
    replica.load_config(stub_configs())
    test_name = rand_string(10)
    replica.create(test_name, False)
    assert build_graph.call_args[0][0].name == test_name
예제 #18
0
def rand_creds(args) -> Credentials:
    kwargs = dict(zip(args, [rand_string(10) for _ in range(len(args))]))
    return Credentials(**kwargs)
예제 #19
0
    def __init__(self):
        self.downstream_relation = Relation(name='downstream_relation',
                                            **self.rand_relation_helper())
        self.upstream_relation = Relation(name='upstream_relation',
                                          **self.rand_relation_helper())
        self.iso_relation = Relation(name='iso_relation',
                                     **self.rand_relation_helper())
        self.birelation_left = Relation(name='birelation_left',
                                        **self.rand_relation_helper())
        self.birelation_right = Relation(name='birelation_right',
                                         **self.rand_relation_helper())
        self.view_relation = Relation(name='view_relation',
                                      **self.rand_relation_helper())

        self.downstream_wildcard_relation_1 = Relation(
            name='downstream_wildcard_relation_1',
            **self.rand_relation_helper())
        self.downstream_wildcard_relation_2 = Relation(
            name='downstream_wildcard_relation_2',
            **self.rand_relation_helper())
        self.upstream_wildcard_relation_1 = Relation(
            name='upstream_wildcard_relation_1',
            schema=self.downstream_wildcard_relation_1.schema,
            database=self.downstream_wildcard_relation_1.database,
            materialization=mz.TABLE,
            attributes=[])
        self.upstream_wildcard_relation_2 = Relation(
            name='upstream_wildcard_relation_2',
            schema=self.downstream_wildcard_relation_2.schema,
            database=self.downstream_wildcard_relation_2.database,
            materialization=mz.TABLE,
            attributes=[])

        self.parent_relation_childid_type = Relation(
            name='parent_relation_childid_type', **self.rand_relation_helper())
        self.parent_relation_parentid = Relation(
            name='parent_relation_parentid', **self.rand_relation_helper())
        self.child_relation_type_1 = Relation(name='child_type_1_records',
                                              **self.rand_relation_helper())
        self.child_relation_type_2 = Relation(name='child_type_2_records',
                                              **self.rand_relation_helper())
        self.child_relation_type_3 = Relation(name='child_type_3_records',
                                              **self.rand_relation_helper())

        self.bidirectional_key_left = rand_string(10)
        self.bidirectional_key_right = rand_string(8)
        self.directional_key = rand_string(15)
        self.parentid_key = rand_string(15)
        self.childid_key = rand_string(15)
        self.childtype_key = rand_string(15)
        self.child2override_key = rand_string(20)

        # update specifics
        self.view_relation.materialization = mz.VIEW

        for n in ('downstream_relation', 'upstream_relation',
                  'downstream_wildcard_relation_1',
                  'downstream_wildcard_relation_2',
                  'upstream_wildcard_relation_1',
                  'upstream_wildcard_relation_2'):
            self.__dict__[n].attributes = [
                Attribute(self.directional_key, dt.INTEGER)
            ]

        for n in (
                'child_relation_type_1',
                'child_relation_type_2',
                'child_relation_type_3',
        ):
            self.__dict__[n].attributes = [
                Attribute(self.parentid_key, dt.VARCHAR),
                Attribute(self.childid_key, dt.VARCHAR)
            ]

        self.parent_relation_childid_type.attributes = [
            Attribute(self.childid_key, dt.VARCHAR),
            Attribute(self.childtype_key, dt.VARCHAR)
        ]
        self.parent_relation_parentid.attributes = [
            Attribute(self.parentid_key, dt.VARCHAR)
        ]

        self.birelation_right.attributes = [
            Attribute(self.bidirectional_key_right, dt.VARCHAR)
        ]
        self.birelation_left.attributes = [
            Attribute(self.bidirectional_key_left, dt.VARCHAR)
        ]

        for r in ('downstream_relation', 'upstream_relation', 'iso_relation',
                  'birelation_left', 'birelation_right', 'view_relation',
                  'downstream_wildcard_relation_1',
                  'downstream_wildcard_relation_2',
                  'upstream_wildcard_relation_1',
                  'upstream_wildcard_relation_2', 'child_relation_type_1',
                  'child_relation_type_2', 'child_relation_type_3',
                  'parent_relation_childid_type', 'parent_relation_parentid'):
            self.__dict__[r].compiled_query = ''
예제 #20
0
 def rand_relation_helper(self) -> dict:
     return dict(database=rand_string(10),
                 schema=rand_string(15),
                 materialization=mz.TABLE,
                 attributes=[]
                 )
예제 #21
0
import time

import docker
import pytest
from sqlalchemy import create_engine

from snowshu.adapters.target_adapters import PostgresAdapter
from snowshu.core.docker import SnowShuDocker
from snowshu.logger import Logger
from tests.common import rand_string
from tests.integration.snowflake.test_end_to_end import DOCKER_SPIN_UP_TIMEOUT

Logger().set_log_level(0)

TEST_NAME, TEST_TABLE = [rand_string(10) for _ in range(2)]


def test_creates_replica(docker_flush):
    # build image
    # load it up with some data
    # convert it to a replica
    # spin it all down
    # start the replica
    # query it and confirm that the data is in there

    shdocker = SnowShuDocker()
    target_adapter = PostgresAdapter()
    target_container = shdocker.startup(target_adapter.DOCKER_IMAGE,
                                        target_adapter.DOCKER_START_COMMAND,
                                        9999, target_adapter,
                                        'SnowflakeAdapter', [