def __init__(self): self.downstream_relation = Relation( name='downstream_relation', **self.rand_relation_helper()) self.upstream_relation = Relation( name='upstream_relation', **self.rand_relation_helper()) self.iso_relation = Relation( name='iso_relation', **self.rand_relation_helper()) self.birelation_left = Relation( name='birelation_left', **self.rand_relation_helper()) self.birelation_right = Relation( name='birelation_right', **self.rand_relation_helper()) self.view_relation = Relation( name='view_relation', **self.rand_relation_helper()) self.bidirectional_key_left = rand_string(10), self.bidirectional_key_right = rand_string(8), self.directional_key = rand_string(15) # update specifics self.view_relation.materialization = mz.VIEW for n in ('downstream_relation', 'upstream_relation',): self.__dict__[n].attributes = [ Attribute(self.directional_key, dt.INTEGER)] self.birelation_right.attributes = [ Attribute(self.bidirectional_key_right, dt.VARCHAR)] self.birelation_left.attributes = [ Attribute(self.bidirectional_key_left, dt.VARCHAR)] for r in ('downstream_relation', 'upstream_relation', 'iso_relation', 'birelation_left', 'birelation_right', 'view_relation',): self.__dict__[r].compiled_query = ''
def __init__(self): self.string_attribute = Attribute( name=rand_string(10), data_type=dt.VARCHAR) self.integer_attribute = Attribute( name=rand_string(10), data_type=dt.INTEGER) self.double_attribute = Attribute( name=rand_string(10), data_type=dt.DOUBLE)
def tests_incremental_flag(graph, stub_configs): graph.return_value.graph = mock.Mock() replica = ReplicaFactory() replica.load_config(stub_configs()) test_name = rand_string(10) replica.incremental = rand_string(10) adapter = replica.config.target_profile.adapter = mock.Mock( spec=BaseTargetAdapter) result = replica.create(test_name, False) adapter.initialize_replica.assert_called_once_with('default', replica.incremental) adapter.build_catalog.assert_called() assert 'image up-to-date' in result
def test_directional_statement(): sf = SnowflakeAdapter() DATABASE, SCHEMA, TABLE, LOCAL_KEY, REMOTE_KEY = [ rand_string(10) for _ in range(5) ] relation = Relation(database=DATABASE, schema=SCHEMA, name=TABLE, materialization=TABLE, attributes=[]) relation.core_query = f""" SELECT * FROM {DATABASE}.{SCHEMA}.{TABLE} SAMPLE BERNOULLI (10) """ statement = sf.predicate_constraint_statement(relation, True, LOCAL_KEY, REMOTE_KEY) assert query_equalize(statement) == query_equalize(f""" {LOCAL_KEY} IN ( SELECT {REMOTE_KEY} AS {LOCAL_KEY} FROM ( SELECT * FROM {DATABASE}.{SCHEMA}.{TABLE} SAMPLE BERNOULLI (10) )) """)
def test_init_cli_sad_path(tmpdir): """make sure init does not overwrite""" runner = CliRunner() pathdir = tmpdir.mkdir(rand_string(10)).strpath Path(os.path.join(pathdir, 'replica.yml')).touch() result = runner.invoke(main.cli, ('init', pathdir)) assert result.exit_code == 1
def test_init_cli_happy_path(tmpdir): runner = CliRunner() pathdir = tmpdir.mkdir(rand_string(10)).strpath result = runner.invoke(main.cli, ('init', pathdir)) assert result.exit_code == 0 assert os.path.isfile(os.path.join(pathdir, 'replica.yml')) assert os.path.isfile(os.path.join(pathdir, 'credentials.yml')) assert f"sample files created in directory {os.path.abspath(pathdir)}" in result.output
def test_errors_on_bad_profile(stub_configs): stub_configs = stub_configs() SOURCE_PROFILE, TARGET_PROFILE, STORAGE_PROFILE = [ rand_string(10) for _ in range(3)] stub_configs['source']['profile'] = SOURCE_PROFILE stub_configs['storage']['profile'] = STORAGE_PROFILE with pytest.raises(ValueError): mock_config_file = StringIO(yaml.dump(stub_configs)) ConfigurationParser().from_file_or_path(mock_config_file)
def test_logger_debug_log_level(temp_log): log_engine = Logger() log_engine.initialize_logger(log_file_location=temp_log.strpath) log_engine.set_log_level(logging.DEBUG) logger = log_engine.logger with LogCapture() as capture: ERROR = rand_string(10) INFO = rand_string(10) DEBUG = rand_string(10) WARNING = rand_string(10) logger.warning(WARNING) logger.error(ERROR) logger.info(INFO) logger.debug(DEBUG) capture.check( ('snowshu', 'WARNING', WARNING), ('snowshu', 'ERROR', ERROR), ('snowshu', 'INFO', INFO), ('snowshu', 'DEBUG', DEBUG), )
def test_logger_always_logs_debug_to_file(temp_log): levels = ('WARNING', 'DEBUG', 'INFO', 'CRITICAL',) log_engine = Logger() log_engine.initialize_logger(log_file_location=temp_log.strpath) for level in LOG_LEVELS: log_engine.set_log_level(level) logger = log_engine.logger message = rand_string(10) logger.debug(message) with open(temp_log) as tmp: line = tmp.readlines()[-1] assert 'DEBUG' in line assert message in line
def test_conn_string_basic(): sf = SnowflakeAdapter() USER, PASSWORD, ACCOUNT, DATABASE = [rand_string(15) for _ in range(4)] creds = Credentials(user=USER, password=PASSWORD, account=ACCOUNT, database=DATABASE) sf.credentials = creds conn_string = sf.get_connection() assert str(conn_string.url ) == f'snowflake://{USER}:{PASSWORD}@{ACCOUNT}/{DATABASE}/'
def test_sample_statement(): sf = SnowflakeAdapter() DATABASE, SCHEMA, TABLE = [rand_string(10) for _ in range(3)] relation = Relation(database=DATABASE, schema=SCHEMA, name=TABLE, materialization=TABLE, attributes=[]) sample = sf.sample_statement_from_relation( relation, BernoulliSampleMethod(10, units="probability")) assert query_equalize(sample) == query_equalize(f""" SELECT * FROM {DATABASE}.{SCHEMA}.{TABLE} SAMPLE BERNOULLI (10) """)
def test_sample_args_valid(run, replica): runner = CliRunner() with runner.isolated_filesystem(): logger = Logger().logger tempfile = Path('./test-file.yml') tempfile.touch() EXPECTED_REPLICA_FILE = tempfile.absolute() EXPECTED_TAG = rand_string(10) EXPECTED_DEBUG = True result = runner.invoke(main.cli, ( '--debug', 'create', '--replica-file', EXPECTED_REPLICA_FILE, )) replica.assert_called_once_with(EXPECTED_REPLICA_FILE) assert logger.getEffectiveLevel() == DEBUG
def test_loads_good_creds(stub_creds,stub_configs): stub_creds = stub_creds() stub_configs = stub_configs() SOURCES_NAME, SOURCES_PASSWORD, STORAGES_ACCOUNT = [ rand_string(10) for _ in range(3)] with tempfile.NamedTemporaryFile(mode='w') as mock_file: stub_creds['sources'][0]['name'] = SOURCES_NAME stub_creds['sources'][0]['password'] = SOURCES_PASSWORD stub_configs['source']['profile'] = SOURCES_NAME json.dump(stub_creds, mock_file) mock_file.seek(0) stub_configs['credpath']=mock_file.name adapter_profile=ConfigurationParser()._build_adapter_profile('source', stub_configs) assert adapter_profile.name == SOURCES_NAME assert adapter_profile.adapter.credentials.password == SOURCES_PASSWORD
def test_analyze_wrap_statement(): sf = SnowflakeAdapter() DATABASE, SCHEMA, NAME = [rand_string(10) for _ in range(3)] relation = Relation(database=DATABASE, schema=SCHEMA, name=NAME, materialization=TABLE, attributes=[]) sql = f"SELECT * FROM some_crazy_query" statement = sf.analyze_wrap_statement(sql, relation) assert query_equalize(statement) == query_equalize(f""" WITH {relation.scoped_cte('SNOWSHU_COUNT_POPULATION')} AS ( SELECT COUNT(*) AS population_size FROM {relation.quoted_dot_notation} ) ,{relation.scoped_cte('SNOWSHU_CORE_SAMPLE')} AS ( {sql} ) ,{relation.scoped_cte('SNOWSHU_CORE_SAMPLE_COUNT')} AS ( SELECT COUNT(*) AS sample_size FROM {relation.scoped_cte('SNOWSHU_CORE_SAMPLE')} ) SELECT s.sample_size AS sample_size ,p.population_size AS population_size FROM {relation.scoped_cte('SNOWSHU_CORE_SAMPLE_COUNT')} s INNER JOIN {relation.scoped_cte('SNOWSHU_COUNT_POPULATION')} p ON 1=1 LIMIT 1 """)
def test_launch_docker_cmd(docker, mock_docker_image): replica_name = rand_string(10) docker.return_value = [mock_docker_image.get_image(replica_name)] result = ReplicaManager.launch_docker_command(replica_name) cmd = f'docker run -d -p 9999:9999 --rm --name {replica_name} snowshu_replica_{replica_name}' assert result == cmd
def test_launch_docker_cmd_bad(docker, mock_docker_image): replica_name = 'does_not_exist' docker.return_value = [mock_docker_image.get_image(rand_string(10))] result = ReplicaManager.launch_docker_command(replica_name) assert result == f'No replica found for does_not_exist.'
def tests_replica_rename(_, build_graph, get_graphs, stub_configs): replica = ReplicaFactory() replica.load_config(stub_configs()) test_name = rand_string(10) replica.create(test_name, False) assert build_graph.call_args[0][0].name == test_name
def rand_creds(args) -> Credentials: kwargs = dict(zip(args, [rand_string(10) for _ in range(len(args))])) return Credentials(**kwargs)
def __init__(self): self.downstream_relation = Relation(name='downstream_relation', **self.rand_relation_helper()) self.upstream_relation = Relation(name='upstream_relation', **self.rand_relation_helper()) self.iso_relation = Relation(name='iso_relation', **self.rand_relation_helper()) self.birelation_left = Relation(name='birelation_left', **self.rand_relation_helper()) self.birelation_right = Relation(name='birelation_right', **self.rand_relation_helper()) self.view_relation = Relation(name='view_relation', **self.rand_relation_helper()) self.downstream_wildcard_relation_1 = Relation( name='downstream_wildcard_relation_1', **self.rand_relation_helper()) self.downstream_wildcard_relation_2 = Relation( name='downstream_wildcard_relation_2', **self.rand_relation_helper()) self.upstream_wildcard_relation_1 = Relation( name='upstream_wildcard_relation_1', schema=self.downstream_wildcard_relation_1.schema, database=self.downstream_wildcard_relation_1.database, materialization=mz.TABLE, attributes=[]) self.upstream_wildcard_relation_2 = Relation( name='upstream_wildcard_relation_2', schema=self.downstream_wildcard_relation_2.schema, database=self.downstream_wildcard_relation_2.database, materialization=mz.TABLE, attributes=[]) self.parent_relation_childid_type = Relation( name='parent_relation_childid_type', **self.rand_relation_helper()) self.parent_relation_parentid = Relation( name='parent_relation_parentid', **self.rand_relation_helper()) self.child_relation_type_1 = Relation(name='child_type_1_records', **self.rand_relation_helper()) self.child_relation_type_2 = Relation(name='child_type_2_records', **self.rand_relation_helper()) self.child_relation_type_3 = Relation(name='child_type_3_records', **self.rand_relation_helper()) self.bidirectional_key_left = rand_string(10) self.bidirectional_key_right = rand_string(8) self.directional_key = rand_string(15) self.parentid_key = rand_string(15) self.childid_key = rand_string(15) self.childtype_key = rand_string(15) self.child2override_key = rand_string(20) # update specifics self.view_relation.materialization = mz.VIEW for n in ('downstream_relation', 'upstream_relation', 'downstream_wildcard_relation_1', 'downstream_wildcard_relation_2', 'upstream_wildcard_relation_1', 'upstream_wildcard_relation_2'): self.__dict__[n].attributes = [ Attribute(self.directional_key, dt.INTEGER) ] for n in ( 'child_relation_type_1', 'child_relation_type_2', 'child_relation_type_3', ): self.__dict__[n].attributes = [ Attribute(self.parentid_key, dt.VARCHAR), Attribute(self.childid_key, dt.VARCHAR) ] self.parent_relation_childid_type.attributes = [ Attribute(self.childid_key, dt.VARCHAR), Attribute(self.childtype_key, dt.VARCHAR) ] self.parent_relation_parentid.attributes = [ Attribute(self.parentid_key, dt.VARCHAR) ] self.birelation_right.attributes = [ Attribute(self.bidirectional_key_right, dt.VARCHAR) ] self.birelation_left.attributes = [ Attribute(self.bidirectional_key_left, dt.VARCHAR) ] for r in ('downstream_relation', 'upstream_relation', 'iso_relation', 'birelation_left', 'birelation_right', 'view_relation', 'downstream_wildcard_relation_1', 'downstream_wildcard_relation_2', 'upstream_wildcard_relation_1', 'upstream_wildcard_relation_2', 'child_relation_type_1', 'child_relation_type_2', 'child_relation_type_3', 'parent_relation_childid_type', 'parent_relation_parentid'): self.__dict__[r].compiled_query = ''
def rand_relation_helper(self) -> dict: return dict(database=rand_string(10), schema=rand_string(15), materialization=mz.TABLE, attributes=[] )
import time import docker import pytest from sqlalchemy import create_engine from snowshu.adapters.target_adapters import PostgresAdapter from snowshu.core.docker import SnowShuDocker from snowshu.logger import Logger from tests.common import rand_string from tests.integration.snowflake.test_end_to_end import DOCKER_SPIN_UP_TIMEOUT Logger().set_log_level(0) TEST_NAME, TEST_TABLE = [rand_string(10) for _ in range(2)] def test_creates_replica(docker_flush): # build image # load it up with some data # convert it to a replica # spin it all down # start the replica # query it and confirm that the data is in there shdocker = SnowShuDocker() target_adapter = PostgresAdapter() target_container = shdocker.startup(target_adapter.DOCKER_IMAGE, target_adapter.DOCKER_START_COMMAND, 9999, target_adapter, 'SnowflakeAdapter', [