def test_extraction_with_multiple_query_result(self, mock_method): # type: (Any, Any) -> None """ Test Extraction from list of results from query """ extractor = SQLAlchemyExtractor() extractor.results = ['test_result', 'test_result2', 'test_result3'] extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = [extractor.extract() for _ in range(3)] self.assertEqual(len(result), 3) self.assertEqual(result, ['test_result', 'test_result2', 'test_result3'])
def init(self, conf: ConfigTree) -> None: conf = Scoped.get_scoped_conf(conf, self.get_scope()) \ .with_fallback(conf) \ .with_fallback(NeptuneStalenessRemovalTask.DEFAULT_CONFIG) self.target_nodes = list( set(conf.get_list(NeptuneStalenessRemovalTask.TARGET_NODES))) self.target_relations = list( set(conf.get_list(NeptuneStalenessRemovalTask.TARGET_RELATIONS))) self.dry_run = conf.get_bool(NeptuneStalenessRemovalTask.DRY_RUN) self.staleness_pct = conf.get_int( NeptuneStalenessRemovalTask.STALENESS_MAX_PCT) self.staleness_pct_dict = conf.get( NeptuneStalenessRemovalTask.STALENESS_PCT_MAX_DICT) self.graph_label_id = conf.get( NeptuneStalenessRemovalTask.GRAPH_LABEL_ID_PROPERTY_NAME) self.staleness_cut_off_in_seconds = conf.get_int( NeptuneStalenessRemovalTask.STALENESS_CUT_OFF_IN_SECONDS) self.cutoff_datetime = datetime.utcnow() - timedelta( seconds=self.staleness_cut_off_in_seconds) self.gremlin_client = NeptuneSessionClient() gremlin_client_conf = Scoped.get_scoped_conf( conf, self.gremlin_client.get_scope()) self.gremlin_client.init(gremlin_client_conf)
def test_extraction_with_model_class(self): # type: (Any) -> None """ Test Extraction using model class """ config_dict = { 'extractor.neo4j.{}'.format(Neo4jExtractor.GRAPH_URL_CONFIG_KEY): 'TEST_GRAPH_URL', 'extractor.neo4j.{}'.format(Neo4jExtractor.CYPHER_QUERY_CONFIG_KEY): 'TEST_QUERY', 'extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_USER): 'TEST_USER', 'extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_PW): 'TEST_PW', 'extractor.neo4j.{}'.format(Neo4jExtractor.MODEL_CLASS_CONFIG_KEY): 'databuilder.models.table_elasticsearch_document.TableESDocument' } self.conf = ConfigFactory.from_dict(config_dict) with patch.object(Neo4jExtractor, '_get_driver'): extractor = Neo4jExtractor() extractor.init( Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result_dict = dict( database='test_database', cluster='test_cluster', schema='test_schema', name='test_table_name', display_name='test_schema.test_table_name', key='test_table_key', description='test_table_description', last_updated_timestamp=123456789, column_names=['test_col1', 'test_col2', 'test_col3'], column_descriptions=[ 'test_description1', 'test_description2', '' ], total_usage=100, unique_usage=5, tags=['hive'], badges=['badge1'], schema_description='schema_description') extractor.results = [result_dict] result_obj = extractor.extract() self.assertIsInstance(result_obj, TableESDocument) self.assertDictEqual(vars(result_obj), result_dict)
def init(self, conf: ConfigTree) -> None: self._conf = conf restapi_query = self._build_restapi_query() self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor( restapi_query=restapi_query, conf=self._conf ) dict_to_model_transformer = DictToModel() dict_to_model_transformer.init( conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback( ConfigFactory.from_dict( {MODEL_CLASS: 'databuilder.models.dashboard.dashboard_chart.DashboardChart'}))) self._transformer = dict_to_model_transformer
def test_keypath_and_pagesize_can_be_set(self, mock_build: Any) -> None: config_dict = { f'extractor.bigquery_table_metadata.{BigQueryMetadataExtractor.PROJECT_ID_KEY}': 'your-project-here', f'extractor.bigquery_table_metadata.{BigQueryMetadataExtractor.PAGE_SIZE_KEY}': 200, f'extractor.bigquery_table_metadata.{BigQueryMetadataExtractor.KEY_PATH_KEY}': '/tmp/doesnotexist', } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA) extractor = BigQueryMetadataExtractor() with self.assertRaises(FileNotFoundError): extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
def text_extraction_with_empty_query_result(self): # type: (Any) -> None """ Test Extraction with empty results from query """ with patch.object(Neo4jExtractor, '_get_driver'): extractor = Neo4jExtractor() extractor.init( Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) extractor.results = [''] result = extractor.extract() self.assertIsNone(result)
def test_consume_success(self) -> None: kafka_extractor = KafkaSourceExtractor() kafka_extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=kafka_extractor.get_scope())) with patch.object(kafka_extractor, 'consumer') as mock_consumer: mock_poll = MagicMock() mock_poll.error.return_value = False # only return once mock_poll.value.side_effect = ['msg'] mock_consumer.poll.return_value = mock_poll records = kafka_extractor.consume() self.assertEqual(len(records), 1)
def test_table_without_columns(self, mock_build): mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, NO_COLS) extractor = BigQueryMetadataExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertEqual(result.database, 'bigquery') self.assertEqual(result.cluster, 'your-project-here') self.assertEqual(result.schema, 'fdgdfgh') self.assertEqual(result.name, 'nested_recs') self.assertEqual(result.description, "") self.assertEqual(result.columns, []) self.assertEqual(result.is_view, False)
def _init_extractor(self, programmatic_description_enabled: bool = True) -> None: repository_path = pathlib.Path( __file__).parent.parent.resolve() / "resources/extractor/feast/fs" conf = { f"extractor.feast.{FeastExtractor.FEAST_REPOSITORY_PATH}": repository_path, f"extractor.feast.{FeastExtractor.DESCRIBE_FEATURE_VIEWS}": programmatic_description_enabled, } self.extractor = FeastExtractor() self.extractor.init( Scoped.get_scoped_conf(conf=ConfigFactory.from_dict(conf), scope=self.extractor.get_scope()))
def init(self, conf: ConfigTree) -> None: self.conf = conf self.consumer_config = conf.get_config(KafkaSourceExtractor.CONSUMER_CONFIG).\ as_plain_ordered_dict() self.topic_names: list = conf.get_list( KafkaSourceExtractor.TOPIC_NAME_LIST) if not self.topic_names: raise Exception('Kafka topic needs to be provided by the user.') self.consumer_total_timeout = conf.get_int( KafkaSourceExtractor.CONSUMER_TOTAL_TIMEOUT_SEC, default=10) self.consumer_poll_timeout = conf.get_int( KafkaSourceExtractor.CONSUMER_POLL_TIMEOUT_SEC, default=1) self.transformer_thrown_exception = conf.get_bool( KafkaSourceExtractor.TRANSFORMER_THROWN_EXCEPTION, default=False) # Transform the protoBuf message with a transformer val_transformer = conf.get(KafkaSourceExtractor.RAW_VALUE_TRANSFORMER) if val_transformer is None: raise Exception('A message transformer should be provided.') else: try: module_name, class_name = val_transformer.rsplit(".", 1) mod = importlib.import_module(module_name) self.transformer = getattr(mod, class_name)() except Exception: raise RuntimeError( 'The Kafka message value deserde class cant instantiated!') if not isinstance(self.transformer, Transformer): raise Exception( 'The transformer needs to be subclass of the base transformer' ) self.transformer.init( Scoped.get_scoped_conf(conf, self.transformer.get_scope())) # Consumer init try: # Disable enable.auto.commit self.consumer_config['enable.auto.commit'] = False self.consumer = Consumer(self.consumer_config) # TODO: to support only consume a subset of partitions. self.consumer.subscribe(self.topic_names) except Exception: raise RuntimeError('Consumer could not start correctly!')
def test_extraction_with_model_class(self) -> None: """ Test Extraction using model class """ extractor = CsvExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertEqual(result.name, 'test_table1') self.assertEqual(result.description._text, '1st test table') self.assertEqual(result.database, 'hive') self.assertEqual(result.cluster, 'gold') self.assertEqual(result.schema, 'test_schema')
def init(self, conf: ConfigTree) -> None: self._conf = conf restapi_query = self._build_restapi_query() self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor( restapi_query=restapi_query, conf=self._conf ) # Constructing URL using several ID via TemplateVariableSubstitutionTransformer transformers: List[Transformer] = [] variable_substitution_transformer = TemplateVariableSubstitutionTransformer() variable_substitution_transformer.init( conf=Scoped.get_scoped_conf(self._conf, variable_substitution_transformer.get_scope()).with_fallback( ConfigFactory.from_dict({FIELD_NAME: 'url', TEMPLATE: 'https://app.mode.com/{organization}' '/reports/{dashboard_id}/queries/{query_id}'}))) transformers.append(variable_substitution_transformer) # Escape backslash as it breaks Cypher statement. replace_transformer = RegexStrReplaceTransformer() replace_transformer.init( conf=Scoped.get_scoped_conf(self._conf, replace_transformer.get_scope()).with_fallback( ConfigFactory.from_dict( {REGEX_REPLACE_TUPLE_LIST: [('\\', '\\\\')], ATTRIBUTE_NAME: 'query_text'}))) transformers.append(replace_transformer) dict_to_model_transformer = DictToModel() dict_to_model_transformer.init( conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback( ConfigFactory.from_dict( {MODEL_CLASS: 'databuilder.models.dashboard.dashboard_query.DashboardQuery'}))) transformers.append(dict_to_model_transformer) self._transformer = ChainedTransformer(transformers=transformers)
def test_publish_with_data_and_no_old_index(self) -> None: """ Test Publish functionality with data but no index in place """ mock_data = json.dumps({ 'KEY_DOESNOT_MATTER': 'NO_VALUE', 'KEY_DOESNOT_MATTER2': 'NO_VALUE2' }) self.mock_es_client.indices.get_alias.return_value = {} with patch('builtins.open', mock_open(read_data=mock_data)) as mock_file: publisher = ElasticsearchPublisher() publisher.init(conf=Scoped.get_scoped_conf( conf=self.conf, scope=publisher.get_scope())) # assert mock was called with test_file_path and test_file_mode mock_file.assert_called_once_with(self.test_file_path, self.test_file_mode) publisher.publish() # ensure indices create endpoint was called default_mapping = ElasticsearchPublisher.DEFAULT_ELASTICSEARCH_INDEX_MAPPING self.mock_es_client.indices.create.assert_called_once_with( index=self.test_es_new_index, body=default_mapping) # bulk endpoint called once self.mock_es_client.bulk.assert_called_once_with([{ 'index': { '_type': self.test_doc_type, '_index': self.test_es_new_index } }, { 'KEY_DOESNOT_MATTER': 'NO_VALUE', 'KEY_DOESNOT_MATTER2': 'NO_VALUE2' }]) # update alias endpoint called once self.mock_es_client.indices.update_aliases.assert_called_once_with( { 'actions': [{ "add": { "index": self.test_es_new_index, "alias": self.test_es_alias } }] })
def test_publish_with_no_data(self) -> None: """ Test Publish functionality with no data """ with patch('builtins.open', mock_open(read_data='')) as mock_file: publisher = ElasticsearchPublisher() publisher.init(conf=Scoped.get_scoped_conf(conf=self.conf, scope=publisher.get_scope())) # assert mock was called with test_file_path and test_file_mode mock_file.assert_called_with(self.test_file_path, self.test_file_mode) publisher.publish() # no calls should be made through elasticseach_client when there is no data self.assertTrue(self.mock_es_client.call_count == 0)
def test_email_filter_not_counted(self, mock_build: Any) -> None: config_dict = { 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY): 'your-project-here', 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.EMAIL_PATTERN): 'emailFilter', } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockLoggingClient(CORRECT_DATA) extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def from_surrounding_config(conf: ConfigTree, sql_stmt: str) -> SQLAlchemyExtractor: """ A factory to create SQLAlchemyExtractors that are wrapped by another, specialized extractor. This function pulls the config from the wrapping extractor's config, and returns a newly configured SQLAlchemyExtractor. :param conf: A config tree from which the sqlalchemy config still needs to be taken. :param conf: The SQL statement to use for extraction. Expected to be set by the wrapping extractor implementation, and not by the config. """ ae = SQLAlchemyExtractor() c = Scoped.get_scoped_conf(conf, ae.get_scope()) \ .with_fallback(ConfigFactory.from_dict({SQLAlchemyExtractor.EXTRACT_SQL: sql_stmt})) ae.init(c) return ae
def init(self, conf): # type: (ConfigTree) -> None conf = conf.with_fallback(DruidMetadataExtractor.DEFAULT_CONFIG) self._cluster = '{}'.format(conf.get_string(DruidMetadataExtractor.CLUSTER_KEY)) self.sql_stmt = DruidMetadataExtractor.SQL_STATEMENT.format( where_clause_suffix=conf.get_string(DruidMetadataExtractor.WHERE_CLAUSE_SUFFIX_KEY, default='')) self._alchemy_extractor = SQLAlchemyExtractor() sql_alch_conf = Scoped.get_scoped_conf(conf, self._alchemy_extractor.get_scope())\ .with_fallback(ConfigFactory.from_dict({SQLAlchemyExtractor.EXTRACT_SQL: self.sql_stmt})) self._alchemy_extractor.init(sql_alch_conf) self._extract_iter = None # type: Union[None, Iterator]
def test_table_part_of_table_date_range(self, mock_build): mock_build.return_value = MockBigQueryClient(ONE_DATASET, TABLE_DATE_RANGE, TABLE_DATA) extractor = BigQueryMetadataExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) count = 0 result = extractor.extract() table_name = result.name while result: count += 1 result = extractor.extract() self.assertEqual(count, 1) self.assertEqual(table_name, 'date_range_')
def test_accepts_dataset_filter_by_label(self, mock_build): config_dict = { 'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.PROJECT_ID_KEY): 'your-project-here', 'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.FILTER_KEY): 'label.key:value' } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA) extractor = BigQueryMetadataExtractor() extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsInstance(result, TableMetadata)
def init(self, conf: ConfigTree) -> None: self._conf = conf self.query = """query { workbooks { id name createdAt description projectName projectVizportalUrlId vizportalUrlId } }""" self._extractor = self._build_extractor() transformers: List[Transformer] = [] timestamp_str_to_epoch_transformer = TimestampStringToEpoch() timestamp_str_to_epoch_transformer.init(conf=Scoped.get_scoped_conf( self._conf, timestamp_str_to_epoch_transformer.get_scope()).with_fallback( ConfigFactory.from_dict({ FIELD_NAME: 'created_timestamp', }))) transformers.append(timestamp_str_to_epoch_transformer) dict_to_model_transformer = DictToModel() dict_to_model_transformer.init(conf=Scoped.get_scoped_conf( self._conf, dict_to_model_transformer.get_scope() ).with_fallback( ConfigFactory.from_dict({ MODEL_CLASS: 'databuilder.models.dashboard.dashboard_metadata.DashboardMetadata' }))) transformers.append(dict_to_model_transformer) self._transformer = ChainedTransformer(transformers=transformers)
def init(self, conf): # type: (ConfigTree) -> None self._conf = conf restapi_query = self._build_restapi_query() self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor( restapi_query=restapi_query, conf=self._conf) # Constructing URL using several ID via TemplateVariableSubstitutionTransformer transformers = [] variable_substitution_transformer = TemplateVariableSubstitutionTransformer( ) variable_substitution_transformer.init(conf=Scoped.get_scoped_conf( self._conf, variable_substitution_transformer.get_scope()).with_fallback( ConfigFactory.from_dict({ FIELD_NAME: 'url', TEMPLATE: 'https://app.mode.com/{organization}' '/reports/{dashboard_id}/queries/{query_id}' }))) transformers.append(variable_substitution_transformer) dict_to_model_transformer = DictToModel() dict_to_model_transformer.init(conf=Scoped.get_scoped_conf( self._conf, dict_to_model_transformer.get_scope() ).with_fallback( ConfigFactory.from_dict({ MODEL_CLASS: 'databuilder.models.dashboard.dashboard_query.DashboardQuery' }))) transformers.append(dict_to_model_transformer) self._transformer = ChainedTransformer(transformers=transformers)
def _init_extractor(self, programmatic_description_enabled: bool = True) -> None: conf = { f'extractor.feast.{FeastExtractor.FEAST_ENDPOINT_CONFIG_KEY}': 'feast-core.example.com:6565', f'extractor.feast.{FeastExtractor.FEAST_SERVICE_CONFIG_KEY}': 'unittest-feast-instance', f'extractor.feast.{FeastExtractor.DESCRIBE_FEATURE_TABLES}': programmatic_description_enabled, } self.extractor = FeastExtractor() self.extractor.init( Scoped.get_scoped_conf(conf=ConfigFactory.from_dict(conf), scope=self.extractor.get_scope())) self.extractor._client = MagicMock(return_value=None)
def test_failed_jobs_should_not_be_counted(self, mock_build: Any) -> None: config_dict = { f'extractor.bigquery_table_usage.{BigQueryTableUsageExtractor.PROJECT_ID_KEY}': 'bigquery-public-data', } conf = ConfigFactory.from_dict(config_dict) client = MockLoggingClient(FAILURE) mock_build.return_value = client extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def __init__(self, conf, task, publisher=NoopPublisher()): # type: (ConfigTree, Task, Publisher) -> None self.task = task self.conf = conf self.publisher = publisher self.scoped_conf = Scoped.get_scoped_conf(self.conf, self.get_scope()) if self.scoped_conf.get_bool(DefaultJob.IS_STATSD_ENABLED, False): prefix = 'amundsen.databuilder.job.{}'.format( self.scoped_conf.get_string(DefaultJob.JOB_IDENTIFIER)) LOGGER.info( 'Setting statsd for job metrics with prefix: {}'.format( prefix)) self.statsd = StatsClient(prefix=prefix) else: self.statsd = None
def init(self, conf: ConfigTree) -> None: # initialize extractor with configurarion self.extractor.init(Scoped.get_scoped_conf(conf, self.extractor.get_scope())) # initialize transformer with configuration self.transformer.init(Scoped.get_scoped_conf(conf, self.transformer.get_scope())) # task configuration conf = Scoped.get_scoped_conf(conf, self.get_scope()) self.date = conf.get_string(SearchMetadatatoElasticasearchTask.DATE, self.today) self.entity = conf.get_string(SearchMetadatatoElasticasearchTask.ENTITY_TYPE).lower() self.elasticsearch_client = conf.get( SearchMetadatatoElasticasearchTask.ELASTICSEARCH_CLIENT_CONFIG_KEY ) self.elasticsearch_alias = conf.get( SearchMetadatatoElasticasearchTask.ELASTICSEARCH_ALIAS_CONFIG_KEY ) self.elasticsearch_new_index = conf.get( SearchMetadatatoElasticasearchTask.ELASTICSEARCH_NEW_INDEX, self.create_new_index_name()) self.document_mapping = conf.get(SearchMetadatatoElasticasearchTask.MAPPING_CLASS, RESOURCE_TO_MAPPING[self.entity]) LOGGER.info(issubclass(self.document_mapping, SearchableResource)) if not issubclass(self.document_mapping, SearchableResource): msg = "Provided document_mapping should be instance" \ f" of SearchableResource not {type(self.document_mapping)}" LOGGER.error(msg) raise TypeError(msg) self.elasticsearch_batch_size = conf.get( SearchMetadatatoElasticasearchTask.ELASTICSEARCH_PUBLISHER_BATCH_SIZE, 10000 ) self.elasticsearch_timeout_sec = conf.get( SearchMetadatatoElasticasearchTask.ELASTICSEARCH_TIMEOUT_SEC, 120 )
def test_failed_jobs_should_not_be_counted(self, mock_build): config_dict = { 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY): 'your-project-here', } conf = ConfigFactory.from_dict(config_dict) client = MockLoggingClient(FAILURE) mock_build.return_value = client extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def _create_schema_by_table_mapping(self): # type: () -> dict # TODO: Make extractor generic table_metadata_extractor = HiveTableMetadataExtractor() table_metadata_extractor.init( Scoped.get_scoped_conf(self._conf, table_metadata_extractor.get_scope())) table_to_schema = {} table_metadata = table_metadata_extractor.extract() while table_metadata: # TODO: deal with collision table_to_schema[table_metadata.name.lower( )] = table_metadata.schema_name.lower() table_metadata = table_metadata_extractor.extract() return table_to_schema
def test_transform_with_dict_object(self): # type: () -> None """ Test Transform functionality with Dict object """ transformer = ElasticsearchDocumentTransformer() transformer.init(conf=Scoped.get_scoped_conf( conf=self.conf, scope=transformer.get_scope())) data = dict(test_key="DOES_NOT_MATTER", test_key2="DOES_NOT_MATTER2") with self.assertRaises(Exception) as context: transformer.transform(data) # type: ignore self.assertTrue( "ElasticsearchDocumentTransformer expects record of type 'Neo4jDataResult'!" in context.exception)
def _build_extractor(self) -> TableauGraphQLApiLastModifiedExtractor: """ Builds a TableauGraphQLApiExtractor. All data required can be retrieved with a single GraphQL call. :return: A TableauGraphQLApiLastModifiedExtractor that provides dashboard update metadata. """ extractor = TableauGraphQLApiLastModifiedExtractor() tableau_extractor_conf = \ Scoped.get_scoped_conf(self._conf, extractor.get_scope())\ .with_fallback(self._conf)\ .with_fallback(ConfigFactory.from_dict({TableauGraphQLApiExtractor.QUERY: self.query, STATIC_RECORD_DICT: {'product': 'tableau'} } ) ) extractor.init(conf=tableau_extractor_conf) return extractor
def test_keypath_can_be_set(self, mock_build): config_dict = { 'extractor.bigquery_watermarks.{}'.format(BigQueryWatermarkExtractor.PROJECT_ID_KEY): 'your-project-here', 'extractor.bigquery_watermarks.{}'.format(BigQueryWatermarkExtractor.KEY_PATH_KEY): '/tmp/doesnotexist', } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, None) extractor = BigQueryWatermarkExtractor() with self.assertRaises(FileNotFoundError): extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))