def test_run_given_entry_group_id_and_entry_id_should_enrich_a_single_entry( self, get_manually_created_fileset_entries, get_entry, parse_gcs_file_patterns, create_filtered_data_for_single_bucket, create_filtered_data_for_multiple_buckets, create_stats_from_dataframe, create_tag_from_stats): # noqa: E125 get_entry.return_value = self.__make_fake_fileset_entry() parse_gcs_file_patterns.return_value = [{ 'bucket_name': 'my_bucket', 'file_regex': '.*' }] dataframe = pd.DataFrame() filtered_buckets_stats = {} create_filtered_data_for_single_bucket.return_value = ( dataframe, filtered_buckets_stats) stats = {} create_stats_from_dataframe.return_value = stats datacatalog_fileset_enricher = DatacatalogFilesetEnricher( 'test_project') datacatalog_fileset_enricher.run('entry_group_id', 'entry_id') get_manually_created_fileset_entries.assert_not_called() get_entry.assert_called_once() parse_gcs_file_patterns.assert_called_once() create_filtered_data_for_single_bucket.assert_called_once() create_filtered_data_for_multiple_buckets.assert_not_called() create_stats_from_dataframe.assert_called_once() create_tag_from_stats.assert_called_once()
def test_clean_up_all_should_call_the_right_clean_up_methods( self, delete_entries_and_entry_groups, delete_tag_template): datacatalog_fileset_enricher = DatacatalogFilesetEnricher( 'test_project') datacatalog_fileset_enricher.clean_up_all() delete_entries_and_entry_groups.assert_called_once() delete_tag_template.assert_called_once()
def test_run_given_bucket_with_wildcard_and_multiple_gcs_patterns_should_call_retrieve_multiple_buckets( # noqa: E501 self, get_manually_created_fileset_entries, get_entry, parse_gcs_file_patterns, create_filtered_data_for_single_bucket, create_filtered_data_for_multiple_buckets, create_stats_from_dataframe, create_tag_from_stats): # noqa:E125 entry = self.__make_fake_fileset_entry() entry.gcs_fileset_spec.file_patterns.append('gs://my_bucket*/*csv') get_entry.return_value = entry parse_gcs_file_patterns.return_value = [{ 'bucket_name': 'my_bucket*', 'file_regex': '.*' }, { 'bucket_name': 'my_bucket*', 'file_regex': '.*csv' }] dataframe = pd.DataFrame() filtered_buckets_stats = {} create_filtered_data_for_multiple_buckets.return_value = ( dataframe, filtered_buckets_stats) stats = {} create_stats_from_dataframe.return_value = stats datacatalog_fileset_enricher = DatacatalogFilesetEnricher( 'test_project') datacatalog_fileset_enricher.run('entry_group_id', 'entry_id') get_manually_created_fileset_entries.assert_not_called() get_entry.assert_called_once() parse_gcs_file_patterns.assert_called_once() create_filtered_data_for_single_bucket.assert_not_called() self.assertEqual(2, create_filtered_data_for_multiple_buckets.call_count) create_stats_from_dataframe.assert_called_once() create_tag_from_stats.assert_called_once()