コード例 #1
0
 def _get_config(self):
     path = self._get_user_config_path()
     connector_config_path = self._get_connector_config_path()
     if path:
         user_config = config.Config(path, connector_config_path)
         return user_config
     return None
コード例 #2
0
    def test_scrape_metadata_with_credentials_overriding_base_metadata_query_should_return_objects(  # noqa: E501
            self, normalize):
        metadata = \
            utils.Utils.convert_json_to_object(self.__MODULE_PATH,
                                               'metadata.json')

        user_config_path = utils.Utils.get_resolved_file_name(
            self.__MODULE_PATH, 'base_metadata_query_ingest_cfg.yaml')
        connector_config_path = utils.Utils.get_test_config_path(
            self.__MODULE_PATH)

        loaded_config = config.Config(user_config_path, connector_config_path)

        normalize.return_value = metadata

        scraper = test_utils.FakeScraper()

        default_query = 'SELECT * from default_db'
        user_defined_override_query = 'SELECT  * from db'

        schemas_metadata = scraper.scrape({},
                                          connection_args={
                                              'host': 'localhost',
                                              'port': 1234
                                          },
                                          query=default_query,
                                          config=loaded_config)

        self.assertEqual(user_defined_override_query,
                         scraper.cur.execute.call_args[0][0])

        self.assertEqual(1, len(schemas_metadata))
コード例 #3
0
    def test_scrape_metadata_with_user_config_should_return_objects(
            self, to_metadata_dict,
            get_exact_table_names_from_dataframe):  # noqa
        metadata = \
            utils.Utils.convert_json_to_object(self.__MODULE_PATH,
                                               'metadata.json')

        to_metadata_dict.return_value = metadata

        get_exact_table_names_from_dataframe.return_value = [
            "schema0.table0", "schema1.table1"
        ]

        scraper = test_utils.FakeScraper()

        config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                   '../test_data/ingest_cfg.yaml')
        user_config = config.Config(config_path)

        metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH)

        schemas_metadata = scraper.get_metadata(metada_def,
                                                connection_args={
                                                    'host': 'localhost',
                                                    'port': 1234
                                                },
                                                user_config=user_config)

        self.assertEqual(1, len(schemas_metadata))
コード例 #4
0
    def test_config_should_retrieve_sql_objects(self, yaml_load):
        yaml_load.return_value = {
            constants.SQL_OBJECTS_KEY: [{
                constants.SQL_OBJECT_ITEM_NAME:
                'functions',
                constants.SQL_OBJECT_ITEM_ENABLED_FLAG:
                True
            }, {
                constants.SQL_OBJECT_ITEM_NAME:
                'stored_procedures',
                constants.SQL_OBJECT_ITEM_ENABLED_FLAG:
                False
            }]
        }

        user_config_path = utils.Utils.get_resolved_file_name(
            self.__MODULE_PATH, 'sql_objects_ingest_cfg.yaml')
        connector_config_path = utils.Utils.get_test_config_path(
            self.__MODULE_PATH)

        loaded_config = config.Config(user_config_path, connector_config_path)

        self.assertEqual(1, len(loaded_config.sql_objects_config))
        self.assertEqual(
            'functions', loaded_config.sql_objects_config['functions'][
                constants.SQL_OBJECT_ITEM_NAME])
        self.assertIsNotNone(loaded_config.sql_objects_config['functions'][
            constants.SQL_OBJECT_ITEM_QUERY_KEY])

        self.assertIsNotNone(loaded_config.sql_objects_config['functions'][
            constants.SQL_OBJECT_ITEM_METADATA_DEF_KEY])
コード例 #5
0
    def test_scrape_metadata_with_enrich_metadata_user_config_and_no_enricher_should_raise_error(  # noqa:E501
            self, to_metadata_dict, _):  # noqa
        metadata = \
            utils.Utils.convert_json_to_object(self.__MODULE_PATH,
                                               'metadata.json')

        to_metadata_dict.return_value = metadata

        config_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            '../test_data/enrich_metadata_ingest_cfg.yaml')
        user_config = config.Config(config_path)

        metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH)

        scraper = test_utils.FakeScraper()

        self.assertRaises(NotImplementedError,
                          scraper.get_metadata,
                          metada_def,
                          connection_args={
                              'host': 'localhost',
                              'port': 1234
                          },
                          user_config=user_config)
コード例 #6
0
    def test_scrape_metadata_with_enrich_metadata_config_and_no_enricher_should_succeed(  # noqa:E501
            self, normalize, _):  # noqa
        metadata = \
            utils.Utils.convert_json_to_object(self.__MODULE_PATH,
                                               'metadata.json')

        normalize.return_value = metadata

        config_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            '../test_data/enrich_metadata_ingest_cfg.yaml')

        loaded_config = config.Config(
            config_path, utils.Utils.get_test_config_path(self.__MODULE_PATH))

        metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH)

        scraper = test_utils.FakeScraper()

        scraper.scrape(metada_def,
                       connection_args={
                           'host': 'localhost',
                           'port': 1234
                       },
                       config=loaded_config)
コード例 #7
0
    def test_config_should_not_deliver_options_not_chosen_by_user(
            self, yaml_load):
        yaml_load.return_value = {
            config_constants.REFRESH_OPTION: True,
            config_constants.ROW_COUNT_OPTION: False
        }
        test_config_path = utils.Utils.get_resolved_file_name(
            self.__MODULE_PATH, 'ingest_cfg.yaml')
        user_config = config.Config(test_config_path)

        self.assertEqual([], user_config.get_chosen_metadata_options())
コード例 #8
0
    def test_scrape_metadata_with_multiple_sql_objects_config_should_return_objects(  # noqa: E501
            self, sql_objects_normalize, base_normalize,
            get_exact_table_names_from_dataframe):  # noqa
        base_metadata = \
            utils.Utils.convert_json_to_object(self.__MODULE_PATH,
                                               'metadata.json')

        base_normalize.return_value = base_metadata

        functions_metadata = \
            utils.Utils.convert_json_to_object(self.__MODULE_PATH,
                                               'normalized_sql_objects.json')

        stored_procedure_metadata = \
            utils.Utils.convert_json_to_object(
                self.__MODULE_PATH,
                'normalized_sql_objects_stored_procedure.json')

        sql_objects_normalize.side_effect = [
            functions_metadata, stored_procedure_metadata
        ]

        get_exact_table_names_from_dataframe.return_value = [
            "schema0.table0", "schema1.table1"
        ]

        scraper = test_utils.FakeScraper()

        user_config_path = utils.Utils.get_resolved_file_name(
            self.__MODULE_PATH, 'sql_objects_ingest_cfg.yaml')
        connector_config_path = utils.Utils.get_test_config_path(
            self.__MODULE_PATH)

        loaded_config = config.Config(user_config_path, connector_config_path)

        metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH)

        scraped_metadata = scraper.scrape(metada_def,
                                          connection_args={
                                              'host': 'localhost',
                                              'port': 1234
                                          },
                                          config=loaded_config)

        self.assertEqual(2, len(scraped_metadata))
        self.assertIn('schemas', scraped_metadata)
        self.assertIn('sql_objects', scraped_metadata)
        self.assertDictEqual(base_metadata, scraped_metadata)
        self.assertDictEqual(functions_metadata,
                             scraped_metadata['sql_objects']['functions'])
        self.assertDictEqual(
            stored_procedure_metadata,
            scraped_metadata['sql_objects']['stored_procedures'])
コード例 #9
0
    def test_config_should_retrieve_base_metadata_query(self, yaml_load):
        yaml_load.return_value = {
            constants.BASE_METADATA_QUERY_FILENAME: 'my_override_query.sql'
        }

        user_config_path = utils.Utils.get_resolved_file_name(
            self.__MODULE_PATH, 'base_metadata_query_ingest_cfg.yaml')
        connector_config_path = utils.Utils.get_test_config_path(
            self.__MODULE_PATH)

        loaded_config = config.Config(user_config_path, connector_config_path)

        self.assertEqual('SELECT  * from db',
                         loaded_config.base_metadata_query)
コード例 #10
0
    def test_config_should_deliver_options_chosen_by_user(self, yaml_load):
        yaml_load.return_value = {
            constants.REFRESH_OPTION: True,
            constants.ROW_COUNT_OPTION: True
        }
        user_config_path = utils.Utils.get_resolved_file_name(
            self.__MODULE_PATH, 'ingest_cfg.yaml')

        connector_config_path = utils.Utils.get_test_config_path(
            self.__MODULE_PATH)

        loaded_config = config.Config(user_config_path, connector_config_path)

        self.assertEqual([constants.ROW_COUNT_OPTION],
                         loaded_config.get_chosen_metadata_options())
コード例 #11
0
    def test_optional_metadata_should_not_be_pulled_with_empty_config(
            self, get_optional_queries, to_metadata_dict):
        path_to_empty_config = utils.Utils.get_resolved_file_name(
            self.__MODULE_PATH, 'empty_ingest_cfg.yaml')
        empty_config = config.Config(path_to_empty_config)

        scraper = test_utils.FakeScraper()
        metadata = \
            utils.Utils.convert_json_to_object(self.__MODULE_PATH,
                                               'metadata.json')

        to_metadata_dict.return_value = metadata
        schemas_metadata = scraper.get_metadata({},
                                                connection_args={
                                                    'host': 'localhost',
                                                    'port': 1234
                                                },
                                                user_config=empty_config)
        self.assertEqual(1, len(schemas_metadata))
        self.assertEqual(0, get_optional_queries.call_count)
コード例 #12
0
    def test_scrape_metadata_with_csv_and_sql_objects_should_return_base_metadata(  # noqa: E501
            self, sql_objects_normalize, base_normalize,
            get_exact_table_names_from_dataframe):  # noqa
        base_metadata = \
            utils.Utils.convert_json_to_object(self.__MODULE_PATH,
                                               'metadata.json')

        base_normalize.return_value = base_metadata

        functions_metadata = \
            utils.Utils.convert_json_to_object(self.__MODULE_PATH,
                                               'normalized_sql_objects.json')

        sql_objects_normalize.return_value = functions_metadata

        get_exact_table_names_from_dataframe.return_value = [
            "schema0.table0", "schema1.table1"
        ]

        scraper = test_utils.FakeScraper()

        user_config_path = utils.Utils.get_resolved_file_name(
            self.__MODULE_PATH, 'sql_objects_ingest_cfg.yaml')
        connector_config_path = utils.Utils.get_test_config_path(
            self.__MODULE_PATH)

        loaded_config = config.Config(user_config_path, connector_config_path)

        metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH)

        scraped_metadata = scraper.scrape(
            metada_def,
            csv_path=utils.Utils.get_resolved_file_name(
                self.__MODULE_PATH, 'rdbms_full_dump.csv'),
            config=loaded_config)

        self.assertEqual(1, len(scraped_metadata))
        self.assertIn('schemas', scraped_metadata)
        self.assertNotIn('sql_objects', scraped_metadata)
        self.assertDictEqual(base_metadata, scraped_metadata)
コード例 #13
0
    def test_metadata_should_not_be_updated_with_empty_config(
            self, get_refresh_metadata_queries, normalize):
        path_to_empty_config = utils.Utils.get_resolved_file_name(
            self.__MODULE_PATH, 'empty_ingest_cfg.yaml')
        empty_config = config.Config(
            path_to_empty_config,
            utils.Utils.get_test_config_path(self.__MODULE_PATH))

        scraper = test_utils.FakeScraper()
        metadata = \
            utils.Utils.convert_json_to_object(self.__MODULE_PATH,
                                               'metadata.json')

        normalize.return_value = metadata
        schemas_metadata = scraper.scrape({},
                                          connection_args={
                                              'host': 'localhost',
                                              'port': 1234
                                          },
                                          config=empty_config)
        self.assertEqual(1, len(schemas_metadata))
        self.assertEqual(0, get_refresh_metadata_queries.call_count)
コード例 #14
0
    def test_scrape_metadata_with_enrich_metadata_config_should_return_objects(  # noqa:E501
            self, normalize, get_exact_table_names_from_dataframe):  # noqa
        metadata = \
            utils.Utils.convert_json_to_object(self.__MODULE_PATH,
                                               'metadata.json')

        normalize.return_value = metadata

        get_exact_table_names_from_dataframe.return_value = [
            "schema0.table0", "schema1.table1"
        ]

        scraper = test_utils.FakeScraperWithMetadataEnricher()

        config_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            '../test_data/enrich_metadata_ingest_cfg.yaml')

        loaded_config = config.Config(
            config_path, utils.Utils.get_test_config_path(self.__MODULE_PATH))

        metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH)

        schemas_metadata = scraper.scrape(metada_def,
                                          connection_args={
                                              'host': 'localhost',
                                              'port': 1234
                                          },
                                          config=loaded_config)

        self.assertEqual(1, len(schemas_metadata))

        metadata_dataframe, metadata_definition = \
            normalize.call_args_list[0][0]
        self.assertTrue(
            metadata_dataframe['schema_name'][0].startswith('mycompany'))
        self.assertTrue(
            metadata_dataframe['table_name'][0].startswith('mycompany'))
コード例 #15
0
    def test_config_no_files_should_not_retrieve_sql_objects(self, yaml_load):
        yaml_load.return_value = {
            constants.SQL_OBJECTS_KEY: [{
                constants.SQL_OBJECT_ITEM_NAME:
                'functions_xpto',
                constants.SQL_OBJECT_ITEM_ENABLED_FLAG:
                True
            }, {
                constants.SQL_OBJECT_ITEM_NAME:
                'stored_procedures',
                constants.SQL_OBJECT_ITEM_ENABLED_FLAG:
                False
            }]
        }

        user_config_path = utils.Utils.get_resolved_file_name(
            self.__MODULE_PATH, 'sql_objects_ingest_cfg.yaml')
        connector_config_path = utils.Utils.get_test_config_path(
            self.__MODULE_PATH)

        loaded_config = config.Config(user_config_path, connector_config_path)

        self.assertEqual(0, len(loaded_config.sql_objects_config))
    def test_synchronize_metadata_with_sql_config_should_not_raise_error(  # noqa: E501
            self, process_entries_length_metric,
            process_metadata_payload_bytes_metric, process_elapsed_time_metric,
            delete_obsolete_metadata, ingest_metadata, make_base_entries,
            make_sql_objects_entries, scrape):
        make_base_entries.return_value = [({}, [])]
        make_sql_objects_entries.return_value = []

        user_config_path = utils.Utils.get_resolved_file_name(
            self.__MODULE_PATH, 'sql_objects_ingest_cfg.yaml')
        connector_config_path = utils.Utils.get_test_config_path(
            self.__MODULE_PATH)

        loaded_config = config.Config(user_config_path, connector_config_path)

        synchronizer = datacatalog_synchronizer.DataCatalogSynchronizer(
            DatacatalogSynchronizerTestCase.__PROJECT_ID,
            DatacatalogSynchronizerTestCase.__LOCATION_ID,
            DatacatalogSynchronizerTestCase.__ENTRY_GROUP_ID,
            DatacatalogSynchronizerTestCase.__HOST,
            utils.Utils.get_metadata_def_obj(self.__MODULE_PATH),
            test_utils.FakeScraper, {'database': 'test_db'},
            enable_monitoring=True,
            config=loaded_config)

        synchronizer.run()
        self.assertEqual(scrape.call_count, 1)
        self.assertEqual(
            synchronizer.
            _DataCatalogSynchronizer__metadata_definition['database_name'],
            'test_db')
        self.assertEqual(make_base_entries.call_count, 1)
        self.assertEqual(ingest_metadata.call_count, 2)
        self.assertEqual(delete_obsolete_metadata.call_count, 1)
        self.assertEqual(process_entries_length_metric.call_count, 1)
        self.assertEqual(process_metadata_payload_bytes_metric.call_count, 1)
        self.assertEqual(process_elapsed_time_metric.call_count, 1)
コード例 #17
0
    def test_scrape_metadata_with_csv_and_user_config_should_return_objects(
            self, to_metadata_dict,
            get_exact_table_names_from_dataframe):  # noqa

        metadata = \
            utils.Utils.convert_json_to_object(self.__MODULE_PATH,
                                               'metadata.json')
        to_metadata_dict.return_value = metadata

        get_exact_table_names_from_dataframe.return_value = [
            "schema0.table0", "schema1.table1"
        ]

        scraper = test_utils.FakeScraperWithMetadataEnricher()

        config_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            '../test_data/enrich_metadata_ingest_cfg.yaml')
        user_config = config.Config(config_path)

        metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH)

        schemas_metadata = scraper.get_metadata(
            metada_def,
            csv_path=utils.Utils.get_resolved_file_name(
                self.__MODULE_PATH, 'rdbms_full_dump.csv'),
            user_config=user_config)

        self.assertEqual(1, len(schemas_metadata))

        metadata_dataframe, metadata_definition = \
            to_metadata_dict.call_args_list[0][0]
        self.assertTrue(
            metadata_dataframe['schema_name'][0].startswith('mycompany'))
        self.assertTrue(
            metadata_dataframe['table_name'][0].startswith('mycompany'))