Example #1
0
    def test_synchronize_metadata_should_not_raise_error(
            self, process_entries_length_metric,
            process_metadata_payload_bytes_metric, process_elapsed_time_metric,
            delete_obsolete_metadata, ingest_metadata,
            make_entries_from_database_metadata,
            get_database_metadata):  # noqa

        make_entries_from_database_metadata.return_value = [({}, [])]

        synchronizer = datacatalog_synchronizer.DataCatalogSynchronizer(
            DatacatalogSynchronizerTestCase.__PROJECT_ID,
            DatacatalogSynchronizerTestCase.__LOCATION_ID,
            DatacatalogSynchronizerTestCase.__HIVE_METASTORE_DB_HOST,
            DatacatalogSynchronizerTestCase.__HIVE_METASTORE_DB_USER,
            DatacatalogSynchronizerTestCase.__HIVE_METASTORE_DB_PASS,
            DatacatalogSynchronizerTestCase.__HIVE_METASTORE_DB_NAME,
            DatacatalogSynchronizerTestCase.__HIVE_METASTORE_DB_TYPE)
        synchronizer.run()
        self.assertEqual(1, get_database_metadata.call_count)
        self.assertEqual(1, make_entries_from_database_metadata.call_count)
        self.assertEqual(1, ingest_metadata.call_count)
        self.assertEqual(1, delete_obsolete_metadata.call_count)
        self.assertEqual(process_entries_length_metric.call_count, 1)
        self.assertEqual(process_metadata_payload_bytes_metric.call_count, 1)
        self.assertEqual(process_elapsed_time_metric.call_count, 1)
Example #2
0
def run():

    if request.method == 'POST':
        request_data = request.get_json()
        app.logger.info(request_data)

        message = request_data['message']
        data = message['data']
        sync_event = json.loads(base64.b64decode(data).decode('utf-8'))

        datacatalog_synchronizer.DataCatalogSynchronizer(
            project_id=os.environ['HIVE2DC_DATACATALOG_PROJECT_ID'],
            location_id=os.environ['HIVE2DC_DATACATALOG_LOCATION_ID'],
            hive_metastore_db_host=os.
            environ['HIVE2DC_HIVE_METASTORE_DB_HOST'],
            metadata_sync_event=sync_event).run()

        response = {'message': 'Synchronized', 'code': 'SUCCESS'}
        return make_response(jsonify(response), 200)
    elif request.method == 'GET':
        return 'use POST method with a message event BODY'
Example #3
0
    def test_synchronize_metadata_with_drop_table_sync_event_should_succeed(  # noqa
            self, process_entries_length_metric,
            process_metadata_payload_bytes_metric, process_elapsed_time_metric,
            delete_metadata, delete_obsolete_metadata, ingest_metadata,
            make_entries_from_database_metadata,
            get_database_metadata):  # noqa

        make_entries_from_database_metadata.return_value = [({}, [])]

        synchronizer = datacatalog_synchronizer.DataCatalogSynchronizer(
            project_id=DatacatalogSynchronizerTestCase.__PROJECT_ID,
            location_id=DatacatalogSynchronizerTestCase.__LOCATION_ID,
            metadata_sync_event=retrieve_json_file(
                '/hooks/message_drop_table.json'))
        synchronizer.run()
        self.assertEqual(1, get_database_metadata.call_count)
        self.assertEqual(1, make_entries_from_database_metadata.call_count)
        self.assertEqual(0, ingest_metadata.call_count)
        self.assertEqual(1, delete_metadata.call_count)
        self.assertEqual(0, delete_obsolete_metadata.call_count)
        self.assertEqual(process_entries_length_metric.call_count, 1)
        self.assertEqual(process_metadata_payload_bytes_metric.call_count, 1)
        self.assertEqual(process_elapsed_time_metric.call_count, 1)
Example #4
0
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import json
import os
import sys

from google.datacatalog_connectors.hive.sync import datacatalog_synchronizer

# Enable logging
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)


def retrieve_json_file(name):
    resolved_name = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 'resources/{}'.format(name))

    with open(resolved_name) as json_file:
        return json.load(json_file)


datacatalog_synchronizer.DataCatalogSynchronizer(
    project_id='uat-env-1',
    location_id='us-central1',
    metadata_sync_event=retrieve_json_file('message_create_table.json'),
    enable_monitoring=True).run()
Example #5
0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import sys

from google.datacatalog_connectors.hive.sync import datacatalog_synchronizer

# Enable logging
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)

datacatalog_synchronizer.DataCatalogSynchronizer(
    project_id='uat-env-1',
    location_id='us-central1',
    hive_metastore_db_host='localhost',
    hive_metastore_db_user='******',
    hive_metastore_db_pass='******',
    hive_metastore_db_name='metastore',
    hive_metastore_db_type='postgresql').run()