예제 #1
0
class PrefectAddServiceStorage(Task):
    def __init__(self, **kwargs):
        self.service_account = service_account.Credentials.from_service_account_file(
            os.environ['MYSQL_BIG_QUERY_GOOGLE_AUTH'])
        self.sd_logger = StackDriverAdapter(self.service_account)
        self.sd_logger.get_client()
        self.sd_logger.create_logger(f"service-add-storage")
        super().__init__(**kwargs)

    def create_service_bucket(self, service: str):
        client = StorageAdapter(self.service_account)
        client.get_client()
        result = client.make_bucket(f"{service}-sync")
        if result:
            self.sd_logger.info({'message': f'Created {service} Bucket'}, {
                'class': 'PrefectAddServiceStorage',
                'method': 'create_service_bucket'
            })
        else:
            self.sd_logger.warning(
                client.errors, {
                    'class': 'PrefectAddServiceStorage',
                    'method': 'create_service_bucket'
                })
        return result
예제 #2
0
class PrefectInstallStorage(Task):
    def __init__(self, **kwargs):
        self.service_account = service_account.Credentials.from_service_account_file(
            os.environ['MYSQL_BIG_QUERY_GOOGLE_AUTH'])
        self.sd_logger = StackDriverAdapter(self.service_account)
        self.sd_logger.get_client()
        self.sd_logger.create_logger(f"storage-install-mysql-sync")
        super().__init__(**kwargs)

    def create_config_bucket(self):
        client = StorageAdapter(self.service_account)
        client.get_client()
        result = client.make_bucket('mysql_sync_keys')
        if result:
            self.sd_logger.info({'message': f'Created mysql_sync_keys Bucket'},
                                {
                                    'class': 'PrefectInstallStorage',
                                    'method': 'create_config_bucket'
                                })
        else:
            self.sd_logger.warning(client.errors, {
                'class': 'PrefectInstallStorage',
                'method': 'create_config_bucket'
            })
        return result
예제 #3
0
class PrefectInstallPubSub(Task):
    def __init__(self, **kwargs):
        self.service_account = service_account.Credentials.from_service_account_file(
            os.environ['MYSQL_BIG_QUERY_GOOGLE_AUTH']
        )
        self.sd_logger = StackDriverAdapter(self.service_account)
        self.sd_logger.get_client()
        self.sd_logger.create_logger(f"install-pub-sub")
        super().__init__(**kwargs)

    def create_client(self):
        pub_sub_client = PubSubAdapter(self.service_account)
        pub_sub_client.get_publisher()
        return pub_sub_client

    def create_topic(self, pub_sub_client: PubSubAdapter, topic: str) -> bool:
        pub_sub_client.set_topic(
            topic
        )
        result = pub_sub_client.create_topic()
        if result:
            self.sd_logger.info(
                {'message': f"Created {topic} Topic"},
                {'class': 'PrefectInstallPubSub', 'method': 'create_topic'}
            )
        else:
            self.sd_logger.warning(
                pub_sub_client.errors,
                {'class': 'PrefectInstallPubSub', 'method': 'create_topic'}
            )
        return result

    def create_subscription(self, pub_sub_client: PubSubAdapter, topic: str, subscription: str) -> bool:
        pub_sub_client.set_topic(
            topic
        )
        pub_sub_client.set_subscription(
            subscription
        )
        pub_sub_client.get_subscriber()
        result = pub_sub_client.create_subscription()
        if result:
            self.sd_logger.info(
                {'message': f"Created {topic} Subscription"},
                {'class': 'PrefectInstallPubSub', 'method': 'create_subscription'}
            )
        else:
            self.sd_logger.warning(
                pub_sub_client.errors,
                {'class': 'PrefectInstallPubSub', 'method': 'create_subscription'}
            )
        return result
예제 #4
0
class PrefectAddServiceBigQuery(Task):
    def __init__(self, service: str, **kwargs):
        self.service_account = service_account.Credentials.from_service_account_file(
            os.environ['MYSQL_BIG_QUERY_GOOGLE_AUTH']
        )
        self.definitions = service_helpers.get_definitions(service)
        self.sd_logger = StackDriverAdapter(self.service_account)
        self.sd_logger.get_client()
        self.sd_logger.create_logger(f"service-add-big-query")
        super().__init__(**kwargs)

    def create_client(self) -> BigQueryAdapter:
        big_query_client = BigQueryAdapter(self.service_account)
        big_query_client.get_client()
        return big_query_client

    def create_data_set(self, big_query_client: BigQueryAdapter) -> bool:
        # return True if dataset already exists
        if big_query_client.check_dataset(self.definitions['data_set']):
            return True
        big_query_client.set_data_set_ref(self.definitions['data_set'])
        result = big_query_client.create_data_set()
        if result:
            self.sd_logger.info(
                {'message': f"Created {self.definitions['data_set']} Data Set"},
                {'class': 'PrefectAddServiceBigQuery', 'method': 'create_data_set'})
        else:
            self.sd_logger.warning(
                big_query_client.errors,
                {'class': 'PrefectAddServiceBigQuery', 'method': 'create_data_set'}
            )
        return result

    def create_tracking_table(self, big_query_client: BigQueryAdapter):
        big_query_client.set_data_set_ref(self.definitions['data_set'])
        big_query_client.set_table_ref('sync_tracking_table')
        # return true if table already exists
        if big_query_client.check_table():
            return True
        schema = [
            bigquery.SchemaField(
                'table',
                'STRING',
                description="Tracked Table Name"
            ),
            bigquery.SchemaField(
                'watched',
                'STRING',
                description="Column to watch to minimize the number of records loaded per sync"
            ),
            bigquery.SchemaField(
                'primary_id',
                'STRING',
                description="Primary Id Column(s)"
            ),
            bigquery.SchemaField(
                'synchronize',
                'BOOLEAN',
                description="Flag to Synchronize the table"
            )
        ]
        result = big_query_client.create_table(schema)
        if result:
            self.sd_logger.info(
                {'message': f"Created tracking table"},
                {'class': 'PrefectAddServiceBigQuery', 'method': 'create_tracking_table'}
            )
        else:
            self.sd_logger.warning(
                big_query_client.errors,
                {'class': 'PrefectAddServiceBigQuery', 'method': 'create_tracking_table'}
            )
        return result
예제 #5
0
class PrefectInstallBigQuery(Task):
    def __init__(self, **kwargs):
        self.service_account = service_account.Credentials.from_service_account_file(
            os.environ['MYSQL_BIG_QUERY_GOOGLE_AUTH'])
        self.sd_logger = StackDriverAdapter(self.service_account)
        self.sd_logger.get_client()
        self.sd_logger.create_logger(f"install-big-query")
        super().__init__(**kwargs)

    def create_client(self) -> BigQueryAdapter:
        big_query_client = BigQueryAdapter(self.service_account)
        big_query_client.get_client()
        return big_query_client

    def create_config_data_set(self,
                               big_query_client: BigQueryAdapter) -> bool:
        # return True if dataset already exists
        if big_query_client.check_dataset('mysql_sync'):
            return True
        big_query_client.set_data_set_ref('mysql_sync')
        result = big_query_client.create_data_set()
        if result:
            self.sd_logger.info({'message': f"Created mysql_sync Data Set"}, {
                'class': 'PrefectInstallBigQuery',
                'method': 'create_data_set'
            })
        else:
            self.sd_logger.warning(big_query_client.errors, {
                'class': 'PrefectInstallBigQuery',
                'method': 'create_data_set'
            })
        return result

    def create_config_table(self, big_query_client: BigQueryAdapter):
        big_query_client.set_data_set_ref('mysql_sync')
        big_query_client.set_table_ref('data_sources')
        # return true if table already exists
        if big_query_client.check_table():
            return True

        schema = [
            bigquery.SchemaField('service',
                                 'STRING',
                                 description="Service Name"),
            bigquery.SchemaField('data_set',
                                 'STRING',
                                 description="Big Query Data Set"),
            bigquery.SchemaField('host',
                                 'STRING',
                                 description="MySQL host connection"),
            bigquery.SchemaField('user',
                                 'STRING',
                                 description="MySQL connection user"),
            bigquery.SchemaField('password',
                                 'STRING',
                                 description="MySQL connection password"),
            bigquery.SchemaField('database',
                                 'STRING',
                                 description="MySQL Database"),
            bigquery.SchemaField('last_run',
                                 'TIMESTAMP',
                                 description="Last produce date"),
        ]
        result = big_query_client.create_table(schema)
        if result:
            self.sd_logger.info({'message': f"Created Config table"}, {
                'class': 'PrefectInstallBigQuery',
                'method': 'create_config_table'
            })
        else:
            self.sd_logger.warning(big_query_client.errors, {
                'class': 'PrefectInstallBigQuery',
                'method': 'create_config_table'
            })
        return result
예제 #6
0
class Schema(Task):
    def __init__(self, service: str, **kwargs):
        self.service_account = service_account.Credentials.from_service_account_file(
            os.environ['MYSQL_BIG_QUERY_GOOGLE_AUTH'])
        self.definitions = service_helpers.get_definitions(service)

        self.sd_logger = StackDriverAdapter(self.service_account)
        self.sd_logger.get_client()
        self.sd_logger.create_logger(f"{self.definitions['service']}-etl")

        self.pub_sub_client = PubSubAdapter(self.service_account)
        self.pub_sub_client.get_subscriber()
        self.pub_sub_client.set_subscription(
            f"{self.definitions['service']}-etl-schema")

        self.big_query_client = BigQueryAdapter(self.service_account)
        self.big_query_client.get_client()
        self.big_query_client.set_data_set_ref(self.definitions['data_set'])
        super().__init__(**kwargs)

    def get_mysql_schema(self, table: str) -> Union[list, bool]:
        mysql_client = MySqlAdapter(self.definitions['service'])
        columns = mysql_client.mysql_table_definition(table)
        if not columns:
            self.sd_logger.error(mysql_client.errors, {
                'class': 'Schema',
                'method': 'get_mysql_schema',
                'table': table
            })
            return False
        return columns

    @staticmethod
    def organized_mysql_schema(schema: list) -> list:
        organized_column_data = []
        for column in schema:
            organized_column_data.append(
                service_helpers.label_mysql_table_definitions(column))
        return organized_column_data

    @staticmethod
    def convert_mysql_to_big_query_schema(schema: list) -> list:
        return service_helpers.generate_bq_schema_from_mysql(schema)

    def store_mysql_schema(self, schema: list, table: str) -> bool:
        encoded_schema = json.dumps(schema)
        storage_client = StorageAdapter(self.service_account)
        storage_client.get_client()

        date_time_obj = datetime.utcnow()

        result = storage_client.write_string(
            bucket=self.definitions['service'] + '-etl',
            destination=
            f'schema/{table}/{date_time_obj.strftime("%m-%d-%Y_%H:%M:%S")}_UTC',
            string=encoded_schema)
        if not result:
            self.sd_logger.error(storage_client.errors, {
                'class': 'Schema',
                'method': 'store_mysql_schema',
                'table': table
            })
            return False

        return result

    def check_table_exists(self, table):
        self.big_query_client.set_table_ref(table)
        return self.big_query_client.check_table()

    def get_current_schema(self, table: str):
        self.big_query_client.set_table_ref(table)
        return self.big_query_client.get_schema()

    @staticmethod
    def compare_schema(new_schema, current_schema) -> bool:
        # first check is total number of items
        if len(new_schema) is not len(current_schema):
            return False
        # compare column names and types
        schema_matches = True
        for x in range(len(new_schema)):
            exists_in_current = False
            if current_schema[x].name == new_schema[x].name:
                if current_schema[x].field_type == new_schema[x].field_type:
                    exists_in_current = True
            else:
                exists_in_current = True

            if not exists_in_current:
                schema_matches = False
        return schema_matches

    def create_table(self, table: str, schema: list) -> bool:
        self.big_query_client.set_table_ref(table)
        result = self.big_query_client.create_table(schema=schema,
                                                    overwrite=True)
        if result:
            self.sd_logger.info({'message': f"Table {table} Created"}, {
                'class': 'Schema',
                'method': 'create_table',
                'table': table
            })
        else:
            self.sd_logger.error(self.big_query_client.errors, {
                'class': 'Schema',
                'method': 'create_table',
                'table': table
            })
        return result

    @staticmethod
    def acknowledge_message(message):
        message.ack()

    def copy_bq_table(self, table: str):
        self.big_query_client.set_table_ref(table)
        copy_table_ref = self.big_query_client.table_ref

        date_time_obj = datetime.utcnow()
        destination_str = f'{table}_{date_time_obj.strftime("%m_%d_%Y")}'
        self.big_query_client.set_table_ref(destination_str)
        destination_table_ref = self.big_query_client.table_ref

        result = self.big_query_client.copy_table(
            copy_table=copy_table_ref, destination_table=destination_table_ref)
        if result:
            self.sd_logger.warning(
                {'message': f"Table {table} copied to {destination_str}"}, {
                    'class': 'Schema',
                    'method': 'copy_bq_table',
                    'table': table
                })
        else:
            self.sd_logger.error(self.big_query_client.errors, {
                'class': 'Schema',
                'method': 'copy_bq_table',
                'table': table
            })
        return destination_str

    def backup_table_to_storage(self, table):
        self.big_query_client.set_table_ref(table)
        copy_table_ref = self.big_query_client.table_ref

        date_time_obj = datetime.utcnow()
        destination = f'gs://{self.definitions["service"]}-etl/data/{table}/{date_time_obj.strftime("%m-%d-%Y_%H:%M:%S")}_UTC_*.avro'
        result = self.big_query_client.export_table_to_storage(
            table=copy_table_ref, destination=destination)
        if result:
            self.sd_logger.info(
                {'message': f"Table {table} exported to {destination}"}, {
                    'class': 'Schema',
                    'method': 'backup_table_to_storage'
                })
        else:
            self.sd_logger.error(self.big_query_client.errors, {
                'class': 'Schema',
                'method': 'backup_table_to_storage'
            })
        return result

    def delete_table(self, table):
        self.big_query_client.set_table_ref(table)
        result = self.big_query_client.delete_table()
        if result:
            self.sd_logger.info({'message': f"Table {table} deleted"}, {
                'class': 'Schema',
                'method': 'delete_table'
            })
        else:
            self.sd_logger.error(self.big_query_client.errors, {
                'class': 'Schema',
                'method': 'delete_table'
            })
        return result