class PrefectAddServiceStorage(Task): def __init__(self, **kwargs): self.service_account = service_account.Credentials.from_service_account_file( os.environ['MYSQL_BIG_QUERY_GOOGLE_AUTH']) self.sd_logger = StackDriverAdapter(self.service_account) self.sd_logger.get_client() self.sd_logger.create_logger(f"service-add-storage") super().__init__(**kwargs) def create_service_bucket(self, service: str): client = StorageAdapter(self.service_account) client.get_client() result = client.make_bucket(f"{service}-sync") if result: self.sd_logger.info({'message': f'Created {service} Bucket'}, { 'class': 'PrefectAddServiceStorage', 'method': 'create_service_bucket' }) else: self.sd_logger.warning( client.errors, { 'class': 'PrefectAddServiceStorage', 'method': 'create_service_bucket' }) return result
class PrefectInstallStorage(Task): def __init__(self, **kwargs): self.service_account = service_account.Credentials.from_service_account_file( os.environ['MYSQL_BIG_QUERY_GOOGLE_AUTH']) self.sd_logger = StackDriverAdapter(self.service_account) self.sd_logger.get_client() self.sd_logger.create_logger(f"storage-install-mysql-sync") super().__init__(**kwargs) def create_config_bucket(self): client = StorageAdapter(self.service_account) client.get_client() result = client.make_bucket('mysql_sync_keys') if result: self.sd_logger.info({'message': f'Created mysql_sync_keys Bucket'}, { 'class': 'PrefectInstallStorage', 'method': 'create_config_bucket' }) else: self.sd_logger.warning(client.errors, { 'class': 'PrefectInstallStorage', 'method': 'create_config_bucket' }) return result
class PrefectInstallPubSub(Task): def __init__(self, **kwargs): self.service_account = service_account.Credentials.from_service_account_file( os.environ['MYSQL_BIG_QUERY_GOOGLE_AUTH'] ) self.sd_logger = StackDriverAdapter(self.service_account) self.sd_logger.get_client() self.sd_logger.create_logger(f"install-pub-sub") super().__init__(**kwargs) def create_client(self): pub_sub_client = PubSubAdapter(self.service_account) pub_sub_client.get_publisher() return pub_sub_client def create_topic(self, pub_sub_client: PubSubAdapter, topic: str) -> bool: pub_sub_client.set_topic( topic ) result = pub_sub_client.create_topic() if result: self.sd_logger.info( {'message': f"Created {topic} Topic"}, {'class': 'PrefectInstallPubSub', 'method': 'create_topic'} ) else: self.sd_logger.warning( pub_sub_client.errors, {'class': 'PrefectInstallPubSub', 'method': 'create_topic'} ) return result def create_subscription(self, pub_sub_client: PubSubAdapter, topic: str, subscription: str) -> bool: pub_sub_client.set_topic( topic ) pub_sub_client.set_subscription( subscription ) pub_sub_client.get_subscriber() result = pub_sub_client.create_subscription() if result: self.sd_logger.info( {'message': f"Created {topic} Subscription"}, {'class': 'PrefectInstallPubSub', 'method': 'create_subscription'} ) else: self.sd_logger.warning( pub_sub_client.errors, {'class': 'PrefectInstallPubSub', 'method': 'create_subscription'} ) return result
class PrefectAddServiceBigQuery(Task): def __init__(self, service: str, **kwargs): self.service_account = service_account.Credentials.from_service_account_file( os.environ['MYSQL_BIG_QUERY_GOOGLE_AUTH'] ) self.definitions = service_helpers.get_definitions(service) self.sd_logger = StackDriverAdapter(self.service_account) self.sd_logger.get_client() self.sd_logger.create_logger(f"service-add-big-query") super().__init__(**kwargs) def create_client(self) -> BigQueryAdapter: big_query_client = BigQueryAdapter(self.service_account) big_query_client.get_client() return big_query_client def create_data_set(self, big_query_client: BigQueryAdapter) -> bool: # return True if dataset already exists if big_query_client.check_dataset(self.definitions['data_set']): return True big_query_client.set_data_set_ref(self.definitions['data_set']) result = big_query_client.create_data_set() if result: self.sd_logger.info( {'message': f"Created {self.definitions['data_set']} Data Set"}, {'class': 'PrefectAddServiceBigQuery', 'method': 'create_data_set'}) else: self.sd_logger.warning( big_query_client.errors, {'class': 'PrefectAddServiceBigQuery', 'method': 'create_data_set'} ) return result def create_tracking_table(self, big_query_client: BigQueryAdapter): big_query_client.set_data_set_ref(self.definitions['data_set']) big_query_client.set_table_ref('sync_tracking_table') # return true if table already exists if big_query_client.check_table(): return True schema = [ bigquery.SchemaField( 'table', 'STRING', description="Tracked Table Name" ), bigquery.SchemaField( 'watched', 'STRING', description="Column to watch to minimize the number of records loaded per sync" ), bigquery.SchemaField( 'primary_id', 'STRING', description="Primary Id Column(s)" ), bigquery.SchemaField( 'synchronize', 'BOOLEAN', description="Flag to Synchronize the table" ) ] result = big_query_client.create_table(schema) if result: self.sd_logger.info( {'message': f"Created tracking table"}, {'class': 'PrefectAddServiceBigQuery', 'method': 'create_tracking_table'} ) else: self.sd_logger.warning( big_query_client.errors, {'class': 'PrefectAddServiceBigQuery', 'method': 'create_tracking_table'} ) return result
class PrefectInstallBigQuery(Task): def __init__(self, **kwargs): self.service_account = service_account.Credentials.from_service_account_file( os.environ['MYSQL_BIG_QUERY_GOOGLE_AUTH']) self.sd_logger = StackDriverAdapter(self.service_account) self.sd_logger.get_client() self.sd_logger.create_logger(f"install-big-query") super().__init__(**kwargs) def create_client(self) -> BigQueryAdapter: big_query_client = BigQueryAdapter(self.service_account) big_query_client.get_client() return big_query_client def create_config_data_set(self, big_query_client: BigQueryAdapter) -> bool: # return True if dataset already exists if big_query_client.check_dataset('mysql_sync'): return True big_query_client.set_data_set_ref('mysql_sync') result = big_query_client.create_data_set() if result: self.sd_logger.info({'message': f"Created mysql_sync Data Set"}, { 'class': 'PrefectInstallBigQuery', 'method': 'create_data_set' }) else: self.sd_logger.warning(big_query_client.errors, { 'class': 'PrefectInstallBigQuery', 'method': 'create_data_set' }) return result def create_config_table(self, big_query_client: BigQueryAdapter): big_query_client.set_data_set_ref('mysql_sync') big_query_client.set_table_ref('data_sources') # return true if table already exists if big_query_client.check_table(): return True schema = [ bigquery.SchemaField('service', 'STRING', description="Service Name"), bigquery.SchemaField('data_set', 'STRING', description="Big Query Data Set"), bigquery.SchemaField('host', 'STRING', description="MySQL host connection"), bigquery.SchemaField('user', 'STRING', description="MySQL connection user"), bigquery.SchemaField('password', 'STRING', description="MySQL connection password"), bigquery.SchemaField('database', 'STRING', description="MySQL Database"), bigquery.SchemaField('last_run', 'TIMESTAMP', description="Last produce date"), ] result = big_query_client.create_table(schema) if result: self.sd_logger.info({'message': f"Created Config table"}, { 'class': 'PrefectInstallBigQuery', 'method': 'create_config_table' }) else: self.sd_logger.warning(big_query_client.errors, { 'class': 'PrefectInstallBigQuery', 'method': 'create_config_table' }) return result
class Schema(Task): def __init__(self, service: str, **kwargs): self.service_account = service_account.Credentials.from_service_account_file( os.environ['MYSQL_BIG_QUERY_GOOGLE_AUTH']) self.definitions = service_helpers.get_definitions(service) self.sd_logger = StackDriverAdapter(self.service_account) self.sd_logger.get_client() self.sd_logger.create_logger(f"{self.definitions['service']}-etl") self.pub_sub_client = PubSubAdapter(self.service_account) self.pub_sub_client.get_subscriber() self.pub_sub_client.set_subscription( f"{self.definitions['service']}-etl-schema") self.big_query_client = BigQueryAdapter(self.service_account) self.big_query_client.get_client() self.big_query_client.set_data_set_ref(self.definitions['data_set']) super().__init__(**kwargs) def get_mysql_schema(self, table: str) -> Union[list, bool]: mysql_client = MySqlAdapter(self.definitions['service']) columns = mysql_client.mysql_table_definition(table) if not columns: self.sd_logger.error(mysql_client.errors, { 'class': 'Schema', 'method': 'get_mysql_schema', 'table': table }) return False return columns @staticmethod def organized_mysql_schema(schema: list) -> list: organized_column_data = [] for column in schema: organized_column_data.append( service_helpers.label_mysql_table_definitions(column)) return organized_column_data @staticmethod def convert_mysql_to_big_query_schema(schema: list) -> list: return service_helpers.generate_bq_schema_from_mysql(schema) def store_mysql_schema(self, schema: list, table: str) -> bool: encoded_schema = json.dumps(schema) storage_client = StorageAdapter(self.service_account) storage_client.get_client() date_time_obj = datetime.utcnow() result = storage_client.write_string( bucket=self.definitions['service'] + '-etl', destination= f'schema/{table}/{date_time_obj.strftime("%m-%d-%Y_%H:%M:%S")}_UTC', string=encoded_schema) if not result: self.sd_logger.error(storage_client.errors, { 'class': 'Schema', 'method': 'store_mysql_schema', 'table': table }) return False return result def check_table_exists(self, table): self.big_query_client.set_table_ref(table) return self.big_query_client.check_table() def get_current_schema(self, table: str): self.big_query_client.set_table_ref(table) return self.big_query_client.get_schema() @staticmethod def compare_schema(new_schema, current_schema) -> bool: # first check is total number of items if len(new_schema) is not len(current_schema): return False # compare column names and types schema_matches = True for x in range(len(new_schema)): exists_in_current = False if current_schema[x].name == new_schema[x].name: if current_schema[x].field_type == new_schema[x].field_type: exists_in_current = True else: exists_in_current = True if not exists_in_current: schema_matches = False return schema_matches def create_table(self, table: str, schema: list) -> bool: self.big_query_client.set_table_ref(table) result = self.big_query_client.create_table(schema=schema, overwrite=True) if result: self.sd_logger.info({'message': f"Table {table} Created"}, { 'class': 'Schema', 'method': 'create_table', 'table': table }) else: self.sd_logger.error(self.big_query_client.errors, { 'class': 'Schema', 'method': 'create_table', 'table': table }) return result @staticmethod def acknowledge_message(message): message.ack() def copy_bq_table(self, table: str): self.big_query_client.set_table_ref(table) copy_table_ref = self.big_query_client.table_ref date_time_obj = datetime.utcnow() destination_str = f'{table}_{date_time_obj.strftime("%m_%d_%Y")}' self.big_query_client.set_table_ref(destination_str) destination_table_ref = self.big_query_client.table_ref result = self.big_query_client.copy_table( copy_table=copy_table_ref, destination_table=destination_table_ref) if result: self.sd_logger.warning( {'message': f"Table {table} copied to {destination_str}"}, { 'class': 'Schema', 'method': 'copy_bq_table', 'table': table }) else: self.sd_logger.error(self.big_query_client.errors, { 'class': 'Schema', 'method': 'copy_bq_table', 'table': table }) return destination_str def backup_table_to_storage(self, table): self.big_query_client.set_table_ref(table) copy_table_ref = self.big_query_client.table_ref date_time_obj = datetime.utcnow() destination = f'gs://{self.definitions["service"]}-etl/data/{table}/{date_time_obj.strftime("%m-%d-%Y_%H:%M:%S")}_UTC_*.avro' result = self.big_query_client.export_table_to_storage( table=copy_table_ref, destination=destination) if result: self.sd_logger.info( {'message': f"Table {table} exported to {destination}"}, { 'class': 'Schema', 'method': 'backup_table_to_storage' }) else: self.sd_logger.error(self.big_query_client.errors, { 'class': 'Schema', 'method': 'backup_table_to_storage' }) return result def delete_table(self, table): self.big_query_client.set_table_ref(table) result = self.big_query_client.delete_table() if result: self.sd_logger.info({'message': f"Table {table} deleted"}, { 'class': 'Schema', 'method': 'delete_table' }) else: self.sd_logger.error(self.big_query_client.errors, { 'class': 'Schema', 'method': 'delete_table' }) return result