def __init__(self, config_file, region_name, global_config_values=GlobalConfigParametersReader(). get_default_config_key_values()): for key, value in global_config_values.items(): config_parameters[key] = value self.region = region_name self.s3_helper = S3Helper(self.region) # load the configuration self.config_helper = ConfigHelper(config_file, self.s3_helper) source = ResourceFactory.get_source_resource_from_config_helper( self.config_helper, self.region) destination = ResourceFactory.get_target_resource_from_config_helper( self.config_helper, self.region) self.task_manager = TaskManager() self.barrier_after_all_cluster_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests) self.barrier_after_all_resource_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_resource_pre_tests) # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's if isinstance(source, TableResource): if isinstance(destination, DBResource): if not isinstance(destination, TableResource): destination = ResourceFactory.get_table_resource_from_merging_2_resources( destination, source) if global_config_values['tableName'] and global_config_values[ 'tableName'] != 'None': destination.set_table(global_config_values['tableName']) self.add_table_migration(source, destination, global_config_values) else: logging.fatal('Destination should be a database resource') raise NotImplementedError elif isinstance(source, SchemaResource): if not isinstance(destination, DBResource): logging.fatal('Destination should be a database resource') raise NotImplementedError self.add_schema_migration(source, destination, global_config_values) elif isinstance(source, DBResource): if not isinstance(destination, DBResource): logging.fatal('Destination should be a database resource') raise NotImplementedError self.add_database_migration(source, destination, global_config_values) else: # TODO: add additional scenario's # For example if both resources are of type schema then create target schema and migrate all tables logging.fatal( 'Source is not a Table, this type of unload-copy is currently not supported.' ) raise NotImplementedError self.task_manager.run()
def __init__(self, config_file, region_name, global_config_values=GlobalConfigParametersReader(). get_default_config_key_values()): for key, value in global_config_values.items(): config_parameters[key] = value self.region = region_name self.s3_helper = S3Helper(self.region) # load the configuration self.config_helper = ConfigHelper(config_file, self.s3_helper) source = ResourceFactory.get_source_resource_from_config_helper( self.config_helper, self.region) destination = ResourceFactory.get_target_resource_from_config_helper( self.config_helper, self.region) self.task_manager = TaskManager() self.barrier_after_all_cluster_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests) self.barrier_after_all_resource_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_resource_pre_tests) # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's # For example if both resources are of type schema then create target schema and migrate all tables self.add_table_migration(source, destination, global_config_values) self.task_manager.run()
def __init__(self, config_file, region_name, global_config_values=GlobalConfigParametersReader(). get_default_config_key_values()): for key, value in global_config_values.items(): config_parameters[key] = value self.region = region_name self.s3_helper = S3Helper(self.region) # load the configuration self.config_helper = ConfigHelper(config_file, self.s3_helper) self.task_manager = TaskManager() self.barrier_after_all_cluster_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests) self.barrier_after_all_resource_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_resource_pre_tests) src_config = self.config_helper.config['unloadSource'] dest_config = self.config_helper.config['copyTarget'] if (src_config['tableNames']): src_tables = src_config['tableNames'] dest_tables = dest_config['tableNames'] logging.info("Migrating multiple tables") if (not dest_tables or len(src_tables) != len(dest_tables)): logging.fatal( "When migrating multiple tables 'tableNames' property must be configured in unloadSource and copyTarget, and be the same length" ) raise NotImplementedError for idx in range(0, len(src_tables)): src_config['tableName'] = src_tables[idx] dest_config['tableName'] = dest_tables[idx] source = ResourceFactory.get_source_resource_from_config_helper( self.config_helper, self.region) destination = ResourceFactory.get_target_resource_from_config_helper( self.config_helper, self.region) self.add_src_dest_tasks(source, destination, global_config_values) else: # Migrating a single table source = ResourceFactory.get_source_resource_from_config_helper( self.config_helper, self.region) destination = ResourceFactory.get_target_resource_from_config_helper( self.config_helper, self.region) self.add_src_dest_tasks(source, destination, global_config_values) self.task_manager.run()
def __init__(self, config_file, region_name, global_config_values=GlobalConfigParametersReader().get_default_config_key_values()): for key, value in global_config_values.items(): config_parameters[key] = value self.region = region_name self.s3_helper = S3Helper(self.region) # load the configuration self.config_helper = ConfigHelper(config_file, self.s3_helper) source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region) destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region) self.task_manager = TaskManager() self.barrier_after_all_cluster_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests) self.barrier_after_all_resource_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_resource_pre_tests) # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's # For example if both resources are of type schema then create target schema and migrate all tables self.add_table_migration(source, destination, global_config_values) self.task_manager.run()
def __init__(self, config_file, region_name, global_config_values=GlobalConfigParametersReader().get_default_config_key_values()): for key, value in global_config_values.items(): config_parameters[key] = value self.region = region_name self.s3_helper = S3Helper(self.region) # load the configuration self.config_helper = ConfigHelper(config_file, self.s3_helper) self.task_manager = TaskManager() self.barrier_after_all_cluster_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests) self.barrier_after_all_resource_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_resource_pre_tests) src_config = self.config_helper.config['unloadSource'] dest_config = self.config_helper.config['copyTarget'] if(src_config['tableNames']): src_tables = src_config['tableNames'] dest_tables = dest_config['tableNames'] logging.info("Migrating multiple tables") if( not dest_tables or len(src_tables) != len(dest_tables) ): logging.fatal("When migrating multiple tables 'tableNames' property must be configured in unloadSource and copyTarget, and be the same length") raise NotImplementedError for idx in range(0,len(src_tables)): src_config['tableName'] = src_tables[idx] dest_config['tableName'] = dest_tables[idx] source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region) destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region) self.add_src_dest_tasks(source,destination,global_config_values) else: # Migrating a single table source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region) destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region) self.add_src_dest_tasks(source,destination,global_config_values) self.task_manager.run()
class UnloadCopyTool: # noinspection PyDefaultArgument def __init__(self, config_file, region_name, global_config_values=GlobalConfigParametersReader().get_default_config_key_values()): for key, value in global_config_values.items(): config_parameters[key] = value self.region = region_name self.s3_helper = S3Helper(self.region) # load the configuration self.config_helper = ConfigHelper(config_file, self.s3_helper) self.task_manager = TaskManager() self.barrier_after_all_cluster_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests) self.barrier_after_all_resource_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_resource_pre_tests) src_config = self.config_helper.config['unloadSource'] dest_config = self.config_helper.config['copyTarget'] if(src_config['tableNames']): src_tables = src_config['tableNames'] dest_tables = dest_config['tableNames'] logging.info("Migrating multiple tables") if( not dest_tables or len(src_tables) != len(dest_tables) ): logging.fatal("When migrating multiple tables 'tableNames' property must be configured in unloadSource and copyTarget, and be the same length") raise NotImplementedError for idx in range(0,len(src_tables)): src_config['tableName'] = src_tables[idx] dest_config['tableName'] = dest_tables[idx] source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region) destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region) self.add_src_dest_tasks(source,destination,global_config_values) else: # Migrating a single table source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region) destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region) self.add_src_dest_tasks(source,destination,global_config_values) self.task_manager.run() def add_src_dest_tasks(self,source,destination,global_config_values): # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's if isinstance(source, TableResource): if isinstance(destination, DBResource): if not isinstance(destination, TableResource): destination = ResourceFactory.get_table_resource_from_merging_2_resources(destination, source) if global_config_values['tableName'] and global_config_values['tableName'] != 'None': destination.set_table(global_config_values['tableName']) self.add_table_migration(source, destination, global_config_values) else: logging.fatal('Destination should be a database resource') raise NotImplementedError pass else: # TODO: add additional scenario's # For example if both resources are of type schema then create target schema and migrate all tables logging.fatal('Source is not a Table, this type of unload-copy is currently not supported.') raise NotImplementedError def add_table_migration(self, source, destination, global_config_values): if global_config_values['connectionPreTest']: if not global_config_values['destinationTablePreTest']: destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=destination) self.task_manager.add_task(destination_cluster_pre_test, dependency_of=self.barrier_after_all_cluster_pre_tests) if not global_config_values['sourceTablePreTest']: source_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=source) self.task_manager.add_task(source_cluster_pre_test, dependency_of=self.barrier_after_all_cluster_pre_tests) if global_config_values['destinationTablePreTest']: if global_config_values['destinationTableAutoCreate']: destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=destination) self.task_manager.add_task(destination_cluster_pre_test, dependency_of=self.barrier_after_all_cluster_pre_tests) else: destination_table_pre_test = FailIfResourceDoesNotExistsTask(destination) self.task_manager.add_task(destination_table_pre_test, dependency_of=self.barrier_after_all_resource_pre_tests, dependencies=self.barrier_after_all_cluster_pre_tests) if global_config_values['sourceTablePreTest']: source_table_pre_test = FailIfResourceDoesNotExistsTask(source) self.task_manager.add_task(source_table_pre_test, dependency_of=self.barrier_after_all_resource_pre_tests, dependencies=self.barrier_after_all_cluster_pre_tests) if global_config_values['destinationTableAutoCreate']: create_target = CreateIfTargetDoesNotExistTask( source_resource=source, target_resource=destination ) self.task_manager.add_task(create_target, dependency_of=self.barrier_after_all_resource_pre_tests, dependencies=self.barrier_after_all_cluster_pre_tests) s3_details = S3Details(self.config_helper, source, encryption_key_id=encryptionKeyID) unload_data = UnloadDataToS3Task(source, s3_details) self.task_manager.add_task(unload_data, dependencies=self.barrier_after_all_resource_pre_tests) copy_data = CopyDataFromS3Task(destination, s3_details) self.task_manager.add_task(copy_data, dependencies=unload_data) s3_cleanup = CleanupS3StagingAreaTask(s3_details) self.task_manager.add_task(s3_cleanup, dependencies=copy_data)
class UnloadCopyTool: # noinspection PyDefaultArgument def __init__(self, config_file, region_name, global_config_values=GlobalConfigParametersReader().get_default_config_key_values()): for key, value in global_config_values.items(): config_parameters[key] = value self.region = region_name self.s3_helper = S3Helper(self.region) # load the configuration self.config_helper = ConfigHelper(config_file, self.s3_helper) source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region) destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region) self.task_manager = TaskManager() self.barrier_after_all_cluster_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests) self.barrier_after_all_resource_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_resource_pre_tests) # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's # For example if both resources are of type schema then create target schema and migrate all tables self.add_table_migration(source, destination, global_config_values) self.task_manager.run() def add_table_migration(self, source, destination, global_config_values): if global_config_values['connectionPreTest']: if not global_config_values['destinationTablePreTest']: destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=destination) self.task_manager.add_task(destination_cluster_pre_test, dependency_of=self.barrier_after_all_cluster_pre_tests) if not global_config_values['sourceTablePreTest']: source_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=source) self.task_manager.add_task(source_cluster_pre_test, dependency_of=self.barrier_after_all_cluster_pre_tests) if global_config_values['destinationTablePreTest']: if global_config_values['destinationTableAutoCreate']: destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=destination) self.task_manager.add_task(destination_cluster_pre_test, dependency_of=self.barrier_after_all_cluster_pre_tests) else: destination_table_pre_test = FailIfResourceDoesNotExistsTask(destination) self.task_manager.add_task(destination_table_pre_test, dependency_of=self.barrier_after_all_resource_pre_tests, dependencies=self.barrier_after_all_cluster_pre_tests) if global_config_values['sourceTablePreTest']: source_table_pre_test = FailIfResourceDoesNotExistsTask(source) self.task_manager.add_task(source_table_pre_test, dependency_of=self.barrier_after_all_resource_pre_tests, dependencies=self.barrier_after_all_cluster_pre_tests) if global_config_values['destinationTableAutoCreate']: create_target = CreateIfTargetDoesNotExistTask( source_resource=source, target_resource=destination ) self.task_manager.add_task(create_target, dependency_of=self.barrier_after_all_resource_pre_tests, dependencies=self.barrier_after_all_cluster_pre_tests) s3_details = S3Details(self.config_helper, source, encryption_key_id=encryptionKeyID) unload_data = UnloadDataToS3Task(source, s3_details) self.task_manager.add_task(unload_data, dependencies=self.barrier_after_all_resource_pre_tests) copy_data = CopyDataFromS3Task(destination, s3_details) self.task_manager.add_task(copy_data, dependencies=unload_data) s3_cleanup = CleanupS3StagingAreaTask(s3_details) self.task_manager.add_task(s3_cleanup, dependencies=copy_data)
class UnloadCopyTool: # noinspection PyDefaultArgument def __init__(self, config_file, region_name, global_config_values=GlobalConfigParametersReader(). get_default_config_key_values()): for key, value in global_config_values.items(): config_parameters[key] = value self.region = region_name self.s3_helper = S3Helper(self.region) # load the configuration self.config_helper = ConfigHelper(config_file, self.s3_helper) source = ResourceFactory.get_source_resource_from_config_helper( self.config_helper, self.region) destination = ResourceFactory.get_target_resource_from_config_helper( self.config_helper, self.region) self.task_manager = TaskManager() self.barrier_after_all_cluster_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests) self.barrier_after_all_resource_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_resource_pre_tests) # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's if isinstance(source, TableResource): if isinstance(destination, DBResource): if not isinstance(destination, TableResource): destination = ResourceFactory.get_table_resource_from_merging_2_resources( destination, source) if global_config_values['tableName'] and global_config_values[ 'tableName'] != 'None': destination.set_table(global_config_values['tableName']) self.add_table_migration(source, destination, global_config_values) else: logging.fatal('Destination should be a database resource') raise NotImplementedError elif isinstance(source, SchemaResource): if not isinstance(destination, DBResource): logging.fatal('Destination should be a database resource') raise NotImplementedError self.add_schema_migration(source, destination, global_config_values) elif isinstance(source, DBResource): if not isinstance(destination, DBResource): logging.fatal('Destination should be a database resource') raise NotImplementedError self.add_database_migration(source, destination, global_config_values) else: # TODO: add additional scenario's # For example if both resources are of type schema then create target schema and migrate all tables logging.fatal( 'Source is not a Table, this type of unload-copy is currently not supported.' ) raise NotImplementedError self.task_manager.run() def add_database_migration(self, source, destination, global_config_values): schemas = source.list_schemas() for schema in schemas: source_schema = SchemaResource(source.get_cluster(), schema) self.add_schema_migration(source_schema, destination, global_config_values) def add_schema_migration(self, source, destination, global_config_values): tables = source.list_tables() for table in tables: source_table = TableResource(source.get_cluster(), source.get_schema(), table) target_table = ResourceFactory.get_table_resource_from_merging_2_resources( destination, source_table) if 'explicit_ids' in self.config_helper.config['copyTarget']: if self.config_helper.config['copyTarget']['explicit_ids']: target_table.set_explicit_ids(True) self.add_table_migration(source_table, target_table, global_config_values) def add_table_migration(self, source, destination, global_config_values): if global_config_values['connectionPreTest']: if not global_config_values['destinationTablePreTest']: destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask( resource=destination) self.task_manager.add_task( destination_cluster_pre_test, dependency_of=self.barrier_after_all_cluster_pre_tests) if not global_config_values['sourceTablePreTest']: source_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask( resource=source) self.task_manager.add_task( source_cluster_pre_test, dependency_of=self.barrier_after_all_cluster_pre_tests) if global_config_values['destinationTablePreTest']: if global_config_values['destinationTableAutoCreate']: destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask( resource=destination) self.task_manager.add_task( destination_cluster_pre_test, dependency_of=self.barrier_after_all_cluster_pre_tests) else: destination_table_pre_test = FailIfResourceDoesNotExistsTask( destination) self.task_manager.add_task( destination_table_pre_test, dependency_of=self.barrier_after_all_resource_pre_tests, dependencies=self.barrier_after_all_cluster_pre_tests) if global_config_values['sourceTablePreTest']: source_table_pre_test = FailIfResourceDoesNotExistsTask(source) self.task_manager.add_task( source_table_pre_test, dependency_of=self.barrier_after_all_resource_pre_tests, dependencies=self.barrier_after_all_cluster_pre_tests) if global_config_values['destinationTableAutoCreate']: create_target = CreateIfTargetDoesNotExistTask( source_resource=source, target_resource=destination) self.task_manager.add_task( create_target, dependency_of=self.barrier_after_all_resource_pre_tests, dependencies=self.barrier_after_all_cluster_pre_tests) s3_details = S3Details(self.config_helper, source, encryption_key_id=encryptionKeyID) unload_data = UnloadDataToS3Task(source, s3_details) self.task_manager.add_task( unload_data, dependencies=self.barrier_after_all_resource_pre_tests) copy_data = CopyDataFromS3Task(destination, s3_details) self.task_manager.add_task(copy_data, dependencies=unload_data) s3_cleanup = CleanupS3StagingAreaTask(s3_details) self.task_manager.add_task(s3_cleanup, dependencies=copy_data)
class UnloadCopyTool: # noinspection PyDefaultArgument def __init__(self, config_file, region_name, global_config_values=GlobalConfigParametersReader(). get_default_config_key_values()): for key, value in global_config_values.items(): config_parameters[key] = value self.region = region_name self.s3_helper = S3Helper(self.region) # load the configuration self.config_helper = ConfigHelper(config_file, self.s3_helper) source = ResourceFactory.get_source_resource_from_config_helper( self.config_helper, self.region) destination = ResourceFactory.get_target_resource_from_config_helper( self.config_helper, self.region) if global_config_values['tableName']: source = TableResource(source.get_cluster(), source.get_schema(), global_config_values['tableName']) destination = TableResource(destination.get_cluster(), destination.get_schema(), global_config_values['tableName']) self.task_manager = TaskManager() self.barrier_after_all_cluster_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests) self.barrier_after_all_resource_pre_tests = NoOperationTask() self.task_manager.add_task(self.barrier_after_all_resource_pre_tests) # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's # For example if both resources are of type schema then create target schema and migrate all tables self.add_table_migration(source, destination, global_config_values) self.task_manager.run() def add_table_migration(self, source, destination, global_config_values): if global_config_values['connectionPreTest']: if not global_config_values['destinationTablePreTest']: destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask( resource=destination) self.task_manager.add_task( destination_cluster_pre_test, dependency_of=self.barrier_after_all_cluster_pre_tests) if not global_config_values['sourceTablePreTest']: source_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask( resource=source) self.task_manager.add_task( source_cluster_pre_test, dependency_of=self.barrier_after_all_cluster_pre_tests) if global_config_values['destinationTablePreTest']: if global_config_values['destinationTableAutoCreate']: destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask( resource=destination) self.task_manager.add_task( destination_cluster_pre_test, dependency_of=self.barrier_after_all_cluster_pre_tests) else: destination_table_pre_test = FailIfResourceDoesNotExistsTask( destination) self.task_manager.add_task( destination_table_pre_test, dependency_of=self.barrier_after_all_resource_pre_tests, dependencies=self.barrier_after_all_cluster_pre_tests) if global_config_values['sourceTablePreTest']: source_table_pre_test = FailIfResourceDoesNotExistsTask(source) self.task_manager.add_task( source_table_pre_test, dependency_of=self.barrier_after_all_resource_pre_tests, dependencies=self.barrier_after_all_cluster_pre_tests) if global_config_values['destinationTableAutoCreate']: create_target = CreateIfTargetDoesNotExistTask( source_resource=source, target_resource=destination) self.task_manager.add_task( create_target, dependency_of=self.barrier_after_all_resource_pre_tests, dependencies=self.barrier_after_all_cluster_pre_tests) s3_details = S3Details(self.config_helper, source, encryption_key_id=encryptionKeyID) unload_data = UnloadDataToS3Task(source, s3_details) self.task_manager.add_task( unload_data, dependencies=self.barrier_after_all_resource_pre_tests) copy_data = CopyDataFromS3Task(destination, s3_details) self.task_manager.add_task(copy_data, dependencies=unload_data) s3_cleanup = CleanupS3StagingAreaTask(s3_details) self.task_manager.add_task(s3_cleanup, dependencies=copy_data)