コード例 #1
0
    def __init__(self,
                 config_file,
                 region_name,
                 global_config_values=GlobalConfigParametersReader().
                 get_default_config_key_values()):
        for key, value in global_config_values.items():
            config_parameters[key] = value
        self.region = region_name
        self.s3_helper = S3Helper(self.region)

        # load the configuration
        self.config_helper = ConfigHelper(config_file, self.s3_helper)

        source = ResourceFactory.get_source_resource_from_config_helper(
            self.config_helper, self.region)

        destination = ResourceFactory.get_target_resource_from_config_helper(
            self.config_helper, self.region)

        self.task_manager = TaskManager()
        self.barrier_after_all_cluster_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests)
        self.barrier_after_all_resource_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_resource_pre_tests)

        # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's
        if isinstance(source, TableResource):
            if isinstance(destination, DBResource):
                if not isinstance(destination, TableResource):
                    destination = ResourceFactory.get_table_resource_from_merging_2_resources(
                        destination, source)
                if global_config_values['tableName'] and global_config_values[
                        'tableName'] != 'None':
                    destination.set_table(global_config_values['tableName'])
                self.add_table_migration(source, destination,
                                         global_config_values)
            else:
                logging.fatal('Destination should be a database resource')
                raise NotImplementedError
        elif isinstance(source, SchemaResource):
            if not isinstance(destination, DBResource):
                logging.fatal('Destination should be a database resource')
                raise NotImplementedError
            self.add_schema_migration(source, destination,
                                      global_config_values)
        elif isinstance(source, DBResource):
            if not isinstance(destination, DBResource):
                logging.fatal('Destination should be a database resource')
                raise NotImplementedError
            self.add_database_migration(source, destination,
                                        global_config_values)
        else:
            # TODO: add additional scenario's
            # For example if both resources are of type schema then create target schema and migrate all tables
            logging.fatal(
                'Source is not a Table, this type of unload-copy is currently not supported.'
            )
            raise NotImplementedError

        self.task_manager.run()
コード例 #2
0
    def __init__(self,
                 config_file,
                 region_name,
                 global_config_values=GlobalConfigParametersReader().
                 get_default_config_key_values()):
        for key, value in global_config_values.items():
            config_parameters[key] = value
        self.region = region_name
        self.s3_helper = S3Helper(self.region)

        # load the configuration
        self.config_helper = ConfigHelper(config_file, self.s3_helper)

        source = ResourceFactory.get_source_resource_from_config_helper(
            self.config_helper, self.region)

        destination = ResourceFactory.get_target_resource_from_config_helper(
            self.config_helper, self.region)

        self.task_manager = TaskManager()
        self.barrier_after_all_cluster_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests)
        self.barrier_after_all_resource_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_resource_pre_tests)

        # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's
        # For example if both resources are of type schema then create target schema and migrate all tables
        self.add_table_migration(source, destination, global_config_values)

        self.task_manager.run()
コード例 #3
0
    def __init__(self,
                 config_file,
                 region_name,
                 global_config_values=GlobalConfigParametersReader().
                 get_default_config_key_values()):
        for key, value in global_config_values.items():
            config_parameters[key] = value
        self.region = region_name
        self.s3_helper = S3Helper(self.region)

        # load the configuration
        self.config_helper = ConfigHelper(config_file, self.s3_helper)

        self.task_manager = TaskManager()
        self.barrier_after_all_cluster_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests)
        self.barrier_after_all_resource_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_resource_pre_tests)

        src_config = self.config_helper.config['unloadSource']
        dest_config = self.config_helper.config['copyTarget']
        if (src_config['tableNames']):
            src_tables = src_config['tableNames']
            dest_tables = dest_config['tableNames']
            logging.info("Migrating multiple tables")
            if (not dest_tables or len(src_tables) != len(dest_tables)):
                logging.fatal(
                    "When migrating multiple tables 'tableNames' property must be configured in unloadSource and copyTarget, and be the same length"
                )
                raise NotImplementedError
            for idx in range(0, len(src_tables)):
                src_config['tableName'] = src_tables[idx]
                dest_config['tableName'] = dest_tables[idx]
                source = ResourceFactory.get_source_resource_from_config_helper(
                    self.config_helper, self.region)
                destination = ResourceFactory.get_target_resource_from_config_helper(
                    self.config_helper, self.region)
                self.add_src_dest_tasks(source, destination,
                                        global_config_values)
        else:
            # Migrating a single table
            source = ResourceFactory.get_source_resource_from_config_helper(
                self.config_helper, self.region)
            destination = ResourceFactory.get_target_resource_from_config_helper(
                self.config_helper, self.region)
            self.add_src_dest_tasks(source, destination, global_config_values)

        self.task_manager.run()
コード例 #4
0
    def __init__(self,
                 config_file,
                 region_name,
                 global_config_values=GlobalConfigParametersReader().get_default_config_key_values()):
        for key, value in global_config_values.items():
            config_parameters[key] = value
        self.region = region_name
        self.s3_helper = S3Helper(self.region)

        # load the configuration
        self.config_helper = ConfigHelper(config_file, self.s3_helper)

        source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region)

        destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region)

        self.task_manager = TaskManager()
        self.barrier_after_all_cluster_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests)
        self.barrier_after_all_resource_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_resource_pre_tests)

        # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's
        # For example if both resources are of type schema then create target schema and migrate all tables
        self.add_table_migration(source, destination, global_config_values)

        self.task_manager.run()
コード例 #5
0
    def __init__(self,
                 config_file,
                 region_name,
                 global_config_values=GlobalConfigParametersReader().get_default_config_key_values()):
        for key, value in global_config_values.items():
            config_parameters[key] = value
        self.region = region_name
        self.s3_helper = S3Helper(self.region)

        # load the configuration
        self.config_helper = ConfigHelper(config_file, self.s3_helper)

        self.task_manager = TaskManager()
        self.barrier_after_all_cluster_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests)
        self.barrier_after_all_resource_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_resource_pre_tests)

        src_config = self.config_helper.config['unloadSource']
        dest_config = self.config_helper.config['copyTarget']
        if(src_config['tableNames']):
            src_tables = src_config['tableNames']
            dest_tables = dest_config['tableNames']
            logging.info("Migrating multiple tables")
            if( not dest_tables or len(src_tables) != len(dest_tables) ):
                logging.fatal("When migrating multiple tables 'tableNames' property must be configured in unloadSource and copyTarget, and be the same length")
                raise NotImplementedError
            for idx in range(0,len(src_tables)):
                src_config['tableName'] = src_tables[idx]
                dest_config['tableName'] = dest_tables[idx]
                source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region)
                destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region)
                self.add_src_dest_tasks(source,destination,global_config_values)
        else:
            # Migrating a single table
            source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region)
            destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region)
            self.add_src_dest_tasks(source,destination,global_config_values)

        self.task_manager.run()
コード例 #6
0
class UnloadCopyTool:
    # noinspection PyDefaultArgument
    def __init__(self,
                 config_file,
                 region_name,
                 global_config_values=GlobalConfigParametersReader().get_default_config_key_values()):
        for key, value in global_config_values.items():
            config_parameters[key] = value
        self.region = region_name
        self.s3_helper = S3Helper(self.region)

        # load the configuration
        self.config_helper = ConfigHelper(config_file, self.s3_helper)

        self.task_manager = TaskManager()
        self.barrier_after_all_cluster_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests)
        self.barrier_after_all_resource_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_resource_pre_tests)

        src_config = self.config_helper.config['unloadSource']
        dest_config = self.config_helper.config['copyTarget']
        if(src_config['tableNames']):
            src_tables = src_config['tableNames']
            dest_tables = dest_config['tableNames']
            logging.info("Migrating multiple tables")
            if( not dest_tables or len(src_tables) != len(dest_tables) ):
                logging.fatal("When migrating multiple tables 'tableNames' property must be configured in unloadSource and copyTarget, and be the same length")
                raise NotImplementedError
            for idx in range(0,len(src_tables)):
                src_config['tableName'] = src_tables[idx]
                dest_config['tableName'] = dest_tables[idx]
                source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region)
                destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region)
                self.add_src_dest_tasks(source,destination,global_config_values)
        else:
            # Migrating a single table
            source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region)
            destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region)
            self.add_src_dest_tasks(source,destination,global_config_values)

        self.task_manager.run()

    def add_src_dest_tasks(self,source,destination,global_config_values):
        # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's
        if isinstance(source, TableResource):
            if isinstance(destination, DBResource):
                if not isinstance(destination, TableResource):
                    destination = ResourceFactory.get_table_resource_from_merging_2_resources(destination, source)
                if global_config_values['tableName'] and global_config_values['tableName'] != 'None':
                    destination.set_table(global_config_values['tableName'])
                self.add_table_migration(source, destination, global_config_values)
            else:
                logging.fatal('Destination should be a database resource')
                raise NotImplementedError
            pass
        else:
            # TODO: add additional scenario's
            # For example if both resources are of type schema then create target schema and migrate all tables
            logging.fatal('Source is not a Table, this type of unload-copy is currently not supported.')
            raise NotImplementedError


    def add_table_migration(self, source, destination, global_config_values):
        if global_config_values['connectionPreTest']:
            if not global_config_values['destinationTablePreTest']:
                destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=destination)
                self.task_manager.add_task(destination_cluster_pre_test,
                                           dependency_of=self.barrier_after_all_cluster_pre_tests)
            if not global_config_values['sourceTablePreTest']:
                source_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=source)
                self.task_manager.add_task(source_cluster_pre_test,
                                           dependency_of=self.barrier_after_all_cluster_pre_tests)
        if global_config_values['destinationTablePreTest']:
            if global_config_values['destinationTableAutoCreate']:
                destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=destination)
                self.task_manager.add_task(destination_cluster_pre_test,
                                           dependency_of=self.barrier_after_all_cluster_pre_tests)
            else:
                destination_table_pre_test = FailIfResourceDoesNotExistsTask(destination)
                self.task_manager.add_task(destination_table_pre_test,
                                           dependency_of=self.barrier_after_all_resource_pre_tests,
                                           dependencies=self.barrier_after_all_cluster_pre_tests)

        if global_config_values['sourceTablePreTest']:
            source_table_pre_test = FailIfResourceDoesNotExistsTask(source)
            self.task_manager.add_task(source_table_pre_test,
                                       dependency_of=self.barrier_after_all_resource_pre_tests,
                                       dependencies=self.barrier_after_all_cluster_pre_tests)

        if global_config_values['destinationTableAutoCreate']:
            create_target = CreateIfTargetDoesNotExistTask(
                source_resource=source,
                target_resource=destination
            )
            self.task_manager.add_task(create_target,
                                       dependency_of=self.barrier_after_all_resource_pre_tests,
                                       dependencies=self.barrier_after_all_cluster_pre_tests)

        s3_details = S3Details(self.config_helper, source, encryption_key_id=encryptionKeyID)
        unload_data = UnloadDataToS3Task(source, s3_details)
        self.task_manager.add_task(unload_data, dependencies=self.barrier_after_all_resource_pre_tests)

        copy_data = CopyDataFromS3Task(destination, s3_details)
        self.task_manager.add_task(copy_data, dependencies=unload_data)

        s3_cleanup = CleanupS3StagingAreaTask(s3_details)
        self.task_manager.add_task(s3_cleanup, dependencies=copy_data)
コード例 #7
0
class UnloadCopyTool:
    # noinspection PyDefaultArgument
    def __init__(self,
                 config_file,
                 region_name,
                 global_config_values=GlobalConfigParametersReader().get_default_config_key_values()):
        for key, value in global_config_values.items():
            config_parameters[key] = value
        self.region = region_name
        self.s3_helper = S3Helper(self.region)

        # load the configuration
        self.config_helper = ConfigHelper(config_file, self.s3_helper)

        source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region)

        destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region)

        self.task_manager = TaskManager()
        self.barrier_after_all_cluster_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests)
        self.barrier_after_all_resource_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_resource_pre_tests)

        # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's
        # For example if both resources are of type schema then create target schema and migrate all tables
        self.add_table_migration(source, destination, global_config_values)

        self.task_manager.run()

    def add_table_migration(self, source, destination, global_config_values):
        if global_config_values['connectionPreTest']:
            if not global_config_values['destinationTablePreTest']:
                destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=destination)
                self.task_manager.add_task(destination_cluster_pre_test,
                                           dependency_of=self.barrier_after_all_cluster_pre_tests)
            if not global_config_values['sourceTablePreTest']:
                source_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=source)
                self.task_manager.add_task(source_cluster_pre_test,
                                           dependency_of=self.barrier_after_all_cluster_pre_tests)
        if global_config_values['destinationTablePreTest']:
            if global_config_values['destinationTableAutoCreate']:
                destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=destination)
                self.task_manager.add_task(destination_cluster_pre_test,
                                           dependency_of=self.barrier_after_all_cluster_pre_tests)
            else:
                destination_table_pre_test = FailIfResourceDoesNotExistsTask(destination)
                self.task_manager.add_task(destination_table_pre_test,
                                           dependency_of=self.barrier_after_all_resource_pre_tests,
                                           dependencies=self.barrier_after_all_cluster_pre_tests)

        if global_config_values['sourceTablePreTest']:
            source_table_pre_test = FailIfResourceDoesNotExistsTask(source)
            self.task_manager.add_task(source_table_pre_test,
                                       dependency_of=self.barrier_after_all_resource_pre_tests,
                                       dependencies=self.barrier_after_all_cluster_pre_tests)

        if global_config_values['destinationTableAutoCreate']:
            create_target = CreateIfTargetDoesNotExistTask(
                source_resource=source,
                target_resource=destination
            )
            self.task_manager.add_task(create_target,
                                       dependency_of=self.barrier_after_all_resource_pre_tests,
                                       dependencies=self.barrier_after_all_cluster_pre_tests)

        s3_details = S3Details(self.config_helper, source, encryption_key_id=encryptionKeyID)
        unload_data = UnloadDataToS3Task(source, s3_details)
        self.task_manager.add_task(unload_data, dependencies=self.barrier_after_all_resource_pre_tests)

        copy_data = CopyDataFromS3Task(destination, s3_details)
        self.task_manager.add_task(copy_data, dependencies=unload_data)

        s3_cleanup = CleanupS3StagingAreaTask(s3_details)
        self.task_manager.add_task(s3_cleanup, dependencies=copy_data)
コード例 #8
0
class UnloadCopyTool:
    # noinspection PyDefaultArgument
    def __init__(self,
                 config_file,
                 region_name,
                 global_config_values=GlobalConfigParametersReader().get_default_config_key_values()):
        for key, value in global_config_values.items():
            config_parameters[key] = value
        self.region = region_name
        self.s3_helper = S3Helper(self.region)

        # load the configuration
        self.config_helper = ConfigHelper(config_file, self.s3_helper)

        self.task_manager = TaskManager()
        self.barrier_after_all_cluster_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests)
        self.barrier_after_all_resource_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_resource_pre_tests)

        src_config = self.config_helper.config['unloadSource']
        dest_config = self.config_helper.config['copyTarget']
        if(src_config['tableNames']):
            src_tables = src_config['tableNames']
            dest_tables = dest_config['tableNames']
            logging.info("Migrating multiple tables")
            if( not dest_tables or len(src_tables) != len(dest_tables) ):
                logging.fatal("When migrating multiple tables 'tableNames' property must be configured in unloadSource and copyTarget, and be the same length")
                raise NotImplementedError
            for idx in range(0,len(src_tables)):
                src_config['tableName'] = src_tables[idx]
                dest_config['tableName'] = dest_tables[idx]
                source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region)
                destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region)
                self.add_src_dest_tasks(source,destination,global_config_values)
        else:
            # Migrating a single table
            source = ResourceFactory.get_source_resource_from_config_helper(self.config_helper, self.region)
            destination = ResourceFactory.get_target_resource_from_config_helper(self.config_helper, self.region)
            self.add_src_dest_tasks(source,destination,global_config_values)

        self.task_manager.run()

    def add_src_dest_tasks(self,source,destination,global_config_values):
        # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's
        if isinstance(source, TableResource):
            if isinstance(destination, DBResource):
                if not isinstance(destination, TableResource):
                    destination = ResourceFactory.get_table_resource_from_merging_2_resources(destination, source)
                if global_config_values['tableName'] and global_config_values['tableName'] != 'None':
                    destination.set_table(global_config_values['tableName'])
                self.add_table_migration(source, destination, global_config_values)
            else:
                logging.fatal('Destination should be a database resource')
                raise NotImplementedError
            pass
        else:
            # TODO: add additional scenario's
            # For example if both resources are of type schema then create target schema and migrate all tables
            logging.fatal('Source is not a Table, this type of unload-copy is currently not supported.')
            raise NotImplementedError


    def add_table_migration(self, source, destination, global_config_values):
        if global_config_values['connectionPreTest']:
            if not global_config_values['destinationTablePreTest']:
                destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=destination)
                self.task_manager.add_task(destination_cluster_pre_test,
                                           dependency_of=self.barrier_after_all_cluster_pre_tests)
            if not global_config_values['sourceTablePreTest']:
                source_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=source)
                self.task_manager.add_task(source_cluster_pre_test,
                                           dependency_of=self.barrier_after_all_cluster_pre_tests)
        if global_config_values['destinationTablePreTest']:
            if global_config_values['destinationTableAutoCreate']:
                destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(resource=destination)
                self.task_manager.add_task(destination_cluster_pre_test,
                                           dependency_of=self.barrier_after_all_cluster_pre_tests)
            else:
                destination_table_pre_test = FailIfResourceDoesNotExistsTask(destination)
                self.task_manager.add_task(destination_table_pre_test,
                                           dependency_of=self.barrier_after_all_resource_pre_tests,
                                           dependencies=self.barrier_after_all_cluster_pre_tests)

        if global_config_values['sourceTablePreTest']:
            source_table_pre_test = FailIfResourceDoesNotExistsTask(source)
            self.task_manager.add_task(source_table_pre_test,
                                       dependency_of=self.barrier_after_all_resource_pre_tests,
                                       dependencies=self.barrier_after_all_cluster_pre_tests)

        if global_config_values['destinationTableAutoCreate']:
            create_target = CreateIfTargetDoesNotExistTask(
                source_resource=source,
                target_resource=destination
            )
            self.task_manager.add_task(create_target,
                                       dependency_of=self.barrier_after_all_resource_pre_tests,
                                       dependencies=self.barrier_after_all_cluster_pre_tests)

        s3_details = S3Details(self.config_helper, source, encryption_key_id=encryptionKeyID)
        unload_data = UnloadDataToS3Task(source, s3_details)
        self.task_manager.add_task(unload_data, dependencies=self.barrier_after_all_resource_pre_tests)

        copy_data = CopyDataFromS3Task(destination, s3_details)
        self.task_manager.add_task(copy_data, dependencies=unload_data)

        s3_cleanup = CleanupS3StagingAreaTask(s3_details)
        self.task_manager.add_task(s3_cleanup, dependencies=copy_data)
コード例 #9
0
class UnloadCopyTool:
    # noinspection PyDefaultArgument
    def __init__(self,
                 config_file,
                 region_name,
                 global_config_values=GlobalConfigParametersReader().
                 get_default_config_key_values()):
        for key, value in global_config_values.items():
            config_parameters[key] = value
        self.region = region_name
        self.s3_helper = S3Helper(self.region)

        # load the configuration
        self.config_helper = ConfigHelper(config_file, self.s3_helper)

        source = ResourceFactory.get_source_resource_from_config_helper(
            self.config_helper, self.region)

        destination = ResourceFactory.get_target_resource_from_config_helper(
            self.config_helper, self.region)

        self.task_manager = TaskManager()
        self.barrier_after_all_cluster_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests)
        self.barrier_after_all_resource_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_resource_pre_tests)

        # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's
        if isinstance(source, TableResource):
            if isinstance(destination, DBResource):
                if not isinstance(destination, TableResource):
                    destination = ResourceFactory.get_table_resource_from_merging_2_resources(
                        destination, source)
                if global_config_values['tableName'] and global_config_values[
                        'tableName'] != 'None':
                    destination.set_table(global_config_values['tableName'])
                self.add_table_migration(source, destination,
                                         global_config_values)
            else:
                logging.fatal('Destination should be a database resource')
                raise NotImplementedError
        elif isinstance(source, SchemaResource):
            if not isinstance(destination, DBResource):
                logging.fatal('Destination should be a database resource')
                raise NotImplementedError
            self.add_schema_migration(source, destination,
                                      global_config_values)
        elif isinstance(source, DBResource):
            if not isinstance(destination, DBResource):
                logging.fatal('Destination should be a database resource')
                raise NotImplementedError
            self.add_database_migration(source, destination,
                                        global_config_values)
        else:
            # TODO: add additional scenario's
            # For example if both resources are of type schema then create target schema and migrate all tables
            logging.fatal(
                'Source is not a Table, this type of unload-copy is currently not supported.'
            )
            raise NotImplementedError

        self.task_manager.run()

    def add_database_migration(self, source, destination,
                               global_config_values):
        schemas = source.list_schemas()
        for schema in schemas:
            source_schema = SchemaResource(source.get_cluster(), schema)
            self.add_schema_migration(source_schema, destination,
                                      global_config_values)

    def add_schema_migration(self, source, destination, global_config_values):
        tables = source.list_tables()
        for table in tables:
            source_table = TableResource(source.get_cluster(),
                                         source.get_schema(), table)
            target_table = ResourceFactory.get_table_resource_from_merging_2_resources(
                destination, source_table)
            if 'explicit_ids' in self.config_helper.config['copyTarget']:
                if self.config_helper.config['copyTarget']['explicit_ids']:
                    target_table.set_explicit_ids(True)
            self.add_table_migration(source_table, target_table,
                                     global_config_values)

    def add_table_migration(self, source, destination, global_config_values):
        if global_config_values['connectionPreTest']:
            if not global_config_values['destinationTablePreTest']:
                destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(
                    resource=destination)
                self.task_manager.add_task(
                    destination_cluster_pre_test,
                    dependency_of=self.barrier_after_all_cluster_pre_tests)
            if not global_config_values['sourceTablePreTest']:
                source_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(
                    resource=source)
                self.task_manager.add_task(
                    source_cluster_pre_test,
                    dependency_of=self.barrier_after_all_cluster_pre_tests)
        if global_config_values['destinationTablePreTest']:
            if global_config_values['destinationTableAutoCreate']:
                destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(
                    resource=destination)
                self.task_manager.add_task(
                    destination_cluster_pre_test,
                    dependency_of=self.barrier_after_all_cluster_pre_tests)
            else:
                destination_table_pre_test = FailIfResourceDoesNotExistsTask(
                    destination)
                self.task_manager.add_task(
                    destination_table_pre_test,
                    dependency_of=self.barrier_after_all_resource_pre_tests,
                    dependencies=self.barrier_after_all_cluster_pre_tests)

        if global_config_values['sourceTablePreTest']:
            source_table_pre_test = FailIfResourceDoesNotExistsTask(source)
            self.task_manager.add_task(
                source_table_pre_test,
                dependency_of=self.barrier_after_all_resource_pre_tests,
                dependencies=self.barrier_after_all_cluster_pre_tests)

        if global_config_values['destinationTableAutoCreate']:
            create_target = CreateIfTargetDoesNotExistTask(
                source_resource=source, target_resource=destination)
            self.task_manager.add_task(
                create_target,
                dependency_of=self.barrier_after_all_resource_pre_tests,
                dependencies=self.barrier_after_all_cluster_pre_tests)

        s3_details = S3Details(self.config_helper,
                               source,
                               encryption_key_id=encryptionKeyID)
        unload_data = UnloadDataToS3Task(source, s3_details)
        self.task_manager.add_task(
            unload_data,
            dependencies=self.barrier_after_all_resource_pre_tests)

        copy_data = CopyDataFromS3Task(destination, s3_details)
        self.task_manager.add_task(copy_data, dependencies=unload_data)

        s3_cleanup = CleanupS3StagingAreaTask(s3_details)
        self.task_manager.add_task(s3_cleanup, dependencies=copy_data)
コード例 #10
0
class UnloadCopyTool:
    # noinspection PyDefaultArgument
    def __init__(self,
                 config_file,
                 region_name,
                 global_config_values=GlobalConfigParametersReader().
                 get_default_config_key_values()):
        for key, value in global_config_values.items():
            config_parameters[key] = value
        self.region = region_name
        self.s3_helper = S3Helper(self.region)

        # load the configuration
        self.config_helper = ConfigHelper(config_file, self.s3_helper)

        source = ResourceFactory.get_source_resource_from_config_helper(
            self.config_helper, self.region)

        destination = ResourceFactory.get_target_resource_from_config_helper(
            self.config_helper, self.region)

        if global_config_values['tableName']:
            source = TableResource(source.get_cluster(), source.get_schema(),
                                   global_config_values['tableName'])
            destination = TableResource(destination.get_cluster(),
                                        destination.get_schema(),
                                        global_config_values['tableName'])

        self.task_manager = TaskManager()
        self.barrier_after_all_cluster_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_cluster_pre_tests)
        self.barrier_after_all_resource_pre_tests = NoOperationTask()
        self.task_manager.add_task(self.barrier_after_all_resource_pre_tests)

        # TODO: Check whether both resources are of type table if that is not the case then perform other scenario's
        # For example if both resources are of type schema then create target schema and migrate all tables
        self.add_table_migration(source, destination, global_config_values)

        self.task_manager.run()

    def add_table_migration(self, source, destination, global_config_values):
        if global_config_values['connectionPreTest']:
            if not global_config_values['destinationTablePreTest']:
                destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(
                    resource=destination)
                self.task_manager.add_task(
                    destination_cluster_pre_test,
                    dependency_of=self.barrier_after_all_cluster_pre_tests)
            if not global_config_values['sourceTablePreTest']:
                source_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(
                    resource=source)
                self.task_manager.add_task(
                    source_cluster_pre_test,
                    dependency_of=self.barrier_after_all_cluster_pre_tests)
        if global_config_values['destinationTablePreTest']:
            if global_config_values['destinationTableAutoCreate']:
                destination_cluster_pre_test = FailIfResourceClusterDoesNotExistsTask(
                    resource=destination)
                self.task_manager.add_task(
                    destination_cluster_pre_test,
                    dependency_of=self.barrier_after_all_cluster_pre_tests)
            else:
                destination_table_pre_test = FailIfResourceDoesNotExistsTask(
                    destination)
                self.task_manager.add_task(
                    destination_table_pre_test,
                    dependency_of=self.barrier_after_all_resource_pre_tests,
                    dependencies=self.barrier_after_all_cluster_pre_tests)

        if global_config_values['sourceTablePreTest']:
            source_table_pre_test = FailIfResourceDoesNotExistsTask(source)
            self.task_manager.add_task(
                source_table_pre_test,
                dependency_of=self.barrier_after_all_resource_pre_tests,
                dependencies=self.barrier_after_all_cluster_pre_tests)

        if global_config_values['destinationTableAutoCreate']:
            create_target = CreateIfTargetDoesNotExistTask(
                source_resource=source, target_resource=destination)
            self.task_manager.add_task(
                create_target,
                dependency_of=self.barrier_after_all_resource_pre_tests,
                dependencies=self.barrier_after_all_cluster_pre_tests)

        s3_details = S3Details(self.config_helper,
                               source,
                               encryption_key_id=encryptionKeyID)
        unload_data = UnloadDataToS3Task(source, s3_details)
        self.task_manager.add_task(
            unload_data,
            dependencies=self.barrier_after_all_resource_pre_tests)

        copy_data = CopyDataFromS3Task(destination, s3_details)
        self.task_manager.add_task(copy_data, dependencies=unload_data)

        s3_cleanup = CleanupS3StagingAreaTask(s3_details)
        self.task_manager.add_task(s3_cleanup, dependencies=copy_data)