def execute(self): # We can't count items on streamed bypasses self.valid_total_count = False self.bypass_state = StreamBypassState(self.config, self.metadata) module_loader = ModuleLoader() reader = module_loader.load_reader(self.config.reader_options, self.metadata) writer = module_loader.load_writer(self.config.writer_options, self.metadata) with closing(reader), closing(writer): for stream in reader.get_read_streams(): if stream not in self.bypass_state.skipped: file_obj = cohere_stream(reader.open_stream(stream)) logging.log( logging.INFO, 'Starting to copy file {}'.format(stream.filename)) try: writer.write_stream(stream, file_obj) finally: file_obj.close() logging.log( logging.INFO, 'Finished copying file {}'.format(stream.filename)) self.bypass_state.commit_copied(stream) else: logging.log(logging.INFO, 'Skip file {}'.format(stream.filename))
def meets_conditions(cls, config): if not config.filter_before_options['name'].endswith('NoFilter'): cls._log_skip_reason('custom filter configured') return False if not config.filter_after_options['name'].endswith('NoFilter'): cls._log_skip_reason('custom filter configured') return False if not config.transform_options['name'].endswith('NoTransform'): cls._log_skip_reason('custom transform configured') return False if not config.grouper_options['name'].endswith('NoGrouper'): cls._log_skip_reason('custom grouper configured') return False if config.writer_options.get('options', {}).get('items_limit'): cls._log_skip_reason('items limit configuration (items_limit)') return False if config.writer_options.get('options', {}).get('items_per_buffer_write'): cls._log_skip_reason( 'buffer limit configuration (items_per_buffer_write)') return False if config.writer_options.get('options', {}).get('size_per_buffer_write'): cls._log_skip_reason( 'buffer limit configuration (size_per_buffer_write)') return False write_buffer = config.writer_options['options'].get('write_buffer') if write_buffer and not write_buffer.endswith('base.WriteBuffer'): cls._log_skip_reason('custom write buffer configuration') return False module_loader = ModuleLoader() try: with closing( module_loader.load_class( config.reader_options['name'])) as reader: pass with closing( module_loader.load_class( config.writer_options['name'])) as writer: pass except: cls._log_skip_reason("Can't load reader and/or writer") return False if not callable(getattr(reader, 'get_read_streams', None)) or\ not callable(getattr(reader, 'open_stream', None)): cls._log_skip_reason( "Reader doesn't support get_read_streams()/open_stream()") return False if not hasattr(writer, 'write_stream'): cls._log_skip_reason("Writer doesn't support write_stream()") return False return True
def __init__(self, config, metadata): module_loader = ModuleLoader() self.state = module_loader.load_persistence(config.persistence_options, metadata) self.state_position = self.state.get_last_position() if not self.state_position: self.done = [] self.skipped = [] self.stats = {'bytes_copied': 0} self.state.commit_position(self._get_state()) else: self.done = [] self.skipped = self.state_position['done'] self.stats = self.state_position.get('stats', {'bytes_copied': 0})
def __init__(self, config, metadata, aws_key, aws_secret): self.config = config module_loader = ModuleLoader() self.state = module_loader.load_persistence(config.persistence_options, metadata) self.state_position = self.state.get_last_position() if not self.state_position: self.pending = S3BucketKeysFetcher( self.config.reader_options['options'], aws_key, aws_secret).pending_keys() self.done = [] self.skipped = [] self.stats = {'total_count': 0} self.state.commit_position(self._get_state()) else: self.pending = self.state_position['pending'] self.done = [] self.skipped = self.state_position['done'] self.keys = self.pending self.stats = self.state_position.get('stats', {'total_count': 0})
def _get_write_buffer(self): module_loader = ModuleLoader() write_buffer_module = self.read_option('write_buffer') write_buffer_class = module_loader.load_class(write_buffer_module) write_buffer_options = { 'name': self.read_option('write_buffer'), 'options': self.read_option('write_buffer_options'), } file_handler = self._items_group_files_handler(write_buffer_class, **write_buffer_options['options']) kwargs = { 'items_per_buffer_write': self.read_option('items_per_buffer_write'), 'size_per_buffer_write': self.read_option('size_per_buffer_write'), 'items_group_files_handler': file_handler, 'compression_format': self.compression_format, 'hash_algorithm': self.hash_algorithm, } return module_loader.load_write_buffer(write_buffer_options, self.metadata, **kwargs)
def __init__(self, configuration): self.config = ExporterConfig(configuration) self.threaded = self.config.exporter_options.get('threaded', False) self.logger = ExportManagerLogger(self.config.log_options) self.module_loader = ModuleLoader() metadata = ExportMeta(configuration) self.metadata = metadata self.reader = self.module_loader.load_reader( self.config.reader_options, metadata) if is_stream_reader(self.reader): deserializer = self.module_loader.load_deserializer( self.config.deserializer_options, metadata) decompressor = self.module_loader.load_decompressor( self.config.decompressor_options, metadata) self.reader.deserializer = deserializer self.reader.decompressor = decompressor self.filter_before = self.module_loader.load_filter( self.config.filter_before_options, metadata) self.filter_after = self.module_loader.load_filter( self.config.filter_after_options, metadata) self.transform = self.module_loader.load_transform( self.config.transform_options, metadata) self.export_formatter = self.module_loader.load_formatter( self.config.formatter_options, metadata) self.writer = self.module_loader.load_writer( self.config.writer_options, metadata, export_formatter=self.export_formatter) self.persistence = self.module_loader.load_persistence( self.config.persistence_options, metadata) self.grouper = self.module_loader.load_grouper( self.config.grouper_options, metadata) self.notifiers = NotifiersList(self.config.notifiers, metadata) if self.config.disable_retries: disable_retries() self.logger.debug('{} has been initiated'.format( self.__class__.__name__)) self.stats_manager = self.module_loader.load_stats_manager( self.config.stats_options, metadata) self.bypass_cases = []
def setUp(self): self.module_loader = ModuleLoader()
def __init__(self, options, metadata): self.options = options self.module_loader = ModuleLoader() self.notifiers = self._populate_notifiers(metadata)