Esempio n. 1
0
 def execute(self):
     # We can't count items on streamed bypasses
     self.valid_total_count = False
     self.bypass_state = StreamBypassState(self.config, self.metadata)
     module_loader = ModuleLoader()
     reader = module_loader.load_reader(self.config.reader_options,
                                        self.metadata)
     writer = module_loader.load_writer(self.config.writer_options,
                                        self.metadata)
     with closing(reader), closing(writer):
         for stream in reader.get_read_streams():
             if stream not in self.bypass_state.skipped:
                 file_obj = cohere_stream(reader.open_stream(stream))
                 logging.log(
                     logging.INFO,
                     'Starting to copy file {}'.format(stream.filename))
                 try:
                     writer.write_stream(stream, file_obj)
                 finally:
                     file_obj.close()
                 logging.log(
                     logging.INFO,
                     'Finished copying file {}'.format(stream.filename))
                 self.bypass_state.commit_copied(stream)
             else:
                 logging.log(logging.INFO,
                             'Skip file {}'.format(stream.filename))
Esempio n. 2
0
 def meets_conditions(cls, config):
     if not config.filter_before_options['name'].endswith('NoFilter'):
         cls._log_skip_reason('custom filter configured')
         return False
     if not config.filter_after_options['name'].endswith('NoFilter'):
         cls._log_skip_reason('custom filter configured')
         return False
     if not config.transform_options['name'].endswith('NoTransform'):
         cls._log_skip_reason('custom transform configured')
         return False
     if not config.grouper_options['name'].endswith('NoGrouper'):
         cls._log_skip_reason('custom grouper configured')
         return False
     if config.writer_options.get('options', {}).get('items_limit'):
         cls._log_skip_reason('items limit configuration (items_limit)')
         return False
     if config.writer_options.get('options',
                                  {}).get('items_per_buffer_write'):
         cls._log_skip_reason(
             'buffer limit configuration (items_per_buffer_write)')
         return False
     if config.writer_options.get('options',
                                  {}).get('size_per_buffer_write'):
         cls._log_skip_reason(
             'buffer limit configuration (size_per_buffer_write)')
         return False
     write_buffer = config.writer_options['options'].get('write_buffer')
     if write_buffer and not write_buffer.endswith('base.WriteBuffer'):
         cls._log_skip_reason('custom write buffer configuration')
         return False
     module_loader = ModuleLoader()
     try:
         with closing(
                 module_loader.load_class(
                     config.reader_options['name'])) as reader:
             pass
         with closing(
                 module_loader.load_class(
                     config.writer_options['name'])) as writer:
             pass
     except:
         cls._log_skip_reason("Can't load reader and/or writer")
         return False
     if not callable(getattr(reader, 'get_read_streams', None)) or\
        not callable(getattr(reader, 'open_stream', None)):
         cls._log_skip_reason(
             "Reader doesn't support get_read_streams()/open_stream()")
         return False
     if not hasattr(writer, 'write_stream'):
         cls._log_skip_reason("Writer doesn't support write_stream()")
         return False
     return True
Esempio n. 3
0
 def __init__(self, config, metadata):
     module_loader = ModuleLoader()
     self.state = module_loader.load_persistence(config.persistence_options,
                                                 metadata)
     self.state_position = self.state.get_last_position()
     if not self.state_position:
         self.done = []
         self.skipped = []
         self.stats = {'bytes_copied': 0}
         self.state.commit_position(self._get_state())
     else:
         self.done = []
         self.skipped = self.state_position['done']
         self.stats = self.state_position.get('stats', {'bytes_copied': 0})
Esempio n. 4
0
 def __init__(self, config, metadata, aws_key, aws_secret):
     self.config = config
     module_loader = ModuleLoader()
     self.state = module_loader.load_persistence(config.persistence_options,
                                                 metadata)
     self.state_position = self.state.get_last_position()
     if not self.state_position:
         self.pending = S3BucketKeysFetcher(
             self.config.reader_options['options'], aws_key,
             aws_secret).pending_keys()
         self.done = []
         self.skipped = []
         self.stats = {'total_count': 0}
         self.state.commit_position(self._get_state())
     else:
         self.pending = self.state_position['pending']
         self.done = []
         self.skipped = self.state_position['done']
         self.keys = self.pending
         self.stats = self.state_position.get('stats', {'total_count': 0})
Esempio n. 5
0
    def _get_write_buffer(self):
        module_loader = ModuleLoader()

        write_buffer_module = self.read_option('write_buffer')
        write_buffer_class = module_loader.load_class(write_buffer_module)
        write_buffer_options = {
            'name': self.read_option('write_buffer'),
            'options': self.read_option('write_buffer_options'),
        }

        file_handler = self._items_group_files_handler(write_buffer_class,
                                                       **write_buffer_options['options'])
        kwargs = {
             'items_per_buffer_write': self.read_option('items_per_buffer_write'),
             'size_per_buffer_write': self.read_option('size_per_buffer_write'),
             'items_group_files_handler': file_handler,
             'compression_format': self.compression_format,
             'hash_algorithm': self.hash_algorithm,
        }
        return module_loader.load_write_buffer(write_buffer_options, self.metadata, **kwargs)
Esempio n. 6
0
 def __init__(self, configuration):
     self.config = ExporterConfig(configuration)
     self.threaded = self.config.exporter_options.get('threaded', False)
     self.logger = ExportManagerLogger(self.config.log_options)
     self.module_loader = ModuleLoader()
     metadata = ExportMeta(configuration)
     self.metadata = metadata
     self.reader = self.module_loader.load_reader(
         self.config.reader_options, metadata)
     if is_stream_reader(self.reader):
         deserializer = self.module_loader.load_deserializer(
             self.config.deserializer_options, metadata)
         decompressor = self.module_loader.load_decompressor(
             self.config.decompressor_options, metadata)
         self.reader.deserializer = deserializer
         self.reader.decompressor = decompressor
     self.filter_before = self.module_loader.load_filter(
         self.config.filter_before_options, metadata)
     self.filter_after = self.module_loader.load_filter(
         self.config.filter_after_options, metadata)
     self.transform = self.module_loader.load_transform(
         self.config.transform_options, metadata)
     self.export_formatter = self.module_loader.load_formatter(
         self.config.formatter_options, metadata)
     self.writer = self.module_loader.load_writer(
         self.config.writer_options,
         metadata,
         export_formatter=self.export_formatter)
     self.persistence = self.module_loader.load_persistence(
         self.config.persistence_options, metadata)
     self.grouper = self.module_loader.load_grouper(
         self.config.grouper_options, metadata)
     self.notifiers = NotifiersList(self.config.notifiers, metadata)
     if self.config.disable_retries:
         disable_retries()
     self.logger.debug('{} has been initiated'.format(
         self.__class__.__name__))
     self.stats_manager = self.module_loader.load_stats_manager(
         self.config.stats_options, metadata)
     self.bypass_cases = []
Esempio n. 7
0
 def setUp(self):
     self.module_loader = ModuleLoader()
Esempio n. 8
0
 def __init__(self, options, metadata):
     self.options = options
     self.module_loader = ModuleLoader()
     self.notifiers = self._populate_notifiers(metadata)