def iteritems(self): for file_obj, fn, size in self.get_read_streams(): stream = cohere_stream(file_obj) try: if fn in self.last_position['readed_streams']: continue stream = self.decompressor.decompress(stream) stream = cohere_stream(stream) items_iter = self.deserializer.deserialize(stream) for record in items_iter: yield record self.last_position['readed_streams'].append(fn) finally: stream.close() self.finished = True
def execute(self): # We can't count items on streamed bypasses self.valid_total_count = False self.bypass_state = StreamBypassState(self.config, self.metadata) module_loader = ModuleLoader() reader = module_loader.load_reader(self.config.reader_options, self.metadata) writer = module_loader.load_writer(self.config.writer_options, self.metadata) with closing(reader), closing(writer): for stream in reader.get_read_streams(): if stream not in self.bypass_state.skipped: file_obj = cohere_stream(reader.open_stream(stream)) logging.log( logging.INFO, 'Starting to copy file {}'.format(stream.filename)) try: writer.write_stream(stream, file_obj) finally: file_obj.close() logging.log( logging.INFO, 'Finished copying file {}'.format(stream.filename)) self.bypass_state.commit_copied(stream) else: logging.log(logging.INFO, 'Skip file {}'.format(stream.filename))
def iteritems_retrying(self, stream_data): if stream_data.filename in self.last_position['readed_streams']: return stream = cohere_stream(self.open_stream(stream_data)) try: stream = self.decompressor.decompress(stream) stream = cohere_stream(stream) items_readed = 0 stream_offset = self.last_position['stream_offset'] items_offset = stream_offset.get(stream_data.filename, 0) for item in self.deserializer.deserialize(stream): items_readed += 1 if items_readed > items_offset: stream_offset[stream_data.filename] = items_readed yield item finally: stream.close() self.last_position['readed_streams'].append(stream_data.filename) del stream_offset[stream_data.filename]
def execute(self): # We can't count items on streamed bypasses self.valid_total_count = False self.bypass_state = StreamBypassState(self.config, self.metadata) module_loader = ModuleLoader() reader = module_loader.load_reader(self.config.reader_options, self.metadata) writer = module_loader.load_writer(self.config.writer_options, self.metadata) with closing(reader), closing(writer): for stream in reader.get_read_streams(): if stream not in self.bypass_state.skipped: file_obj = cohere_stream(reader.open_stream(stream)) logging.log(logging.INFO, 'Starting to copy file {}'.format(stream.filename)) try: writer.write_stream(stream, file_obj) finally: file_obj.close() logging.log(logging.INFO, 'Finished copying file {}'.format(stream.filename)) self.bypass_state.commit_copied(stream) else: logging.log(logging.INFO, 'Skip file {}'.format(stream.filename))