def run(self): while True: task = self.queue.get(True) if isinstance(task, ShutdownThreadRequest): LOGGER.debug("Shutdown request received in IO thread, " "shutting down.") self._cleanup() return elif isinstance(task, IORequest): filename, offset, data, is_stream = task if is_stream: fileobj = sys.stdout bytes_print(data) else: fileobj = self.fd_descriptor_cache.get(filename) if fileobj is None: fileobj = open(filename, 'rb+') self.fd_descriptor_cache[filename] = fileobj fileobj.seek(offset) fileobj.write(data) LOGGER.debug("Writing data to: %s, offset: %s", filename, offset) fileobj.flush() elif isinstance(task, IOCloseRequest): LOGGER.debug("IOCloseRequest received for %s, closing file.", task.filename) fileobj = self.fd_descriptor_cache.get(task.filename) if fileobj is not None: fileobj.close() del self.fd_descriptor_cache[task.filename] if task.desired_mtime is not None: os.utime(task.filename, (task.desired_mtime, task.desired_mtime))
def save_file(filename, response_data, last_update, is_stream=False): """ This writes to the file upon downloading. It reads the data in the response. Makes a new directory if needed and then writes the data to the file. It also modifies the last modified time to that of the S3 object. """ body = response_data['Body'] etag = response_data['ETag'][1:-1] if not is_stream: d = os.path.dirname(filename) try: if not os.path.exists(d): os.makedirs(d) except OSError as e: if not e.errno == errno.EEXIST: raise CreateDirectoryError( "Could not create directory %s: %s" % (d, e)) if MD5_AVAILABLE and _can_validate_md5_with_etag(etag, response_data): md5 = hashlib.md5() else: md5 = None file_chunks = iter(partial(body.read, 1024 * 1024), b'') if is_stream: # Need to save the data to be able to check the etag for a stream # because once the data is written to the stream there is no # undoing it. payload = write_to_file(None, etag, file_chunks, md5, True) else: with open(filename, 'wb') as out_file: write_to_file(out_file, etag, file_chunks, md5) if md5 is not None and etag != md5.hexdigest(): if not is_stream: os.remove(filename) raise MD5Error(filename) if not is_stream: last_update_tuple = last_update.timetuple() mod_timestamp = time.mktime(last_update_tuple) set_file_utime(filename, int(mod_timestamp)) else: # Now write the output to stdout since the md5 is correct. bytes_print(payload) sys.stdout.flush()
def _handle_stream_task(self, data): fileobj = sys.stdout bytes_print(data) fileobj.flush()