def _do_execute_direct(self, code): shell = builtins.__xonsh_shell__ env = builtins.__xonsh_env__ out = io.StringIO() err = io.StringIO() enc = env.get('XONSH_ENCODING') out = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t', encoding=enc, newline='\n') err = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t', encoding=enc, newline='\n') try: with redirect_stdout(out), redirect_stderr(err), \ swap(builtins, '__xonsh_stdout_uncaptured__', out), \ swap(builtins, '__xonsh_stderr_uncaptured__', err), \ env.swap({'XONSH_STORE_STDOUT': False}): shell.default(code) interrupted = False except KeyboardInterrupt: interrupted = True output, error = '', '' if out.tell() > 0: out.seek(0) output = out.read() if err.tell() > 0: err.seek(0) error = err.read() out.close() err.close() return output, error, interrupted
def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): """Execute user code.""" if len(code.strip()) == 0: return {'status': 'ok', 'execution_count': self.execution_count, 'payload': [], 'user_expressions': {}} env = builtins.__xonsh_env__ shell = builtins.__xonsh_shell__ hist = builtins.__xonsh_history__ enc = env.get('XONSH_ENCODING') out = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t', encoding=enc, newline='\n') err = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t', encoding=enc, newline='\n') try: with redirect_stdout(out), redirect_stderr(err), \ swap(builtins, '__xonsh_stdout_uncaptured__', out), \ swap(builtins, '__xonsh_stderr_uncaptured__', err), \ env.swap({'XONSH_STORE_STDOUT': False}): shell.default(code) interrupted = False except KeyboardInterrupt: interrupted = True if not silent: # stdout response if out.tell() > 0: out.seek(0) self._respond_in_chunks('stdout', out.read()) if err.tell() > 0: err.seek(0) self._respond_in_chunks('stderr', err.read()) if hasattr(builtins, '_') and builtins._ is not None: # rely on sys.displayhook functionality self._respond_in_chunks('stdout', pformat(builtins._)) builtins._ = None if hist is not None and len(hist) > 0 and out.tell() == 0 and err.tell() == 0: self._respond_in_chunks('stdout', hist.outs[-1]) out.close() err.close() if interrupted: return {'status': 'abort', 'execution_count': self.execution_count} rtn = 0 if (hist is None or len(hist) == 0) else hist.rtns[-1] if 0 < rtn: message = {'status': 'error', 'execution_count': self.execution_count, 'ename': '', 'evalue': str(rtn), 'traceback': []} else: message = {'status': 'ok', 'execution_count': self.execution_count, 'payload': [], 'user_expressions': {}} return message
def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): """Execute user code.""" if len(code.strip()) == 0: return {'status': 'ok', 'execution_count': self.execution_count, 'payload': [], 'user_expressions': {}} env = builtins.__xonsh_env__ shell = builtins.__xonsh_shell__ hist = builtins.__xonsh_history__ enc = env.get('XONSH_ENCODING') out = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t', encoding=enc, newline='\n') err = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t', encoding=enc, newline='\n') try: with redirect_stdout(out), redirect_stderr(err), \ swap(builtins, '__xonsh_stdout_uncaptured__', out), \ swap(builtins, '__xonsh_stderr_uncaptured__', err), \ env.swap({'XONSH_STORE_STDOUT': False}): shell.default(code) interrupted = False except KeyboardInterrupt: interrupted = True if not silent: # stdout response if out.tell() > 0: out.seek(0) self._respond_in_chunks('stdout', out.read()) if err.tell() > 0: err.seek(0) self._respond_in_chunks('stderr', err.read()) if hasattr(builtins, '_') and builtins._ is not None: # rely on sys.displayhook functionality self._respond_in_chunks('stdout', pformat(builtins._)) builtins._ = None if len(hist) > 0 and out.tell() == 0 and err.tell() == 0: self._respond_in_chunks('stdout', hist.outs[-1]) out.close() err.close() if interrupted: return {'status': 'abort', 'execution_count': self.execution_count} rtn = 0 if len(hist) == 0 else hist.rtns[-1] if 0 < rtn: message = {'status': 'error', 'execution_count': self.execution_count, 'ename': '', 'evalue': str(rtn), 'traceback': []} else: message = {'status': 'ok', 'execution_count': self.execution_count, 'payload': [], 'user_expressions': {}} return message
def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): """Execute user code.""" if len(code.strip()) == 0: return {"status": "ok", "execution_count": self.execution_count, "payload": [], "user_expressions": {}} env = builtins.__xonsh_env__ shell = builtins.__xonsh_shell__ hist = builtins.__xonsh_history__ enc = env.get("XONSH_ENCODING") out = SpooledTemporaryFile(max_size=MAX_SIZE, mode="w+t", encoding=enc, newline="\n") err = SpooledTemporaryFile(max_size=MAX_SIZE, mode="w+t", encoding=enc, newline="\n") try: with redirect_stdout(out), redirect_stderr(err), swap(builtins, "__xonsh_stdout_uncaptured__", out), swap( builtins, "__xonsh_stderr_uncaptured__", err ), env.swap({"XONSH_STORE_STDOUT": False}): shell.default(code) interrupted = False except KeyboardInterrupt: interrupted = True if not silent: # stdout response if out.tell() > 0: out.seek(0) self._respond_in_chunks("stdout", out.read()) if err.tell() > 0: err.seek(0) self._respond_in_chunks("stderr", err.read()) if hasattr(builtins, "_") and builtins._ is not None: # rely on sys.displayhook functionality self._respond_in_chunks("stdout", pformat(builtins._)) builtins._ = None if len(hist) > 0 and out.tell() == 0 and err.tell() == 0: self._respond_in_chunks("stdout", hist.outs[-1]) out.close() err.close() if interrupted: return {"status": "abort", "execution_count": self.execution_count} rtn = 0 if len(hist) == 0 else hist.rtns[-1] if 0 < rtn: message = { "status": "error", "execution_count": self.execution_count, "ename": "", "evalue": str(rtn), "traceback": [], } else: message = {"status": "ok", "execution_count": self.execution_count, "payload": [], "user_expressions": {}} return message
def __init__(self, data=None, fp=None, length=-1): assert bool(data is not None) ^ bool(fp) if length == -1: if data is not None: length = len(data) else: length = get_size(fp) # can be -1 # We allow writer reuse, but if we're working with a stream, we cannot # seek. Copy the data to a tempfile. if fp and not can_seek(fp): newfp = SpooledTemporaryFile(MAX_INMEMORY_SIZE) sendfile(newfp, fp) length = newfp.tell() newfp.seek(0) fp = newfp self.data = data self.fp = fp self.fpreads = 0 # keep track of fp usage self.length = length assert length >= 0 self.use_tempfile = length > MAX_INMEMORY_SIZE
def upload_file(self, user, stream, expected_size, filename, force_coll_name=''): temp_file = None logger.debug('Upload Begin') logger.debug('Expected Size: ' + str(expected_size)) #is_anon = False size_rem = user.get_size_remaining() logger.debug('User Size Rem: ' + str(size_rem)) if size_rem < expected_size: return {'error': 'out_of_space'} if force_coll_name and not user.has_collection(force_coll_name): #if is_anon: # user.create_collection(force_coll, 'Temporary Collection') #else: #status = 'Collection {0} not found'.format(force_coll_name) return {'error': 'no_such_collection'} temp_file = SpooledTemporaryFile(max_size=BLOCK_SIZE) stream = CacheingLimitReader(stream, expected_size, temp_file) if filename.endswith('.har'): stream, expected_size = self.har2warc(filename, stream) temp_file.close() temp_file = stream infos = self.parse_uploaded(stream, expected_size) total_size = temp_file.tell() if total_size != expected_size: return { 'error': 'incomplete_upload', 'expected': expected_size, 'actual': total_size } upload_id, upload_key = self._init_upload_status(user, total_size, 1, filename=filename) return self.handle_upload(temp_file, upload_id, upload_key, infos, filename, user, force_coll_name, total_size)
class RemoteFileBuffer(object): """File-like object providing buffer for local file operations. Instances of this class manage a local tempfile buffer corresponding to the contents of a remote file. All reads and writes happen locally, with the content being copied to the remote file only on flush() or close(). Instances of this class are returned by S3FS.open, but it is desgined to be usable by any FS subclass that manages remote files. """ def __init__(self,fs,path,mode): self.file = TempFile() self.fs = fs self.path = path self.mode = mode def __del__(self): if not self.closed: self.close() # This is lifted straight from the stdlib's tempfile.py def __getattr__(self,name): file = self.__dict__['file'] a = getattr(file, name) if not issubclass(type(a), type(0)): setattr(self, name, a) return a def __enter__(self): self.file.__enter__() return self def __exit__(self,exc,value,tb): self.close() return False def __iter__(self): return iter(self.file) def flush(self): self.file.flush() if "w" in self.mode or "a" in self.mode or "+" in self.mode: pos = self.file.tell() self.file.seek(0) self.fs.setcontents(self.path,self.file) self.file.seek(pos) def close(self): if "w" in self.mode or "a" in self.mode or "+" in self.mode: self.file.seek(0) self.fs.setcontents(self.path,self.file) self.file.close()
def upload_file(self, user, stream, expected_size, filename, force_coll_name=''): """Upload WARC archive. :param User user: user :param stream: file object :param int expected_size: expected WARC archive size :param str filename: WARC archive filename :param str force_coll_name: name of collection to upload into :returns: upload information :rtype: dict """ temp_file = None logger.debug('Upload Begin') logger.debug('Expected Size: ' + str(expected_size)) #is_anon = False size_rem = user.get_size_remaining() logger.debug('User Size Rem: ' + str(size_rem)) if size_rem < expected_size: return {'error': 'out_of_space'} if force_coll_name and not user.has_collection(force_coll_name): #if is_anon: # user.create_collection(force_coll, 'Temporary Collection') #else: #status = 'Collection {0} not found'.format(force_coll_name) return {'error': 'no_such_collection'} temp_file = SpooledTemporaryFile(max_size=BLOCK_SIZE) stream = CacheingLimitReader(stream, expected_size, temp_file) if filename.endswith('.har'): stream, expected_size = self.har2warc(filename, stream) temp_file.close() temp_file = stream infos = self.parse_uploaded(stream, expected_size) total_size = temp_file.tell() if total_size != expected_size: return {'error': 'incomplete_upload', 'expected': expected_size, 'actual': total_size} upload_id, upload_key = self._init_upload_status(user, total_size, 1, filename=filename) return self.handle_upload(temp_file, upload_id, upload_key, infos, filename, user, force_coll_name, total_size)
def ensure_content_length(resp): """ Add Content-Length when it is not present. Streams content into a temp file, and replaces the original socket with it. """ spool = SpooledTemporaryFile(current_app.config.get('FILES_URL_MAX_SIZE')) shutil.copyfileobj(resp.raw, spool) resp.headers['Content-Length'] = str(spool.tell()) spool.seek(0) # replace the original socket with temp file resp.raw._fp.close() resp.raw._fp = spool return resp
def convert(self, input_file: SpooledTemporaryFile, max_char_per_line: int = MAX_CHAR_PER_LINE) -> dict: result = '' input_file.seek(0, io.SEEK_END) if input_file.tell() > 0: try: converted = extract_text(input_file) result = parse_converted_pdf(converted, int(max_char_per_line), self.debug) msg = f'Successfully convert "{input_file.filename}!"' logger.info(msg) except Exception as err: msg = f'Unable to convert "{input_file.filename}"!' logger.exception(msg) raise err result = {'result': result} return result
class GCloudFile(File): """ Django file object that wraps a SpooledTemporaryFile and remembers changes on write to reupload the file to GCS on close() """ def __init__(self, blob, maxsize=1000): """ :type blob: google.cloud.storage.blob.Blob """ self._dirty = False self._tmpfile = SpooledTemporaryFile( max_size=maxsize, prefix="django_gcloud_storage_" ) self._blob = blob super(GCloudFile, self).__init__(self._tmpfile) def _update_blob(self): # Specify explicit size to avoid problems with not yet spooled temporary files # Djangos File.size property already knows how to handle cases like this if DJANGO_17 and self._tmpfile.name is None: # Django bug #22307 size = self._tmpfile.tell() else: size = self.size self._blob.upload_from_file(self._tmpfile, size=size, rewind=True) def write(self, content): self._dirty = True super(GCloudFile, self).write(content) def close(self): if self._dirty: self._update_blob() self._dirty = False super(GCloudFile, self).close()
class VersionedFile(io.BufferedIOBase): def __init__(self, manager, filename, mode=Perm.read, requestor=Owner.ALL, meta=None, rev=None, file_info=None, **kwargs): io.BufferedIOBase.__init__(self) self.path = self.name = filename # manager.check_perm(self.path, owner=requestor, perm=mode) self.created = self.modified = None self.data = None self.meta = meta or {} self.mode = mode self._seekable = True self.length = 0 self.bs = 8192 self._cipher = None self.manager = manager self._file_info = file_info or manager.get_metadata_and_check_perm( filename, rev, mode=mode, owner=requestor) # self._file_info = manager.get_file_metadata(filename, rev, mode=mode) if self._file_info: self.update(self._file_info) if mode == Perm.read and not self._file_info: raise FileNotFoundError(self.path) elif mode == Perm.write: self.owner = requestor if kwargs: self.update(kwargs) self._pos = 0 if mode == Perm.read: if self.data: self._curr_chunk = self.data self._curr_chunk_num = 0 else: self._curr_chunk_num = None self._curr_chunk = None else: self._buf = SpooledTemporaryFile( max_size=getattr(self, 'buffer_threshold', 52428800)) self.hash = None @property def is_dir(self): return self.content_type == u'application/x-directory' def do_hash(self, algo='sha256'): self.hash = algo def __enter__(self): return self def __exit__(self, exc_type, exc, tb): if exc: if self.readable(): self.close() else: self._buf.close() self.mode = None import six six.reraise(exc_type, exc, tb) else: self.close() def close(self): if self.closed: return if self.writable(): self._buf.seek(0, 2) length = self.length = self._buf.tell() self._buf.seek(0) hist_data = { u'meta': self.meta, u'owner': getattr(self, 'owner', None), u'length': length, u'hash': self.hash, u'created': self.created, u'modified': self.modified, u'file_info': self._file_info, } content_type = getattr(self, 'content_type', None) if not content_type: content_type = mimetypes.guess_type(self.path)[0] hist_data[u'content_type'] = content_type if getattr(self, 'force_rev', None) is not None: hist_data[u'rev'] = rev = self.force_rev hist_data[u'modified'] = self.created self.update( self.manager.save_file_data(self.path, hist_data, self._buf, cipher=self._cipher)) self._buf.close() self._buf = None self.mode = None io.BufferedIOBase.close(self) # def __del__(self): # self.close() def readable(self): return self.mode == Perm.read def writable(self): return self.mode == Perm.write def seekable(self): return self._seekable def tell(self): if self.readable(): return self._pos else: return self._buf.tell() def seek(self, pos, whence=0): if self.mode == Perm.read: curpos = self._pos if whence == 0: abspos = pos elif whence == 1: abspos = curpos + pos elif whence == 2: abspos = self.length + pos self._pos = abspos return self._pos elif self.mode == Perm.write and self.seekable(): return self._buf.seek(pos, whence) def read(self, size=-1): if self.mode != Perm.read: return elif self._pos == self.length: return b'' buf = bytearray() if self._pos == 0 and size == -1: if self.data: self._pos = self.length return self.data else: # optimization for reading the whole file i = 0 for chunk in self.manager.get_file_chunks(self._file_info, cipher=self._cipher): i += 1 buf.extend(chunk) self._pos = len(buf) return bytes(buf) length = size if size > 0 else self.length where, pos = divmod(self._pos, self.bs) if self._curr_chunk_num != where: self._curr_chunk = self.manager.get_file_chunk(self._file_info, where, cipher=self._cipher) self._curr_chunk_num = where buf += self._curr_chunk[pos:] while len(buf) < length: where += 1 self._curr_chunk = self.manager.get_file_chunk(self._file_info, where, cipher=self._cipher) if self._curr_chunk is None: self._curr_chunk_num = None break buf.extend(self._curr_chunk) self._curr_chunk_num = where read = buf[:length] self._pos += len(read) return bytes(read) def readall(self): return self.read() def write(self, data): if not data: return if not self.writable(): raise FileError() if isinstance(data, six.text_type): data = data.encode('utf8') wrote = len(data) self._buf.write(data) return wrote def update(self, kwargs): if kwargs: for k, v in kwargs.items(): if k == 'modified' and self.mode == 'w': continue if v is not None: setattr(self, k, v) def set_encryption(self, password='', save_password=False): """ Set the encryption password, optionally saving the password in the metadata """ try: from nacl.secret import SecretBox except ImportError: SecretBox = None if SecretBox: password = hashlib.sha256(password.encode('utf8')).digest() else: password = hashlib.sha512(password.encode('utf8')).digest()[:56] if self.writable(): assert self._cipher is None if SecretBox: method = u'nacl' self.meta[u'_encryption'] = {u'method': method} else: method = u'cfb' self.meta[u'_encryption'] = { u'method': method, u'iv': os.urandom(8), } if save_password: self.meta[u'_encryption'][u'key'] = password else: assert u'_encryption' in self.meta method = self.meta[u'_encryption'][u'method'] password = self.meta[u'_encryption'].get(u'key', None) or password if method == u'nacl': c = SecretBox(password) self._cipher = {'encrypt': c.encrypt, 'decrypt': c.decrypt} else: import blowfish c = blowfish.Cipher(password) iv = self.meta[u'_encryption'][u'iv'] self._cipher = { 'encrypt': lambda chunk: b''.join(c.encrypt_cfb(chunk, iv)), 'decrypt': lambda chunk: b''.join(c.decrypt_cfb(chunk, iv)), } if self.data: self._curr_chunk = self._cipher['decrypt'](self.data)
def fetch_media(self, url, partial_fetch=False): """Retrieves a given media object from a remote (HTTP) location and returns the content-type and a file-like object containing the media content. The file-like object is a temporary file that - depending on the size - lives in memory or on disk. Once the file is closed, the contents are removed from storage. :param url: the URL of the media asset. :type url: str. :param partial_fetch: determines if the the complete file should be fetched, or if only the first 2 MB should be retrieved. This feature is used to prevent complete retrieval of large a/v material. :type partial_fetch: bool. :returns: a tuple with the ``content-type``, ``content-lenght`` and a file-like object containing the media content. The value of ``content-length`` will be ``None`` in case a partial fetch is requested and ``content-length`` is not returned by the remote server. """ http_resp = self.http_session.get(url, stream=True, timeout=(60, 120)) http_resp.raise_for_status() if not os.path.exists(TEMP_DIR_PATH): log.debug('Creating temp directory %s' % TEMP_DIR_PATH) os.makedirs(TEMP_DIR_PATH) # Create a temporary file to store the media item, write the file # to disk if it is larger than 1 MB. media_file = SpooledTemporaryFile(max_size=1024 * 1024, prefix='oad_m_', suffix='.tmp', dir=TEMP_DIR_PATH) # When a partial fetch is requested, request up to two MB partial_target_size = 1024 * 1024 * 2 content_length = http_resp.headers.get('content-length') if content_length and int(content_length) < partial_target_size: partial_target_size = int(content_length) retrieved_bytes = 0 for chunk in http_resp.iter_content(chunk_size=512 * 1024): if chunk: # filter out keep-alive chunks media_file.write(chunk) retrieved_bytes += len(chunk) if partial_fetch and retrieved_bytes >= partial_target_size: break media_file.flush() log.debug('Fetched media item %s [%s/%s]' % (url, retrieved_bytes, content_length)) # If the server doens't provide a content-length and this isn't # a partial fetch, determine the size by looking at the retrieved # content if not content_length and not partial_fetch: media_file.seek(0, 2) content_length = media_file.tell() return (http_resp.headers.get('content-type'), content_length, media_file)
def upload_file(self): stream = None temp_file = None logger.debug('Upload Begin') expected_size = int(request.headers['Content-Length']) logger.debug('Expected Size: ' + str(expected_size)) if not expected_size: return {'error_message': 'No File Specified'} curr_user = self.manager.get_curr_user() if not curr_user: #user = self.manager.get_anon_user() #force_coll = 'temp' #is_anon = True return { 'error_message': 'Sorry, uploads only available for logged-in users' } user = curr_user force_coll = request.query.getunicode('force-coll', '') is_anon = False size_rem = self.manager.get_size_remaining(user) logger.debug('User Size Rem: ' + str(size_rem)) if size_rem < expected_size: return { 'error_message': 'Sorry, not enough space to upload this file' } if force_coll and not self.manager.has_collection(user, force_coll): if is_anon: self.manager.create_collection(user, force_coll, 'Temporary Collection') else: status = 'Collection {0} not found'.format(force_coll) return {'error_message': status} temp_file = SpooledTemporaryFile(max_size=BLOCK_SIZE) filename = request.query.getunicode('filename') stream = request.environ['wsgi.input'] stream = CacheingLimitReader(stream, expected_size, temp_file) if filename.endswith('.har'): stream, expected_size = self.har2warc(filename, stream) temp_file.close() temp_file = stream infos = self.parse_uploaded(stream, expected_size) total_size = temp_file.tell() if total_size != expected_size: return { 'error_message': 'size mismatch: expected {0}, got {1}'.format( expected_size, total_size) } upload_id = self._get_upload_id() upload_key = self.upload_key.format(user=user, upid=upload_id) with redis_pipeline(self.manager.redis) as pi: pi.hset(upload_key, 'size', 0) pi.hset(upload_key, 'total_size', total_size * 2) pi.hset(upload_key, 'filename', filename) pi.hset(upload_key, 'total_files', 1) pi.hset(upload_key, 'files', 1) return self.handle_upload(temp_file, upload_id, upload_key, infos, filename, user, force_coll, total_size)
else: U = urllib2.urlparse.urlparse(R.url) fname = os.path.basename(U.path) print ' Save as', fname F = SpooledTemporaryFile(max_size=R.info().get('content-length', 0)) while True: D = R.read(1024 * 1024) if len(D) == 0: break F.write(D) except IOError: traceback.print_exc() continue else: FS = F.tell() F.seek(0, 0) DF = File(F) DF.size = FS I.file.save(fname, DF) finally: R.close() print 'Done'
class Buffer(FileWrapper): """Class implementing buffering of input and output streams. This class uses a separate buffer file to hold the contents of the underlying file while they are being manipulated. As data is read it is duplicated into the buffer, and data is written from the buffer back to the file on close. """ def __init__(self, fileobj, mode=None, max_size_in_memory=1024 * 8): """Buffered file wrapper constructor.""" self._buffer = SpooledTemporaryFile(max_size=max_size_in_memory) self._in_eof = False self._in_pos = 0 self._was_truncated = False super(Buffer, self).__init__(fileobj, mode) def _buffer_size(self): try: return len(self._buffer.file.getvalue()) except AttributeError: return os.fstat(self._buffer.fileno()).st_size def _buffer_chunks(self): chunk = self._buffer.read(16 * 1024) if chunk == "": yield chunk else: while chunk != "": yield chunk chunk = self._buffer.read(16 * 1024) def _write_out_buffer(self): if self._check_mode("r"): self._read_rest() if "a" in self.mode: self._buffer.seek(self._in_pos) self._fileobj.seek(self._in_pos) else: self._fileobj.seek(0) self._buffer.seek(0) else: self._buffer.seek(0) if self._was_truncated: self._fileobj.truncate(0) self._was_truncated = False for chunk in self._buffer_chunks(): self._fileobj.write(chunk) def flush(self): # flush the buffer; we only write to the underlying file on close self._buffer.flush() def close(self): if self.closed: return if self._check_mode("w"): self._write_out_buffer() super(Buffer, self).close() self._buffer.close() def _read(self, sizehint=-1): # First return any data available from the buffer. # Since we don't flush the buffer after every write, certain OSes # (guess which!) will happily read junk data from the end of it. # Instead, we explicitly read only up to self._in_pos. if not self._in_eof: buffered_size = self._in_pos - self._buffer.tell() if sizehint >= 0: buffered_size = min(sizehint, buffered_size) else: buffered_size = sizehint data = self._buffer.read(buffered_size) if data != "": return data # Then look for more data in the underlying file if self._in_eof: return None data = self._fileobj.read(sizehint) self._in_pos += len(data) self._buffer.write(data) if sizehint < 0 or len(data) < sizehint: self._in_eof = True self._buffer.flush() return data def _write(self, data, flushing=False): self._buffer.write(data) if self._check_mode("r") and not self._in_eof: diff = self._buffer.tell() - self._in_pos if diff > 0: junk = self._fileobj.read(diff) self._in_pos += len(junk) if len(junk) < diff: self._in_eof = True self._buffer.flush() def _seek(self, offset, whence): # Ensure we've read enough to simply do the seek on the buffer if self._check_mode("r") and not self._in_eof: if whence == 0: if offset > self._in_pos: self._read_rest() if whence == 1: if self._buffer.tell() + offset > self._in_pos: self._read_rest() if whence == 2: self._read_rest() # Then just do it on the buffer... self._buffer.seek(offset, whence) def _tell(self): return self._buffer.tell() def _truncate(self, size): if self._check_mode("r") and not self._in_eof: if size > self._in_pos: self._read_rest() self._in_eof = True try: self._buffer.truncate(size) except TypeError: et, ev, tb = sys.exc_info() # SpooledTemporaryFile.truncate() doesn't accept size paramter. try: self._buffer._file.truncate(size) except Exception: raise et, ev, tb # StringIO objects don't truncate to larger size correctly. if hasattr(self._buffer, "_file"): _file = self._buffer._file if hasattr(_file, "getvalue"): if len(_file.getvalue()) != size: curpos = _file.tell() _file.seek(0, 2) _file.write("\x00" * (size - len(_file.getvalue()))) _file.seek(curpos) self._was_truncated = True def _read_rest(self): """Read the rest of the input stream.""" if self._in_eof: return pos = self._buffer.tell() self._buffer.seek(0, 2) data = self._fileobj.read(self._bufsize) while data: self._in_pos += len(data) self._buffer.write(data) data = self._fileobj.read(self._bufsize) self._in_eof = True self._buffer.flush() self._buffer.seek(pos)
def fetch_pack_from_origin( self, origin_url: str, base_repo: RepoRepresentation, do_activity: Callable[[bytes], None], ) -> FetchPackReturn: """Fetch a pack from the origin""" pack_buffer = SpooledTemporaryFile(max_size=self.temp_file_cutoff) transport_url = origin_url logger.debug("Transport url to communicate with server: %s", transport_url) client, path = dulwich.client.get_transport_and_path( transport_url, thin_packs=False ) logger.debug("Client %s to fetch pack at %s", client, path) size_limit = self.pack_size_bytes def do_pack(data: bytes) -> None: cur_size = pack_buffer.tell() would_write = len(data) if cur_size + would_write > size_limit: raise IOError( f"Pack file too big for repository {origin_url}, " f"limit is {size_limit} bytes, current size is {cur_size}, " f"would write {would_write}" ) pack_buffer.write(data) pack_result = client.fetch_pack( path, base_repo.determine_wants, base_repo.graph_walker(), do_pack, progress=do_activity, ) remote_refs = pack_result.refs or {} symbolic_refs = pack_result.symrefs or {} pack_buffer.flush() pack_size = pack_buffer.tell() pack_buffer.seek(0) logger.debug("fetched_pack_size=%s", pack_size) # check if repository only supports git dumb transfer protocol, # fetched pack file will be empty in that case as dulwich do # not support it and do not fetch any refs self.dumb = transport_url.startswith("http") and getattr(client, "dumb", False) return FetchPackReturn( remote_refs=utils.filter_refs(remote_refs), symbolic_refs=utils.filter_refs(symbolic_refs), pack_buffer=pack_buffer, pack_size=pack_size, )
def fetch_media(self, url, partial_fetch=False): """Retrieves a given media object from a remote (HTTP) location and returns the content-type and a file-like object containing the media content. The file-like object is a temporary file that - depending on the size - lives in memory or on disk. Once the file is closed, the contents are removed from storage. :param url: the URL of the media asset. :type url: str. :param partial_fetch: determines if the the complete file should be fetched, or if only the first 2 MB should be retrieved. This feature is used to prevent complete retrieval of large a/v material. :type partial_fetch: bool. :returns: a tuple with the ``content-type``, ``content-lenght`` and a file-like object containing the media content. The value of ``content-length`` will be ``None`` in case a partial fetch is requested and ``content-length`` is not returned by the remote server. """ http_resp = self.http_session.get(url, stream=True, timeout=(60, 120)) http_resp.raise_for_status() if not os.path.exists(TEMP_DIR_PATH): log.debug('Creating temp directory %s' % TEMP_DIR_PATH) os.makedirs(TEMP_DIR_PATH) # Create a temporary file to store the media item, write the file # to disk if it is larger than 1 MB. media_file = SpooledTemporaryFile(max_size=1024*1024, prefix='ocd_m_', suffix='.tmp', dir=TEMP_DIR_PATH) # When a partial fetch is requested, request up to two MB partial_target_size = 1024*1024*2 content_length = http_resp.headers.get('content-length') if content_length and int(content_length) < partial_target_size: partial_target_size = int(content_length) retrieved_bytes = 0 for chunk in http_resp.iter_content(chunk_size=512*1024): if chunk: # filter out keep-alive chunks media_file.write(chunk) retrieved_bytes += len(chunk) if partial_fetch and retrieved_bytes >= partial_target_size: break media_file.flush() log.debug('Fetched media item %s [%s/%s]' % (url, retrieved_bytes, content_length)) # If the server doens't provide a content-length and this isn't # a partial fetch, determine the size by looking at the retrieved # content if not content_length and not partial_fetch: media_file.seek(0, 2) content_length = media_file.tell() return ( http_resp.headers.get('content-type'), content_length, media_file )
class Buffer(FileWrapper): """Class implementing buffereing of input and output streams. This class uses a separate buffer file to hold the contents of the underlying file while they are being manipulated. As data is read it is duplicated into the buffer, and data is written from the buffer back to the file on close. """ def __init__(self,fileobj,mode=None,max_size_in_memory=1024*8): """Buffered file wrapper constructor.""" self._buffer = SpooledTemporaryFile(max_size=max_size_in_memory) self._in_eof = False self._in_pos = 0 super(Buffer,self).__init__(fileobj,mode) def _buffer_chunks(self): chunk = self._buffer.read(16*1024) if chunk == "": yield chunk else: while chunk != "": yield chunk chunk = self._buffer.read(16*1024) def _write_out_buffer(self): if self._check_mode("r"): self._read_rest() if "a" in self.mode: self._buffer.seek(self._in_pos) self._fileobj.seek(self._in_pos) else: self._fileobj.seek(0) self._buffer.seek(0) else: self._buffer.seek(0) for chunk in self._buffer_chunks(): self._fileobj.write(chunk) def flush(self): # flush the buffer; we only write to the underlying file on close self._buffer.flush() def close(self): if self.closed: return if self._check_mode("w"): self._write_out_buffer() super(Buffer,self).close() self._buffer.close() def _read(self,sizehint=-1): # First return any data available from the buffer. # Since we don't flush the buffer after every write, certain OSes # (guess which!) will happy read junk data from the end of it. # Instead, we explicitly read only up to self._in_pos. if not self._in_eof: buffered_size = self._in_pos - self._buffer.tell() if sizehint >= 0: buffered_size = min(sizehint,buffered_size) else: buffered_size = sizehint data = self._buffer.read(buffered_size) if data != "": return data # Then look for more data in the underlying file if self._in_eof: return None data = self._fileobj.read(sizehint) self._in_pos += len(data) self._buffer.write(data) if sizehint < 0 or len(data) < sizehint: self._in_eof = True self._buffer.flush() return data def _write(self,data,flushing=False): self._buffer.write(data) if self._check_mode("r") and not self._in_eof: diff = self._buffer.tell() - self._in_pos if diff > 0: junk = self._fileobj.read(diff) self._in_pos += len(junk) if len(junk) < diff: self._in_eof = True self._buffer.flush() def _seek(self,offset,whence): # Ensure we've read enough to simply do the seek on the buffer if self._check_mode("r") and not self._in_eof: if whence == 0: if offset > self._in_pos: self._read_rest() if whence == 1: if self._buffer.tell() + offset > self._in_pos: self._read_rest() if whence == 2: self._read_rest() # Then just do it on the buffer... self._buffer.seek(offset,whence) def _tell(self): return self._buffer.tell() def _read_rest(self): """Read the rest of the input stream.""" if self._in_eof: return pos = self._buffer.tell() self._buffer.seek(0,2) data = self._fileobj.read(self._bufsize) while data: self._in_pos += len(data) self._buffer.write(data) data = self._fileobj.read(self._bufsize) self._in_eof = True self._buffer.flush() self._buffer.seek(pos)
class TestFile(object): CACHE_LIMIT = 0x80000 # data cache limit per file: 512KB XFER_BUF = 0x10000 # transfer buffer size: 64KB __slots__ = ("_file_name", "_fp") def __init__(self, file_name): # This is a naive fix for a larger path issue. This is a simple sanity # check and does not check if invalid characters are used. If an invalid # file name is used an exception will be raised when trying to write # that file to the file system. if "\\" in file_name: file_name = file_name.replace("\\", "/") if file_name.startswith("/"): file_name = file_name.lstrip("/") if file_name.endswith("."): file_name = file_name.rstrip(".") if not file_name \ or ("/" in file_name and not file_name.rsplit("/", 1)[-1]) \ or file_name.startswith("../"): raise TypeError("file_name is invalid %r" % (file_name, )) # name including path relative to wwwroot self._file_name = os.path.normpath(file_name) self._fp = SpooledTemporaryFile(dir=grz_tmp("storage"), max_size=self.CACHE_LIMIT, prefix="testfile_") def __enter__(self): return self def __exit__(self, *exc): self.close() def clone(self): """Make a copy of the TestFile. Args: None Returns: TestFile: A copy of the TestFile instance """ cloned = type(self)(self._file_name) self._fp.seek(0) shutil.copyfileobj(self._fp, cloned._fp, self.XFER_BUF) # pylint: disable=protected-access return cloned def close(self): """Close the TestFile. Args: None Returns: None """ self._fp.close() @property def data(self): """Get the data from the TestFile. Not recommenced for large files. Args: None Returns: bytes: Data from the TestFile """ pos = self._fp.tell() self._fp.seek(0) data = self._fp.read() self._fp.seek(pos) return data def dump(self, path): """Write TestFile data to the filesystem. Args: path (str): Path to output data. Returns: None """ target_path = os.path.join(path, os.path.dirname(self._file_name)) if not os.path.isdir(target_path): os.makedirs(target_path) self._fp.seek(0) with open(os.path.join(path, self._file_name), "wb") as dst_fp: shutil.copyfileobj(self._fp, dst_fp, self.XFER_BUF) @property def file_name(self): return self._file_name @classmethod def from_data(cls, data, file_name, encoding="UTF-8"): """Create a TestFile and add it to the test case. Args: data (bytes or str): Data to write to file. If data is of type str encoding must be given. file_name (str): Name for the TestFile. encoding (str): Encoding to be used. Returns: TestFile: A TestFile. """ t_file = cls(file_name) if data: if isinstance(data, bytes) or not encoding: t_file.write(data) else: t_file.write(data.encode(encoding)) return t_file @classmethod def from_file(cls, input_file, file_name=None): """Create a TestFile from an existing file. Args: input_file (str): Path to existing file to use. file_name (str): Name for the TestFile. If file_name is not given the name of the input_file will be used. Returns: TestFile: A TestFile. """ if file_name is None: file_name = os.path.basename(input_file) t_file = cls(file_name) with open(input_file, "rb") as src_fp: shutil.copyfileobj(src_fp, t_file._fp, cls.XFER_BUF) # pylint: disable=protected-access return t_file @property def size(self): """Size of the file in bytes. Args: None Returns: int: Size in bytes. """ pos = self._fp.tell() self._fp.seek(0, os.SEEK_END) size = self._fp.tell() self._fp.seek(pos) return size def write(self, data): """Add data to the TestFile. Args: data (bytes): Data to add to the TestFile. Returns: None """ self._fp.write(data)
def upload_file(): stream = None upload = None try: upload = request.files.get('upload-file') if not upload: return {'error_message': 'No File Specified'} curr_user = self.manager.get_curr_user() if not curr_user: #user = self.manager.get_anon_user() #force_coll = 'temp' #is_anon = True return {'error_message': 'Sorry, uploads only available for logged-in users'} user = curr_user force_coll = request.forms.getunicode('force-coll', '') is_anon = False if force_coll and not self.manager.has_collection(user, force_coll): if is_anon: self.manager.create_collection(user, force_coll, 'Temporary Collection') else: status = 'Collection {0} not found'.format(force_coll) return {'error_message': status} stream = SpooledTemporaryFile(max_size=BLOCK_SIZE) logger.debug('Upload Start, Saving') upload.save(stream) size_rem = self.manager.get_size_remaining(user) logger.debug('Size Rem: ' + str(size_rem)) expected_size = stream.tell() logger.debug('Expected Size: ' + str(expected_size)) if size_rem < expected_size: return {'error_message': 'Sorry, not enough space to upload this file'} filename = upload.filename logger.debug('Filename: ' + filename) new_coll, error_message = self.handle_upload(stream, filename, user, force_coll) if new_coll: msg = 'Uploaded file <b>{1}</b> into collection <b>{0}</b>'.format(new_coll['title'], filename) self.flash_message(msg, 'success') return {'uploaded': 'true', 'user': user, 'coll': new_coll['id']} else: print(error_message) return {'error_message': error_message} except Exception as e: traceback.print_exc() return {'error_message': str(e)} finally: if upload: upload.file.close() if stream: stream.close()
class UploadFile: """ An uploaded file included as part of the request data. """ __slots__ = ("filename", "content_type", "file") spool_max_size = 1024 * 1024 def __init__(self, filename: str, content_type: str = "") -> None: self.filename = filename self.content_type = content_type self.file = SpooledTemporaryFile(max_size=self.spool_max_size, mode="w+b") @property def in_memory(self) -> bool: rolled_to_disk = getattr(self.file, "_rolled", True) return not rolled_to_disk def write(self, data: bytes) -> None: self.file.write(data) async def awrite(self, data: bytes) -> None: if self.in_memory: self.write(data) else: await asyncio.get_event_loop().run_in_executor( None, self.write, data) def read(self, size: int = -1) -> bytes: return self.file.read(size) async def aread(self, size: int = -1) -> bytes: if self.in_memory: return self.read(size) return await asyncio.get_event_loop().run_in_executor( None, self.read, size) def seek(self, offset: int) -> None: self.file.seek(offset) async def aseek(self, offset: int) -> None: if self.in_memory: self.seek(offset) else: await asyncio.get_event_loop().run_in_executor( None, self.seek, offset) def close(self) -> None: self.file.close() async def aclose(self) -> None: if self.in_memory: self.close() else: await asyncio.get_event_loop().run_in_executor(None, self.close) def save(self, filepath: str) -> None: """ Save file to disk. """ # from shutil.COPY_BUFSIZE copy_bufsize = 1024 * 1024 if os.name == "nt" else 64 * 1024 file_position = self.file.tell() self.file.seek(0, 0) try: with open(filepath, "wb+") as target_file: source_read = self.file.read target_write = target_file.write while True: buf = source_read(copy_bufsize) if not buf: break target_write(buf) finally: self.file.seek(file_position) async def asave(self, filepath: str) -> None: """ Save file to disk, work in threading pool. """ await asyncio.get_event_loop().run_in_executor(None, self.save, filepath)
class TPCTemporaryStorage(object): __slots__ = ( '_queue', '_queue_contents', ) def __init__(self): # start with a fresh in-memory buffer instead of reusing one that might # already be spooled to disk. # TODO: An alternate idea would be a temporary sqlite database. self._queue = SpooledTemporaryFile(max_size=10 * 1024 * 1024) # {oid: (startpos, endpos, prev_tid_int)} self._queue_contents = OidObjectMap() def reset(self): self._queue_contents.clear() self._queue.seek(0) def store_temp(self, oid_int, state, prev_tid_int=0): """ Queue an object for caching. Typically, we can't actually cache the object yet, because its transaction ID is not yet chosen. """ queue = self._queue queue.seek(0, 2) # seek to end startpos = queue.tell() queue.write(state) endpos = queue.tell() self._queue_contents[oid_int] = (startpos, endpos, prev_tid_int) def __len__(self): # How many distinct OIDs have been stored? # This also lets us be used in a boolean context to see # if we've actually stored anything or are closed. return len(self._queue_contents) @property def stored_oids(self): return self._queue_contents @property def max_stored_oid(self): return OidObjectMap_max_key(self._queue_contents) def _read_temp_state(self, startpos, endpos): self._queue.seek(startpos) length = endpos - startpos state = self._queue.read(length) if len(state) != length: raise AssertionError("Queued cache data is truncated") return state def read_temp(self, oid_int): """ Return the bytes for a previously stored temporary item. """ startpos, endpos, _ = self._queue_contents[oid_int] return self._read_temp_state(startpos, endpos) def __iter__(self): return self.iter_for_oids(None) def iter_for_oids(self, oids): read_temp_state = self._read_temp_state for startpos, endpos, oid_int, prev_tid_int in self.items(oids): state = read_temp_state(startpos, endpos) yield state, oid_int, prev_tid_int def items(self, oids=None): # Order the queue by file position, which should help # if the file is large and needs to be read # sequentially from disk. items = [(startpos, endpos, oid_int, prev_tid_int) for (oid_int, (startpos, endpos, prev_tid_int)) in iteroiditems(self._queue_contents) if oids is None or oid_int in oids] items.sort() return items def close(self): if self._queue is not None: self._queue.close() self._queue = None self._queue_contents = () # Not None so len() keeps working def __repr__(self): approx_size = 0 if self._queue is not None: self._queue.seek(0, 2) # seek to end # The number of bytes we stored isn't necessarily the # number of bytes we send to the server, if there are duplicates approx_size = self._queue.tell() return "<%s at 0x%x count=%d bytes=%d>" % ( type(self).__name__, id(self), len(self), approx_size) def __str__(self): base = repr(self) if not self: return base out = NStringIO() div = '=' * len(base) headings = ['OID', 'Length', 'Previous TID'] col_width = (len(base) - 5) // len(headings) print(base, file=out) print(div, file=out) print('| ', file=out, end='') for heading in headings: print('%-*s' % (col_width, heading), end='', file=out) print('| ', end='', file=out) out.seek(out.tell() - 3) print('|', file=out) print(div, file=out) items = sorted( (oid_int, endpos - startpos, prev_tid_int) for (startpos, endpos, oid_int, prev_tid_int) in self.items()) for oid_int, length, prev_tid_int in items: print('%*d |%*d |%*d' % (col_width, oid_int, col_width, length, col_width, prev_tid_int), file=out) return out.getvalue()