def _cache_store_chunks(items, key, expiration): """Store a list of items as chunks in the cache. The list of items will be combined into chunks and stored in the cache as efficiently as possible. Each item in the list will be yielded to the caller as it's fetched from the list or generator. """ chunks_data = StringIO() chunks_data_len = 0 read_start = 0 item_count = 0 i = 0 for data, has_item, item in items: if has_item: yield item item_count += 1 chunks_data.write(data) chunks_data_len += len(data) if chunks_data_len > CACHE_CHUNK_SIZE: # We have enough data to fill a chunk now. Start processing # what we've stored and create cache keys for each chunk. # Anything remaining will be stored for the next round. chunks_data.seek(read_start) cached_data = {} while chunks_data_len > CACHE_CHUNK_SIZE: chunk = chunks_data.read(CACHE_CHUNK_SIZE) chunk_len = len(chunk) chunks_data_len -= chunk_len read_start += chunk_len # Note that we wrap the chunk in a list so that the cache # backend won't try to perform any conversion on the string. cached_data[make_cache_key('%s-%d' % (key, i))] = [chunk] i += 1 # Store the keys in the cache in a single request. cache.set_many(cached_data, expiration) # Reposition back at the end of the stream. chunks_data.seek(0, 2) if chunks_data_len > 0: # There's one last bit of data to store. Note that this should be # less than the size of a chunk, assert chunks_data_len <= CACHE_CHUNK_SIZE chunks_data.seek(read_start) chunk = chunks_data.read() cache.set(make_cache_key('%s-%d' % (key, i)), [chunk], expiration) i += 1 cache.set(make_cache_key(key), '%d' % i, expiration)
def prepend_data(self, data): """Prepend data to the buffer. Args: data (bytes): The data to prepend. """ if data: new_data_io = StringIO() new_data_io.write(data) new_data_io.write(self._data_io.getvalue()) self._data_io.close() self._data_io = new_data_io
def parse(self): """ Parses the diff, returning a list of File objects representing each file in the diff. """ logging.debug("DiffParser.parse: Beginning parse of diff, size = %s", len(self.data)) preamble = StringIO() self.files = [] parsed_file = None i = 0 # Go through each line in the diff, looking for diff headers. while i < len(self.lines): next_linenum, new_file = self.parse_change_header(i) if new_file: # This line is the start of a new file diff. # # First, finalize the last one. if self.files: self.files[-1].finalize() parsed_file = new_file # We need to prepend the preamble, if we have one. parsed_file.prepend_data(preamble.getvalue()) preamble.close() preamble = StringIO() self.files.append(parsed_file) i = next_linenum else: if parsed_file: i = self.parse_diff_line(i, parsed_file) else: preamble.write(self.lines[i]) preamble.write(b'\n') i += 1 if self.files: self.files[-1].finalize() preamble.close() logging.debug("DiffParser.parse: Finished parsing diff.") return self.files
def parse(self): """ Parses the diff, returning a list of File objects representing each file in the diff. """ self.files = [] i = 0 preamble = StringIO() while i < len(self.lines): next_i, file_info, new_diff = self._parse_diff(i) if file_info: if self.files: self.files[-1].finalize() self._ensure_file_has_required_fields(file_info) file_info.prepend_data(preamble.getvalue()) preamble.close() preamble = StringIO() self.files.append(file_info) elif new_diff: # We found a diff, but it was empty and has no file entry. # Reset the preamble. preamble.close() preamble = StringIO() else: preamble.write(self.lines[i]) preamble.write(b'\n') i = next_i try: if self.files: self.files[-1].finalize() elif preamble.getvalue().strip() != b'': # This is probably not an actual git diff file. raise DiffParserError('This does not appear to be a git diff', 0) finally: preamble.close() return self.files
class FileStream(object): """File stream for streaming reponses This buffer intended for use as an argument to StreamingHTTPResponse and also as a file for TarFile to write into. Files are read in by chunks and written to this buffer through TarFile. When there is content to be read from the buffer, it is taken up by StreamingHTTPResponse and the buffer is cleared to prevent storing large chunks of data in memory. """ def __init__(self): self.buffer = StringIO() self.offset = 0 def write(self, s): """Write ``s`` to the buffer and adjust the offset.""" self.buffer.write(s) self.offset += len(s) def tell(self): """Return the current position of the buffer.""" return self.offset def close(self): """Close the buffer.""" self.buffer.close() def pop(self): """Return the current contents of the buffer then clear it.""" s = self.buffer.getvalue() self.buffer.close() self.buffer = StringIO() return s
class ParsedDiffFile(object): """A parsed file from a diff. This stores information on a single file represented in a diff, including the contents of that file's diff, as parsed by :py:class:`DiffParser` or one of its subclasses. Parsers should set the attributes on this based on the contents of the diff, and should add any data found in the diff. This class is meant to be used internally and by subclasses of :py:class:`DiffParser`. """ def __init__(self): """Initialize the parsed file information.""" self.origFile = None self.newFile = None self.origInfo = None self.newInfo = None self.origChangesetId = None self.binary = False self.deleted = False self.moved = False self.copied = False self.insert_count = 0 self.delete_count = 0 self._data_io = StringIO() self._data = None @property def data(self): """The data for this diff. This must be accessed after :py:meth:`finalize` has been called. """ if self._data is None: raise ValueError('ParsedDiffFile.data cannot be accessed until ' 'finalize() is called.') return self._data def finalize(self): """Finalize the parsed diff. This makes the diff data available to consumers and closes the buffer for writing. """ self._data = self._data_io.getvalue() self._data_io.close() def prepend_data(self, data): """Prepend data to the buffer. Args: data (bytes): The data to prepend. """ if data: new_data_io = StringIO() new_data_io.write(data) new_data_io.write(self._data_io.getvalue()) self._data_io.close() self._data_io = new_data_io def append_data(self, data): """Append data to the buffer. Args: data (bytes): The data to append. """ if data: self._data_io.write(data)