def assertFlippedBit(self, file_orig, file_modded, position): len_orig = os.path.getsize(file_orig) len_modded = os.path.getsize(file_modded) self.assertEqual(len_orig, len_modded, "Files of different sizes") f_o = FileIO(file_orig, "r+b") f_m = FileIO(file_modded, "r+b") for i in xrange(len_orig): # read in a byte from each file and compare b_o = ord(f_o.read(1)) b_m = ord(f_m.read(1)) if i == (position / 8): for m in xrange(8): bit_m = BitwiseAnalyser.BitManipulator.getBitFromByteAt( b_m, m) bit_o = BitwiseAnalyser.BitManipulator.getBitFromByteAt( b_o, m) if m == (position % 8): self.assertNotEqual( bit_m, bit_o, "Bits are equal when the should be different at position: " + str(position)) else: self.assertEqual( bit_m, bit_o, "Bits are incorrectly different at position " + str(i)) else: self.assertEqual( b_o, b_m, "Bytes differ (when the shouldn't) at position " + str(i)) f_o.close() f_m.close()
def parse_file(self): """Parses the video file, obtaining metadata that can be accessed thru this class' properties. :raises ValueError: File is not an MP4 format video. """ the_file = FileIO(self.path, 'rb') # the mimetype could be incorrect # we'll let the file decide if not self.video_format in self.mimetype: the_file.seek(0x00, SEEK_SET) first_12 = the_file.read(12) # split the dword and the ftyp size_dword = struct.unpack('>I', first_12[0:4])[0] ftyp_val = first_12[4:] # validate if mp4 if size_dword > 0: if ftyp_val not in self.supported_ftypes: the_file.close() raise ValueError("{} is not an MP4 video.".format( self.name)) else: the_file.close() raise ValueError("{} is not an MP4 video.".format(self.name)) # determine the size of the `compatible_brand` field # this is the very first DWORD of the file the_file.seek(0x00, SEEK_SET) compat_brand_end = the_file.read(4) compat_brand_end = struct.unpack('>I', compat_brand_end)[0] compat_brand_size = compat_brand_end - 0x10 # get the `compatible_brand` field the_file.seek(0x10, SEEK_SET) compat_brand = the_file.read(compat_brand_size) # PARSE THE FILE!!! try: if compat_brand in self.supported_brands: self._read_mp4_container(the_file, compat_brand_end) except NoMoovAtomException: #TODO: ADD LOGGING #FIXME: MAKE THIS INTO A LOGGER print("WARNING: {} has no moov atom!".format(self.name)) except NoReadVideoHeaderException: print("WARNING: Couldn't get information from {}!".format( self.name)) the_file.close() self._parsed_header = True
def __init__(self, file_handle: FileIO): data = file_handle.read(10) if data[0:3].decode("ascii") != "ID3": self.__is_id3 = False return self.__id3_size = data[6] << 21 | data[7] << 14 | data[8] << 7 | data[9] flags = data[5] self.__id3_version = int(data[3]) self.__unsynchronisation = flags & 0x80 == 0x80 self.__extended_header = flags & 0x40 == 0x40 self.__experimental = flags & 0x20 == 0x20 self.__is_id3 = flags & 0x1f == 0 self.__data = file_handle.read(self.__id3_size) self.__parse_id3_data()
def _parse_config_file_or_exit(config_file: io.FileIO) -> Dict: experiment_config = yaml.safe_load(config_file.read()) config_file.close() if not experiment_config or not isinstance(experiment_config, dict): print("Error: invalid experiment config file {}".format(config_file.name)) sys.exit(1) return experiment_config
def get_from_file_memory_duplicate(path): io = FileIO(path, 'rb') io2 = StringIO() io2.write(io.read()) io.close() io2.seek(0, os.SEEK_SET) return ELF(io2)
def uint7(file: io.FileIO) -> int: to_parse = file.read(1) if len(to_parse) != 1: raise IOError("Not enough bytes found") rval = (to_parse[0] & 0x7F) return rval
def mail_merge_from_dict( template_fp: FileIO, data_dict: dict, ) -> Dict[str, str]: """Mail merges a Jinja2 template against a dictionary of dictionaries This function inputs a Jinja2 template file and a dictionary of dictionaries, each having as keys variables in the template file, and outputs a dictionary with the same keys as the input dictionary and as values the results of rendering the template against the corresponding entry in the input dictionary Args: template_fp: pointer to text file or file-like object containing a Jinja2 template and ready to be read from data_dict: dictionary of dictionaries, with each inner-dictionary having as keys variables from the Jinja2 template Returns: A dictionary with the same keys as the input dictionary and as values the results of rendering the Jinja2 template against the corresponding entry in the input dictionary """ template_text = Template(template_fp.read()) return_value = OrderedDict() for k in data_dict: return_value[k] = template_text.render(data_dict[k]) return return_value
def get_from_file_memory_duplicate(path): io = FileIO(path,'rb') io2 = StringIO() io2.write(io.read()) io.close() io2.seek(0, os.SEEK_SET) return ELF(io2)
def dynamic_header(df: FileIO, hdr): # this part reads the part of the header with the information about each signal ns = hdr['ns'] hdr['labels'] = [] for i in range(ns): hdr['labels'].append(df.read(16).strip().decode('ascii')) header_keys_dynamic = [('transducer', 80, str), ('physical_dim', 8, str), ('physical_min', 8, float), ('physical_max', 8, float), ('digital_min', 8, float), ('digital_max', 8, float), ('prefiltering', 80, str), ('num_samples', 8, int), ('reserved_signal', 32, str)] for key, n, method in header_keys_dynamic: hdr[key] = defaultdict(method) for label in hdr['labels']: hdr[key][label] = read_n_bytes(df, n, method) return hdr
def read_signal(data_file: FileIO, header): """Reads EEG signal from the EDF file.""" signal = {} num_records = header['num_records'] rest = bytes(data_file.read()) offset = 0 dt = np.dtype(np.int16) dt = dt.newbyteorder('<') for label in header['labels']: num_samples = header['num_samples'][label] signal[label] = np.zeros(num_records * num_samples).reshape( num_records, num_samples) for i in range(num_records): for label in header['labels']: num_samples = header['num_samples'][label] signal[label][i] = np.frombuffer(rest, dtype=dt, count=num_samples, offset=offset) offset += num_samples * 2 for label in header['labels']: num_samples = header['num_samples'][label] signal[label] = scale( header['physical_max'][label], header['digital_max'][label], np.array(signal[label].reshape(num_samples * num_records))) return signal
def flipByteAt(inputfile, position): """Flips the bits for the byte at the specified position in the input file.""" f = FileIO(inputfile, "r+") f.seek(position) byte = ord(f.read(1)) f.seek(-1, 1) # go back 1 byte from current position f.write(struct.pack("B", byte ^ 0xFF)) # read in the byte and XOR it f.close()
def flipByteAt(inputfile, position): """Flips the bits for the byte at the specified position in the input file.""" f = FileIO(inputfile, "r+") f.seek(position) byte = ord(f.read(1)) f.seek(-1, 1) # go back 1 byte from current position f.write(struct.pack("B", byte^0xFF)) # read in the byte and XOR it f.close()
def __execfile(name): try: f = FileIO(name) codestr = f.read() exec(codestr) except: raise RuntimeError('Failed to execute file %s' % name) finally: f.close()
def assertFlippedByte(self, file_orig, file_modded, position): len_orig = os.path.getsize(file_orig) len_modded = os.path.getsize(file_modded) self.assertEqual(len_orig, len_modded, "Files of different sizes") f_o = FileIO(file_orig, "r+b") f_m = FileIO(file_modded, "r+b") for i in xrange(len_orig): # read in a byte from each file and compare b_o = ord(f_o.read(1)) b_m = ord(f_m.read(1)) if i==position: self.assertEqual(b_m, b_o^0xff, "Flipped bytes are actually equal at position "+str(i)) else: self.assertEqual(b_o, b_m, "Bytes differ (when the shouldn't) at position "+str(i)) f_o.close() f_m.close()
def read_from_file(file: io.FileIO, start: int, stop: int) -> bytes: assert stop > start file.seek(start) data = bytes() while file.tell() < stop: read_data = file.read(stop - file.tell()) if read_data == b'': raise ReachEndOfFile('Read until the end of file') data += read_data assert len(data) == stop - start return data
class filestream_range_iterator(Iterable): """ A class that mimics FileIO and implements an iterator that returns a fixed-sized sequence of bytes. Beginning from `start` to `end`. BBB: due to a possible bug in Zope>4, <=4.1.3, couldn't be subclass of FileIO as Iterators.filestream_iterator """ def __init__(self, name, mode='rb', bufsize=-1, streamsize=1 << 16, start=0, end=None): self._io = FileIO(name, mode=mode) self.streamsize = streamsize self.start = start self.end = end self._io.seek(start, 0) def __iter__(self): if self._io.closed: raise ValueError("I/O operation on closed file.") return self def __next__(self): if self.end is None: bytes = self.streamsize else: bytes = max(min(self.end - self._io.tell(), self.streamsize), 0) data = self._io.read(bytes) if not data: raise StopIteration return data next = __next__ def close(self): self._io.close() # BBB: is it necessary to implement __len__ ? # def __len__(self) def read(self, size=-1): return self._io.read(size)
def __init__(self, data, buffer_size=DEFAULT_BUFFER_SIZE): if isinstance(data, (BufferedReader, DataReader)): if isinstance(data.raw, FileIO): data = FileIO(data.name, 'rb') else: data = BytesIO(data.read()) elif isinstance(data, (os.PathLike, str)): data = FileIO(data, 'rb') elif isinstance(data, (bytearray, bytes, memoryview)): data = BytesIO(data) super().__init__(data, buffer_size=buffer_size)
def flipBitAt(inputfile, position): """Flips the bit at the specified position in the input file.""" if not 0<=position<(8*os.path.getsize(inputfile)): raise IndexError("Position "+str(position)+" is out of range") f = FileIO(inputfile, "r+") f.seek(position/8) byte = ord(f.read(1)) f.seek(-1, 1) # go back 1 byte from the current position bitnum = position%8 f.write(struct.pack("B", byte^(1<<(7-bitnum)))) f.close()
def flipBitAt(inputfile, position): """Flips the bit at the specified position in the input file.""" if not 0 <= position < (8 * os.path.getsize(inputfile)): raise IndexError("Position " + str(position) + " is out of range") f = FileIO(inputfile, "r+") f.seek(position / 8) byte = ord(f.read(1)) f.seek(-1, 1) # go back 1 byte from the current position bitnum = position % 8 f.write(struct.pack("B", byte ^ (1 << (7 - bitnum)))) f.close()
def read_from_file(file_fd: io.FileIO, start: int, stop: int) -> bytes: length = stop - start assert length >= 0 file_fd.seek(start) data = bytes() while file_fd.tell() < stop: read_data = file_fd.read(stop - file_fd.tell()) if read_data == b'': raise ReachedEndOfFile('Read until the end of file') data += read_data assert len(data) == length return data
def read_from_file(file_fd: io.FileIO, start: int, stop: int) -> bytes: length = stop - start assert length >= 0 file_fd.seek(start) data = bytes() while file_fd.tell() < stop: read_data = file_fd.read(stop - file_fd.tell()) if read_data == b'': raise EndOfFileError('Read until the end of file_fd') data += read_data assert len(data) == length return data
def redeem_code_exchange(user: str, userfile: io.FileIO, code: str): if not user and not userfile: raise click.MissingParameter('user or userfile required') user_s = [] if user: user_s.append(user) if userfile: for line in userfile.read().splitlines(): line = line.strip().split()[0] if line: user_s.append(line) for user in user_s: _redeem_code_exchange(user, code)
def read_id3(file_handle: FileIO, skip_v1: bool = False) -> ID3Base: id3 = ID3v2(file_handle) if id3.is_valid_id3 or skip_v1: return id3 # Check for an id3v1 tag current_file_position = file_handle.tell() file_handle.seek(-128, SEEK_END) block = file_handle.read(128) id3 = ID3v1(block) file_handle.seek(current_file_position, SEEK_SET) return id3
def read_from_file(file_fd: io.FileIO, start: int, end: int) -> bytes: length = end - start assert length >= 0 file_fd.seek(start) data = bytes() while file_fd.tell() < end: # The read() (when called with a positive argument), readinto() and write() methods on this class will only make one system call. read_data = file_fd.read(end - file_fd.tell()) if read_data == b'': raise EndOfFileError('read until the end of file_fd') data += read_data assert len(data) == length return data
def uint16(file: io.FileIO) -> int: to_parse = file.read(2) rval = 0x00 if len(to_parse) != 2: raise IOError("Not enough bytes found") to_parse = [c_uint16(x).value for x in to_parse] rval |= to_parse[0] << 0 rval |= to_parse[1] << 8 return rval
def assertFlippedByte(self, file_orig, file_modded, position): len_orig = os.path.getsize(file_orig) len_modded = os.path.getsize(file_modded) self.assertEqual(len_orig, len_modded, "Files of different sizes") f_o = FileIO(file_orig, "r+b") f_m = FileIO(file_modded, "r+b") for i in xrange(len_orig): # read in a byte from each file and compare b_o = ord(f_o.read(1)) b_m = ord(f_m.read(1)) if i == position: self.assertEqual( b_m, b_o ^ 0xff, "Flipped bytes are actually equal at position " + str(i)) else: self.assertEqual( b_o, b_m, "Bytes differ (when the shouldn't) at position " + str(i)) f_o.close() f_m.close()
def read(self, file: FileIO): keywardIndexData = file.read(self._length) if self._encrypt & 0x02: keywardIndexData = util._mdx_decrypt(keywardIndexData) keywardBlockInfo = zlib.decompress(keywardIndexData[8:]) adler32 = struct.unpack('>I', keywardIndexData[4:8])[0] assert (adler32 == zlib.adler32(keywardBlockInfo) & 0xffffffff) byte_format = '>H' byte_width = 2 text_term = 1 i = 0 keyIndex = [] while i < len(keywardBlockInfo): entriesNum = struct.unpack( '>Q', keywardBlockInfo[i:i + BYTE_LENGTH_NUMBER])[0] i += BYTE_LENGTH_NUMBER firstKeyLength = struct.unpack( byte_format, keywardBlockInfo[i:i + byte_width])[0] i += byte_width firstKey = str(keywardBlockInfo[i:i + firstKeyLength], encoding=DEFAULT_ENCODING) i += firstKeyLength + text_term lastKeyLength = struct.unpack( byte_format, keywardBlockInfo[i:i + byte_width])[0] i += byte_width lastKey = str(keywardBlockInfo[i:i + lastKeyLength], encoding=DEFAULT_ENCODING) i += lastKeyLength + text_term compKeyBlocksSize = struct.unpack( '>Q', keywardBlockInfo[i:i + BYTE_LENGTH_NUMBER])[0] i += BYTE_LENGTH_NUMBER deCompKeyBlocksSize = struct.unpack( '>Q', keywardBlockInfo[i:i + BYTE_LENGTH_NUMBER])[0] i += BYTE_LENGTH_NUMBER keyIndex.append({ 'entries_number': entriesNum, 'first_keyward': firstKey, 'first_keyward_length': firstKeyLength, 'last_keyward': lastKey, 'last_keyward_length': lastKeyLength, 'compress_key_blocks_size': compKeyBlocksSize, 'decompress_key_blocks_size': deCompKeyBlocksSize }) self._data = keyIndex
def read(self, instream: io.FileIO, rsakey): # read the signature (signlen, ) = struct.unpack("i", instream.read(4)) signature = instream.read(signlen) # read the entries (entrylen, ) = struct.unpack("i", instream.read(4)) for i in range(0, entrylen): entry = MBPakFileEntry() (filepathlen, ) = struct.unpack("b", instream.read(1)) entry.filePath = instream.read(filepathlen).decode( encoding="utf-8") (isencrypted, ) = struct.unpack("b", instream.read(1)) entry.encrypted = True if isencrypted == 1 else False (entry.fileOffset, ) = struct.unpack("q", instream.read(8)) (entry.uncompressedSize, ) = struct.unpack("q", instream.read(8)) entry.compressedContents = [ struct.unpack("i", instream.read(4))[0] ] # temporary store entry.encrypted = True self.entries.append(entry) databuffer = instream.read() publickey, privatekey = rsakey # now verify integrity if self.verifysign(databuffer, publickey, signature): # now fill up the compressed contents and shit for entry in self.entries: datalen = entry.compressedContents[ 0] # retrieve what we stored entry.compressedContents = databuffer[entry.fileOffset:entry. fileOffset + datalen] else: raise Exception("TAMPERED DATA")
def assertFlippedBit(self, file_orig, file_modded, position): len_orig = os.path.getsize(file_orig) len_modded = os.path.getsize(file_modded) self.assertEqual(len_orig, len_modded, "Files of different sizes") f_o = FileIO(file_orig, "r+b") f_m = FileIO(file_modded, "r+b") for i in xrange(len_orig): # read in a byte from each file and compare b_o = ord(f_o.read(1)) b_m = ord(f_m.read(1)) if i==(position/8): for m in xrange(8): bit_m = BitwiseAnalyser.BitManipulator.getBitFromByteAt(b_m, m) bit_o = BitwiseAnalyser.BitManipulator.getBitFromByteAt(b_o, m) if m==(position%8): self.assertNotEqual(bit_m, bit_o, "Bits are equal when the should be different at position: "+str(position)) else: self.assertEqual(bit_m, bit_o, "Bits are incorrectly different at position "+str(i)) else: self.assertEqual(b_o, b_m, "Bytes differ (when the shouldn't) at position "+str(i)) f_o.close() f_m.close()
def load(self, file: FileIO): self.ptr = file.tell() self.is_leaf, self.keys = load(file) ptr_num = len(self.keys) if not self.is_leaf: ptr_num += (ptr_num + 1) ptrs = unpack('Q' * ptr_num, file.read(8 * ptr_num)) if self.is_leaf: self.ptrs_value = list(ptrs) else: self.ptrs_value = list(ptrs[:len(self.keys)]) self.ptrs_child = list(ptrs[len(self.keys):]) self.size = file.tell() - self.ptr
def uint32(file: io.FileIO): to_parse = file.read(4) rval = 0x00 if len(to_parse) != 4: raise IOError("Not enough bytes found") to_parse = [c_uint32(x).value for x in to_parse] # Convert any unwanted negatives away rval |= to_parse[0] << 0 rval |= to_parse[1] << 8 rval |= to_parse[2] << 16 rval |= to_parse[3] << 24 return rval
def load(self, file: FileIO): self.ptr = file.tell() self.is_leaf, self.keys = load(file) ptr_num = len(self.keys) if not self.is_leaf: ptr_num += (ptr_num + 1) ptrs = unpack('Q' * ptr_num, file.read(8 * ptr_num)) if self.is_leaf: self.ptrs_value = list(ptrs) else: ptr_num //= 2 self.ptrs_value = list(ptrs[:ptr_num]) self.ptrs_child = list(ptrs[ptr_num:]) self.size = file.tell() - self.ptr
def load(self, file: FileIO): self.ptr = file.tell() # IndexNode: [is_leaf, [..., key]] + ptrs_value + ptrs_child if not is_leaf self.is_leaf, self.keys = load(file) ptr_num = len(self.keys) if not self.is_leaf: ptr_num += (ptr_num + 1) ptrs = unpack('Q' * ptr_num, file.read(8 * ptr_num)) if self.is_leaf: self.ptrs_value = list(ptrs) else: self.ptrs_value = list(ptrs[:len(self.keys)]) self.ptrs_child = list(ptrs[len(self.keys):]) self.size = file.tell() - self.ptr
def _read_in_chunks(file_object: io.FileIO, chunk_size: int = 2 * MB) -> str: """Read a file in fixed-size chunks (to minimize memory usage for large files). Args: file_object: An opened file-like object supporting read(). chunk_size: Max size (in bytes) of each file chunk. Yields: File chunks, each of size at most chunk_size. """ while True: chunk = file_object.read(chunk_size) if chunk: yield chunk else: return # End of file.
class FileDataReader(AbstractDataReader): """ A reader that can read data from a file """ def __init__(self, filename): """ :param filename: The file to read :type filename: str :raise spinnman.exceptions.SpinnmanIOException: If the file\ cannot found or opened for reading """ try: self._fileio = FileIO(filename, "r") except IOError as e: raise SpinnmanIOException(str(e)) def read(self, n_bytes): """ See :py:meth:`spinnman.data.abstract_data_reader.AbstractDataReader.read` """ return bytearray(self._fileio.read(n_bytes)) def readinto(self, data): """ See :py:meth:`spinnman.data.abstract_data_reader.AbstractDataReader.readinto` """ return self._fileio.readinto(data) def readall(self): """ See :py:meth:`spinnman.data.abstract_data_reader.AbstractDataReader.readall` """ return self._fileio.readall() def close(self): """ Closes the file :return: Nothing is returned: :rtype: None :raise spinnman.exceptions.SpinnmanIOException: If the file\ cannot be closed """ try: self._fileio.close() except IOError as e: raise SpinnmanIOException(str(e))
def read_in_chunks(file_object: FileIO, block_size: int = 4096) -> Iterable[bytes]: """Return a generator which yields data in chunks. Source: `read-file-in-chunks-ram-usage-read-strings-from-binary-file <http://stackoverflow.com/questions/17056382/ read-file-in-chunks-ram-usage-read-strings-from-binary-files>`_ :param file_object: File object to read in chunks. :type file_object: file object :param block_size: (optional) Chunk size. :type block_size: int :yield: The next chunk in file object. :yield type: `bytes` """ for chunk in iter(lambda: file_object.read(block_size), b''): yield chunk
def what(file, h=None): f = None try: if h is None: # if isinstance(file, (str, PathLike)) if isinstance(file, str): # FIXME(corona10): RustPython doesn't support PathLike yet. f = FileIO(file, 'rb') h = f.read(32) else: location = file.tell() h = file.read(32) file.seek(location) for tf in tests: res = tf(h, f) if res: return res finally: if f: f.close() return None
def compile(self, file: io.FileIO) -> object: if not str(self.file_name).startswith('.'): soup = BeautifulSoup(file.read(), features='html.parser') sections = soup.select( 'h2.sectionHeading, h3.noteHeading, div.noteText') curr_section = None results = [] for sec in sections: if 'sectionHeading' in sec['class']: curr_section = list(sec.children)[0].string elif 'noteHeading' in sec['class']: highlight, heading = list(sec.children)[1:3] _, note_title, page_num = re.match( r'^((.*) > )?Page (\d+)', heading[len(') - '):]).groups() note_text = sec.find_next('div', { 'class': 'noteText' }).string note_text = note_text.replace(' .', '.').replace( ' ?', '?').replace(' ;', ';').replace(' !', '!') note_text = note_text.replace(' ,', ',') results.append({ 'section': curr_section, 'chapter': note_title, 'contents': note_text, 'metadata': { 'color': highlight.string, 'page_num': int(page_num), }, }) return { 'title': soup.select('div.bookTitle')[0].string, 'segments': results, } return None
def load(self, file: FileIO): self.ptr = file.tell() indicator = file.read(1) assert unpack('B', indicator)[0] in (0, 1) self.key, self.value = load(file) self.size = file.tell() - self.ptr
def read(self, n, *args): self.prog.progressBy(n) return FileIO.read(self, n, *args)
class File(RawIOBase): 'Create a file object wrapping an e[x]ploded zip file' HEADER = 0 DATA = 1 DESCRIPTOR = 2 DIRECTORY = 3 def __init__(self, path, flags, info, fh=None, base='.', depth=0): super(File, self).__init__() self.path = path self.flags = flags self.fh = fh self.info = info self.depth = depth self.cursor = 0 self.offset = 0 self.state = File.HEADER # stream item info self.stream_offset = 0 self.zip_header = b'' self.descriptor = b'' # data file info self.data = None self.data_name = '' self.data_len = 0 # streams prefix = os.path.join(base, 'meta', os.path.basename(path)) self.stream = FileIO(prefix + '.stream', 'rb') self.dir = FileIO(prefix + '.dir', 'rb') self.data_dir = os.path.join(base, 'data') # init self._load_stream_item() self.lock = threading.Lock() def _load_stream_item(self): 'Sets the next stream item as current.' if self.data: self.data.close() self.data = None # open the header so we can know the data file to open, and the # length of the var fields raw_header = self.stream.read(STREAM_ITEM.size) header = StreamItem._make(STREAM_ITEM.unpack(raw_header)) var_fields = header.filename_len + header.extra_field_len # I would think that b2a_hex should decode the raw bytes... sha1 = b2a_hex(header.sha).decode('ascii') # only save the zip part of the header self.zip_header = (raw_header[:HEADER_DIFF] + self.stream.read(var_fields)) self.descriptor = self.stream.read(header.descriptor_len) self.data_name = path.join(*([self.data_dir] + list(sha1[:self.depth]) + [sha1])) def _open_data_file(self): self.data = FileIO(self.data_name, 'rb') self.data_len = self.data.seek(0, 2) self.data.seek(0) def close(self): self.stream.close() self.dir.close() if self.data: self.data.close() def fileno(self): return self.fh def isatty(self): return False def read(self, count=-1): if count < 0: return self.readall() elif count == 0: return b'' state = self.state if state == File.HEADER: previous_offset = self.offset self.offset += count result = self.zip_header[previous_offset:self.offset] self.cursor += len(result) if self.offset >= len(self.zip_header): self.state = File.DATA if not self.data: self._open_data_file() return result elif state == File.DATA: result = self.data.read(count) self.cursor += len(result) if self.data.tell() >= self.data_len: self.state = File.DESCRIPTOR self.offset = 0 # empty data file (state will now be DESCRIPTOR) if not result: return self.read(count) return result elif state == File.DESCRIPTOR: previous_offset = self.offset self.offset += count result = self.descriptor[previous_offset:self.offset] self.cursor += len(result) if self.offset >= len(self.descriptor): if self.cursor >= self.info.directory_offset: self.state = File.DIRECTORY self.dir.seek(0) self.stream_offset = None if self.data: self.data.close() self.data = None else: self.state = File.HEADER self.offset = 0 self.stream_offset = self.stream.tell() self._load_stream_item() # descriptor is optional (state will now be HEADER or DIRECTORY) if not result: return self.read(count) return result elif state == File.DIRECTORY: result = self.dir.read(count) self.cursor += len(result) return result else: raise RuntimeError('Invalid state: %r' % self.state) def readable(self): return True def readinto(self, b): count = len(b) if count == 0: return 0 state = self.state if state == File.HEADER: header_len = len(self.zip_header) previous_offset = self.offset current_offset = self.offset = \ min(previous_offset + count, header_len) read = current_offset - previous_offset b[:read] = self.zip_header[previous_offset:current_offset] self.cursor += read if current_offset == header_len: self.state = File.DATA if not self.data: self._open_data_file() return read elif state == File.DATA: read = self.data.readinto(b) self.cursor += read if self.data.tell() >= self.data_len: self.state = File.DESCRIPTOR self.offset = 0 # empty data file (state will now be DESCRIPTOR) if not read: return self.readinto(b) return read elif state == File.DESCRIPTOR: descriptor_len = len(self.descriptor) previous_offset = self.offset current_offset = self.offset = \ min(previous_offset + count, descriptor_len) read = current_offset - previous_offset b[:read] = self.descriptor[previous_offset:current_offset] self.cursor += read if current_offset == descriptor_len: if self.cursor >= self.info.directory_offset: self.state = File.DIRECTORY self.dir.seek(0) self.stream_offset = None if self.data: self.data.close() self.data = None else: self.state = File.HEADER self.offset = 0 self.stream_offset = self.stream.tell() self._load_stream_item() # descriptor is optional (state will now be HEADER or DIRECTORY) if not read: return self.readinto(b) return read elif state == File.DIRECTORY: read = self.dir.readinto(b) self.cursor += read return read else: raise RuntimeError('Invalid state: %r' % self.state) def seek(self, pos, offset=0): if offset == 1: pos += self.cursor elif offset == 2: pos += self.info.filesize if pos == self.cursor: return pos self.cursor = pos # skip directly to the central directory if pos >= self.info.directory_offset: if self.data: self.data.close() self.data = None self.state = File.DIRECTORY self.stream_offset = None self.dir.seek(pos - self.info.directory_offset) return pos # calculate the offset into the stream file z_offset, s_offset = self.info.jump_tree.find(pos).location additional = pos - z_offset # we're looking at a different data file # (load local header into memory) if s_offset != self.stream_offset: self.stream_offset = s_offset self.stream.seek(s_offset) self._load_stream_item() header_len = len(self.zip_header) if additional < header_len: self.state = File.HEADER self.offset = additional return pos # assume currently in the data file additional -= header_len self.state = File.DATA # if the file hasn't been opened yet, open it and find its size if not self.data: self._open_data_file() if additional < self.data_len: self.data.seek(additional) else: self.state = File.DESCRIPTOR self.offset = additional - self.data_len return pos def seekable(self): return True def tell(self): return self.cursor def writeable(self): return False
def load(self, file: FileIO): self.ptr = file.tell() indicator = file.read(1) assert indicator in (OP, ED) self.key, self.value = load(file) self.size = file.tell() - self.ptr