Ejemplo n.º 1
0
    def assertFlippedBit(self, file_orig, file_modded, position):
        len_orig = os.path.getsize(file_orig)
        len_modded = os.path.getsize(file_modded)
        self.assertEqual(len_orig, len_modded, "Files of different sizes")

        f_o = FileIO(file_orig, "r+b")
        f_m = FileIO(file_modded, "r+b")

        for i in xrange(len_orig):
            # read in a byte from each file and compare
            b_o = ord(f_o.read(1))
            b_m = ord(f_m.read(1))
            if i == (position / 8):
                for m in xrange(8):
                    bit_m = BitwiseAnalyser.BitManipulator.getBitFromByteAt(
                        b_m, m)
                    bit_o = BitwiseAnalyser.BitManipulator.getBitFromByteAt(
                        b_o, m)
                    if m == (position % 8):
                        self.assertNotEqual(
                            bit_m, bit_o,
                            "Bits are equal when the should be different at position: "
                            + str(position))
                    else:
                        self.assertEqual(
                            bit_m, bit_o,
                            "Bits are incorrectly different at position " +
                            str(i))
            else:
                self.assertEqual(
                    b_o, b_m,
                    "Bytes differ (when the shouldn't) at position " + str(i))
        f_o.close()
        f_m.close()
Ejemplo n.º 2
0
    def parse_file(self):
        """Parses the video file, obtaining metadata that can be accessed thru
        this class' properties.

        :raises ValueError: File is not an MP4 format video.
        """
        the_file = FileIO(self.path, 'rb')

        # the mimetype could be incorrect
        # we'll let the file decide
        if not self.video_format in self.mimetype:
            the_file.seek(0x00, SEEK_SET)
            first_12 = the_file.read(12)
            # split the dword and the ftyp
            size_dword = struct.unpack('>I', first_12[0:4])[0]
            ftyp_val = first_12[4:]
            # validate if mp4
            if size_dword > 0:
                if ftyp_val not in self.supported_ftypes:
                    the_file.close()
                    raise ValueError("{} is not an MP4 video.".format(
                        self.name))
            else:
                the_file.close()
                raise ValueError("{} is not an MP4 video.".format(self.name))

        # determine the size of the `compatible_brand` field
        # this is the very first DWORD of the file
        the_file.seek(0x00, SEEK_SET)
        compat_brand_end = the_file.read(4)
        compat_brand_end = struct.unpack('>I', compat_brand_end)[0]
        compat_brand_size = compat_brand_end - 0x10
        # get the `compatible_brand` field
        the_file.seek(0x10, SEEK_SET)
        compat_brand = the_file.read(compat_brand_size)

        # PARSE THE FILE!!!
        try:
            if compat_brand in self.supported_brands:
                self._read_mp4_container(the_file, compat_brand_end)
        except NoMoovAtomException:
            #TODO: ADD LOGGING
            #FIXME: MAKE THIS INTO A LOGGER
            print("WARNING: {} has no moov atom!".format(self.name))
        except NoReadVideoHeaderException:
            print("WARNING: Couldn't get information from {}!".format(
                self.name))

        the_file.close()
        self._parsed_header = True
Ejemplo n.º 3
0
    def __init__(self, file_handle: FileIO):
        data = file_handle.read(10)
        if data[0:3].decode("ascii") != "ID3":
            self.__is_id3 = False
            return

        self.__id3_size = data[6] << 21 | data[7] << 14 | data[8] << 7 | data[9]
        flags = data[5]
        self.__id3_version = int(data[3])
        self.__unsynchronisation = flags & 0x80 == 0x80
        self.__extended_header = flags & 0x40 == 0x40
        self.__experimental = flags & 0x20 == 0x20
        self.__is_id3 = flags & 0x1f == 0
        self.__data = file_handle.read(self.__id3_size)
        self.__parse_id3_data()
Ejemplo n.º 4
0
def _parse_config_file_or_exit(config_file: io.FileIO) -> Dict:
    experiment_config = yaml.safe_load(config_file.read())
    config_file.close()
    if not experiment_config or not isinstance(experiment_config, dict):
        print("Error: invalid experiment config file {}".format(config_file.name))
        sys.exit(1)
    return experiment_config
Ejemplo n.º 5
0
 def get_from_file_memory_duplicate(path):
     io = FileIO(path, 'rb')
     io2 = StringIO()
     io2.write(io.read())
     io.close()
     io2.seek(0, os.SEEK_SET)
     return ELF(io2)
Ejemplo n.º 6
0
def uint7(file: io.FileIO) -> int:
    to_parse = file.read(1)
    if len(to_parse) != 1:
        raise IOError("Not enough bytes found")

    rval = (to_parse[0] & 0x7F)
    return rval
Ejemplo n.º 7
0
def mail_merge_from_dict(
    template_fp: FileIO,
    data_dict: dict,
) -> Dict[str, str]:
    """Mail merges a Jinja2 template against a dictionary of dictionaries

    This function inputs a Jinja2 template file and a dictionary of
    dictionaries, each having as keys variables in the template file, and
    outputs a dictionary with the same keys as the input dictionary and as
    values the results of rendering the template against the corresponding
    entry in the input dictionary

    Args:
        template_fp: pointer to text file or file-like object containing a
            Jinja2 template and ready to be read from
        data_dict: dictionary of dictionaries, with each inner-dictionary
            having as keys variables from the Jinja2 template

    Returns:
        A dictionary with the same keys as the input dictionary and as values
        the results of rendering the Jinja2 template against the corresponding
        entry in the input dictionary

    """

    template_text = Template(template_fp.read())

    return_value = OrderedDict()
    for k in data_dict:
        return_value[k] = template_text.render(data_dict[k])

    return return_value
Ejemplo n.º 8
0
 def get_from_file_memory_duplicate(path):
     io = FileIO(path,'rb')
     io2 = StringIO()
     io2.write(io.read())
     io.close()
     io2.seek(0, os.SEEK_SET)
     return ELF(io2)
Ejemplo n.º 9
0
            def dynamic_header(df: FileIO, hdr):

                # this part reads the part of the header with the information about each signal
                ns = hdr['ns']

                hdr['labels'] = []
                for i in range(ns):
                    hdr['labels'].append(df.read(16).strip().decode('ascii'))

                header_keys_dynamic = [('transducer', 80, str),
                                       ('physical_dim', 8, str),
                                       ('physical_min', 8, float),
                                       ('physical_max', 8, float),
                                       ('digital_min', 8, float),
                                       ('digital_max', 8, float),
                                       ('prefiltering', 80, str),
                                       ('num_samples', 8, int),
                                       ('reserved_signal', 32, str)]

                for key, n, method in header_keys_dynamic:
                    hdr[key] = defaultdict(method)
                    for label in hdr['labels']:
                        hdr[key][label] = read_n_bytes(df, n, method)

                return hdr
Ejemplo n.º 10
0
        def read_signal(data_file: FileIO, header):
            """Reads EEG signal from the EDF file."""

            signal = {}
            num_records = header['num_records']
            rest = bytes(data_file.read())
            offset = 0
            dt = np.dtype(np.int16)
            dt = dt.newbyteorder('<')

            for label in header['labels']:
                num_samples = header['num_samples'][label]
                signal[label] = np.zeros(num_records * num_samples).reshape(
                    num_records, num_samples)

            for i in range(num_records):
                for label in header['labels']:
                    num_samples = header['num_samples'][label]
                    signal[label][i] = np.frombuffer(rest,
                                                     dtype=dt,
                                                     count=num_samples,
                                                     offset=offset)
                    offset += num_samples * 2

            for label in header['labels']:
                num_samples = header['num_samples'][label]
                signal[label] = scale(
                    header['physical_max'][label],
                    header['digital_max'][label],
                    np.array(signal[label].reshape(num_samples * num_records)))

            return signal
Ejemplo n.º 11
0
 def flipByteAt(inputfile, position):
     """Flips the bits for the byte at the specified position in the input file."""
     f = FileIO(inputfile, "r+")
     f.seek(position)
     byte = ord(f.read(1))
     f.seek(-1, 1)  # go back 1 byte from current position
     f.write(struct.pack("B", byte ^ 0xFF))  # read in the byte and XOR it
     f.close()
Ejemplo n.º 12
0
 def flipByteAt(inputfile, position):
     """Flips the bits for the byte at the specified position in the input file."""
     f = FileIO(inputfile, "r+")
     f.seek(position)
     byte = ord(f.read(1))
     f.seek(-1, 1)   # go back 1 byte from current position
     f.write(struct.pack("B", byte^0xFF))    # read in the byte and XOR it
     f.close()
 def __execfile(name):
     try:
         f = FileIO(name)
         codestr = f.read()
         exec(codestr)
     except:
         raise RuntimeError('Failed to execute file %s' % name)
     finally:
         f.close()
Ejemplo n.º 14
0
 def assertFlippedByte(self, file_orig, file_modded, position):
     len_orig   = os.path.getsize(file_orig)
     len_modded = os.path.getsize(file_modded)
     
     self.assertEqual(len_orig, len_modded, "Files of different sizes")
     
     f_o = FileIO(file_orig, "r+b")
     f_m = FileIO(file_modded, "r+b")
     
     for i in xrange(len_orig):
         # read in a byte from each file and compare
         b_o = ord(f_o.read(1))
         b_m = ord(f_m.read(1))
         if i==position:
             self.assertEqual(b_m, b_o^0xff, "Flipped bytes are actually equal at position "+str(i))
         else:
             self.assertEqual(b_o, b_m, "Bytes differ (when the shouldn't) at position "+str(i))
     f_o.close()
     f_m.close()
Ejemplo n.º 15
0
def read_from_file(file: io.FileIO, start: int, stop: int) -> bytes:
    assert stop > start
    file.seek(start)
    data = bytes()
    while file.tell() < stop:
        read_data = file.read(stop - file.tell())
        if read_data == b'':
            raise ReachEndOfFile('Read until the end of file')
        data += read_data
    assert len(data) == stop - start
    return data
Ejemplo n.º 16
0
class filestream_range_iterator(Iterable):
    """
    A class that mimics FileIO and implements an iterator that returns a
    fixed-sized sequence of bytes. Beginning from `start` to `end`.

    BBB: due to a possible bug in Zope>4, <=4.1.3, couldn't be subclass of FileIO
         as Iterators.filestream_iterator
    """

    def __init__(self, name, mode='rb', bufsize=-1, streamsize=1 << 16, start=0, end=None):
        self._io = FileIO(name, mode=mode)
        self.streamsize = streamsize
        self.start = start
        self.end = end
        self._io.seek(start, 0)

    def __iter__(self):
        if self._io.closed:
            raise ValueError("I/O operation on closed file.")
        return self

    def __next__(self):
        if self.end is None:
            bytes = self.streamsize
        else:
            bytes = max(min(self.end - self._io.tell(), self.streamsize), 0)
        data = self._io.read(bytes)
        if not data:
            raise StopIteration
        return data

    next = __next__

    def close(self):
        self._io.close()

    # BBB: is it necessary to implement __len__ ?
    # def __len__(self)

    def read(self, size=-1):
        return self._io.read(size)
Ejemplo n.º 17
0
	def __init__(self, data, buffer_size=DEFAULT_BUFFER_SIZE):
		if isinstance(data, (BufferedReader, DataReader)):
			if isinstance(data.raw, FileIO):
				data = FileIO(data.name, 'rb')
			else:
				data = BytesIO(data.read())
		elif isinstance(data, (os.PathLike, str)):
			data = FileIO(data, 'rb')
		elif isinstance(data, (bytearray, bytes, memoryview)):
			data = BytesIO(data)

		super().__init__(data, buffer_size=buffer_size)
Ejemplo n.º 18
0
 def flipBitAt(inputfile, position):
     """Flips the bit at the specified position in the input file."""
     if not 0<=position<(8*os.path.getsize(inputfile)):
         raise IndexError("Position "+str(position)+" is out of range")
     
     f = FileIO(inputfile, "r+")
     f.seek(position/8)
     byte = ord(f.read(1))
     f.seek(-1, 1)   # go back 1 byte from the current position
     bitnum = position%8
     f.write(struct.pack("B", byte^(1<<(7-bitnum))))
     f.close()
Ejemplo n.º 19
0
    def flipBitAt(inputfile, position):
        """Flips the bit at the specified position in the input file."""
        if not 0 <= position < (8 * os.path.getsize(inputfile)):
            raise IndexError("Position " + str(position) + " is out of range")

        f = FileIO(inputfile, "r+")
        f.seek(position / 8)
        byte = ord(f.read(1))
        f.seek(-1, 1)  # go back 1 byte from the current position
        bitnum = position % 8
        f.write(struct.pack("B", byte ^ (1 << (7 - bitnum))))
        f.close()
Ejemplo n.º 20
0
def read_from_file(file_fd: io.FileIO, start: int, stop: int) -> bytes:
    length = stop - start
    assert length >= 0
    file_fd.seek(start)
    data = bytes()
    while file_fd.tell() < stop:
        read_data = file_fd.read(stop - file_fd.tell())
        if read_data == b'':
            raise ReachedEndOfFile('Read until the end of file')
        data += read_data
    assert len(data) == length
    return data
Ejemplo n.º 21
0
def read_from_file(file_fd: io.FileIO, start: int, stop: int) -> bytes:
    length = stop - start
    assert length >= 0
    file_fd.seek(start)
    data = bytes()
    while file_fd.tell() < stop:
        read_data = file_fd.read(stop - file_fd.tell())
        if read_data == b'':
            raise EndOfFileError('Read until the end of file_fd')
        data += read_data
    assert len(data) == length
    return data
Ejemplo n.º 22
0
Archivo: user.py Proyecto: zuzhi/rssant
def redeem_code_exchange(user: str, userfile: io.FileIO, code: str):
    if not user and not userfile:
        raise click.MissingParameter('user or userfile required')
    user_s = []
    if user:
        user_s.append(user)
    if userfile:
        for line in userfile.read().splitlines():
            line = line.strip().split()[0]
            if line:
                user_s.append(line)
    for user in user_s:
        _redeem_code_exchange(user, code)
Ejemplo n.º 23
0
    def read_id3(file_handle: FileIO, skip_v1: bool = False) -> ID3Base:
        id3 = ID3v2(file_handle)
        if id3.is_valid_id3 or skip_v1:
            return id3

        # Check for an id3v1 tag
        current_file_position = file_handle.tell()
        file_handle.seek(-128, SEEK_END)
        block = file_handle.read(128)
        id3 = ID3v1(block)

        file_handle.seek(current_file_position, SEEK_SET)
        return id3
Ejemplo n.º 24
0
def read_from_file(file_fd: io.FileIO, start: int, end: int) -> bytes:
    length = end - start
    assert length >= 0
    file_fd.seek(start)
    data = bytes()
    while file_fd.tell() < end:
        # The read() (when called with a positive argument), readinto() and write() methods on this class will only make one system call.
        read_data = file_fd.read(end - file_fd.tell())
        if read_data == b'':
            raise EndOfFileError('read until the end of file_fd')
        data += read_data
    assert len(data) == length
    return data
Ejemplo n.º 25
0
def uint16(file: io.FileIO) -> int:
    to_parse = file.read(2)
    rval = 0x00

    if len(to_parse) != 2:
        raise IOError("Not enough bytes found")

    to_parse = [c_uint16(x).value for x in to_parse]

    rval |= to_parse[0] << 0
    rval |= to_parse[1] << 8

    return rval
Ejemplo n.º 26
0
    def assertFlippedByte(self, file_orig, file_modded, position):
        len_orig = os.path.getsize(file_orig)
        len_modded = os.path.getsize(file_modded)

        self.assertEqual(len_orig, len_modded, "Files of different sizes")

        f_o = FileIO(file_orig, "r+b")
        f_m = FileIO(file_modded, "r+b")

        for i in xrange(len_orig):
            # read in a byte from each file and compare
            b_o = ord(f_o.read(1))
            b_m = ord(f_m.read(1))
            if i == position:
                self.assertEqual(
                    b_m, b_o ^ 0xff,
                    "Flipped bytes are actually equal at position " + str(i))
            else:
                self.assertEqual(
                    b_o, b_m,
                    "Bytes differ (when the shouldn't) at position " + str(i))
        f_o.close()
        f_m.close()
Ejemplo n.º 27
0
    def read(self, file: FileIO):
        keywardIndexData = file.read(self._length)
        if self._encrypt & 0x02:
            keywardIndexData = util._mdx_decrypt(keywardIndexData)
        keywardBlockInfo = zlib.decompress(keywardIndexData[8:])
        adler32 = struct.unpack('>I', keywardIndexData[4:8])[0]
        assert (adler32 == zlib.adler32(keywardBlockInfo) & 0xffffffff)

        byte_format = '>H'
        byte_width = 2
        text_term = 1

        i = 0
        keyIndex = []
        while i < len(keywardBlockInfo):
            entriesNum = struct.unpack(
                '>Q', keywardBlockInfo[i:i + BYTE_LENGTH_NUMBER])[0]
            i += BYTE_LENGTH_NUMBER
            firstKeyLength = struct.unpack(
                byte_format, keywardBlockInfo[i:i + byte_width])[0]
            i += byte_width

            firstKey = str(keywardBlockInfo[i:i + firstKeyLength],
                           encoding=DEFAULT_ENCODING)
            i += firstKeyLength + text_term

            lastKeyLength = struct.unpack(
                byte_format, keywardBlockInfo[i:i + byte_width])[0]
            i += byte_width
            lastKey = str(keywardBlockInfo[i:i + lastKeyLength],
                          encoding=DEFAULT_ENCODING)
            i += lastKeyLength + text_term

            compKeyBlocksSize = struct.unpack(
                '>Q', keywardBlockInfo[i:i + BYTE_LENGTH_NUMBER])[0]
            i += BYTE_LENGTH_NUMBER

            deCompKeyBlocksSize = struct.unpack(
                '>Q', keywardBlockInfo[i:i + BYTE_LENGTH_NUMBER])[0]
            i += BYTE_LENGTH_NUMBER
            keyIndex.append({
                'entries_number': entriesNum,
                'first_keyward': firstKey,
                'first_keyward_length': firstKeyLength,
                'last_keyward': lastKey,
                'last_keyward_length': lastKeyLength,
                'compress_key_blocks_size': compKeyBlocksSize,
                'decompress_key_blocks_size': deCompKeyBlocksSize
            })
            self._data = keyIndex
Ejemplo n.º 28
0
    def read(self, instream: io.FileIO, rsakey):
        # read the signature
        (signlen, ) = struct.unpack("i", instream.read(4))
        signature = instream.read(signlen)

        # read the entries
        (entrylen, ) = struct.unpack("i", instream.read(4))
        for i in range(0, entrylen):
            entry = MBPakFileEntry()
            (filepathlen, ) = struct.unpack("b", instream.read(1))
            entry.filePath = instream.read(filepathlen).decode(
                encoding="utf-8")
            (isencrypted, ) = struct.unpack("b", instream.read(1))
            entry.encrypted = True if isencrypted == 1 else False
            (entry.fileOffset, ) = struct.unpack("q", instream.read(8))
            (entry.uncompressedSize, ) = struct.unpack("q", instream.read(8))
            entry.compressedContents = [
                struct.unpack("i", instream.read(4))[0]
            ]  # temporary store
            entry.encrypted = True
            self.entries.append(entry)

        databuffer = instream.read()

        publickey, privatekey = rsakey

        # now verify integrity
        if self.verifysign(databuffer, publickey, signature):
            # now fill up the compressed contents and shit
            for entry in self.entries:
                datalen = entry.compressedContents[
                    0]  # retrieve what we stored
                entry.compressedContents = databuffer[entry.fileOffset:entry.
                                                      fileOffset + datalen]
        else:
            raise Exception("TAMPERED DATA")
Ejemplo n.º 29
0
 def assertFlippedBit(self, file_orig, file_modded, position):
     len_orig   = os.path.getsize(file_orig)
     len_modded = os.path.getsize(file_modded)
     self.assertEqual(len_orig, len_modded, "Files of different sizes")
     
     f_o = FileIO(file_orig, "r+b")
     f_m = FileIO(file_modded, "r+b")
     
     for i in xrange(len_orig):
         # read in a byte from each file and compare
         b_o = ord(f_o.read(1))
         b_m = ord(f_m.read(1))
         if i==(position/8):
             for m in xrange(8):
                 bit_m = BitwiseAnalyser.BitManipulator.getBitFromByteAt(b_m, m)
                 bit_o = BitwiseAnalyser.BitManipulator.getBitFromByteAt(b_o, m)
                 if m==(position%8):
                     self.assertNotEqual(bit_m, bit_o, "Bits are equal when the should be different at position: "+str(position))
                 else:
                     self.assertEqual(bit_m, bit_o, "Bits are incorrectly different at position "+str(i))
         else:
             self.assertEqual(b_o, b_m, "Bytes differ (when the shouldn't) at position "+str(i))
     f_o.close()
     f_m.close()
Ejemplo n.º 30
0
    def load(self, file: FileIO):
        self.ptr = file.tell()
        self.is_leaf, self.keys = load(file)

        ptr_num = len(self.keys)
        if not self.is_leaf:
            ptr_num += (ptr_num + 1)
        ptrs = unpack('Q' * ptr_num, file.read(8 * ptr_num))

        if self.is_leaf:
            self.ptrs_value = list(ptrs)
        else:
            self.ptrs_value = list(ptrs[:len(self.keys)])
            self.ptrs_child = list(ptrs[len(self.keys):])
        self.size = file.tell() - self.ptr
Ejemplo n.º 31
0
def uint32(file: io.FileIO):
    to_parse = file.read(4)
    rval = 0x00

    if len(to_parse) != 4:
        raise IOError("Not enough bytes found")

    to_parse = [c_uint32(x).value for x in to_parse]  # Convert any unwanted negatives away

    rval |= to_parse[0] << 0
    rval |= to_parse[1] << 8
    rval |= to_parse[2] << 16
    rval |= to_parse[3] << 24


    return rval
Ejemplo n.º 32
0
    def load(self, file: FileIO):
        self.ptr = file.tell()
        self.is_leaf, self.keys = load(file)

        ptr_num = len(self.keys)
        if not self.is_leaf:
            ptr_num += (ptr_num + 1)
        ptrs = unpack('Q' * ptr_num, file.read(8 * ptr_num))

        if self.is_leaf:
            self.ptrs_value = list(ptrs)
        else:
            ptr_num //= 2
            self.ptrs_value = list(ptrs[:ptr_num])
            self.ptrs_child = list(ptrs[ptr_num:])
        self.size = file.tell() - self.ptr
Ejemplo n.º 33
0
    def load(self, file: FileIO):
        self.ptr = file.tell()
        # IndexNode: [is_leaf, [..., key]] + ptrs_value + ptrs_child if not is_leaf
        self.is_leaf, self.keys = load(file)

        ptr_num = len(self.keys)
        if not self.is_leaf:
            ptr_num += (ptr_num + 1)
        ptrs = unpack('Q' * ptr_num, file.read(8 * ptr_num))

        if self.is_leaf:
            self.ptrs_value = list(ptrs)
        else:
            self.ptrs_value = list(ptrs[:len(self.keys)])
            self.ptrs_child = list(ptrs[len(self.keys):])
        self.size = file.tell() - self.ptr
Ejemplo n.º 34
0
def _read_in_chunks(file_object: io.FileIO, chunk_size: int = 2 * MB) -> str:
    """Read a file in fixed-size chunks (to minimize memory usage for large files).

    Args:
        file_object: An opened file-like object supporting read().
        chunk_size: Max size (in bytes) of each file chunk.

    Yields:
        File chunks, each of size at most chunk_size.
    """
    while True:
        chunk = file_object.read(chunk_size)
        if chunk:
            yield chunk
        else:
            return  # End of file.
Ejemplo n.º 35
0
class FileDataReader(AbstractDataReader):
    """ A reader that can read data from a file
    """

    def __init__(self, filename):
        """
        :param filename: The file to read
        :type filename: str
        :raise spinnman.exceptions.SpinnmanIOException: If the file\
                    cannot found or opened for reading
        """
        try:
            self._fileio = FileIO(filename, "r")
        except IOError as e:
            raise SpinnmanIOException(str(e))

    def read(self, n_bytes):
        """ See :py:meth:`spinnman.data.abstract_data_reader.AbstractDataReader.read`
        """
        return bytearray(self._fileio.read(n_bytes))

    def readinto(self, data):
        """ See :py:meth:`spinnman.data.abstract_data_reader.AbstractDataReader.readinto`
        """
        return self._fileio.readinto(data)

    def readall(self):
        """ See :py:meth:`spinnman.data.abstract_data_reader.AbstractDataReader.readall`
        """
        return self._fileio.readall()

    def close(self):
        """ Closes the file

        :return: Nothing is returned:
        :rtype: None
        :raise spinnman.exceptions.SpinnmanIOException: If the file\
                    cannot be closed
        """
        try:
            self._fileio.close()
        except IOError as e:
            raise SpinnmanIOException(str(e))
Ejemplo n.º 36
0
def read_in_chunks(file_object: FileIO,
                   block_size: int = 4096) -> Iterable[bytes]:
    """Return a generator which yields data in chunks.

    Source: `read-file-in-chunks-ram-usage-read-strings-from-binary-file 
    <http://stackoverflow.com/questions/17056382/
    read-file-in-chunks-ram-usage-read-strings-from-binary-files>`_

    :param file_object: File object to read in chunks.
    :type file_object: file object

    :param block_size: (optional) Chunk size.
    :type block_size: int

    :yield: The next chunk in file object.
    :yield type: `bytes`
    """
    for chunk in iter(lambda: file_object.read(block_size), b''):
        yield chunk
Ejemplo n.º 37
0
def what(file, h=None):
    f = None
    try:
        if h is None:
            # if isinstance(file, (str, PathLike))
            if isinstance(file, str): # FIXME(corona10): RustPython doesn't support PathLike yet.
                f = FileIO(file, 'rb')
                h = f.read(32)
            else:
                location = file.tell()
                h = file.read(32)
                file.seek(location)
        for tf in tests:
            res = tf(h, f)
            if res:
                return res
    finally:
        if f: f.close()
    return None
Ejemplo n.º 38
0
    def compile(self, file: io.FileIO) -> object:
        if not str(self.file_name).startswith('.'):
            soup = BeautifulSoup(file.read(), features='html.parser')

            sections = soup.select(
                'h2.sectionHeading, h3.noteHeading, div.noteText')
            curr_section = None
            results = []

            for sec in sections:
                if 'sectionHeading' in sec['class']:
                    curr_section = list(sec.children)[0].string
                elif 'noteHeading' in sec['class']:
                    highlight, heading = list(sec.children)[1:3]
                    _, note_title, page_num = re.match(
                        r'^((.*) > )?Page (\d+)',
                        heading[len(') - '):]).groups()
                    note_text = sec.find_next('div', {
                        'class': 'noteText'
                    }).string
                    note_text = note_text.replace(' .', '.').replace(
                        ' ?', '?').replace(' ;', ';').replace(' !', '!')
                    note_text = note_text.replace(' ,', ',')
                    results.append({
                        'section': curr_section,
                        'chapter': note_title,
                        'contents': note_text,
                        'metadata': {
                            'color': highlight.string,
                            'page_num': int(page_num),
                        },
                    })

            return {
                'title': soup.select('div.bookTitle')[0].string,
                'segments': results,
            }

        return None
Ejemplo n.º 39
0
 def load(self, file: FileIO):
     self.ptr = file.tell()
     indicator = file.read(1)
     assert unpack('B', indicator)[0] in (0, 1)
     self.key, self.value = load(file)
     self.size = file.tell() - self.ptr
Ejemplo n.º 40
0
Archivo: utils.py Proyecto: BoPeng/SOS
 def read(self, n, *args):
     self.prog.progressBy(n)
     return FileIO.read(self, n, *args)
Ejemplo n.º 41
0
class File(RawIOBase):
    'Create a file object wrapping an e[x]ploded zip file'

    HEADER = 0
    DATA = 1
    DESCRIPTOR = 2
    DIRECTORY = 3

    def __init__(self, path, flags, info, fh=None, base='.', depth=0):
        super(File, self).__init__()

        self.path = path
        self.flags = flags
        self.fh = fh

        self.info = info
        self.depth = depth
        self.cursor = 0
        self.offset = 0
        self.state = File.HEADER

        # stream item info
        self.stream_offset = 0
        self.zip_header = b''
        self.descriptor = b''

        # data file info
        self.data = None
        self.data_name = ''
        self.data_len = 0

        # streams
        prefix = os.path.join(base, 'meta', os.path.basename(path))
        self.stream = FileIO(prefix + '.stream', 'rb')
        self.dir = FileIO(prefix + '.dir', 'rb')
        self.data_dir = os.path.join(base, 'data')

        # init
        self._load_stream_item()
        self.lock = threading.Lock()

    def _load_stream_item(self):
        'Sets the next stream item as current.'

        if self.data:
            self.data.close()
            self.data = None

        # open the header so we can know the data file to open, and the
        # length of the var fields
        raw_header = self.stream.read(STREAM_ITEM.size)
        header = StreamItem._make(STREAM_ITEM.unpack(raw_header))

        var_fields = header.filename_len + header.extra_field_len
        # I would think that b2a_hex should decode the raw bytes...
        sha1 = b2a_hex(header.sha).decode('ascii')

        # only save the zip part of the header
        self.zip_header = (raw_header[:HEADER_DIFF] +
                           self.stream.read(var_fields))

        self.descriptor = self.stream.read(header.descriptor_len)

        self.data_name = path.join(*([self.data_dir] +
                                     list(sha1[:self.depth]) + [sha1]))

    def _open_data_file(self):
        self.data = FileIO(self.data_name, 'rb')
        self.data_len = self.data.seek(0, 2)
        self.data.seek(0)

    def close(self):
        self.stream.close()
        self.dir.close()
        if self.data: self.data.close()

    def fileno(self):
        return self.fh

    def isatty(self):
        return False

    def read(self, count=-1):
        if count < 0: return self.readall()
        elif count == 0: return b''

        state = self.state
        if state == File.HEADER:
            previous_offset = self.offset
            self.offset += count

            result = self.zip_header[previous_offset:self.offset]
            self.cursor += len(result)

            if self.offset >= len(self.zip_header):
                self.state = File.DATA
                if not self.data: self._open_data_file()

            return result

        elif state == File.DATA:
            result = self.data.read(count)
            self.cursor += len(result)

            if self.data.tell() >= self.data_len:
                self.state = File.DESCRIPTOR
                self.offset = 0

            # empty data file (state will now be DESCRIPTOR)
            if not result: return self.read(count)

            return result

        elif state == File.DESCRIPTOR:
            previous_offset = self.offset
            self.offset += count

            result = self.descriptor[previous_offset:self.offset]
            self.cursor += len(result)

            if self.offset >= len(self.descriptor):
                if self.cursor >= self.info.directory_offset:
                    self.state = File.DIRECTORY
                    self.dir.seek(0)
                    self.stream_offset = None

                    if self.data:
                        self.data.close()
                        self.data = None

                else:
                    self.state = File.HEADER
                    self.offset = 0
                    self.stream_offset = self.stream.tell()
                    self._load_stream_item()

            # descriptor is optional (state will now be HEADER or DIRECTORY)
            if not result: return self.read(count)

            return result
        elif state == File.DIRECTORY:
            result = self.dir.read(count)
            self.cursor += len(result)

            return result
        else:
            raise RuntimeError('Invalid state: %r' % self.state)

    def readable(self):
        return True

    def readinto(self, b):
        count = len(b)
        if count == 0: return 0

        state = self.state
        if state == File.HEADER:
            header_len = len(self.zip_header)
            previous_offset = self.offset

            current_offset = self.offset = \
                    min(previous_offset + count, header_len)

            read = current_offset - previous_offset
            b[:read] = self.zip_header[previous_offset:current_offset]
            self.cursor += read

            if current_offset == header_len:
                self.state = File.DATA
                if not self.data: self._open_data_file()

            return read

        elif state == File.DATA:
            read = self.data.readinto(b)
            self.cursor += read

            if self.data.tell() >= self.data_len:
                self.state = File.DESCRIPTOR
                self.offset = 0

            # empty data file (state will now be DESCRIPTOR)
            if not read: return self.readinto(b)

            return read

        elif state == File.DESCRIPTOR:
            descriptor_len = len(self.descriptor)
            previous_offset = self.offset

            current_offset = self.offset = \
                    min(previous_offset + count, descriptor_len)

            read = current_offset - previous_offset
            b[:read] = self.descriptor[previous_offset:current_offset]
            self.cursor += read

            if current_offset == descriptor_len:
                if self.cursor >= self.info.directory_offset:
                    self.state = File.DIRECTORY
                    self.dir.seek(0)
                    self.stream_offset = None

                    if self.data:
                        self.data.close()
                        self.data = None

                else:
                    self.state = File.HEADER
                    self.offset = 0
                    self.stream_offset = self.stream.tell()
                    self._load_stream_item()

            # descriptor is optional (state will now be HEADER or DIRECTORY)
            if not read: return self.readinto(b)

            return read
        elif state == File.DIRECTORY:
            read = self.dir.readinto(b)
            self.cursor += read

            return read
        else:
            raise RuntimeError('Invalid state: %r' % self.state)

    def seek(self, pos, offset=0):
        if offset == 1:
            pos += self.cursor
        elif offset == 2:
            pos += self.info.filesize

        if pos == self.cursor: return pos
        self.cursor = pos

        # skip directly to the central directory
        if pos >= self.info.directory_offset:
            if self.data:
                self.data.close()
                self.data = None

            self.state = File.DIRECTORY
            self.stream_offset = None
            self.dir.seek(pos - self.info.directory_offset)
            return pos

        # calculate the offset into the stream file
        z_offset, s_offset = self.info.jump_tree.find(pos).location
        additional = pos - z_offset

        # we're looking at a different data file
        # (load local header into memory)
        if s_offset != self.stream_offset:
            self.stream_offset = s_offset
            self.stream.seek(s_offset)
            self._load_stream_item()

        header_len = len(self.zip_header)
        if additional < header_len:
            self.state = File.HEADER
            self.offset = additional
            return pos

        # assume currently in the data file
        additional -= header_len
        self.state = File.DATA

        # if the file hasn't been opened yet, open it and find its size
        if not self.data: self._open_data_file()

        if additional < self.data_len:
            self.data.seek(additional)
        else:
            self.state = File.DESCRIPTOR
            self.offset = additional - self.data_len

        return pos

    def seekable(self):
        return True

    def tell(self):
        return self.cursor

    def writeable(self):
        return False
Ejemplo n.º 42
0
 def load(self, file: FileIO):
     self.ptr = file.tell()
     indicator = file.read(1)
     assert indicator in (OP, ED)
     self.key, self.value = load(file)
     self.size = file.tell() - self.ptr