Example #1
0
def split_txt(txt, epub_split_size_kb=0):
    '''
    Ensure there are split points for converting
    to EPUB. A misdetected paragraph type can
    result in the entire document being one giant
    paragraph. In this case the EPUB parser will not
    be able to determine where to split the file
    to accommodate the EPUB file size limitation
    and will fail.
    '''
    # Takes care if there is no point to split
    if epub_split_size_kb > 0:
        if isinstance(txt, unicode_type):
            txt = txt.encode('utf-8')
        length_byte = len(txt)
        # Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
        chunk_size = long_type(length_byte / (int(length_byte / (epub_split_size_kb * 1024)) + 2))
        # if there are chunks with a superior size then go and break
        parts = txt.split(b'\n\n')
        lengths = tuple(map(len, parts))
        if lengths and max(lengths) > chunk_size:
            txt = b'\n\n'.join([
                split_string_separator(line, chunk_size) for line in parts
            ])
    if isbytestring(txt):
        txt = txt.decode('utf-8')

    return txt
Example #2
0
def split_txt(txt, epub_split_size_kb=0):
    '''
    Ensure there are split points for converting
    to EPUB. A misdetected paragraph type can
    result in the entire document being one giant
    paragraph. In this case the EPUB parser will not
    be able to determine where to split the file
    to accommodate the EPUB file size limitation
    and will fail.
    '''
    # Takes care if there is no point to split
    if epub_split_size_kb > 0:
        if isinstance(txt, unicode_type):
            txt = txt.encode('utf-8')
        length_byte = len(txt)
        # Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
        chunk_size = long_type(length_byte / (int(length_byte / (epub_split_size_kb * 1024)) + 2))
        # if there are chunks with a superior size then go and break
        parts = txt.split(b'\n\n')
        lengths = tuple(map(len, parts))
        if lengths and max(lengths) > chunk_size:
            txt = b'\n\n'.join([
                split_string_separator(line, chunk_size) for line in parts
            ])
    if isbytestring(txt):
        txt = txt.decode('utf-8')

    return txt
Example #3
0
    def get_lastrowid(self, cursor):
        # SQLite3 + Python has a fun issue on 32-bit systems with integer overflows.
        # Issue a SQL query instead, getting the value as a string, and then converting to a long python int manually.
        query = 'SELECT last_insert_rowid()'
        cursor.execute(query)
        row = cursor.fetchone()

        return long_type(row[0])
Example #4
0
def _bytelist2longBigEndian(blist):
    "Transform a list of characters into a list of longs."

    imax = len(blist) // 4
    hl = [0] * imax

    j = 0
    i = 0
    while i < imax:
        b0 = long_type(blist[j]) << 24
        b1 = long_type(blist[j + 1]) << 16
        b2 = long_type(blist[j + 2]) << 8
        b3 = long_type(blist[j + 3])
        hl[i] = b0 | b1 | b2 | b3
        i = i + 1
        j = j + 4

    return hl
Example #5
0
def _bytelist2longBigEndian(blist):
    "Transform a list of characters into a list of longs."

    imax = len(blist)//4
    hl = [0] * imax

    j = 0
    i = 0
    while i < imax:
        b0 = long_type(blist[j]) << 24
        b1 = long_type(blist[j+1]) << 16
        b2 = long_type(blist[j+2]) << 8
        b3 = long_type(blist[j+3])
        hl[i] = b0 | b1 | b2 | b3
        i = i+1
        j = j+4

    return hl
Example #6
0
    def build_header(self, section_lengths, out_stream):
        '''
        section_lengths = Lenght of each section in file.
        '''

        now = int(time.time())
        nrecords = len(section_lengths)

        out_stream.write(self.title + struct.pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0))
        out_stream.write(self.identity + struct.pack('>IIH', nrecords, 0, nrecords))

        offset = 78 + (8 * nrecords) + 2
        for id, record in enumerate(section_lengths):
            out_stream.write(struct.pack('>LBBBB', long_type(offset), 0, 0, 0, 0))
            offset += record
        out_stream.write('\x00\x00')
Example #7
0
 def RetrieveObject(self, ui, start=-1, length=-1):
     '''Retrieves the contents of a document.
     This function takes a UnitInfo and two optional arguments, the first
     being the start address and the second is the length. These define
     the amount of data to be read from the archive.
     '''
     if self.file and ui:
         if length == -1:
             len = ui.length
         else:
             len = length
         if start == -1:
             st = 0
         else:
             st = long_type(start)
         return chmlib.chm_retrieve_object(self.file, ui, st, len)
     else:
         return 0, b''
Example #8
0
    def update(self, inBuf):
        """Add to the current message.

        Update the mssha1 object with the string arg. Repeated calls
        are equivalent to a single call with the concatenation of all
        the arguments, i.e. s.update(a); s.update(b) is equivalent
        to s.update(a+b).

        The hash is immediately calculated for all full blocks. The final
        calculation is made in digest(). It will calculate 1-2 blocks,
        depending on how much padding we have to add. This allows us to
        keep an intermediate value for the hash, so that we only need to
        make minimal recalculation if we call update() to add more data
        to the hashed string.
        """

        inBuf = bytearray(inBuf)
        leninBuf = long_type(len(inBuf))

        # Compute number of bytes mod 64.
        index = (self.count[1] >> 3) & 0x3F

        # Update number of bits.
        self.count[1] = self.count[1] + (leninBuf << 3)
        if self.count[1] < (leninBuf << 3):
            self.count[0] = self.count[0] + 1
        self.count[0] = self.count[0] + (leninBuf >> 29)

        partLen = 64 - index

        if leninBuf >= partLen:
            self.input[index:] = inBuf[:partLen]
            self._transform(_bytelist2longBigEndian(self.input))
            i = partLen
            while i + 63 < leninBuf:
                self._transform(_bytelist2longBigEndian(inBuf[i:i + 64]))
                i = i + 64
            else:
                self.input = inBuf[i:leninBuf]
        else:
            i = 0
            self.input = self.input + inBuf
Example #9
0
    def update(self, inBuf):
        """Add to the current message.

        Update the mssha1 object with the string arg. Repeated calls
        are equivalent to a single call with the concatenation of all
        the arguments, i.e. s.update(a); s.update(b) is equivalent
        to s.update(a+b).

        The hash is immediately calculated for all full blocks. The final
        calculation is made in digest(). It will calculate 1-2 blocks,
        depending on how much padding we have to add. This allows us to
        keep an intermediate value for the hash, so that we only need to
        make minimal recalculation if we call update() to add more data
        to the hashed string.
        """

        inBuf = bytearray(inBuf)
        leninBuf = long_type(len(inBuf))

        # Compute number of bytes mod 64.
        index = (self.count[1] >> 3) & 0x3F

        # Update number of bits.
        self.count[1] = self.count[1] + (leninBuf << 3)
        if self.count[1] < (leninBuf << 3):
            self.count[0] = self.count[0] + 1
        self.count[0] = self.count[0] + (leninBuf >> 29)

        partLen = 64 - index

        if leninBuf >= partLen:
            self.input[index:] = inBuf[:partLen]
            self._transform(_bytelist2longBigEndian(self.input))
            i = partLen
            while i + 63 < leninBuf:
                self._transform(_bytelist2longBigEndian(inBuf[i:i+64]))
                i = i + 64
            else:
                self.input = inBuf[i:leninBuf]
        else:
            i = 0
            self.input = self.input + inBuf