def split_txt(txt, epub_split_size_kb=0): ''' Ensure there are split points for converting to EPUB. A misdetected paragraph type can result in the entire document being one giant paragraph. In this case the EPUB parser will not be able to determine where to split the file to accommodate the EPUB file size limitation and will fail. ''' # Takes care if there is no point to split if epub_split_size_kb > 0: if isinstance(txt, unicode_type): txt = txt.encode('utf-8') length_byte = len(txt) # Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin) chunk_size = long_type(length_byte / (int(length_byte / (epub_split_size_kb * 1024)) + 2)) # if there are chunks with a superior size then go and break parts = txt.split(b'\n\n') lengths = tuple(map(len, parts)) if lengths and max(lengths) > chunk_size: txt = b'\n\n'.join([ split_string_separator(line, chunk_size) for line in parts ]) if isbytestring(txt): txt = txt.decode('utf-8') return txt
def split_txt(txt, epub_split_size_kb=0): ''' Ensure there are split points for converting to EPUB. A misdetected paragraph type can result in the entire document being one giant paragraph. In this case the EPUB parser will not be able to determine where to split the file to accommodate the EPUB file size limitation and will fail. ''' # Takes care if there is no point to split if epub_split_size_kb > 0: if isinstance(txt, unicode_type): txt = txt.encode('utf-8') length_byte = len(txt) # Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin) chunk_size = long_type(length_byte / (int(length_byte / (epub_split_size_kb * 1024)) + 2)) # if there are chunks with a superior size then go and break parts = txt.split(b'\n\n') lengths = tuple(map(len, parts)) if lengths and max(lengths) > chunk_size: txt = b'\n\n'.join([ split_string_separator(line, chunk_size) for line in parts ]) if isbytestring(txt): txt = txt.decode('utf-8') return txt
def get_lastrowid(self, cursor): # SQLite3 + Python has a fun issue on 32-bit systems with integer overflows. # Issue a SQL query instead, getting the value as a string, and then converting to a long python int manually. query = 'SELECT last_insert_rowid()' cursor.execute(query) row = cursor.fetchone() return long_type(row[0])
def _bytelist2longBigEndian(blist): "Transform a list of characters into a list of longs." imax = len(blist) // 4 hl = [0] * imax j = 0 i = 0 while i < imax: b0 = long_type(blist[j]) << 24 b1 = long_type(blist[j + 1]) << 16 b2 = long_type(blist[j + 2]) << 8 b3 = long_type(blist[j + 3]) hl[i] = b0 | b1 | b2 | b3 i = i + 1 j = j + 4 return hl
def _bytelist2longBigEndian(blist): "Transform a list of characters into a list of longs." imax = len(blist)//4 hl = [0] * imax j = 0 i = 0 while i < imax: b0 = long_type(blist[j]) << 24 b1 = long_type(blist[j+1]) << 16 b2 = long_type(blist[j+2]) << 8 b3 = long_type(blist[j+3]) hl[i] = b0 | b1 | b2 | b3 i = i+1 j = j+4 return hl
def build_header(self, section_lengths, out_stream): ''' section_lengths = Lenght of each section in file. ''' now = int(time.time()) nrecords = len(section_lengths) out_stream.write(self.title + struct.pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0)) out_stream.write(self.identity + struct.pack('>IIH', nrecords, 0, nrecords)) offset = 78 + (8 * nrecords) + 2 for id, record in enumerate(section_lengths): out_stream.write(struct.pack('>LBBBB', long_type(offset), 0, 0, 0, 0)) offset += record out_stream.write('\x00\x00')
def RetrieveObject(self, ui, start=-1, length=-1): '''Retrieves the contents of a document. This function takes a UnitInfo and two optional arguments, the first being the start address and the second is the length. These define the amount of data to be read from the archive. ''' if self.file and ui: if length == -1: len = ui.length else: len = length if start == -1: st = 0 else: st = long_type(start) return chmlib.chm_retrieve_object(self.file, ui, st, len) else: return 0, b''
def update(self, inBuf): """Add to the current message. Update the mssha1 object with the string arg. Repeated calls are equivalent to a single call with the concatenation of all the arguments, i.e. s.update(a); s.update(b) is equivalent to s.update(a+b). The hash is immediately calculated for all full blocks. The final calculation is made in digest(). It will calculate 1-2 blocks, depending on how much padding we have to add. This allows us to keep an intermediate value for the hash, so that we only need to make minimal recalculation if we call update() to add more data to the hashed string. """ inBuf = bytearray(inBuf) leninBuf = long_type(len(inBuf)) # Compute number of bytes mod 64. index = (self.count[1] >> 3) & 0x3F # Update number of bits. self.count[1] = self.count[1] + (leninBuf << 3) if self.count[1] < (leninBuf << 3): self.count[0] = self.count[0] + 1 self.count[0] = self.count[0] + (leninBuf >> 29) partLen = 64 - index if leninBuf >= partLen: self.input[index:] = inBuf[:partLen] self._transform(_bytelist2longBigEndian(self.input)) i = partLen while i + 63 < leninBuf: self._transform(_bytelist2longBigEndian(inBuf[i:i + 64])) i = i + 64 else: self.input = inBuf[i:leninBuf] else: i = 0 self.input = self.input + inBuf
def update(self, inBuf): """Add to the current message. Update the mssha1 object with the string arg. Repeated calls are equivalent to a single call with the concatenation of all the arguments, i.e. s.update(a); s.update(b) is equivalent to s.update(a+b). The hash is immediately calculated for all full blocks. The final calculation is made in digest(). It will calculate 1-2 blocks, depending on how much padding we have to add. This allows us to keep an intermediate value for the hash, so that we only need to make minimal recalculation if we call update() to add more data to the hashed string. """ inBuf = bytearray(inBuf) leninBuf = long_type(len(inBuf)) # Compute number of bytes mod 64. index = (self.count[1] >> 3) & 0x3F # Update number of bits. self.count[1] = self.count[1] + (leninBuf << 3) if self.count[1] < (leninBuf << 3): self.count[0] = self.count[0] + 1 self.count[0] = self.count[0] + (leninBuf >> 29) partLen = 64 - index if leninBuf >= partLen: self.input[index:] = inBuf[:partLen] self._transform(_bytelist2longBigEndian(self.input)) i = partLen while i + 63 < leninBuf: self._transform(_bytelist2longBigEndian(inBuf[i:i+64])) i = i + 64 else: self.input = inBuf[i:leninBuf] else: i = 0 self.input = self.input + inBuf