def book_tbs(self, data, first): spanner = data['spans'] if spanner is not None: self.bytestring = encode_tbs(spanner.index, { 0b010: 0, 0b001: 0 }, flag_size=3) else: starts, completes, ends = (data['starts'], data['completes'], data['ends']) if (not completes and ((len(starts) == 1 and not ends) or (len(ends) == 1 and not starts))): node = starts[0] if starts else ends[0] self.bytestring = encode_tbs(node.index, {0b010: 0}, flag_size=3) else: nodes = [] for x in (starts, completes, ends): nodes.extend(x) nodes.sort(key=lambda x: x.index) self.bytestring = encode_tbs(nodes[0].index, { 0b010: 0, 0b100: len(nodes) }, flag_size=3)
def book_tbs(self, data, first): spanner = data['spans'] if spanner is not None: self.bytestring = encode_tbs( spanner.index, { 0b010: 0, 0b001: 0 }, flag_size=3) else: starts, completes, ends = (data['starts'], data['completes'], data['ends']) if (not completes and ((len(starts) == 1 and not ends) or (len(ends) == 1 and not starts))): node = starts[0] if starts else ends[0] self.bytestring = encode_tbs( node.index, {0b010: 0}, flag_size=3) else: nodes = [] for x in (starts, completes, ends): nodes.extend(x) nodes.sort(key=lambda x: x.index) self.bytestring = encode_tbs( nodes[0].index, { 0b010: 0, 0b100: len(nodes) }, flag_size=3)
def sequences_to_bytes(sequences): ans = [] flag_size = 3 for val, extra in sequences: ans.append(encode_tbs(val, extra, flag_size)) flag_size = 4 # only the first sequence has flag size 3 as all # subsequent sequences could need the 0b1000 flag return b''.join(ans)
def __init__(self, data, is_periodical, first=False, section_map={}, after_first=False): self.section_map = section_map if is_periodical: # The starting bytes. # The value is zero which I think indicates the periodical # index entry. The values for the various flags seem to be # unused. If the 0b100 is present, it means that the record # deals with section 1 (or is the final record with section # transitions). self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3) self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0}, flag_size=3) self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0}, flag_size=3) self.type_111 = encode_tbs(0, { 0b100: 2, 0b010: 0, 0b001: 0 }, flag_size=3) if not data: byts = b'' if after_first: # This can happen if a record contains only text between # the periodical start and the first section byts = self.type_011 self.bytestring = byts else: depth_map = defaultdict(list) for x in ('starts', 'ends', 'completes'): for idx in data[x]: depth_map[idx.depth].append(idx) for l in itervalues(depth_map): l.sort(key=lambda x: x.offset) self.periodical_tbs(data, first, depth_map) else: if not data: self.bytestring = b'' else: self.book_tbs(data, first)
def __init__(self, data, is_periodical, first=False, section_map={}, after_first=False): self.section_map = section_map if is_periodical: # The starting bytes. # The value is zero which I think indicates the periodical # index entry. The values for the various flags seem to be # unused. If the 0b100 is present, it means that the record # deals with section 1 (or is the final record with section # transitions). self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3) self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0}, flag_size=3) self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0}, flag_size=3) self.type_111 = encode_tbs( 0, { 0b100: 2, 0b010: 0, 0b001: 0 }, flag_size=3) if not data: byts = b'' if after_first: # This can happen if a record contains only text between # the periodical start and the first section byts = self.type_011 self.bytestring = byts else: depth_map = defaultdict(list) for x in ('starts', 'ends', 'completes'): for idx in data[x]: depth_map[idx.depth].append(idx) for l in depth_map.itervalues(): l.sort(key=lambda x: x.offset) self.periodical_tbs(data, first, depth_map) else: if not data: self.bytestring = b'' else: self.book_tbs(data, first)
def periodical_tbs(self, data, first, depth_map): buf = io.BytesIO() has_section_start = (depth_map[1] and set(depth_map[1]).intersection( set(data['starts']))) spanner = data['spans'] parent_section_index = -1 if depth_map[0]: # We have a terminal record # Find the first non periodical node first_node = None for nodes in (depth_map[1], depth_map[2]): for node in nodes: if (first_node is None or (node.offset, node.depth) < (first_node.offset, first_node.depth)): first_node = node typ = (self.type_110 if has_section_start else self.type_010) # parent_section_index is needed for the last record if first_node is not None and first_node.depth > 0: parent_section_index = (first_node.index if first_node.depth == 1 else first_node.parent_index) else: parent_section_index = max(iter(self.section_map)) else: # Non terminal record if spanner is not None: # record is spanned by a single article parent_section_index = spanner.parent_index typ = (self.type_110 if parent_section_index == 1 else self.type_010) elif not depth_map[1]: # has only article nodes, i.e. spanned by a section parent_section_index = depth_map[2][0].parent_index typ = (self.type_111 if parent_section_index == 1 else self.type_010) else: # has section transitions if depth_map[2]: parent_section_index = depth_map[2][0].parent_index else: parent_section_index = depth_map[1][0].index typ = self.type_011 buf.write(typ) if typ not in (self.type_110, self.type_111) and parent_section_index > 0: extra = {} # Write starting section information if spanner is None: num_articles = len([ a for a in depth_map[1] if a.parent_index == parent_section_index ]) if not depth_map[1]: extra = {0b0001: 0} if num_articles > 1: extra = {0b0100: num_articles} buf.write(encode_tbs(parent_section_index, extra)) if spanner is None: articles = depth_map[2] sections = {self.section_map[a.parent_index] for a in articles} sections = sorted(sections, key=lambda x: x.offset) section_map = { s: [a for a in articles if a.parent_index == s.index] for s in sections } for i, section in enumerate(sections): # All the articles in this record that belong to section articles = section_map[section] first_article = articles[0] last_article = articles[-1] num = len(articles) last_article_ends = (last_article in data['ends'] or last_article in data['completes']) try: next_sec = sections[i + 1] except: next_sec = None extra = {} if num > 1: extra[0b0100] = num if False and i == 0 and next_sec is not None: # Write offset to next section from start of record # I can't figure out exactly when Kindlegen decides to # write this so I have disabled it for now. extra[0b0001] = next_sec.offset - data['offset'] buf.write( encode_tbs(first_article.index - section.index, extra)) if next_sec is not None: buf.write( encode_tbs(last_article.index - next_sec.index, {0b1000: 0})) # If a section TOC starts and extends into the next record add # a trailing vwi. We detect this by TBS type==3, processing last # section present in the record, and the last article in that # section either ends or completes and doesn't finish # on the last byte of the record. elif (typ == self.type_011 and last_article_ends and ((last_article.offset + last_article.size) % RECORD_SIZE > 0)): buf.write( encode_tbs(last_article.index - section.index - 1, {0b1000: 0})) else: buf.write( encode_tbs(spanner.index - parent_section_index, {0b0001: 0})) self.bytestring = buf.getvalue()
def periodical_tbs(self, data, first, depth_map): buf = io.BytesIO() has_section_start = (depth_map[1] and set(depth_map[1]).intersection(set(data['starts']))) spanner = data['spans'] parent_section_index = -1 if depth_map[0]: # We have a terminal record # Find the first non periodical node first_node = None for nodes in (depth_map[1], depth_map[2]): for node in nodes: if (first_node is None or (node.offset, node.depth) < (first_node.offset, first_node.depth)): first_node = node typ = (self.type_110 if has_section_start else self.type_010) # parent_section_index is needed for the last record if first_node is not None and first_node.depth > 0: parent_section_index = (first_node.index if first_node.depth == 1 else first_node.parent_index) else: parent_section_index = max(iter(self.section_map)) else: # Non terminal record if spanner is not None: # record is spanned by a single article parent_section_index = spanner.parent_index typ = (self.type_110 if parent_section_index == 1 else self.type_010) elif not depth_map[1]: # has only article nodes, i.e. spanned by a section parent_section_index = depth_map[2][0].parent_index typ = (self.type_111 if parent_section_index == 1 else self.type_010) else: # has section transitions if depth_map[2]: parent_section_index = depth_map[2][0].parent_index else: parent_section_index = depth_map[1][0].index typ = self.type_011 buf.write(typ) if typ not in (self.type_110, self.type_111) and parent_section_index > 0: extra = {} # Write starting section information if spanner is None: num_articles = len([a for a in depth_map[1] if a.parent_index == parent_section_index]) if not depth_map[1]: extra = {0b0001: 0} if num_articles > 1: extra = {0b0100: num_articles} buf.write(encode_tbs(parent_section_index, extra)) if spanner is None: articles = depth_map[2] sections = {self.section_map[a.parent_index] for a in articles} sections = sorted(sections, key=lambda x:x.offset) section_map = {s:[a for a in articles if a.parent_index == s.index] for s in sections} for i, section in enumerate(sections): # All the articles in this record that belong to section articles = section_map[section] first_article = articles[0] last_article = articles[-1] num = len(articles) last_article_ends = (last_article in data['ends'] or last_article in data['completes']) try: next_sec = sections[i+1] except: next_sec = None extra = {} if num > 1: extra[0b0100] = num if False and i == 0 and next_sec is not None: # Write offset to next section from start of record # I can't figure out exactly when Kindlegen decides to # write this so I have disabled it for now. extra[0b0001] = next_sec.offset - data['offset'] buf.write(encode_tbs(first_article.index-section.index, extra)) if next_sec is not None: buf.write(encode_tbs(last_article.index-next_sec.index, {0b1000: 0})) # If a section TOC starts and extends into the next record add # a trailing vwi. We detect this by TBS type==3, processing last # section present in the record, and the last article in that # section either ends or completes and doesn't finish # on the last byte of the record. elif (typ == self.type_011 and last_article_ends and ((last_article.offset+last_article.size) % RECORD_SIZE > 0) ): buf.write(encode_tbs(last_article.index-section.index-1, {0b1000: 0})) else: buf.write(encode_tbs(spanner.index - parent_section_index, {0b0001: 0})) self.bytestring = buf.getvalue()