Exemple #1
0
 def book_tbs(self, data, first):
     spanner = data['spans']
     if spanner is not None:
         self.bytestring = encode_tbs(spanner.index, {
             0b010: 0,
             0b001: 0
         },
                                      flag_size=3)
     else:
         starts, completes, ends = (data['starts'], data['completes'],
                                    data['ends'])
         if (not completes and ((len(starts) == 1 and not ends) or
                                (len(ends) == 1 and not starts))):
             node = starts[0] if starts else ends[0]
             self.bytestring = encode_tbs(node.index, {0b010: 0},
                                          flag_size=3)
         else:
             nodes = []
             for x in (starts, completes, ends):
                 nodes.extend(x)
             nodes.sort(key=lambda x: x.index)
             self.bytestring = encode_tbs(nodes[0].index, {
                 0b010: 0,
                 0b100: len(nodes)
             },
                                          flag_size=3)
Exemple #2
0
 def book_tbs(self, data, first):
     spanner = data['spans']
     if spanner is not None:
         self.bytestring = encode_tbs(
             spanner.index, {
                 0b010: 0,
                 0b001: 0
             }, flag_size=3)
     else:
         starts, completes, ends = (data['starts'], data['completes'],
                                    data['ends'])
         if (not completes and ((len(starts) == 1 and not ends) or
                                (len(ends) == 1 and not starts))):
             node = starts[0] if starts else ends[0]
             self.bytestring = encode_tbs(
                 node.index, {0b010: 0}, flag_size=3)
         else:
             nodes = []
             for x in (starts, completes, ends):
                 nodes.extend(x)
             nodes.sort(key=lambda x: x.index)
             self.bytestring = encode_tbs(
                 nodes[0].index, {
                     0b010: 0,
                     0b100: len(nodes)
                 }, flag_size=3)
Exemple #3
0
def sequences_to_bytes(sequences):
    ans = []
    flag_size = 3
    for val, extra in sequences:
        ans.append(encode_tbs(val, extra, flag_size))
        flag_size = 4  # only the first sequence has flag size 3 as all
        # subsequent sequences could need the 0b1000 flag
    return b''.join(ans)
Exemple #4
0
def sequences_to_bytes(sequences):
    ans = []
    flag_size = 3
    for val, extra in sequences:
        ans.append(encode_tbs(val, extra, flag_size))
        flag_size = 4  # only the first sequence has flag size 3 as all
        # subsequent sequences could need the 0b1000 flag
    return b''.join(ans)
Exemple #5
0
    def __init__(self,
                 data,
                 is_periodical,
                 first=False,
                 section_map={},
                 after_first=False):
        self.section_map = section_map

        if is_periodical:
            # The starting bytes.
            # The value is zero which I think indicates the periodical
            # index entry. The values for the various flags seem to be
            # unused. If the 0b100 is present, it means that the record
            # deals with section 1 (or is the final record with section
            # transitions).
            self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3)
            self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0}, flag_size=3)
            self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0}, flag_size=3)
            self.type_111 = encode_tbs(0, {
                0b100: 2,
                0b010: 0,
                0b001: 0
            },
                                       flag_size=3)

            if not data:
                byts = b''
                if after_first:
                    # This can happen if a record contains only text between
                    # the periodical start and the first section
                    byts = self.type_011
                self.bytestring = byts
            else:
                depth_map = defaultdict(list)
                for x in ('starts', 'ends', 'completes'):
                    for idx in data[x]:
                        depth_map[idx.depth].append(idx)
                for l in itervalues(depth_map):
                    l.sort(key=lambda x: x.offset)
                self.periodical_tbs(data, first, depth_map)
        else:
            if not data:
                self.bytestring = b''
            else:
                self.book_tbs(data, first)
Exemple #6
0
    def __init__(self,
                 data,
                 is_periodical,
                 first=False,
                 section_map={},
                 after_first=False):
        self.section_map = section_map

        if is_periodical:
            # The starting bytes.
            # The value is zero which I think indicates the periodical
            # index entry. The values for the various flags seem to be
            # unused. If the 0b100 is present, it means that the record
            # deals with section 1 (or is the final record with section
            # transitions).
            self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3)
            self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0}, flag_size=3)
            self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0}, flag_size=3)
            self.type_111 = encode_tbs(
                0, {
                    0b100: 2,
                    0b010: 0,
                    0b001: 0
                }, flag_size=3)

            if not data:
                byts = b''
                if after_first:
                    # This can happen if a record contains only text between
                    # the periodical start and the first section
                    byts = self.type_011
                self.bytestring = byts
            else:
                depth_map = defaultdict(list)
                for x in ('starts', 'ends', 'completes'):
                    for idx in data[x]:
                        depth_map[idx.depth].append(idx)
                for l in depth_map.itervalues():
                    l.sort(key=lambda x: x.offset)
                self.periodical_tbs(data, first, depth_map)
        else:
            if not data:
                self.bytestring = b''
            else:
                self.book_tbs(data, first)
Exemple #7
0
    def periodical_tbs(self, data, first, depth_map):
        buf = io.BytesIO()

        has_section_start = (depth_map[1] and set(depth_map[1]).intersection(
            set(data['starts'])))
        spanner = data['spans']
        parent_section_index = -1

        if depth_map[0]:
            # We have a terminal record

            # Find the first non periodical node
            first_node = None
            for nodes in (depth_map[1], depth_map[2]):
                for node in nodes:
                    if (first_node is None or (node.offset, node.depth) <
                        (first_node.offset, first_node.depth)):
                        first_node = node

            typ = (self.type_110 if has_section_start else self.type_010)

            # parent_section_index is needed for the last record
            if first_node is not None and first_node.depth > 0:
                parent_section_index = (first_node.index if first_node.depth
                                        == 1 else first_node.parent_index)
            else:
                parent_section_index = max(iter(self.section_map))

        else:
            # Non terminal record

            if spanner is not None:
                # record is spanned by a single article
                parent_section_index = spanner.parent_index
                typ = (self.type_110
                       if parent_section_index == 1 else self.type_010)
            elif not depth_map[1]:
                # has only article nodes, i.e. spanned by a section
                parent_section_index = depth_map[2][0].parent_index
                typ = (self.type_111
                       if parent_section_index == 1 else self.type_010)
            else:
                # has section transitions
                if depth_map[2]:
                    parent_section_index = depth_map[2][0].parent_index
                else:
                    parent_section_index = depth_map[1][0].index
                typ = self.type_011

        buf.write(typ)

        if typ not in (self.type_110,
                       self.type_111) and parent_section_index > 0:
            extra = {}
            # Write starting section information
            if spanner is None:
                num_articles = len([
                    a for a in depth_map[1]
                    if a.parent_index == parent_section_index
                ])
                if not depth_map[1]:
                    extra = {0b0001: 0}
                if num_articles > 1:
                    extra = {0b0100: num_articles}
            buf.write(encode_tbs(parent_section_index, extra))

        if spanner is None:
            articles = depth_map[2]
            sections = {self.section_map[a.parent_index] for a in articles}
            sections = sorted(sections, key=lambda x: x.offset)
            section_map = {
                s: [a for a in articles if a.parent_index == s.index]
                for s in sections
            }
            for i, section in enumerate(sections):
                # All the articles in this record that belong to section
                articles = section_map[section]
                first_article = articles[0]
                last_article = articles[-1]
                num = len(articles)
                last_article_ends = (last_article in data['ends']
                                     or last_article in data['completes'])

                try:
                    next_sec = sections[i + 1]
                except:
                    next_sec = None

                extra = {}
                if num > 1:
                    extra[0b0100] = num
                if False and i == 0 and next_sec is not None:
                    # Write offset to next section from start of record
                    # I can't figure out exactly when Kindlegen decides to
                    # write this so I have disabled it for now.
                    extra[0b0001] = next_sec.offset - data['offset']

                buf.write(
                    encode_tbs(first_article.index - section.index, extra))

                if next_sec is not None:
                    buf.write(
                        encode_tbs(last_article.index - next_sec.index,
                                   {0b1000: 0}))

                # If a section TOC starts and extends into the next record add
                # a trailing vwi. We detect this by TBS type==3, processing last
                # section present in the record, and the last article in that
                # section either ends or completes and doesn't finish
                # on the last byte of the record.
                elif (typ == self.type_011 and last_article_ends and
                      ((last_article.offset + last_article.size) % RECORD_SIZE
                       > 0)):
                    buf.write(
                        encode_tbs(last_article.index - section.index - 1,
                                   {0b1000: 0}))

        else:
            buf.write(
                encode_tbs(spanner.index - parent_section_index, {0b0001: 0}))

        self.bytestring = buf.getvalue()
Exemple #8
0
    def periodical_tbs(self, data, first, depth_map):
        buf = io.BytesIO()

        has_section_start = (depth_map[1] and
                set(depth_map[1]).intersection(set(data['starts'])))
        spanner = data['spans']
        parent_section_index = -1

        if depth_map[0]:
            # We have a terminal record

            # Find the first non periodical node
            first_node = None
            for nodes in (depth_map[1], depth_map[2]):
                for node in nodes:
                    if (first_node is None or (node.offset, node.depth) <
                            (first_node.offset, first_node.depth)):
                        first_node = node

            typ = (self.type_110 if has_section_start else self.type_010)

            # parent_section_index is needed for the last record
            if first_node is not None and first_node.depth > 0:
                parent_section_index = (first_node.index if first_node.depth == 1 else first_node.parent_index)
            else:
                parent_section_index = max(iter(self.section_map))

        else:
            # Non terminal record

            if spanner is not None:
                # record is spanned by a single article
                parent_section_index = spanner.parent_index
                typ = (self.type_110 if parent_section_index == 1 else
                        self.type_010)
            elif not depth_map[1]:
                # has only article nodes, i.e. spanned by a section
                parent_section_index = depth_map[2][0].parent_index
                typ = (self.type_111 if parent_section_index == 1 else
                        self.type_010)
            else:
                # has section transitions
                if depth_map[2]:
                    parent_section_index = depth_map[2][0].parent_index
                else:
                    parent_section_index = depth_map[1][0].index
                typ = self.type_011

        buf.write(typ)

        if typ not in (self.type_110, self.type_111) and parent_section_index > 0:
            extra = {}
            # Write starting section information
            if spanner is None:
                num_articles = len([a for a in depth_map[1] if a.parent_index == parent_section_index])
                if not depth_map[1]:
                    extra = {0b0001: 0}
                if num_articles > 1:
                    extra = {0b0100: num_articles}
            buf.write(encode_tbs(parent_section_index, extra))

        if spanner is None:
            articles = depth_map[2]
            sections = {self.section_map[a.parent_index] for a in
                articles}
            sections = sorted(sections, key=lambda x:x.offset)
            section_map = {s:[a for a in articles if a.parent_index ==
                s.index] for s in sections}
            for i, section in enumerate(sections):
                # All the articles in this record that belong to section
                articles = section_map[section]
                first_article = articles[0]
                last_article = articles[-1]
                num = len(articles)
                last_article_ends = (last_article in data['ends'] or
                        last_article in data['completes'])

                try:
                    next_sec = sections[i+1]
                except:
                    next_sec = None

                extra = {}
                if num > 1:
                    extra[0b0100] = num
                if False and i == 0 and next_sec is not None:
                    # Write offset to next section from start of record
                    # I can't figure out exactly when Kindlegen decides to
                    # write this so I have disabled it for now.
                    extra[0b0001] = next_sec.offset - data['offset']

                buf.write(encode_tbs(first_article.index-section.index, extra))

                if next_sec is not None:
                    buf.write(encode_tbs(last_article.index-next_sec.index,
                        {0b1000: 0}))

                # If a section TOC starts and extends into the next record add
                # a trailing vwi. We detect this by TBS type==3, processing last
                # section present in the record, and the last article in that
                # section either ends or completes and doesn't finish
                # on the last byte of the record.
                elif (typ == self.type_011 and last_article_ends and
                      ((last_article.offset+last_article.size) % RECORD_SIZE > 0)
                     ):
                    buf.write(encode_tbs(last_article.index-section.index-1,
                        {0b1000: 0}))

        else:
            buf.write(encode_tbs(spanner.index - parent_section_index,
                {0b0001: 0}))

        self.bytestring = buf.getvalue()