Example #1
0
 def read(self, lsd_decoder, bstr, known_prefix):
     h = Heading()
     prefix_len = lsd_decoder.decode_prefix_len()
     postfix_len = lsd_decoder.decode_postfix_len()
     h.text = known_prefix[:prefix_len]
     h.text += lsd_decoder.decode_heading(postfix_len)
     self.reference = lsd_decoder.read_reference2()
     if bstr.read_bit():
         # additional not visible formatting item in header
         # join multisymbols item
         ext_length = bstr.read_bits(8)
         if ext_length != 0:
             ext = ""
             first_idx = prev_idx = 0
             for i in range(ext_length):
                 idx = bstr.read_bits(8)
                 char = int2unichr(bstr.read_bits(16))
                 if ext == "":
                     ext += char
                     first_idx = prev_idx = idx
                 else:
                     if prev_idx + 1 == idx:
                         # join item with sequence idx
                         ext += char
                         prev_idx = idx
                     else:
                         # other item
                         h.extensions.append((first_idx, ext))
                         ext = char
                         first_idx = prev_idx = idx
             # add last item
             h.extensions.append((first_idx, ext))
     self.headings.append(h)
     return h.text
Example #2
0
 def read(self, lsd_decoder, bstr, known_prefix):
     h = Heading()
     prefix_len = lsd_decoder.decode_prefix_len()
     postfix_len = lsd_decoder.decode_postfix_len()
     h.text = known_prefix[:prefix_len]
     h.text += lsd_decoder.decode_heading(postfix_len)
     self.reference = lsd_decoder.read_reference2()
     if bstr.read_bit():
         # additional not visible formatting item in header
         # join multisymbols item
         ext_length = bstr.read_bits(8)
         if ext_length != 0:
             ext = ""
             first_idx = prev_idx = 0
             for i in range(ext_length):
                 idx = bstr.read_bits(8)
                 char = int2unichr(bstr.read_bits(16))
                 if ext == "":
                     ext += char
                     first_idx = prev_idx = idx
                 else:
                     if prev_idx + 1 == idx:
                         # join item with sequence idx
                         ext += char
                         prev_idx = idx
                     else:
                         # other item
                         h.extensions.append((first_idx, ext))
                         ext = char
                         first_idx = prev_idx = idx
             # add last item
             h.extensions.append((first_idx, ext))
     self.headings.append(h)
     return h.text
Example #3
0
 def decode_heading(self, size):
     res = ""
     for i in range(size):
         sym_idx = self._ltHeadings.decode()
         sym = self._heading_symbols[sym_idx]
         assert(sym <= 0xffff)  # LingvoEngine:2EAB84E8
         res += int2unichr(sym)
     return res
Example #4
0
 def decode_heading(self, size):
     res = ""
     for i in range(size):
         sym_idx = self._ltHeadings.decode()
         sym = self._heading_symbols[sym_idx]
         assert (sym <= 0xffff)  # LingvoEngine:2EAB84E8
         res += int2unichr(sym)
     return res
Example #5
0
 def read_unicode(self, size, big_endian=True):
     res = ""
     for i in range(size):
         ch = self.read_some(2)
         if not big_endian:
             ch = reverse16(ch)
         res += int2unichr(ch)
         # res += unichr(self.read_some(2), big_endian))
     return res
Example #6
0
 def read_unicode(self, size, big_endian=True):
     res = ""
     for i in range(size):
         ch = self.read_some(2)
         if not big_endian:
             ch = reverse16(ch)
         res += int2unichr(ch)
         # res += unichr(self.read_some(2), big_endian))
     return res
Example #7
0
 def decode_article(self, size):
     res = ""
     while len(res) < size:
         sym_idx = self._ltArticles.decode()
         sym = self._article_symbols[sym_idx]
         if sym <= 0x80:
             if sym <= 0x3F:
                 start_pref_idx = self.bstr.read_bits(tools.bit_length(len(self.prefix)))
                 s = sym + 3
                 res += self.prefix[start_pref_idx:start_pref_idx + s]
             else:
                 start_idx = self.bstr.read_bits(tools.bit_length(size))
                 s = sym - 0x3d
                 res += res[start_idx:start_idx + s]
         else:
             res += int2unichr(sym - 0x80)
     return res
Example #8
0
 def decode_article(self, size):
     res = ""
     while len(res) < size:
         sym_idx = self._ltArticles.decode()
         sym = self._article_symbols[sym_idx]
         if sym <= 0x80:
             if sym <= 0x3F:
                 start_pref_idx = self.bstr.read_bits(
                     tools.bit_length(len(self.prefix)))
                 s = sym + 3
                 res += self.prefix[start_pref_idx:start_pref_idx + s]
             else:
                 start_idx = self.bstr.read_bits(tools.bit_length(size))
                 s = sym - 0x3d
                 res += res[start_idx:start_idx + s]
         else:
             res += int2unichr(sym - 0x80)
     return res
Example #9
0
 def decode_article(self, size):
     """
     decode User and Abrv dict
     """
     res = ""
     while len(res) < size:
         sym_idx = self._ltArticles.decode()
         sym = self._article_symbols[sym_idx]
         if sym >= 0x10000:
             if sym >= 0x10040:
                 start_idx = self.bstr.read_bits(tools.bit_length(size))
                 s = sym - 0x1003d
                 res += res[start_idx:start_idx + s]
             else:
                 prefix_idx = self.bstr.read_bits(tools.bit_length(len(self.prefix)))
                 s = sym - 0xfffd
                 res += self.prefix[prefix_idx:prefix_idx + s]
         else:
             res += int2unichr(sym)
     return res
Example #10
0
 def decode_article(self, size):
     """
     decode User and Abrv dict
     """
     res = ""
     while len(res) < size:
         sym_idx = self._ltArticles.decode()
         sym = self._article_symbols[sym_idx]
         if sym >= 0x10000:
             if sym >= 0x10040:
                 start_idx = self.bstr.read_bits(tools.bit_length(size))
                 s = sym - 0x1003d
                 res += res[start_idx:start_idx + s]
             else:
                 prefix_idx = self.bstr.read_bits(
                     tools.bit_length(len(self.prefix)))
                 s = sym - 0xfffd
                 res += self.prefix[prefix_idx:prefix_idx + s]
         else:
             res += int2unichr(sym)
     return res
Example #11
0
 def read_xored_prefix(self, size):
     res = ""
     for i in range(size):
         res += int2unichr(self.bstr.read_bits(16) ^ 0x879A)
     return res
Example #12
0
 def read_xored_prefix(self, size):
     res = ""
     for i in range(size):
         res += int2unichr(self.bstr.read_bits(16) ^ 0x879A)
     return res