def read_bits_s(self, count): if count > 32: raise LsdError("Many bits for read: %d" % count) res = 0 for i in range(count): res <<= 1 res += self.read_bit() return res
def read_some(self, length): if length == 1: return self.read_byte() elif length == 2: return self.read_word() elif length == 4: return self.read_int() else: raise LsdError('Allow read byte, word and int length: %i' % length)
def read_bits_o(self, count): if count > 32: raise LsdError("Many bits for read: %d" % count) masks = (1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF) count_bytes = (count + self.in_byte_pos) // 8 if count + self.in_byte_pos - 8 * count_bytes > 0: count_bytes += 1 # if in single raw byt if count_bytes == 1: if (self.in_byte_pos + count) < 8: byte = self.record[self.pos] byte >>= 7 - self.in_byte_pos - count + 1 byte &= masks[count - 1] self.in_byte_pos += count return byte # many raw bytes # inBitPos # | count = 13 | # 01234567 | 01234567 | 0123456 # # inBitPos = 5 count_first = 3 count_las = 2 # p = self.pos count_last = (count + self.in_byte_pos) % 8 count_first = 8 - self.in_byte_pos byte_first = self.record[p] p += 1 byte_first &= masks[count_first - 1] res = byte_first # full bytes full_bytes = (count - count_first) // 8 if full_bytes > 0: for i in range(full_bytes): res <<= 8 res += self.record[p] p += 1 # last byte if count_last > 0: byte = self.record[p] byte >>= 8 - count_last res <<= count_last res += byte self.in_byte_pos = count_last self.pos = p return res
def parse(self): if not self.readed: self.read() if self.verbose: print("decoding overlay..") self.overlay = OverlayReader(self.bstr, self.overlay_data) if self.verbose: print("decoding headings: %d" % self.header.entries_count) self.read_headings() if self.headings.appended != self.header.entries_count: raise LsdError("Decoded not all entries %d != %d" % (self.headings.appended, self.header.entries_count)) # merge multititle headings # self.headings = self.merge_headings() if self.verbose: print("decoding articles: %d" % len(self.headings)) for h in self.headings: # h.dump() self.dict.append((h, self.read_article(h))) self._parsed = True if self.verbose: print("OK")
def __init__(self, dict_file, verbose=False): self.filename = dict_file self._readed = False self._parsed = False self.verbose = verbose with open(dict_file, 'rb') as fp: self.bstr = BitStream(bytearray(fp.read())) self.overlay = None self.headings = ArticleHeadingList() self.dict = [] self.header = Header(self.bstr) # check magic if self.header.magic != u'LingVo': raise LsdError('Allow only Lsd "LingVo" ident: %s' % repr(self.header.magic)) # initialize decoder self.decoder = None hi_version = self.header.hi_version version = self.header.version if hi_version == 0x11: # lingvo 11 dictionary: 0x11001 self.decoder = decoder.UserDictionaryDecoder(self.bstr) elif hi_version == 0x12: # lingvo 12 dictionary: 0x12001 self.decoder = decoder.UserDictionaryDecoder(self.bstr) elif hi_version == 0x13: # x3 dictionary: 0x131001 and 0x132001 if pages count > 1000 self.decoder = decoder.SystemDictionaryDecoder13(self.bstr) elif hi_version == 0x14: # x5 dictionary if version == 0x142001: # user dictionaries self.decoder = decoder.UserDictionaryDecoder(self.bstr) elif version == 0x141004: # system dictionaries self.decoder = decoder.SystemDictionaryDecoder14(self.bstr) elif version == 0x145001: # abbreviation dictionaries self.decoder = decoder.AbbreviationDictionaryDecoder(self.bstr) elif hi_version == 0x15: # x6 dictionary if version == 0x152001: # user dictionaries self.decoder = decoder.UserDictionaryDecoder(self.bstr) elif version == 0x151005: # system dictionaries # xor dictionary self.xor_block_x6(self.header.dictionary_encoder_offset, self.header.articles_offset) self.decoder = decoder.SystemDictionaryDecoder14(self.bstr) elif version == 0x155001: # abbreviation dictionaries self.decoder = decoder.AbbreviationDictionaryDecoder(self.bstr) if self.decoder is None: self.dump() print("Not supported dictionary version: %s" % hex(self.header.version)) exit(1) # raise LsdError("Not supported dict version %s" % hex(self.header.version)) name_len = self.bstr.read_some(1) self.name = self.bstr.read_unicode(name_len, False) self.first_heading = self.bstr.read_unicode(self.bstr.read_byte(), False) self.last_heading = self.bstr.read_unicode(self.bstr.read_byte(), False) capitals_len = reverse32(self.bstr.read_int()) self.capitals = self.bstr.read_unicode(capitals_len, False) # icon v12+ if self.header.version > 0x120000: self.icon_size = reverse16(self.bstr.read_word()) self.icon = self.bstr.read(self.icon_size) else: self.icon_size = 0 self.icon = None if self.header.version > 0x140000: self.header_checksum = reverse32(self.bstr.read_int()) else: self.header_checksum = 0 if self.header.version > 0x120000: self.pages_end = reverse32(self.bstr.read_int()) self.overlay_data = reverse32(self.bstr.read_int()) else: self.pages_end = self.bstr.length self.overlay_data = self.bstr.length # no overlay if self.header.version > 0x140000: self.dummy1 = reverse32(self.bstr.read_int()) self.dummy2 = reverse32(self.bstr.read_int()) else: self.dummy1 = 0 self.dummy2 = 0 # set bstr pos for decoding self.bstr.seek(self.header.dictionary_encoder_offset)