def __init__(self, bstr): self.bstr = bstr self.magic = self.bstr.read(8).replace('\x00', '') self.version = reverse32(self.bstr.read_int()) self.unk = reverse32(self.bstr.read_int()) self.checksum = reverse32(self.bstr.read_int()) self.entries_count = reverse32(self.bstr.read_int()) self.annotation_offset = reverse32(self.bstr.read_int()) self.dictionary_encoder_offset = reverse32(self.bstr.read_int()) self.articles_offset = reverse32(self.bstr.read_int()) self.pages_offset = reverse32(self.bstr.read_int()) self.unk1 = reverse32(self.bstr.read_int()) self.unk2 = reverse16(self.bstr.read_word()) self.unk3 = reverse16(self.bstr.read_word()) self.source_language = reverse16(self.bstr.read_word()) self.target_language = reverse16(self.bstr.read_word()) self.name = self.bstr.read_unicode(self.bstr.read_byte(), False) self.first_heading = self.bstr.read_unicode(self.bstr.read_byte(), False) self.last_heading = self.bstr.read_unicode(self.bstr.read_byte(), False) self.capitals = self.bstr.read_unicode(reverse32(self.bstr.read_int()), False) # icon self.icon_size = reverse16(self.bstr.read_word()) # read icon self.icon = self.bstr.read(self.icon_size) self.header_checksum = reverse32(self.bstr.read_int()) self.pages_end = reverse32(self.bstr.read_int()) self.overlay_data = reverse32(self.bstr.read_int()) return
def __init__(self, bstr): self.bstr = bstr self.magic = self.bstr.read(8).decode().replace('\x00', '') self.version = reverse32(self.bstr.read_int()) self.unk = reverse32(self.bstr.read_int()) self.checksum = reverse32(self.bstr.read_int()) self.entries_count = reverse32(self.bstr.read_int()) self.annotation_offset = reverse32(self.bstr.read_int()) self.dictionary_encoder_offset = reverse32(self.bstr.read_int()) self.articles_offset = reverse32(self.bstr.read_int()) self.pages_offset = reverse32(self.bstr.read_int()) self.unk1 = reverse32(self.bstr.read_int()) self.unk2 = reverse16(self.bstr.read_word()) self.unk3 = reverse16(self.bstr.read_word()) self.source_language = reverse16(self.bstr.read_word()) self.target_language = reverse16(self.bstr.read_word()) return
def __init__(self, dict_file, verbose=False): self.filename = dict_file self._readed = False self._parsed = False self.verbose = verbose with open(dict_file, 'rb') as fp: self.bstr = BitStream(bytearray(fp.read())) self.overlay = None self.headings = ArticleHeadingList() self.dict = [] self.header = Header(self.bstr) # check magic if self.header.magic != u'LingVo': raise LsdError('Allow only Lsd "LingVo" ident: %s' % repr(self.header.magic)) # initialize decoder self.decoder = None hi_version = self.header.hi_version version = self.header.version if hi_version == 0x11: # lingvo 11 dictionary: 0x11001 self.decoder = decoder.UserDictionaryDecoder(self.bstr) elif hi_version == 0x12: # lingvo 12 dictionary: 0x12001 self.decoder = decoder.UserDictionaryDecoder(self.bstr) elif hi_version == 0x13: # x3 dictionary: 0x131001 and 0x132001 if pages count > 1000 self.decoder = decoder.SystemDictionaryDecoder13(self.bstr) elif hi_version == 0x14: # x5 dictionary if version == 0x142001: # user dictionaries self.decoder = decoder.UserDictionaryDecoder(self.bstr) elif version == 0x141004: # system dictionaries self.decoder = decoder.SystemDictionaryDecoder14(self.bstr) elif version == 0x145001: # abbreviation dictionaries self.decoder = decoder.AbbreviationDictionaryDecoder(self.bstr) elif hi_version == 0x15: # x6 dictionary if version == 0x152001: # user dictionaries self.decoder = decoder.UserDictionaryDecoder(self.bstr) elif version == 0x151005: # system dictionaries # xor dictionary self.xor_block_x6(self.header.dictionary_encoder_offset, self.header.articles_offset) self.decoder = decoder.SystemDictionaryDecoder14(self.bstr) elif version == 0x155001: # abbreviation dictionaries self.decoder = decoder.AbbreviationDictionaryDecoder(self.bstr) if self.decoder is None: self.dump() print("Not supported dictionary version: %s" % hex(self.header.version)) exit(1) # raise LsdError("Not supported dict version %s" % hex(self.header.version)) name_len = self.bstr.read_some(1) self.name = self.bstr.read_unicode(name_len, False) self.first_heading = self.bstr.read_unicode(self.bstr.read_byte(), False) self.last_heading = self.bstr.read_unicode(self.bstr.read_byte(), False) capitals_len = reverse32(self.bstr.read_int()) self.capitals = self.bstr.read_unicode(capitals_len, False) # icon v12+ if self.header.version > 0x120000: self.icon_size = reverse16(self.bstr.read_word()) self.icon = self.bstr.read(self.icon_size) else: self.icon_size = 0 self.icon = None if self.header.version > 0x140000: self.header_checksum = reverse32(self.bstr.read_int()) else: self.header_checksum = 0 if self.header.version > 0x120000: self.pages_end = reverse32(self.bstr.read_int()) self.overlay_data = reverse32(self.bstr.read_int()) else: self.pages_end = self.bstr.length self.overlay_data = self.bstr.length # no overlay if self.header.version > 0x140000: self.dummy1 = reverse32(self.bstr.read_int()) self.dummy2 = reverse32(self.bstr.read_int()) else: self.dummy1 = 0 self.dummy2 = 0 # set bstr pos for decoding self.bstr.seek(self.header.dictionary_encoder_offset)
def test_reverse16(self): self.assertEqual(bitstream.reverse16(0x0102), 0x0201)