def utf16InfoDecode(b_value): """ b_value is byte array returns str, or None (on errors) block type = 3 block format: <2 byte code1><2 byte code2> if code2 == 0: then the block ends if code2 == 1: then the block continues as follows: <4 byte len1> \x00 \x00 <message in utf-16> len1 - length of message in 2-byte chars """ if b_value[0] != 0: log.warning('utf16InfoDecode: b_value=%s, null expected at 0'%list(b_value)) return if b_value[1] == 0: if len(b_value) > 2: log.warning('utf16InfoDecode: unexpected b_value size: %s'%len(b_value)) return elif b_value[1] > 1: log.warning('utf16InfoDecode: b_value=%s, unexpected byte at 1'%list(b_value)) return ## now b_value[1] == 1 size = 2 * binStrToInt(b_value[2:6]) if tuple(b_value[6:8]) != (0, 0): log.warning('utf16InfoDecode: b_value=%s, null expected at 6:8'%list(b_value)) if size != len(b_value)-8: log.warning('utf16InfoDecode: b_value=%s, size does not match'%list(b_value)) return b_value[8:].decode('utf16')## str
def aboutInfoDecode(b_value): if not b_value: return aboutExt, _, aboutContents = b_value.partition('\x00') if not aboutExt: log.warning('read_type_3: about: no file extension') return return (aboutExt, aboutContents)
def aboutInfoDecode(b_value): if not b_value: return aboutExt, _, aboutContents = b_value.partition('\x00') if not aboutExt: log.warning('read_type_3: about: no file extension') return return (aboutExt, aboutContents)
def languageInfoDecode(b_value): """ returns BabylonLanguage instance """ intValue = binStrToInt(b_value) try: return languageByCode[intValue] except IndexError: log.warning("read_type_3: unknown language code = %s", intValue) return
def languageInfoDecode(b_value): """ returns BabylonLanguage instance """ intValue = binStrToInt(b_value) try: return languageByCode[intValue] except IndexError: log.warning("read_type_3: unknown language code = %s" % intValue) return
def languageInfoDecode(b_value): """ returns BabylonLanguage instance """ intValue = uintFromBytes(b_value) try: return languageByCode[intValue] except IndexError: log.warning(f"read_type_3: unknown language code = {intValue}") return
def aboutInfoDecode(b_value): if not b_value: return aboutExt, _, aboutContents = b_value.partition(b"\x00") if not aboutExt: log.warning("read_type_3: about: no file extension") return return { "about_extension": aboutExt, "about": aboutContents, }
def aboutInfoDecode(b_value): if not b_value: return aboutExt, _, aboutContents = b_value.partition(b"\x00") if not aboutExt: log.warning("read_type_3: about: no file extension") return return { "about_extension": aboutExt, "about": aboutContents, }
def utf16InfoDecode(b_value): """ b_value is byte array returns str, or None (on errors) block type = 3 block format: <2 byte code1><2 byte code2> if code2 == 0: then the block ends if code2 == 1: then the block continues as follows: <4 byte len1> \x00 \x00 <message in utf-16> len1 - length of message in 2-byte chars """ if b_value[0] != 0: log.warning( "utf16InfoDecode: b_value=%s, null expected at 0", b_value, ) return if b_value[1] == 0: if len(b_value) > 2: log.warning( "utf16InfoDecode: unexpected b_value size: %s", len(b_value), ) return elif b_value[1] > 1: log.warning( "utf16InfoDecode: b_value=%s, unexpected byte at 1", list(b_value), ) return # now b_value[1] == 1 size = 2 * binStrToInt(b_value[2:6]) if tuple(b_value[6:8]) != (0, 0): log.warning( "utf16InfoDecode: b_value=%s, null expected at 6:8", list(b_value), ) if size != len(b_value) - 8: log.warning( "utf16InfoDecode: b_value=%s, size does not match", list(b_value), ) return b_value[8:].decode("utf16") # str
def charsetInfoDecode(b_value): value = b_value[0] try: return charsetByCode[value] except KeyError: log.warning("read_type_3: unknown charset %s", value)
def write_entries(glos, f, cleanHTML, indexes): """ :param indexes: str | None """ if cleanHTML: BeautifulSoup = get_beautiful_soup() if not BeautifulSoup: log.warning( 'cleanHTML option passed but BeautifulSoup not found. ' + 'to fix this run `sudo pip3 install lxml beautifulsoup4 html5lib`' ) else: BeautifulSoup = None # write entries generate_id = id_generator() generate_indexes = indexes_generator(indexes) _buffer = '' xdxf.xdxf_init() glos.setDefaultDefiFormat('h') for i, entry in enumerate(glos): words = entry.getWords() word, alts = words[0], words[1:] defi = entry.getDefi() format = entry.getDefiFormat() long_title = _normalize.title_long(_normalize.title(word, BeautifulSoup)) if not long_title: continue _id = next(generate_id) if BeautifulSoup: title_attr = BeautifulSoup.dammit.EntitySubstitution.substitute_xml(long_title, True) else: title_attr = '"%s"' % long_title begin_entry = '<d:entry id="%(id)s" d:title=%(title)s>\n' % { 'id': _id, 'title': title_attr, } if format == 'x': content = xdxf.xdxf_to_html(defi) content = format_clean_content(None, content, BeautifulSoup) else: content = defi content = format_clean_content(long_title, content, BeautifulSoup) indexes = generate_indexes(long_title, alts, content, BeautifulSoup) end_entry = '\n</d:entry>\n' _buffer += begin_entry _buffer += indexes _buffer += content _buffer += end_entry if i % 1000 == 0: f.write(_buffer) _buffer = '' f.write(_buffer)
def charsetInfoDecode(b_value): value = b_value[0] try: return charsetByCode[value] except KeyError: log.warning("read_type_3: unknown charset %s" % value)