def parse_index_record(table, data, control_byte_count, tags, codec, ordt_map, strict=False): header = parse_indx_header(data) idxt_pos = header['start'] if data[idxt_pos:idxt_pos + 4] != b'IDXT': print('WARNING: Invalid INDX record') entry_count = header['count'] # loop through to build up the IDXT position starts idx_positions = [] for j in range(entry_count): pos, = struct.unpack_from(b'>H', data, idxt_pos + 4 + (2 * j)) idx_positions.append(pos) # The last entry ends before the IDXT tag (but there might be zero fill # bytes we need to ignore!) idx_positions.append(idxt_pos) # For each entry in the IDXT build up the tag map and any associated # text for j in range(entry_count): start, end = idx_positions[j:j + 2] rec = data[start:end] # Sometimes (in the guide table if the type attribute has non ascii # values) the ident is UTF-16 encoded. Try to handle that. try: ident, consumed = decode_string(rec, codec=codec, ordt_map=ordt_map) except UnicodeDecodeError: ident, consumed = decode_string(rec, codec='utf-16', ordt_map=ordt_map) if '\x00' in ident: try: ident, consumed = decode_string(rec, codec='utf-16', ordt_map=ordt_map) except UnicodeDecodeError: ident = ident.replace('u\x00', '') rec = rec[consumed:] tag_map = get_tag_map(control_byte_count, tags, rec, strict=strict) table[ident] = tag_map return header
def parse_index_record(table, data, control_byte_count, tags, codec, ordt_map, strict=False): header = parse_indx_header(data) idxt_pos = header['start'] if data[idxt_pos:idxt_pos+4] != b'IDXT': print('WARNING: Invalid INDX record') entry_count = header['count'] # loop through to build up the IDXT position starts idx_positions= [] for j in range(entry_count): pos, = struct.unpack_from(b'>H', data, idxt_pos + 4 + (2 * j)) idx_positions.append(pos) # The last entry ends before the IDXT tag (but there might be zero fill # bytes we need to ignore!) idx_positions.append(idxt_pos) # For each entry in the IDXT build up the tag map and any associated # text for j in range(entry_count): start, end = idx_positions[j:j+2] rec = data[start:end] # Sometimes (in the guide table if the type attribute has non ascii # values) the ident is UTF-16 encoded. Try to handle that. try: ident, consumed = decode_string(rec, codec=codec, ordt_map=ordt_map) except UnicodeDecodeError: ident, consumed = decode_string(rec, codec='utf-16', ordt_map=ordt_map) if u'\x00' in ident: try: ident, consumed = decode_string(rec, codec='utf-16', ordt_map=ordt_map) except UnicodeDecodeError: ident = ident.replace('u\x00', u'') rec = rec[consumed:] tag_map = get_tag_map(control_byte_count, tags, rec, strict=strict) table[ident] = tag_map return header