def parse(self): try: self.data = java_class.JavaClass.from_io(self.infile) except (Exception, ValidationFailedError) as e: raise UnpackParserException(e.args) # make sure that all the pointers # into the constant pool are actually valid constant_pool_index = 1 for i in self.data.constant_pool: if i.is_prev_two_entries: constant_pool_index += 1 continue if self.data.this_class == constant_pool_index: try: decoded_string = mutf8.decode_modified_utf8( i.cp_info.name_as_str) except UnicodeDecodeError: # This shouldn't happen and means there # is an error in the mutf8 package pass if type(i.cp_info) == java_class.JavaClass.StringCpInfo: try: decoded_string = mutf8.decode_modified_utf8( i.cp_info.name_as_str) except UnicodeDecodeError: # This shouldn't happen and means there # is an error in the mutf8 package pass except AttributeError as e: raise UnpackParserException(e.args) constant_pool_index += 1
def _read_utf8(read): """Reads a length-prefixed MUTF-8 string.""" name_length = read('h', 2)[0] return mutf8.decode_modified_utf8(read.src.read(name_length))
def set_metadata_and_labels(self): """sets metadata and labels for the unpackresults""" labels = ['java class'] # store the results for Java: # * methods # * interfaces (TODO) # * fields # * source file name # * class name # * strings metadata = {} # walk the constant pool for information that isn't # available some other way. metadata['strings'] = [] constant_pool_index = 1 for i in self.data.constant_pool: if i.is_prev_two_entries: constant_pool_index += 1 continue if self.data.this_class == constant_pool_index: try: decoded_string = mutf8.decode_modified_utf8( i.cp_info.name_as_str) metadata['classname'] = decoded_string except UnicodeDecodeError: # This shouldn't happen and means there # is an error in the mutf8 package pass if type(i.cp_info) == java_class.JavaClass.StringCpInfo: try: decoded_string = mutf8.decode_modified_utf8( i.cp_info.name_as_str) metadata['strings'].append(decoded_string) except UnicodeDecodeError: # This shouldn't happen and means there # is an error in the mutf8 package pass constant_pool_index += 1 #metadata['interfaces'] = [] #for i in self.data.interfaces: # try: # decoded_string = mutf8.decode_modified_utf8(i.name_as_str) # metadata['interfaces'].append(decoded_string) # except (UnicodeDecodeError, AttributeError): # pass metadata['fields'] = [] for i in self.data.fields: try: decoded_string = mutf8.decode_modified_utf8(i.name_as_str) metadata['fields'].append(decoded_string) except UnicodeDecodeError: # This shouldn't happen and means there # is an error in the mutf8 package pass metadata['methods'] = [] for i in self.data.methods: try: decoded_string = mutf8.decode_modified_utf8(i.name_as_str) metadata['methods'].append(decoded_string) except UnicodeDecodeError: # This shouldn't happen and means there # is an error in the mutf8 package pass for i in self.data.attributes: try: name = mutf8.decode_modified_utf8(i.name_as_str) except UnicodeDecodeError: # This shouldn't happen and means there # is an error in the mutf8 package continue if name == 'SourceFile': try: decoded_string = mutf8.decode_modified_utf8( i.info.sourcefile_as_str) metadata['sourcefile'] = decoded_string except UnicodeDecodeError: # This shouldn't happen and means there # is an error in the mutf8 package continue self.unpack_results.set_metadata(metadata) self.unpack_results.set_labels(labels)
def unpack_name(buf) -> str: return decode_modified_utf8(buf.read(buf.unpack("H")))
def set_metadata_and_labels(self): """sets metadata and labels for the unpackresults""" labels = ['dex', 'android'] metadata = {} metadata['version'] = self.data.header.version_str metadata['classes'] = [] for class_definition in self.data.class_defs: if class_definition.class_data is None: continue class_obj = {} try: class_obj['classname'] = mutf8.decode_modified_utf8(class_definition.type_name[1:-1]) except UnicodeDecodeError: pass if class_definition.sourcefile_name is not None: class_obj['source'] = mutf8.decode_modified_utf8(class_definition.sourcefile_name) class_obj['methods'] = [] # process direct methods method_id = 0 for method in class_definition.class_data.direct_methods: if method.code is None: continue # compute various hashes for the bytecode and store them hashes = {} sha256 = hashlib.sha256(method.code.insns).hexdigest() hashes['sha256'] = sha256 tlsh_hash = tlsh.hash(method.code.insns) if tlsh_hash != 'TNULL': hashes['tlsh'] = tlsh_hash else: hashes['tlsh'] = None # extract the relevant strings from the bytecode and store them strings = [] res = self.parse_bytecode(method.code.insns) for r in res: try: # this shouldn't happen, but there is was bug # in mutf8: https://github.com/TkTech/mutf8/issues/1 bytecode_string = mutf8.decode_modified_utf8(self.data.string_ids[r].value.data) try: # this shouldn't happen, but there is likely a bug in mutf8 bytecode_string.encode() strings.append(bytecode_string) except: pass except UnicodeDecodeError: pass method_id += method.method_idx_diff.value method_name = mutf8.decode_modified_utf8(self.data.method_ids[method_id].method_name) class_obj['methods'].append({'name': method_name, 'method_type': 'direct', 'bytecode_hashes': hashes, 'strings': strings}) # process virtual methods method_id = 0 for method in class_definition.class_data.virtual_methods: if method.code is None: continue # compute various hashes for the bytecode and store them hashes = {} sha256 = hashlib.sha256(method.code.insns).hexdigest() hashes['sha256'] = sha256 tlsh_hash = tlsh.hash(method.code.insns) if tlsh_hash != 'TNULL': hashes['tlsh'] = tlsh_hash else: hashes['tlsh'] = None # extract the relevant strings from the bytecode and store them strings = [] res = self.parse_bytecode(method.code.insns) for r in res: try: # this shouldn't happen, but there is was bug # in mutf8: https://github.com/TkTech/mutf8/issues/1 bytecode_string = mutf8.decode_modified_utf8(self.data.string_ids[r].value.data) try: # this shouldn't happen, but there is likely a bug in mutf8 bytecode_string.encode() strings.append(bytecode_string) except: pass except UnicodeDecodeError: pass method_id += method.method_idx_diff.value method_name = mutf8.decode_modified_utf8(self.data.method_ids[method_id].method_name) class_obj['methods'].append({'name': method_name, 'method_type': 'virtual', 'bytecode_hashes': hashes, 'strings': strings}) # process fields class_obj['fields'] = [] field_id = 0 for field in class_definition.class_data.static_fields: field_id += field.field_idx_diff.value field_type = mutf8.decode_modified_utf8(self.data.field_ids[field_id].type_name) if field_type.endswith(';'): field_type = field_type[1:-1] class_type = mutf8.decode_modified_utf8(self.data.field_ids[field_id].class_name) if class_type.endswith(';'): class_type = class_type[1:-1] field_name = mutf8.decode_modified_utf8(self.data.field_ids[field_id].field_name) class_obj['fields'].append({'name': field_name, 'type': field_type, 'class': class_type, 'field_type': 'static'}) field_id = 0 for field in class_definition.class_data.instance_fields: field_id += field.field_idx_diff.value field_type = mutf8.decode_modified_utf8(self.data.field_ids[field_id].type_name) if field_type.endswith(';'): field_type = field_type[1:-1] try: class_type = mutf8.decode_modified_utf8(self.data.field_ids[field_id].class_name) if class_type.endswith(';'): class_type = class_type[1:-1] except UnicodeError: pass field_name = mutf8.decode_modified_utf8(self.data.field_ids[field_id].field_name) class_obj['fields'].append({'name': field_name, 'type': field_type, 'class': class_type, 'field_type': 'instance'}) metadata['classes'].append(class_obj) self.unpack_results.set_metadata(metadata) self.unpack_results.set_labels(labels)