def add_binary_feature(feature, n1, n2): if type(n1) in (GivReg, Reg, Flag): n1_info = coarse(n1) else: n1_info = fine(n1) if type(n2) in (GivReg, Reg, Flag): n2_info = coarse(n2) else: n2_info = fine(n2) if isinstance(n1, Reg): n1.features.add('L' + feature.format(coarse(n1), n2_info)) if isinstance(n2, IndirectOffset): n1.features.add('L' + feature.format(coarse(n1), coarse(n2_info))) elif isinstance(n1, IndirectOffset): n1.features.add('L' + feature.format(coarse(n1), n2_info)) n1.features.add('L' + feature.format(fine(n1), n2_info)) if isinstance(n2, IndirectOffset): n1.features.add('L' + feature.format(coarse(n1), coarse(n2_info))) n1.features.add('L' + feature.format(fine(n1), coarse(n2_info))) if isinstance(n2, Reg): n2.features.add('R' + feature.format(n1_info, coarse(n2))) if isinstance(n1, IndirectOffset): n2.features.add('R' + feature.format(coarse(n1), coarse(n2))) elif isinstance(n2, IndirectOffset): n2.features.add('R' + feature.format(n1_info, coarse(n2))) n2.features.add('R' + feature.format(n1_info, fine(n2))) if isinstance(n1, IndirectOffset): n2.features.add('R' + feature.format(coarse(n1), coarse(n2))) n2.features.add('R' + feature.format(coarse(n1), coarse(n2)))
def initialize(self): for direct_offset in self.binary.direct_offsets.values(): self.add_edge(direct_offset, direct_offset.ttype, 'NAME[{}]TTYPE', coarse) node_type = make_node_type(coarse(direct_offset), self.binary) self.add_edge(direct_offset, node_type, 'NODE[COARSE]TYPE') # node_type = make_node_type(coarse(direct_offset.ttype), self.binary) # self.add_edge( # direct_offset.ttype, # node_type, # 'NODE[COARSE]TYPE' # ) if isinstance(direct_offset, StringArrayOffset): for string_const in direct_offset.strings: self.add_edge(direct_offset, string_const, 'STRARR[]ELEM') keys = list(sorted(self.binary.direct_offsets.keys())) for key1, key2 in zip(keys[:-1], keys[1:]): direct_offset1 = self.binary.direct_offsets[key1] direct_offset2 = self.binary.direct_offsets[key2] diff = key2 - key1 if diff <= 0x10: self.add_edge(direct_offset1, direct_offset2, 'LOCAL{}[DIRECT]'.format(diff)) self.add_edge(direct_offset1.ttype, direct_offset2.ttype, 'LOCAL{}[DIRECT]TTYPE'.format(diff)) self.add_edge(direct_offset1.ttype, make_size_node(diff, self.binary), 'SIZE[DIRECT]TTYPE') for function in self.binary.functions.functions: if function.is_run_init: self.add_edge(function, function.ttype, 'NAME[{}]TTYPE', coarse) node_type = make_node_type(coarse(function), self.binary) self.add_edge(function, node_type, 'NODE[COARSE]TYPE') for callee in function.callees: self.add_edge(function, callee, 'CALL') for indirect_offset in function.indirect_offsets.values(): for off in indirect_offset.values(): node_type = make_node_type(fine(off), self.binary) self.add_edge(function, node_type, '[FUNC]NODETYPE[INDIRECT]') if not (self.binary.config.MODE == self.binary.config.TRAIN and not function.init_run): for i in function.string_consts: string_const = function.binary.string_consts[i] self.add_edge(string_const, function, 'STR[]FUNC') for offset in function.direct_offsets: direct_offset = function.binary.direct_offsets[offset] self.add_edge(direct_offset, function, 'DIRECT[]FUNC') for virtual_exp in function.virtual_exps.values(): EXP_EDGE_EXTRACTOR.visit(virtual_exp.exp, function=function) for indirect_offset in function.indirect_offsets.values(): for off in indirect_offset.values(): # self.add_edge( # off, # function, # 'INDIRECT[{}]FUNC', # coarse # ) self.add_edge(off, function, 'INDIRECT[{}]FUNC', fine) # self.add_edge( # off, # off.ttype, # 'NAME[{}]TTYPE', # coarse # ) self.add_edge(off, off.ttype, 'NAME[{}]TTYPE', fine) # node_type = make_node_type(coarse(off), self.binary) # self.add_edge( # off, # node_type, # 'NODE[COARSE]TYPE' # ) self.add_edge(off, node_type, 'NODE[FINE]TYPE') # node_type = make_node_type(fine(off.ttype), self.binary) # self.add_edge( # off.ttype, # node_type, # 'NODE[FINE]TYPE' # ) for reg in function.regs.values(): self.add_edge(reg, function, 'REG[{}]FUNC', coarse) if self.binary.config.MACHINE_ARCH in ('x64', 'ARM') \ and reg.var_type == constants.FUN_ARG: self.add_edge(reg, function, 'REG[{}]FUNC', fine) self.add_edge(reg, reg.ttype, 'NAME[{}]TTYPE', coarse) node_type = make_node_type(coarse(reg), self.binary) self.add_edge(reg, node_type, 'NODE[COARSE]TYPE') # node_type = make_node_type(coarse(reg.ttype), self.binary) # self.add_edge( # reg.ttype, # node_type, # 'NODE[FINE]TYPE' # ) for key in function.indirect_offsets: for i in range(1, self.binary.config.ADDRESS_BYTE_SIZE + 1): key_1 = (key[0], key[1] + i) if key_1 in function.indirect_offsets: indirect_offsets = function.indirect_offsets[key] indirect_offsets_1 = function.indirect_offsets[ key_1] for index in indirect_offsets: if index in indirect_offsets_1: indirect_offset = indirect_offsets[index] indirect_offset_1 = indirect_offsets_1[ index] # self.add_edge( # indirect_offset, # indirect_offset_1, # 'LOCAL[{}]', # coarse # ) self.add_edge(indirect_offset, indirect_offset_1, '[{}]LOCAL[{}]', fine, fine) for key in function.regs: key_1 = (key[0], key[1] + 1) if key_1 in function.regs: reg = function.regs[key] reg_1 = function.regs[key_1] self.add_edge(reg, reg_1, 'LOCAL[{}]', coarse) for function in self.binary.functions.functions: if not (self.binary.config.MODE == self.binary.config.TRAIN and not function.init_run): for blk in function.blks.values(): for stmt in blk.stmts: STMT_EDGE_EXTRACTOR.visit(stmt, function=function)
def to_json(self, clear=False): nodes = [] for node in self.nodes.values(): if type(node) == IntConst: node_name = 'INT[{}][{}]'.format(node.width, node.value) node_type = 'giv' elif type(node) == StringConst: node_name = '\"{}\"'.format(node.value) node_type = 'giv' elif type(node) == SwitchTable: node_name = 'SwitchTable' node_type = 'giv' elif type(node) == Flag: node_name = node.base_flag node_type = 'giv' elif type(node) == Insn: node_name = node.name node_type = 'giv' elif type(node) == CodeOffset: node_name = 'CodeOffset' node_type = 'giv' elif type(node) == VirtualElm: node_name = node.name node_type = 'giv' elif type(node) == GivOffset: node_name = str(node.offset) node_type = 'giv' elif type(node) == TempOffset: node_name = fine(node) node_type = 'giv' elif type(node) == GivReg: node_name = coarse(node) node_type = 'giv' elif type(node) == NodeType: node_name = node.name node_type = 'giv' elif type(node) == OpNode: node_name = node.name node_type = 'giv' elif type(node) == OtherVarNode: node_name = node.name node_type = 'giv' elif type(node) == SizeNode: node_name = 'SIZE[{}]'.format(node.size) node_type = 'giv' elif type(node) == UnknownNode: node_name = 'Unknown' node_type = 'giv' elif type(node) == Function: if node.is_name_given: node_name = node.name node_type = 'giv' else: node_name = node.train_name if self.binary.config.UNK_GIV: node_type = 'giv' if node.train_name == UNKNOWN_LABEL else 'inf' else: node_type = 'inf' elif type(node) == DirectOffset: if node.is_name_given: node_name = node.name node_type = 'giv' else: node_name = node.train_name if self.binary.config.UNK_GIV: node_type = 'giv' if node.train_name == UNKNOWN_LABEL else 'inf' else: node_type = 'inf' elif type(node) == StringArrayOffset: if node.is_name_given: node_name = node.name node_type = 'giv' else: node_name = node.train_name if self.binary.config.UNK_GIV: node_type = 'giv' if node.train_name == UNKNOWN_LABEL else 'inf' else: node_type = 'inf' elif type(node) == IndirectOffset: node_name = node.train_name if node.n2p_type == self.binary.config.GIV: node_type = 'giv' else: if self.binary.config.UNK_GIV: node_type = 'giv' if node.train_name == UNKNOWN_LABEL else 'inf' else: node_type = 'inf' elif type(node) == Reg: node_name = node.train_name if node.n2p_type == self.binary.config.GIV: node_type = 'giv' else: if self.binary.config.UNK_GIV: node_type = 'giv' if node.train_name == UNKNOWN_LABEL else 'inf' else: node_type = 'inf' elif type(node) == Ttype: node_name = node.train_name if type(node.owner) == Reg: if node.owner.n2p_type == self.binary.config.GIV: node_type = 'giv' else: if self.binary.config.UNK_GIV: node_type = 'giv' if node.train_name == UNKNOWN_LABEL else 'inf' else: node_type = 'inf' elif type(node.owner) == IndirectOffset: if node.owner.n2p_type == self.binary.config.GIV: node_type = 'giv' else: if self.binary.config.UNK_GIV: node_type = 'giv' if node.train_name == UNKNOWN_LABEL else 'inf' else: node_type = 'inf' else: if self.binary.config.UNK_GIV: node_type = 'giv' if node.train_name == UNKNOWN_LABEL else 'inf' else: node_type = 'inf' else: print(node) if clear and node_type == 'inf': nodes.append(OrderedDict([('v', node.id), (node_type, '')])) else: nodes.append( OrderedDict([('v', node.id), (node_type, node_name)])) return nodes
def add_unary_feature(feature, node): if isinstance(node, Reg): node.features.add(feature.format(coarse(node))) elif isinstance(node, IndirectOffset): node.features.add(feature.format(coarse(node))) node.features.add(feature.format(fine(node)))