def sim_programs(self, prog1, prog2): headers_sim = self._simimilarity_dicts(prog1.num_of_headers(), prog2.num_of_headers()) # control types ctrl_type_sim = self._simimilarity_dicts(prog1.num_of_control_types(), prog2.num_of_control_types()) # local variables loc_vars_sim = self._simimilarity_dicts(prog1.num_of_local_variables(), prog2.num_of_local_variables()) # global variable glob_vars_sim = self._simimilarity_dicts( prog1.num_of_global_variables(), prog2.num_of_global_variables()) sim = Config.get('fast.ctrl_type_power') * ctrl_type_sim sim += Config.get('fast.headers_power') * headers_sim sim += Config.get('fast.loc_vars_power') * loc_vars_sim sim += Config.get('fast.glob_vars_power') * glob_vars_sim self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim)) self._logger.info("headers_sim = %f" % (headers_sim)) self._logger.info("loc_vars_sim = %f" % (loc_vars_sim)) self._logger.info("glob_vars_sim = %f" % (glob_vars_sim)) self._logger.info("fast.sim_programs(%s, %s) = %f" % (prog1.name, prog2.name, sim)) return sim
def sim_functions(self, func1, func2): # control types ctrl_type_sim = self._simimilarity_dicts( func1.num_of_control_types(), func2.num_of_control_types() ) # local variables loc_vars_sim = self._simimilarity_dicts( func1.num_of_local_variables(), func2.num_of_local_variables() ) # return type ret_type_sim = 1 if func1.ret_type.code() == func2.ret_type.code() else 0 sim = Config.get('fast.func_ctrl_type_power') * ctrl_type_sim sim += Config.get('fast.func_loc_vars_power') * loc_vars_sim self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim)) self._logger.info("loc_vars_sim = %f" % (loc_vars_sim)) self._logger.info("fast.sim_functions(%s, %s) = %f" % (func1.name, func2.name, sim)) return sim
def extract_functions(self, target_prog, target, data, out_dir): out_dir += "/%s/funcs/%s" % (target_prog.name, target.name) if not os.path.exists(out_dir): os.makedirs(out_dir) filename_src = "%s/src.c" % (out_dir) filename_sim = "%s/sim.c" % (out_dir) src = SCFileDecorator(filename_src) src.write_comment(["generated with %s %s" % (Config.get('package'), Config.get('version'))]) src.write_comment(["source code of", "target function=%s" % (target.name)]) src.write(self.__extracted_function(target_prog, target)) src.write_comment(["end of source code"]) del src src = SCFileDecorator(filename_sim) src.write_comment(["generated with %s %s" % (Config.get('package'), Config.get('version'))]) src.write_comment(["source code of", "similar functions"]) for prog,func,f,s in data: src.write_comment([ "program=%s" % (prog.name), "function=%s" % (func.name), "f.sim=%f" % (f), "s.sim=%f" % (s) ]) src.write(self.__extracted_function(prog, func)) src.write_comment(["end of function=%s" % (func.name)]) del src
def code(self): typ = '_'.join(self.names) return "%d%d%d%d%d%d%02d" % ( min(self.pointer_count, Config.get('max_pointer_count')), min(self.func_pointer_count, Config.get('max_func_pointer_count')), min(self.array_count, Config.get('max_array_count')), self.is_struct, self.is_union, self.is_enum, self.TYPES[typ] if self.TYPES.has_key(typ) else self.UNKNOW_TYPE)
def code(self): typ = '_'.join(self.names) return "%d%d%d%d%d%d%02d" % ( min(self.pointer_count, Config.get('max_pointer_count')), min(self.func_pointer_count, Config.get('max_func_pointer_count')), min(self.array_count, Config.get('max_array_count')), self.is_struct, self.is_union, self.is_enum, self.TYPES[typ] if self.TYPES.has_key(typ) else self.UNKNOW_TYPE )
def sim_functions(self, func1, func2): # control types ctrl_type_sim = self._simimilarity_dicts( func1.num_of_control_types(), func2.num_of_control_types() ) # argument types args_vars_sim = self._simimilarity_dicts( func1.num_of_arguments_variables(), func2.num_of_arguments_variables() ) # local variables loc_vars_sim = self._simimilarity_dicts( func1.num_of_local_variables(), func2.num_of_local_variables() ) # control sequence ctrl_seq_sim = self._simimilarity_lists( func1.seq_of_control_types(), func2.seq_of_control_types() ) # return type ret_type_sim = 1 if func1.ret_type.code() == func2.ret_type.code() else 0 sim = Config.get('detailed.ctrl_type_power') * ctrl_type_sim sim += Config.get('detailed.ctrl_seq_power') * ctrl_seq_sim sim += Config.get('detailed.loc_vars_power') * loc_vars_sim sim += Config.get('detailed.ret_type_power') * ret_type_sim sim += Config.get('detailed.args_vars_power') * args_vars_sim self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim)) self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim)) self._logger.info("ctrl_seq_sim = %f" % (ctrl_seq_sim)) self._logger.info("loc_vars_sim = %f" % (loc_vars_sim)) self._logger.info("ret_type_sim = %f" % (ret_type_sim)) self._logger.info("args_vars_sim = %f" % (args_vars_sim)) self._logger.info("detailed.sim_functions(%s, %s) = %f" % (func1.name, func2.name, sim)) return sim
def sim_programs(self, prog1, prog2): # headers headers_sim = self._simimilarity_dicts( prog1.num_of_headers(), prog2.num_of_headers() ) # global variable glob_vars_sim = self._simimilarity_dicts( prog1.num_of_global_variables(), prog2.num_of_global_variables() ) # function similiar precision = 10000 matrix = [] for i in range(0, len(prog1.functions)): matrix.append(range(len(prog2.functions))) for j in range(0, len(prog2.functions)): matrix[i][j] = self.sim_functions(prog1.functions[i], prog2.functions[j]) * precision cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxint - cost) m = Munkres() indexes = m.compute(cost_matrix) score = 1.0 / (prog1.count_of_control_blocks() + prog2.count_of_control_blocks()) funcs_sim = 0 for i,j in indexes: value = matrix[i][j] funcs_sim += (value / precision) * \ ((len(prog1.functions[i].seq_of_control_types()) + len(prog2.functions[j].seq_of_control_types()))* score) #funcs_sim /= max(len(prog1.functions), len(prog2.functions)) sim = Config.get('detailed.glob_vars_power') * glob_vars_sim sim += Config.get('detailed.headers_power') * headers_sim sim += Config.get('detailed.funcs_power') * funcs_sim self._logger.info("headers_sim = %f" % headers_sim) self._logger.info("glob_vars_sim = %f" % glob_vars_sim) self._logger.info("funcs_sim = %f" % funcs_sim) self._logger.info("detailed.sim_programs(%s, %s) = %f" % (prog1.name, prog1.name, sim)) return sim
def sim_programs(self, prog1, prog2): # headers headers_sim = self._simimilarity_dicts(prog1.num_of_headers(), prog2.num_of_headers()) # global variable glob_vars_sim = self._simimilarity_dicts( prog1.num_of_global_variables(), prog2.num_of_global_variables()) # function similiar precision = 10000 matrix = [] for i in range(0, len(prog1.functions)): matrix.append(range(len(prog2.functions))) for j in range(0, len(prog2.functions)): matrix[i][j] = self.sim_functions( prog1.functions[i], prog2.functions[j]) * precision cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxint - cost) m = Munkres() indexes = m.compute(cost_matrix) score = 1.0 / (prog1.count_of_control_blocks() + prog2.count_of_control_blocks()) funcs_sim = 0 for i, j in indexes: value = matrix[i][j] funcs_sim += (value / precision) * \ ((len(prog1.functions[i].seq_of_control_types()) + len(prog2.functions[j].seq_of_control_types()))* score) #funcs_sim /= max(len(prog1.functions), len(prog2.functions)) sim = Config.get('detailed.glob_vars_power') * glob_vars_sim sim += Config.get('detailed.headers_power') * headers_sim sim += Config.get('detailed.funcs_power') * funcs_sim self._logger.info("headers_sim = %f" % headers_sim) self._logger.info("glob_vars_sim = %f" % glob_vars_sim) self._logger.info("funcs_sim = %f" % funcs_sim) self._logger.info("detailed.sim_programs(%s, %s) = %f" % (prog1.name, prog1.name, sim)) return sim
def sim_functions(self, func1, func2): # control types ctrl_type_sim = self._simimilarity_dicts(func1.num_of_control_types(), func2.num_of_control_types()) # argument types args_vars_sim = self._simimilarity_dicts( func1.num_of_arguments_variables(), func2.num_of_arguments_variables()) # local variables loc_vars_sim = self._simimilarity_dicts(func1.num_of_local_variables(), func2.num_of_local_variables()) # control sequence ctrl_seq_sim = self._simimilarity_lists(func1.seq_of_control_types(), func2.seq_of_control_types()) # return type ret_type_sim = 1 if func1.ret_type.code() == func2.ret_type.code( ) else 0 sim = Config.get('detailed.ctrl_type_power') * ctrl_type_sim sim += Config.get('detailed.ctrl_seq_power') * ctrl_seq_sim sim += Config.get('detailed.loc_vars_power') * loc_vars_sim sim += Config.get('detailed.ret_type_power') * ret_type_sim sim += Config.get('detailed.args_vars_power') * args_vars_sim self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim)) self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim)) self._logger.info("ctrl_seq_sim = %f" % (ctrl_seq_sim)) self._logger.info("loc_vars_sim = %f" % (loc_vars_sim)) self._logger.info("ret_type_sim = %f" % (ret_type_sim)) self._logger.info("args_vars_sim = %f" % (args_vars_sim)) self._logger.info("detailed.sim_functions(%s, %s) = %f" % (func1.name, func2.name, sim)) return sim
def sim_functions(self, func1, func2): # control types ctrl_type_sim = self._simimilarity_dicts(func1.num_of_control_types(), func2.num_of_control_types()) # local variables loc_vars_sim = self._simimilarity_dicts(func1.num_of_local_variables(), func2.num_of_local_variables()) # return type ret_type_sim = 1 if func1.ret_type.code() == func2.ret_type.code( ) else 0 sim = Config.get('fast.func_ctrl_type_power') * ctrl_type_sim sim += Config.get('fast.func_loc_vars_power') * loc_vars_sim self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim)) self._logger.info("loc_vars_sim = %f" % (loc_vars_sim)) self._logger.info("fast.sim_functions(%s, %s) = %f" % (func1.name, func2.name, sim)) return sim
def sim_programs(self, prog1, prog2): headers_sim = self._simimilarity_dicts( prog1.num_of_headers(), prog2.num_of_headers() ) # control types ctrl_type_sim = self._simimilarity_dicts( prog1.num_of_control_types(), prog2.num_of_control_types() ) # local variables loc_vars_sim = self._simimilarity_dicts( prog1.num_of_local_variables(), prog2.num_of_local_variables() ) # global variable glob_vars_sim = self._simimilarity_dicts( prog1.num_of_global_variables(), prog2.num_of_global_variables() ) sim = Config.get('fast.ctrl_type_power') * ctrl_type_sim sim += Config.get('fast.headers_power') * headers_sim sim += Config.get('fast.loc_vars_power') * loc_vars_sim sim += Config.get('fast.glob_vars_power') * glob_vars_sim self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim)) self._logger.info("headers_sim = %f" % (headers_sim)) self._logger.info("loc_vars_sim = %f" % (loc_vars_sim)) self._logger.info("glob_vars_sim = %f" % (glob_vars_sim)) self._logger.info("fast.sim_programs(%s, %s) = %f" % (prog1.name, prog2.name, sim)) return sim
def parse_headers(filename): headers = [] f = open(filename) content = f.readlines() for line in content: if line.strip().startswith('#include'): s = line.find('<') if s == -1: s = line.find('"') f = line.find('\"', s+1) else: f = line.find('>') header = line[s+1:f] if header in Config.get('standart_headers'): headers.append(header) return headers