예제 #1
0
파일: fast.py 프로젝트: tmars/PyCPlagIdent
    def sim_programs(self, prog1, prog2):
        headers_sim = self._simimilarity_dicts(prog1.num_of_headers(),
                                               prog2.num_of_headers())

        # control types
        ctrl_type_sim = self._simimilarity_dicts(prog1.num_of_control_types(),
                                                 prog2.num_of_control_types())

        # local variables
        loc_vars_sim = self._simimilarity_dicts(prog1.num_of_local_variables(),
                                                prog2.num_of_local_variables())

        # global variable
        glob_vars_sim = self._simimilarity_dicts(
            prog1.num_of_global_variables(), prog2.num_of_global_variables())

        sim = Config.get('fast.ctrl_type_power') * ctrl_type_sim
        sim += Config.get('fast.headers_power') * headers_sim
        sim += Config.get('fast.loc_vars_power') * loc_vars_sim
        sim += Config.get('fast.glob_vars_power') * glob_vars_sim

        self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim))
        self._logger.info("headers_sim = %f" % (headers_sim))
        self._logger.info("loc_vars_sim = %f" % (loc_vars_sim))
        self._logger.info("glob_vars_sim = %f" % (glob_vars_sim))
        self._logger.info("fast.sim_programs(%s, %s) = %f" %
                          (prog1.name, prog2.name, sim))

        return sim
예제 #2
0
파일: fast.py 프로젝트: tmars/PyCPlagIdent
    def sim_functions(self, func1, func2):

        # control types
        ctrl_type_sim = self._simimilarity_dicts(
            func1.num_of_control_types(),
            func2.num_of_control_types()
        )

        # local variables
        loc_vars_sim = self._simimilarity_dicts(
            func1.num_of_local_variables(),
            func2.num_of_local_variables()
        )

        # return type
        ret_type_sim = 1 if func1.ret_type.code() == func2.ret_type.code() else 0



        sim = Config.get('fast.func_ctrl_type_power') * ctrl_type_sim
        sim += Config.get('fast.func_loc_vars_power') * loc_vars_sim

        self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim))
        self._logger.info("loc_vars_sim = %f" % (loc_vars_sim))
        self._logger.info("fast.sim_functions(%s, %s) = %f" % (func1.name, func2.name, sim))

        return sim
예제 #3
0
    def extract_functions(self, target_prog, target, data, out_dir):
        out_dir += "/%s/funcs/%s" % (target_prog.name, target.name)
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        filename_src = "%s/src.c" % (out_dir)
        filename_sim = "%s/sim.c" % (out_dir)

        src = SCFileDecorator(filename_src)
        src.write_comment(["generated with %s %s" % (Config.get('package'), Config.get('version'))])
        src.write_comment(["source code of", "target function=%s" % (target.name)])
        src.write(self.__extracted_function(target_prog, target))
        src.write_comment(["end of source code"])
        del src

        src = SCFileDecorator(filename_sim)
        src.write_comment(["generated with %s %s" % (Config.get('package'), Config.get('version'))])
        src.write_comment(["source code of", "similar functions"])

        for prog,func,f,s in data:
            src.write_comment([
                "program=%s" % (prog.name),
                "function=%s" % (func.name),
                "f.sim=%f" % (f),
                "s.sim=%f" % (s)
            ])
            src.write(self.__extracted_function(prog, func))
            src.write_comment(["end of function=%s" % (func.name)])

        del src
예제 #4
0
 def code(self):
     typ = '_'.join(self.names)
     return "%d%d%d%d%d%d%02d" % (
         min(self.pointer_count, Config.get('max_pointer_count')),
         min(self.func_pointer_count, Config.get('max_func_pointer_count')),
         min(self.array_count, Config.get('max_array_count')),
         self.is_struct, self.is_union, self.is_enum,
         self.TYPES[typ] if self.TYPES.has_key(typ) else self.UNKNOW_TYPE)
예제 #5
0
 def code(self):
     typ = '_'.join(self.names)
     return "%d%d%d%d%d%d%02d" % (
         min(self.pointer_count, Config.get('max_pointer_count')),
         min(self.func_pointer_count, Config.get('max_func_pointer_count')),
         min(self.array_count, Config.get('max_array_count')),
         self.is_struct,
         self.is_union,
         self.is_enum,
         self.TYPES[typ] if self.TYPES.has_key(typ) else self.UNKNOW_TYPE
     )
예제 #6
0
    def sim_functions(self, func1, func2):

        # control types
        ctrl_type_sim = self._simimilarity_dicts(
            func1.num_of_control_types(),
            func2.num_of_control_types()
        )

        # argument types
        args_vars_sim = self._simimilarity_dicts(
            func1.num_of_arguments_variables(),
            func2.num_of_arguments_variables()
        )

        # local variables
        loc_vars_sim = self._simimilarity_dicts(
            func1.num_of_local_variables(),
            func2.num_of_local_variables()
        )

        # control sequence
        ctrl_seq_sim = self._simimilarity_lists(
            func1.seq_of_control_types(),
            func2.seq_of_control_types()
        )

        # return type
        ret_type_sim = 1 if func1.ret_type.code() == func2.ret_type.code() else 0



        sim = Config.get('detailed.ctrl_type_power') * ctrl_type_sim
        sim += Config.get('detailed.ctrl_seq_power') * ctrl_seq_sim
        sim += Config.get('detailed.loc_vars_power') * loc_vars_sim
        sim += Config.get('detailed.ret_type_power') * ret_type_sim
        sim += Config.get('detailed.args_vars_power') * args_vars_sim

        self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim))
        self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim))
        self._logger.info("ctrl_seq_sim = %f" % (ctrl_seq_sim))
        self._logger.info("loc_vars_sim = %f" % (loc_vars_sim))
        self._logger.info("ret_type_sim = %f" % (ret_type_sim))
        self._logger.info("args_vars_sim = %f" % (args_vars_sim))
        self._logger.info("detailed.sim_functions(%s, %s) = %f" % (func1.name, func2.name, sim))

        return sim
예제 #7
0
    def sim_programs(self, prog1, prog2):
        # headers
        headers_sim = self._simimilarity_dicts(
            prog1.num_of_headers(),
            prog2.num_of_headers()
        )

        # global variable
        glob_vars_sim = self._simimilarity_dicts(
            prog1.num_of_global_variables(),
            prog2.num_of_global_variables()
        )

        # function similiar
        precision = 10000
        matrix = []
        for i in range(0, len(prog1.functions)):
            matrix.append(range(len(prog2.functions)))
            for j in range(0, len(prog2.functions)):
                matrix[i][j] = self.sim_functions(prog1.functions[i], prog2.functions[j]) * precision

        cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxint - cost)
        m = Munkres()
        indexes = m.compute(cost_matrix)
        score = 1.0 / (prog1.count_of_control_blocks() + prog2.count_of_control_blocks())
        funcs_sim = 0
        for i,j in indexes:
            value = matrix[i][j]
            funcs_sim += (value / precision) * \
                ((len(prog1.functions[i].seq_of_control_types()) + len(prog2.functions[j].seq_of_control_types()))* score)
        #funcs_sim /= max(len(prog1.functions), len(prog2.functions))


        sim = Config.get('detailed.glob_vars_power') * glob_vars_sim
        sim += Config.get('detailed.headers_power') * headers_sim
        sim += Config.get('detailed.funcs_power') * funcs_sim

        self._logger.info("headers_sim = %f" % headers_sim)
        self._logger.info("glob_vars_sim = %f" % glob_vars_sim)
        self._logger.info("funcs_sim = %f" % funcs_sim)
        self._logger.info("detailed.sim_programs(%s, %s) = %f" % (prog1.name, prog1.name, sim))

        return sim
예제 #8
0
    def sim_programs(self, prog1, prog2):
        # headers
        headers_sim = self._simimilarity_dicts(prog1.num_of_headers(),
                                               prog2.num_of_headers())

        # global variable
        glob_vars_sim = self._simimilarity_dicts(
            prog1.num_of_global_variables(), prog2.num_of_global_variables())

        # function similiar
        precision = 10000
        matrix = []
        for i in range(0, len(prog1.functions)):
            matrix.append(range(len(prog2.functions)))
            for j in range(0, len(prog2.functions)):
                matrix[i][j] = self.sim_functions(
                    prog1.functions[i], prog2.functions[j]) * precision

        cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxint - cost)
        m = Munkres()
        indexes = m.compute(cost_matrix)
        score = 1.0 / (prog1.count_of_control_blocks() +
                       prog2.count_of_control_blocks())
        funcs_sim = 0
        for i, j in indexes:
            value = matrix[i][j]
            funcs_sim += (value / precision) * \
                ((len(prog1.functions[i].seq_of_control_types()) + len(prog2.functions[j].seq_of_control_types()))* score)
        #funcs_sim /= max(len(prog1.functions), len(prog2.functions))

        sim = Config.get('detailed.glob_vars_power') * glob_vars_sim
        sim += Config.get('detailed.headers_power') * headers_sim
        sim += Config.get('detailed.funcs_power') * funcs_sim

        self._logger.info("headers_sim = %f" % headers_sim)
        self._logger.info("glob_vars_sim = %f" % glob_vars_sim)
        self._logger.info("funcs_sim = %f" % funcs_sim)
        self._logger.info("detailed.sim_programs(%s, %s) = %f" %
                          (prog1.name, prog1.name, sim))

        return sim
예제 #9
0
    def sim_functions(self, func1, func2):

        # control types
        ctrl_type_sim = self._simimilarity_dicts(func1.num_of_control_types(),
                                                 func2.num_of_control_types())

        # argument types
        args_vars_sim = self._simimilarity_dicts(
            func1.num_of_arguments_variables(),
            func2.num_of_arguments_variables())

        # local variables
        loc_vars_sim = self._simimilarity_dicts(func1.num_of_local_variables(),
                                                func2.num_of_local_variables())

        # control sequence
        ctrl_seq_sim = self._simimilarity_lists(func1.seq_of_control_types(),
                                                func2.seq_of_control_types())

        # return type
        ret_type_sim = 1 if func1.ret_type.code() == func2.ret_type.code(
        ) else 0

        sim = Config.get('detailed.ctrl_type_power') * ctrl_type_sim
        sim += Config.get('detailed.ctrl_seq_power') * ctrl_seq_sim
        sim += Config.get('detailed.loc_vars_power') * loc_vars_sim
        sim += Config.get('detailed.ret_type_power') * ret_type_sim
        sim += Config.get('detailed.args_vars_power') * args_vars_sim

        self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim))
        self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim))
        self._logger.info("ctrl_seq_sim = %f" % (ctrl_seq_sim))
        self._logger.info("loc_vars_sim = %f" % (loc_vars_sim))
        self._logger.info("ret_type_sim = %f" % (ret_type_sim))
        self._logger.info("args_vars_sim = %f" % (args_vars_sim))
        self._logger.info("detailed.sim_functions(%s, %s) = %f" %
                          (func1.name, func2.name, sim))

        return sim
예제 #10
0
파일: fast.py 프로젝트: tmars/PyCPlagIdent
    def sim_functions(self, func1, func2):

        # control types
        ctrl_type_sim = self._simimilarity_dicts(func1.num_of_control_types(),
                                                 func2.num_of_control_types())

        # local variables
        loc_vars_sim = self._simimilarity_dicts(func1.num_of_local_variables(),
                                                func2.num_of_local_variables())

        # return type
        ret_type_sim = 1 if func1.ret_type.code() == func2.ret_type.code(
        ) else 0

        sim = Config.get('fast.func_ctrl_type_power') * ctrl_type_sim
        sim += Config.get('fast.func_loc_vars_power') * loc_vars_sim

        self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim))
        self._logger.info("loc_vars_sim = %f" % (loc_vars_sim))
        self._logger.info("fast.sim_functions(%s, %s) = %f" %
                          (func1.name, func2.name, sim))

        return sim
예제 #11
0
파일: fast.py 프로젝트: tmars/PyCPlagIdent
    def sim_programs(self, prog1, prog2):
        headers_sim = self._simimilarity_dicts(
            prog1.num_of_headers(),
            prog2.num_of_headers()
        )

        # control types
        ctrl_type_sim = self._simimilarity_dicts(
            prog1.num_of_control_types(),
            prog2.num_of_control_types()
        )

        # local variables
        loc_vars_sim = self._simimilarity_dicts(
            prog1.num_of_local_variables(),
            prog2.num_of_local_variables()
        )

        # global variable
        glob_vars_sim = self._simimilarity_dicts(
            prog1.num_of_global_variables(),
            prog2.num_of_global_variables()
        )

        sim = Config.get('fast.ctrl_type_power') * ctrl_type_sim
        sim += Config.get('fast.headers_power') * headers_sim
        sim += Config.get('fast.loc_vars_power') * loc_vars_sim
        sim += Config.get('fast.glob_vars_power') * glob_vars_sim

        self._logger.info("ctrl_type_sim = %f" % (ctrl_type_sim))
        self._logger.info("headers_sim = %f" % (headers_sim))
        self._logger.info("loc_vars_sim = %f" % (loc_vars_sim))
        self._logger.info("glob_vars_sim = %f" % (glob_vars_sim))
        self._logger.info("fast.sim_programs(%s, %s) = %f" % (prog1.name, prog2.name, sim))

        return sim
예제 #12
0
 def parse_headers(filename):
     headers = []
     f = open(filename)
     content = f.readlines()
     for line in content:
         if line.strip().startswith('#include'):
             s = line.find('<')
             if s == -1:
                 s = line.find('"')
                 f = line.find('\"', s+1)
             else:
                 f = line.find('>')
             header = line[s+1:f]
             if header in Config.get('standart_headers'):
                 headers.append(header)
     return headers