def check_counting_condition(self, conditions, function, expected): """ Checks if the given count vectors match the given expected data. :param conditions: String indicating which condition(s) to use (will be fed to ClangCountingConditions.counting_condition) :param function: String indicating which function from test file to use. (i.e. "used(int, int)") :param expected: Dict with python lists of counts for all variables. """ counter = ClangCountVectorCreator( ClangCountingConditions.counting_condition( Setting("irrelevant", conditions))) vectors = counter.get_vectors_for_file(self.testfile) actual = vectors[function] self.assertEqual(len(actual), len(expected), "Actual dict: " + str(actual)) self.assertEqual(sorted(actual.keys()), sorted(expected.keys())) for variable in actual: self.assertEqual(actual[variable].count_vector, expected[variable], "Variable '{}' doesnt match.".format(variable))
def test_counting(self): expected_results = { (6, "test()"): {}, (12, "main(int, char *)"): { # Variables "i": [4, 1], "asd": [1, 0], "t": [4, 1], "args": [2, 0], # Globals "g": [3, 1], # Functions "smile": [1, 1], "printf": [1, 1], # Constants "#5": [1, 0], '#"i is %d"': [1, 1]}} self.uut = ClangCountVectorCreator([no_condition, is_call_argument]) cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results)
def test_empty_counting(self): expected_results = { (6, "test()"): {}, (12, "main(int, char *)"): { # Variables "i": [], "asd": [], "t": [], "args": [], # Globals "g": [], # Functions "smile": [], "printf": [], # Constants "5": [], '"i is %d"': []}} self.uut = ClangCountVectorCreator() cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results)
def test_counting(self): expected_results = { (6, 'test()'): {}, (12, 'main(int, char *)'): { # Variables 'i': [4, 1], 'asd': [1, 0], 't': [4, 1], 'args': [2, 0], # Globals 'g': [3, 1], # Functions 'smile': [1, 1], 'printf': [1, 1], # Constants '5': [1, 0], '"i is %d"': [1, 1] } } self.uut = ClangCountVectorCreator([no_condition, is_call_argument]) cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results)
def test_empty_counting(self): expected_results = { (6, 'test()'): {}, (12, 'main(int, char *)'): { # Variables 'i': [], 'asd': [], 't': [], 'args': [], # Globals 'g': [], # Functions 'smile': [], 'printf': [], # Constants '5': [], '"i is %d"': [] } } self.uut = ClangCountVectorCreator() cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results)
def test_counting(self): expected_results = { (6, 'test()'): {}, (12, 'main(int, char *)'): { # Variables 'i': [4, 1], 'asd': [1, 0], 't': [4, 1], 'args': [2, 0], # Globals 'g': [3, 1], # Functions 'smile': [1, 1], 'printf': [1, 1], # Constants '5': [1, 0], '"i is %d"': [1, 1]}} self.uut = ClangCountVectorCreator([no_condition, is_call_argument]) cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results)
def test_empty_counting(self): expected_results = { (6, 'test()'): {}, (12, 'main(int, char *)'): { # Variables 'i': [], 'asd': [], 't': [], 'args': [], # Globals 'g': [], # Functions 'smile': [], 'printf': [], # Constants '5': [], '"i is %d"': []}} self.uut = ClangCountVectorCreator() cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results)
class ClangCountVectorCreatorTest(unittest.TestCase): functions = sorted(["main(int, char *)", "test()"]) def setUp(self): self.testfile = os.path.abspath( os.path.join(os.path.dirname(__file__), "sample.c")) def test_empty_counting(self): expected_results = { (6, "test()"): {}, (12, "main(int, char *)"): { # Variables "i": [], "asd": [], "t": [], "args": [], # Globals "g": [], # Functions "smile": [], "printf": [], # Constants "#5": [], '#"i is %d"': [] } } self.uut = ClangCountVectorCreator() cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results) def check_cv_dict(self, actual, expected): self.assertEqual(len(actual), len(expected), str(actual)) self.assertEqual(sorted(actual.keys()), sorted(expected.keys())) for function in actual: self.assertEqual(len(actual[function]), len(expected[function])) self.assertEqual(sorted(actual[function].keys()), sorted(expected[function].keys())) for variable in actual[function]: self.assertEqual(actual[function][variable].count_vector, expected[function][variable]) def test_counting(self): expected_results = { (6, "test()"): {}, (12, "main(int, char *)"): { # Variables "i": [4, 1], "asd": [1, 0], "t": [4, 1], "args": [2, 0], # Globals "g": [3, 1], # Functions "smile": [1, 1], "printf": [1, 1], # Constants "#5": [1, 0], '#"i is %d"': [1, 1] } } self.uut = ClangCountVectorCreator([no_condition, is_call_argument]) cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results)
def run(self, counting_conditions: counting_condition_dict = default_cc_dict, average_calculation: bool = False, poly_postprocessing: bool = True, exp_postprocessing: bool = False, extra_include_paths: path_list = ()): ''' Retrieves similarities for code clone detection. Those can be reused in another bear to produce results. Postprocessing may be done because small functions are less likely to be clones at the same difference value than big functions which may provide a better refactoring opportunity for the user. :param counting_conditions: A comma seperated list of counting conditions. Possible values are: used, returned, is_condition, in_condition, in_second_level_condition, in_third_level_condition, is_assignee, is_assigner, loop_content, second_level_loop_content, third_level_loop_content, is_param, in_sum, in_product, in_binary_operation, member_accessed. Weightings can be assigned to each condition due to providing a dict value, i.e. having used weighted in half as much as other conditions would simply be: "used: 0.5, is_assignee". Weightings default to 1 if unset. :param average_calculation: If set to true the difference calculation function will take the average of all variable differences as the difference, else it will normalize the function as a whole and thus weighting in variables dependent on their size. :param poly_postprocessing: If set to true, the difference value of big function pairs will be reduced using a polynomial approach. :param extra_include_paths: A list containing additional include paths. :param exp_postprocessing: If set to true, the difference value of big function pairs will be reduced using an exponential approach. ''' self.debug("Using the following counting conditions:") for key, val in counting_conditions.items(): self.debug(" *", key.__name__, "(weighting: {})".format(val)) self.debug("Creating count matrices...") count_matrices = get_count_matrices( ClangCountVectorCreator(list(counting_conditions.keys()), list(counting_conditions.values())), list(self.file_dict.keys()), lambda prog: self.debug("{:2.4f}%...".format(prog)), self.section["files"].origin, collect_dirs(extra_include_paths)) self.debug("Calculating differences...") differences = [] function_count = len(count_matrices) # Thats n over 2, hardcoded to simplify calculation combination_length = function_count * (function_count - 1) / 2 partial_get_difference = functools.partial( get_difference, count_matrices=count_matrices, average_calculation=average_calculation, poly_postprocessing=poly_postprocessing, exp_postprocessing=exp_postprocessing) for i, elem in enumerate( map(partial_get_difference, [(f1, f2) for f1, f2 in combinations(count_matrices, 2)])): if i % 50 == 0: self.debug("{:2.4f}%...".format(100 * i / combination_length)) differences.append(elem) yield HiddenResult(self, differences) yield HiddenResult(self, count_matrices)
class ClangCountVectorCreatorTest(unittest.TestCase): functions = sorted(["main(int, char *)", "test()"]) def setUp(self): self.testfile = os.path.abspath(os.path.join( os.path.dirname(__file__), "sample.c")) def test_empty_counting(self): expected_results = { (6, "test()"): {}, (12, "main(int, char *)"): { # Variables "i": [], "asd": [], "t": [], "args": [], # Globals "g": [], # Functions "smile": [], "printf": [], # Constants "#5": [], '#"i is %d"': []}} self.uut = ClangCountVectorCreator() cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results) def check_cv_dict(self, actual, expected): self.assertEqual(len(actual), len(expected), str(actual)) self.assertEqual(sorted(actual.keys()), sorted(expected.keys())) for function in actual: self.assertEqual(len(actual[function]), len(expected[function])) self.assertEqual(sorted(actual[function].keys()), sorted(expected[function].keys())) for variable in actual[function]: self.assertEqual(actual[function][variable].count_vector, expected[function][variable]) def test_counting(self): expected_results = { (6, "test()"): {}, (12, "main(int, char *)"): { # Variables "i": [4, 1], "asd": [1, 0], "t": [4, 1], "args": [2, 0], # Globals "g": [3, 1], # Functions "smile": [1, 1], "printf": [1, 1], # Constants "#5": [1, 0], '#"i is %d"': [1, 1]}} self.uut = ClangCountVectorCreator([no_condition, is_call_argument]) cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results)
class ClangCountVectorCreatorTest(unittest.TestCase): functions = sorted(['main(int, char *)', 'test()']) def setUp(self): self.testfile = os.path.abspath( os.path.join(os.path.dirname(__file__), 'sample.c')) def test_empty_counting(self): expected_results = { (6, 'test()'): {}, (12, 'main(int, char *)'): { # Variables 'i': [], 'asd': [], 't': [], 'args': [], # Globals 'g': [], # Functions 'smile': [], 'printf': [], # Constants '5': [], '"i is %d"': [] } } self.uut = ClangCountVectorCreator() cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results) def check_cv_dict(self, actual, expected): self.assertEqual(len(actual), len(expected), str(actual)) self.assertEqual(sorted(actual.keys()), sorted(expected.keys())) for function in actual: self.assertEqual(len(actual[function]), len(expected[function])) self.assertEqual(sorted(actual[function].keys()), sorted(expected[function].keys())) for variable in actual[function]: self.assertEqual(actual[function][variable].count_vector, expected[function][variable]) def test_counting(self): expected_results = { (6, 'test()'): {}, (12, 'main(int, char *)'): { # Variables 'i': [4, 1], 'asd': [1, 0], 't': [4, 1], 'args': [2, 0], # Globals 'g': [3, 1], # Functions 'smile': [1, 1], 'printf': [1, 1], # Constants '5': [1, 0], '"i is %d"': [1, 1] } } self.uut = ClangCountVectorCreator([no_condition, is_call_argument]) cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results)
class ClangCountVectorCreatorTest(unittest.TestCase): functions = sorted(['main(int, char *)', 'test()']) def setUp(self): self.testfile = os.path.abspath(os.path.join( os.path.dirname(__file__), 'sample.c')) def test_empty_counting(self): expected_results = { (6, 'test()'): {}, (12, 'main(int, char *)'): { # Variables 'i': [], 'asd': [], 't': [], 'args': [], # Globals 'g': [], # Functions 'smile': [], 'printf': [], # Constants '5': [], '"i is %d"': []}} self.uut = ClangCountVectorCreator() cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results) def check_cv_dict(self, actual, expected): self.assertEqual(len(actual), len(expected), str(actual)) self.assertEqual(sorted(actual.keys()), sorted(expected.keys())) for function in actual: self.assertEqual(len(actual[function]), len(expected[function])) self.assertEqual(sorted(actual[function].keys()), sorted(expected[function].keys())) for variable in actual[function]: self.assertEqual(actual[function][variable].count_vector, expected[function][variable]) def test_counting(self): expected_results = { (6, 'test()'): {}, (12, 'main(int, char *)'): { # Variables 'i': [4, 1], 'asd': [1, 0], 't': [4, 1], 'args': [2, 0], # Globals 'g': [3, 1], # Functions 'smile': [1, 1], 'printf': [1, 1], # Constants '5': [1, 0], '"i is %d"': [1, 1]}} self.uut = ClangCountVectorCreator([no_condition, is_call_argument]) cv_dict = self.uut.get_vectors_for_file(self.testfile) self.check_cv_dict(cv_dict, expected_results)