def detect_extract_method_from_commit(old_commit, new_commit): result = [] extracted_method_candidates = defaultdict(set) diff_index = old_commit.diff(new_commit, create_patch=True) added_lines_dict = defaultdict(list) for diff in diff_index.iter_change_type('A'): if is_method_body(diff.b_blob.path): method = get_method(diff.b_blob.path) method_name = method[:method.index(r'(')] args = method[method.index(r'('):].split(r',') num_args = len(args) if num_args == 1: num_args = 0 if args[0] == '()' else 1 c = get_class(diff.b_blob.path) extracted_method_candidates[c].add(method_name) (deleted_lines, added_lines) = diff_parser.parse(diff.diff) added_lines_dict[(c, method_name, num_args)].append((method, added_lines)) for diff in diff_index.iter_change_type('M'): if not is_method_body(diff.b_blob.path): continue c = get_class(diff.b_blob.path) if c not in extracted_method_candidates.keys(): continue (deleted_lines, added_lines) = diff_parser.parse(diff.diff) if not (deleted_lines and added_lines): continue a_package = get_package(diff.a_blob.path, old_commit) b_package = get_package(diff.b_blob.path, new_commit) m = get_method(diff.b_blob.path) script = '\n'.join([l[1] for l in deleted_lines]) for method in extracted_method_candidates[c]: num_args_list = parse_added_lines(added_lines, method) for num_args in num_args_list: if (c, method, num_args) not in added_lines_dict.keys(): continue for extracted_method, extracted_lines in added_lines_dict[(c, method, num_args)]: extracted_lines = extracted_lines[1:-1] script2 = '\n'.join([l[1] for l in extracted_lines]) try: sim = calculate_similarity(script, script2) except ZeroDivisionError: sim = "N/A" org_commit = get_org_commit(new_commit) result.append( (old_commit.hexsha, new_commit.hexsha, org_commit, a_package, b_package, c, m, extracted_method, sim)) # print deleted_lines, added_lines_dict[(c, method, num_args)] return result
def detect_extract_method_from_commit(old_commit, new_commit): result = [] diff_index = old_commit.diff(new_commit, create_patch=True) extracted_method_candidates, added_lines_dict = get_extracted_method_candidates(diff_index) for diff in diff_index.iter_change_type('M'): a_path = diff.a_blob.path b_path = diff.b_blob.path if a_path != b_path: continue if not (is_method_body(b_path) or is_constructor_body(b_path)): continue c = get_class(b_path) if c not in extracted_method_candidates.keys(): continue (deleted_lines, added_lines) = diff_parser.parse(diff.diff) if not (deleted_lines and added_lines): continue a_package = get_package(a_path, old_commit) b_package = get_package(b_path, new_commit) if is_method_body(b_path): m = get_method(b_path) else: m = get_constructor(b_path) script = '\n'.join([l[1] for l in deleted_lines]) for method, path_of_method, num_args in extracted_method_candidates[c]: extracted_lines = added_lines_dict[path_of_method] num_args_list = parse_added_lines(added_lines, method) if num_args not in num_args_list: continue extracted_lines = extracted_lines[1:-1] script2 = '\n'.join([l[1] for l in extracted_lines]) try: sim = calculate_similarity(script, script2) except ZeroDivisionError: sim = "N/A" org_commit = get_org_commit(new_commit) target_before_body = diff.a_blob.data_stream.read() target_after_body = diff.b_blob.data_stream.read() target_deleted_lines = [l[1] for l in deleted_lines] refactoring_candidate = {'a_commit': old_commit.hexsha, 'b_commit': new_commit.hexsha, 'b_org_commit': org_commit, 'a_package': a_package, 'b_package': b_package, 'target_class': c, 'target_method': m, 'extracted_method': method, 'similarity': sim, 'target_before_body': target_before_body, 'target_after_body': target_after_body, 'extracted_body': script2, 'target_deleted_lines': target_deleted_lines, 'target_method_path': b_path, 'extracted_method_path': path_of_method } result.append(refactoring_candidate) return result
def test_calculate_similarity(): script1 = "public void main()" script2 = "public int main()" result = calculate_similarity(script1, script2) assert result == (1.0 / 3.0)
def test_calculate_similarity2(): script1 = "" script2 = "" calculate_similarity(script1, script2)
def detect_shingle_pullup_method(old_commit, new_commit): diff_index = old_commit.diff(new_commit, create_patch=False) added_methods = defaultdict(list) deleted_methods = defaultdict(list) for diff in diff_index.iter_change_type('A'): new_method = Method.create_from_blob(diff.b_blob, new_commit) if new_method: added_methods[new_method.get_class_name()].append(new_method) deleted_classes = set() for diff in diff_index.iter_change_type('D'): # NOTE change following old_commit to new_commit to detect # pull_up_method by same condtion of UMLDiff subclass_method = SubclassMethod.create_from_blob( diff.a_blob, old_commit) if subclass_method: if not subclass_method.extend: continue if subclass_method.get_full_class_name() in deleted_classes: continue if not exist_class(diff.a_blob, new_commit): deleted_classes.add(subclass_method.get_full_class_name()) continue if subclass_method.extend in added_methods.keys(): deleted_methods[subclass_method.extend].append(subclass_method) pull_up_method_candidates = [] old_org_commit = get_org_commit(old_commit) new_org_commit = get_org_commit(new_commit) for super_class, v in deleted_methods.iteritems(): if super_class not in added_methods: print('%s does\'nt have a deleted method' % (super_class)) continue for dst_method in added_methods[super_class]: dst_body = dst_method.get_body() if not dst_body: continue dst_body = '\n'.join(dst_body.split('\n')[1:-2]) for src_method in v: src_body = src_method.get_body() is_same_parameters = match_type(src_method, dst_method) if src_body: src_body = '\n'.join(src_body.split('\n')[1:-2]) if src_body or dst_body: try: sim = calculate_similarity(src_body, dst_body) except ZeroDivisionError: sim = "N/A" else: sim = 0 pull_up_method_candidates.append( (old_commit.hexsha, new_commit.hexsha, old_org_commit, new_org_commit, str(src_method), str(dst_method), sim, is_same_parameters)) return pull_up_method_candidates
def detect_extract_method_from_commit(old_commit, new_commit): result = [] diff_index = old_commit.diff(new_commit, create_patch=True) extracted_method_candidates, added_lines_dict = get_extracted_method_candidates( diff_index) for diff in diff_index.iter_change_type('M'): a_path = diff.a_blob.path b_path = diff.b_blob.path if a_path != b_path: continue if not (is_method_body(b_path) or is_constructor_body(b_path)): continue c = get_class(b_path) if c not in list(extracted_method_candidates.keys()): continue (deleted_lines, added_lines) = diff_parser.parse(diff.diff) if not (deleted_lines and added_lines): continue a_package = get_package(a_path, old_commit) b_package = get_package(b_path, new_commit) if is_method_body(b_path): m = get_method(b_path) else: m = get_constructor(b_path) script = '\n'.join([l[1] for l in deleted_lines]) for method, path_of_method, num_args in extracted_method_candidates[c]: extracted_lines = added_lines_dict[path_of_method] num_args_list = parse_added_lines(added_lines, method) if num_args not in num_args_list: continue extracted_lines = extracted_lines[1:-1] script2 = '\n'.join([l[1] for l in extracted_lines]) try: sim = calculate_similarity(script, script2) except ZeroDivisionError: sim = "N/A" org_commit = get_org_commit(new_commit) target_before_body = diff.a_blob.data_stream.read() target_after_body = diff.b_blob.data_stream.read() target_deleted_lines = [l[1] for l in deleted_lines] refactoring_candidate = { 'a_commit': old_commit.hexsha, 'b_commit': new_commit.hexsha, 'b_org_commit': org_commit, 'a_package': a_package, 'b_package': b_package, 'target_class': c, 'target_method': m, 'extracted_method': method, 'similarity': sim, 'target_before_body': target_before_body, 'target_after_body': target_after_body, 'extracted_body': script2, 'target_deleted_lines': target_deleted_lines, 'target_method_path': b_path, 'extracted_method_path': path_of_method } result.append(refactoring_candidate) return result
def detect_shingle_pullup_method(old_commit, new_commit): diff_index = old_commit.diff(new_commit, create_patch=False) added_methods = defaultdict(list) deleted_methods = defaultdict(list) for diff in diff_index.iter_change_type('A'): new_method = Method.create_from_blob(diff.b_blob, new_commit) if new_method: added_methods[new_method.get_class_name()].append(new_method) deleted_classes = set() for diff in diff_index.iter_change_type('D'): # NOTE change following old_commit to new_commit to detect # pull_up_method by same condtion of UMLDiff subclass_method = SubclassMethod.create_from_blob(diff.a_blob, old_commit) if subclass_method: if not subclass_method.extend: continue if subclass_method.get_full_class_name() in deleted_classes: continue if not exist_class(diff.a_blob, new_commit): deleted_classes.add(subclass_method.get_full_class_name()) continue if subclass_method.extend in added_methods.keys(): deleted_methods[subclass_method.extend].append(subclass_method) pull_up_method_candidates = [] old_org_commit = get_org_commit(old_commit) new_org_commit = get_org_commit(new_commit) for super_class, v in deleted_methods.iteritems(): if super_class not in added_methods: print('%s does\'nt have a deleted method' % (super_class)) continue for dst_method in added_methods[super_class]: dst_body = dst_method.get_body() if not dst_body: continue dst_body = '\n'.join(dst_body.split('\n')[1:-2]) for src_method in v: src_body = src_method.get_body() is_same_parameters = match_type(src_method, dst_method) if src_body: src_body = '\n'.join(src_body.split('\n')[1:-2]) if src_body or dst_body: try: sim = calculate_similarity(src_body, dst_body) except ZeroDivisionError: sim = "N/A" else: sim = 0 pull_up_method_candidates.append((old_commit.hexsha, new_commit.hexsha, old_org_commit, new_org_commit, str(src_method), str(dst_method), sim, is_same_parameters)) return pull_up_method_candidates