def extract_one_to_db_obj(project): gcf = GitOneCommitFeatures gcf.initialize(project) rgcms = retrieve_git_log(project) db_objs = list() root = set() rgcm_dict = dict() for rgcm in rgcms: rgcm_dict[rgcm.commit_id] = rgcm if len(rgcm.parent) == 0: root.add(rgcm.commit_id) del rgcms gcf.current_root = root gcf.calculated_commit = set() gcf.candidate_commit = set() gcf.rgcm_dict = rgcm_dict while len(SizeFeatures.current_root) > 0: extract_results = gcf.calculate_features_for_root(SizeFeatures) assert (isinstance(extract_results, list)) for er in extract_results: sf_obj = SizeFeaturesObj(er) #sf_obj.print_attributes() sf_dict = { 'project': getattr(sf_obj, 'project'), 'commit_id': getattr(sf_obj, 'commit_id'), 'la': getattr(sf_obj, 'la'), 'ld': getattr(sf_obj, 'ld'), 'lt': getattr(sf_obj, 'lt') } db_objs.append(sf_dict) # db_objs.append(sf_obj.to_db_obj()) return db_objs
def extract_one_to_db_obj(project): gcf = GitOneCommitFeatures gcf.initialize(project, merge_all_log=True) rgcms = retrieve_git_log(project) db_objs = list() root = set() rgcm_dict = dict() for rgcm in rgcms: rgcm_dict[rgcm.commit_id] = rgcm if len(rgcm.parent) == 0: root.add(rgcm.commit_id) gcf.current_root = root gcf.calculated_commit = set() gcf.candidate_commit = set() gcf.rgcm_dict = rgcm_dict number = 0 while len(gcf.current_root) > 0: number += len(gcf.current_root) extract_results = gcf.calculate_features_for_root(HistoryFeatures) assert (isinstance(extract_results, list)) for er in extract_results: hf_obj = HistoryFeaturesObj(er) hf_dict = { 'project': getattr(hf_obj, 'project'), 'commit_id': getattr(hf_obj, 'commit_id'), 'ndev': getattr(hf_obj, 'ndev'), 'age': getattr(hf_obj, 'age'), 'nuc': getattr(hf_obj, 'nuc') } db_objs.append(hf_dict) # db_objs.append(sf_obj.to_db_obj()) return db_objs
def extract_one_to_db_obj(project): GitOneCommitFeatures.initialize(project) rgcms = retrieve_git_log(project) db_objs = list() sorted_rgcms = sorted(rgcms, key=lambda x: x.time_stamp) rgcm = sorted_rgcms[-1] df = DiffusionFeatures(rgcm) attr_dict = df.extract() # if attr_dict is None: # continue # df_obj type: <class 'defect_features.object.features.DiffusionFeatures'> df_obj = DiffusionFeaturesObj(attr_dict) # get diffusion attribute and construct df_obj ok # df_obj.print_attributes() df_dict = { 'project': getattr(df_obj, 'project'), 'commit_id': getattr(df_obj, 'commit_id'), 'ns': getattr(df_obj, 'ns'), 'nd': getattr(df_obj, 'nd'), 'nf': getattr(df_obj, 'nf'), 'entropy': getattr(df_obj, 'entropy') } db_objs.append(df_dict) # db_objs.append(df_obj.to_db_obj()) return db_objs
def extract_one_to_db_obj(project): GitCommitFeatures.initialize(project) rgcms = retrieve_git_log(project) sorted_rgcms = sorted(rgcms, key=lambda x: x.time_stamp) db_objs = list() for rgcm in sorted_rgcms: ef = ExperienceFeatures(rgcm) attr_dict = ef.extract() if attr_dict is None: continue ef_obj = ExperienceFeaturesObj(attr_dict) #ef_obj.print_attributes() db_objs.append(ef_obj.to_db_obj()) return db_objs
def extract_one_to_db_obj(project): # start get features GitCommitFeatures.initialize(project) rgcms = retrieve_git_log(project) db_objs = list() pf_objs = list() corrective_commits = dict() # key: commit_id; value: pf_obj all_commits = dict() # key:commit_id; value: pf_obj sorted_rgcms = sorted(rgcms, key=lambda x: x.time_stamp) # get features done for rgcm in sorted_rgcms: pf = PurposeFeatures(rgcm) attr_dict = pf.extract() if attr_dict is None: continue pf_obj = PurposeFeaturesObj(attr_dict) pf_objs.append(pf_obj) all_commits[pf_obj.commit_id] = pf_obj if pf_obj.classification == 'Corrective': corrective_commits[pf_obj.commit_id] = pf_obj # # link corrective commit to buggy commit. szz algorithm try: git_commit_linker = GitCommitLinker(project, corrective_commits, all_commits) git_commit_linker.link_corrective_commits() except Exception as e: print(e) raise #p3c pmd # pmd = Pmd(project, pf_objs) # pmd.pmd_main() for pf_obj in pf_objs: # to json if pf_obj.fix_by != None: pf_obj.fix_by = json.dumps(pf_obj.fix_by) if pf_obj.fixes != None: pf_obj.fixes = json.dumps(pf_obj.fixes) if pf_obj.buggy_lines != None: pf_obj.buggy_lines = json.dumps(pf_obj.buggy_lines) if pf_obj.clean_lines != None: pf_obj.clean_lines = json.dumps(pf_obj.clean_lines) if pf_obj.bug_fix_files != None: pf_obj.bug_fix_files = json.dumps(pf_obj.bug_fix_files) #change object/features.py PurposeFeatures object to db object db_objs.append(pf_obj.to_db_obj()) return db_objs
def extract_one_to_db_obj(project): GitCommitFeatures.initialize(project) rgcms = retrieve_git_log(project) db_objs = list() sorted_rgcms = sorted(rgcms, key=lambda x: x.time_stamp) for rgcm in sorted_rgcms: df = DiffusionFeatures(rgcm) attr_dict = df.extract() if attr_dict is None: continue # df_obj type: <class 'defect_features.object.features.DiffusionFeatures'> df_obj = DiffusionFeaturesObj(attr_dict) # get diffusion attribute and construct df_obj ok # df_obj.print_attributes() db_objs.append(df_obj.to_db_obj()) return db_objs
def initialize(project, merge_all_log=False): gcf = GitOneCommitFeatures gcf.project_logs = retrieve_git_log(project) gcf.project_numstat = get_one_numstats(project) gcf.project_namestat = retrieve_one_git_namestats(project) gcf.project_merge_namestat = retrieve_one_git_namestats( project, True, merge_all_log) gcf.project_merge_numstat = get_one_numstats(project, True, merge_all_log) gcf.committer_time = dict() gcf.file_stats = dict() gcf.developer_stats = dict() gcf.parent_file_stats = dict() gcf.mem_manager = MemManager(project) gcf.candidate_commit = set() gcf.rgcm_dict = dict() gcf.current_root = set() gcf.calculated_commit = set()
def store_one_meta(self, project): gls = retrieve_git_log(project) sorted_gls = sorted(gls, key=lambda x: x.time_stamp) db_objs = list() # print('number of commits:',len(gls)) # print(project, 'Begin to store meta data') gl = sorted_gls[-1] cm = commit_meta.CommitMeta() cm.from_git_log(gl) cm_dict = { 'project': getattr(cm, 'project'), 'commit_id': getattr(cm, 'commit_id'), 'is_merge': getattr(cm, 'is_merge'), 'time_stamp': getattr(cm, 'time_stamp'), 'author_email': getattr(cm, 'author_email') } db_objs.append(cm_dict) cmpath = conf.local_path + "featuresStore/commit_meta1" with open(cmpath, 'w') as f_meta: json.dump(db_objs, f_meta)
def extract_one_to_db_obj(project): GitOneCommitFeatures.initialize(project) rgcms = retrieve_git_log(project) sorted_rgcms = sorted(rgcms, key=lambda x: x.time_stamp) db_objs = list() rgcm = sorted_rgcms[-1] ef = ExperienceFeatures(rgcm) attr_dict = ef.extract() # if attr_dict is None: # continue ef_obj = ExperienceFeaturesObj(attr_dict) #ef_obj.print_attributes() ef_dict = { 'project': getattr(ef_obj, 'project'), 'commit_id': getattr(ef_obj, 'commit_id'), 'exp': getattr(ef_obj, 'exp'), 'rexp': getattr(ef_obj, 'rexp'), 'sexp': getattr(ef_obj, 'sexp') } db_objs.append(ef_dict) # db_objs.append(ef_obj.to_db_obj()) return db_objs
def extract_one_to_db_obj(project): # start get features GitOneCommitFeatures.initialize(project) rgcms = retrieve_git_log(project) db_objs = list() pf_objs = list() corrective_commits = dict() # key: commit_id; value: pf_obj all_commits = dict() # key:commit_id; value: pf_obj sorted_rgcms = sorted(rgcms, key=lambda x: x.time_stamp) # get features done rgcm = sorted_rgcms[-1] pf = PurposeFeatures(rgcm) attr_dict = pf.extract() # if attr_dict is None: # continue pf_obj = PurposeFeaturesObj(attr_dict) pf_objs.append(pf_obj) all_commits[pf_obj.commit_id] = pf_obj if pf_obj.classification == 'Corrective': corrective_commits[pf_obj.commit_id] = pf_obj # # link corrective commit to buggy commit. szz algorithm try: git_commit_linker = GitCommitLinker(project, corrective_commits, all_commits) git_commit_linker.link_corrective_commits() except Exception as e: print(e) raise #p3c pmd # pmd = Pmd(project, pf_objs) # pmd.pmd_main() for pf_obj in pf_objs: # to json if pf_obj.fix_by != None: pf_obj.fix_by = json.dumps(pf_obj.fix_by) if pf_obj.fixes != None: pf_obj.fixes = json.dumps(pf_obj.fixes) if pf_obj.buggy_lines != None: pf_obj.buggy_lines = json.dumps(pf_obj.buggy_lines) if pf_obj.clean_lines != None: pf_obj.clean_lines = json.dumps(pf_obj.clean_lines) if pf_obj.bug_fix_files != None: pf_obj.bug_fix_files = json.dumps(pf_obj.bug_fix_files) #change object/features.py PurposeFeatures object to db object pf_dict = { 'project': getattr(pf_obj, 'project'), 'commit_id': getattr(pf_obj, 'commit_id'), 'time_stamp': getattr(pf_obj, 'time_stamp'), 'is_fix': getattr(pf_obj, 'is_fix'), 'classification': getattr(pf_obj, 'classification'), 'linked': getattr(pf_obj, 'linked'), 'contains_bug': getattr(pf_obj, 'contains_bug'), 'fix_by': getattr(pf_obj, 'fix_by'), 'fixes': getattr(pf_obj, 'fixes'), 'buggy_lines': getattr(pf_obj, 'buggy_lines'), 'block': getattr(pf_obj, 'block'), 'critical': getattr(pf_obj, 'critical'), 'major': getattr(pf_obj, 'major'), 'block_total': getattr(pf_obj, 'block_total'), 'critical_total': getattr(pf_obj, 'critical_total'), 'major_total': getattr(pf_obj, 'major_total'), 'file_name_stat': getattr(pf_obj, 'file_name_stat'), 'find_interval': getattr(pf_obj, 'find_interval'), 'la': getattr(pf_obj, 'la'), 'ld': getattr(pf_obj, 'ld'), 'fix_file_num': getattr(pf_obj, 'fix_file_num'), 'bug_fix_files': getattr(pf_obj, 'bug_fix_files'), 'rules': getattr(pf_obj, 'rules'), "clean_lines": getattr(pf_obj, 'clean_lines') } db_objs.append(pf_dict) # db_objs.append(pf_obj.to_db_obj()) return db_objs