Example #1
0
def extract_one_to_db_obj(project):
    gcf = GitOneCommitFeatures
    gcf.initialize(project)
    rgcms = retrieve_git_log(project)
    db_objs = list()
    root = set()
    rgcm_dict = dict()
    for rgcm in rgcms:
        rgcm_dict[rgcm.commit_id] = rgcm
        if len(rgcm.parent) == 0:
            root.add(rgcm.commit_id)
    del rgcms
    gcf.current_root = root
    gcf.calculated_commit = set()
    gcf.candidate_commit = set()
    gcf.rgcm_dict = rgcm_dict
    while len(SizeFeatures.current_root) > 0:
        extract_results = gcf.calculate_features_for_root(SizeFeatures)
        assert (isinstance(extract_results, list))
        for er in extract_results:
            sf_obj = SizeFeaturesObj(er)
            #sf_obj.print_attributes()
            sf_dict = {
                'project': getattr(sf_obj, 'project'),
                'commit_id': getattr(sf_obj, 'commit_id'),
                'la': getattr(sf_obj, 'la'),
                'ld': getattr(sf_obj, 'ld'),
                'lt': getattr(sf_obj, 'lt')
            }
            db_objs.append(sf_dict)
            # db_objs.append(sf_obj.to_db_obj())
    return db_objs
Example #2
0
def extract_one_to_db_obj(project):
    gcf = GitOneCommitFeatures
    gcf.initialize(project, merge_all_log=True)
    rgcms = retrieve_git_log(project)
    db_objs = list()
    root = set()
    rgcm_dict = dict()
    for rgcm in rgcms:
        rgcm_dict[rgcm.commit_id] = rgcm
        if len(rgcm.parent) == 0:
            root.add(rgcm.commit_id)
    gcf.current_root = root
    gcf.calculated_commit = set()
    gcf.candidate_commit = set()
    gcf.rgcm_dict = rgcm_dict
    number = 0
    while len(gcf.current_root) > 0:
        number += len(gcf.current_root)
        extract_results = gcf.calculate_features_for_root(HistoryFeatures)
        assert (isinstance(extract_results, list))
        for er in extract_results:
            hf_obj = HistoryFeaturesObj(er)
            hf_dict = {
                'project': getattr(hf_obj, 'project'),
                'commit_id': getattr(hf_obj, 'commit_id'),
                'ndev': getattr(hf_obj, 'ndev'),
                'age': getattr(hf_obj, 'age'),
                'nuc': getattr(hf_obj, 'nuc')
            }
            db_objs.append(hf_dict)
            # db_objs.append(sf_obj.to_db_obj())
    return db_objs
Example #3
0
def extract_one_to_db_obj(project):
    GitOneCommitFeatures.initialize(project)
    rgcms = retrieve_git_log(project)
    db_objs = list()
    sorted_rgcms = sorted(rgcms, key=lambda x: x.time_stamp)
    rgcm = sorted_rgcms[-1]

    df = DiffusionFeatures(rgcm)
    attr_dict = df.extract()
    # if attr_dict is None:
    #     continue
    # df_obj type: <class 'defect_features.object.features.DiffusionFeatures'>
    df_obj = DiffusionFeaturesObj(attr_dict)
    # get diffusion attribute and construct df_obj ok
    # df_obj.print_attributes()
    df_dict = {
        'project': getattr(df_obj, 'project'),
        'commit_id': getattr(df_obj, 'commit_id'),
        'ns': getattr(df_obj, 'ns'),
        'nd': getattr(df_obj, 'nd'),
        'nf': getattr(df_obj, 'nf'),
        'entropy': getattr(df_obj, 'entropy')
    }
    db_objs.append(df_dict)
    # db_objs.append(df_obj.to_db_obj())
    return db_objs
Example #4
0
def extract_one_to_db_obj(project):
    GitCommitFeatures.initialize(project)
    rgcms = retrieve_git_log(project)
    sorted_rgcms = sorted(rgcms, key=lambda x: x.time_stamp)
    db_objs = list()
    for rgcm in sorted_rgcms:
        ef = ExperienceFeatures(rgcm)
        attr_dict = ef.extract()
        if attr_dict is None:
            continue
        ef_obj = ExperienceFeaturesObj(attr_dict)
        #ef_obj.print_attributes()
        db_objs.append(ef_obj.to_db_obj())
    return db_objs
Example #5
0
def extract_one_to_db_obj(project):
    # start get features
    GitCommitFeatures.initialize(project)
    rgcms = retrieve_git_log(project)

    db_objs = list()
    pf_objs = list()
    corrective_commits = dict()  # key: commit_id; value: pf_obj
    all_commits = dict()  # key:commit_id; value: pf_obj
    sorted_rgcms = sorted(rgcms, key=lambda x: x.time_stamp)
    # get features done
    for rgcm in sorted_rgcms:
        pf = PurposeFeatures(rgcm)
        attr_dict = pf.extract()
        if attr_dict is None:
            continue
        pf_obj = PurposeFeaturesObj(attr_dict)
        pf_objs.append(pf_obj)
        all_commits[pf_obj.commit_id] = pf_obj
        if pf_obj.classification == 'Corrective':
            corrective_commits[pf_obj.commit_id] = pf_obj
    # # link corrective commit to buggy commit. szz algorithm
    try:
        git_commit_linker = GitCommitLinker(project, corrective_commits,
                                            all_commits)
        git_commit_linker.link_corrective_commits()
    except Exception as e:
        print(e)
        raise
    #p3c pmd

    # pmd = Pmd(project, pf_objs)
    # pmd.pmd_main()
    for pf_obj in pf_objs:
        # to json
        if pf_obj.fix_by != None:
            pf_obj.fix_by = json.dumps(pf_obj.fix_by)
        if pf_obj.fixes != None:
            pf_obj.fixes = json.dumps(pf_obj.fixes)
        if pf_obj.buggy_lines != None:
            pf_obj.buggy_lines = json.dumps(pf_obj.buggy_lines)
        if pf_obj.clean_lines != None:
            pf_obj.clean_lines = json.dumps(pf_obj.clean_lines)
        if pf_obj.bug_fix_files != None:
            pf_obj.bug_fix_files = json.dumps(pf_obj.bug_fix_files)
        #change object/features.py PurposeFeatures object to db object
        db_objs.append(pf_obj.to_db_obj())
    return db_objs
Example #6
0
def extract_one_to_db_obj(project):
    GitCommitFeatures.initialize(project)
    rgcms = retrieve_git_log(project)
    db_objs = list()
    sorted_rgcms = sorted(rgcms, key=lambda x: x.time_stamp)
    for rgcm in sorted_rgcms:
        df = DiffusionFeatures(rgcm)
        attr_dict = df.extract()
        if attr_dict is None:
            continue
        # df_obj type: <class 'defect_features.object.features.DiffusionFeatures'>
        df_obj = DiffusionFeaturesObj(attr_dict)
        # get diffusion attribute and construct df_obj ok
        # df_obj.print_attributes()
        db_objs.append(df_obj.to_db_obj())
    return db_objs
Example #7
0
    def initialize(project, merge_all_log=False):
        gcf = GitOneCommitFeatures
        gcf.project_logs = retrieve_git_log(project)
        gcf.project_numstat = get_one_numstats(project)
        gcf.project_namestat = retrieve_one_git_namestats(project)
        gcf.project_merge_namestat = retrieve_one_git_namestats(
            project, True, merge_all_log)
        gcf.project_merge_numstat = get_one_numstats(project, True,
                                                     merge_all_log)

        gcf.committer_time = dict()
        gcf.file_stats = dict()
        gcf.developer_stats = dict()
        gcf.parent_file_stats = dict()
        gcf.mem_manager = MemManager(project)
        gcf.candidate_commit = set()
        gcf.rgcm_dict = dict()
        gcf.current_root = set()
        gcf.calculated_commit = set()
Example #8
0
    def store_one_meta(self, project):
        gls = retrieve_git_log(project)
        sorted_gls = sorted(gls, key=lambda x: x.time_stamp)
        db_objs = list()
        # print('number of commits:',len(gls))
        # print(project, 'Begin to store meta data')
        gl = sorted_gls[-1]
        cm = commit_meta.CommitMeta()
        cm.from_git_log(gl)
        cm_dict = {
            'project': getattr(cm, 'project'),
            'commit_id': getattr(cm, 'commit_id'),
            'is_merge': getattr(cm, 'is_merge'),
            'time_stamp': getattr(cm, 'time_stamp'),
            'author_email': getattr(cm, 'author_email')
        }

        db_objs.append(cm_dict)

        cmpath = conf.local_path + "featuresStore/commit_meta1"
        with open(cmpath, 'w') as f_meta:
            json.dump(db_objs, f_meta)
Example #9
0
def extract_one_to_db_obj(project):
    GitOneCommitFeatures.initialize(project)
    rgcms = retrieve_git_log(project)
    sorted_rgcms = sorted(rgcms, key=lambda x: x.time_stamp)
    db_objs = list()
    rgcm = sorted_rgcms[-1]
    ef = ExperienceFeatures(rgcm)
    attr_dict = ef.extract()
    # if attr_dict is None:
    #     continue
    ef_obj = ExperienceFeaturesObj(attr_dict)
    #ef_obj.print_attributes()
    ef_dict = {
        'project': getattr(ef_obj, 'project'),
        'commit_id': getattr(ef_obj, 'commit_id'),
        'exp': getattr(ef_obj, 'exp'),
        'rexp': getattr(ef_obj, 'rexp'),
        'sexp': getattr(ef_obj, 'sexp')
    }
    db_objs.append(ef_dict)

    # db_objs.append(ef_obj.to_db_obj())
    return db_objs
Example #10
0
def extract_one_to_db_obj(project):
    # start get features
    GitOneCommitFeatures.initialize(project)
    rgcms = retrieve_git_log(project)

    db_objs = list()
    pf_objs = list()
    corrective_commits = dict()  # key: commit_id; value: pf_obj
    all_commits = dict()  # key:commit_id; value: pf_obj
    sorted_rgcms = sorted(rgcms, key=lambda x: x.time_stamp)
    # get features done
    rgcm = sorted_rgcms[-1]

    pf = PurposeFeatures(rgcm)
    attr_dict = pf.extract()
    # if attr_dict is None:
    #     continue
    pf_obj = PurposeFeaturesObj(attr_dict)
    pf_objs.append(pf_obj)
    all_commits[pf_obj.commit_id] = pf_obj
    if pf_obj.classification == 'Corrective':
        corrective_commits[pf_obj.commit_id] = pf_obj
    # # link corrective commit to buggy commit. szz algorithm
    try:
        git_commit_linker = GitCommitLinker(project, corrective_commits,
                                            all_commits)
        git_commit_linker.link_corrective_commits()
    except Exception as e:
        print(e)
        raise
    #p3c pmd

    # pmd = Pmd(project, pf_objs)
    # pmd.pmd_main()
    for pf_obj in pf_objs:
        # to json
        if pf_obj.fix_by != None:
            pf_obj.fix_by = json.dumps(pf_obj.fix_by)
        if pf_obj.fixes != None:
            pf_obj.fixes = json.dumps(pf_obj.fixes)
        if pf_obj.buggy_lines != None:
            pf_obj.buggy_lines = json.dumps(pf_obj.buggy_lines)
        if pf_obj.clean_lines != None:
            pf_obj.clean_lines = json.dumps(pf_obj.clean_lines)
        if pf_obj.bug_fix_files != None:
            pf_obj.bug_fix_files = json.dumps(pf_obj.bug_fix_files)
        #change object/features.py PurposeFeatures object to db object
        pf_dict = {
            'project': getattr(pf_obj, 'project'),
            'commit_id': getattr(pf_obj, 'commit_id'),
            'time_stamp': getattr(pf_obj, 'time_stamp'),
            'is_fix': getattr(pf_obj, 'is_fix'),
            'classification': getattr(pf_obj, 'classification'),
            'linked': getattr(pf_obj, 'linked'),
            'contains_bug': getattr(pf_obj, 'contains_bug'),
            'fix_by': getattr(pf_obj, 'fix_by'),
            'fixes': getattr(pf_obj, 'fixes'),
            'buggy_lines': getattr(pf_obj, 'buggy_lines'),
            'block': getattr(pf_obj, 'block'),
            'critical': getattr(pf_obj, 'critical'),
            'major': getattr(pf_obj, 'major'),
            'block_total': getattr(pf_obj, 'block_total'),
            'critical_total': getattr(pf_obj, 'critical_total'),
            'major_total': getattr(pf_obj, 'major_total'),
            'file_name_stat': getattr(pf_obj, 'file_name_stat'),
            'find_interval': getattr(pf_obj, 'find_interval'),
            'la': getattr(pf_obj, 'la'),
            'ld': getattr(pf_obj, 'ld'),
            'fix_file_num': getattr(pf_obj, 'fix_file_num'),
            'bug_fix_files': getattr(pf_obj, 'bug_fix_files'),
            'rules': getattr(pf_obj, 'rules'),
            "clean_lines": getattr(pf_obj, 'clean_lines')
        }
        db_objs.append(pf_dict)
        # db_objs.append(pf_obj.to_db_obj())
    return db_objs