Exemple #1
0
def missing_class_gen(root_class,
                      root_test,
                      java_src,
                      log,
                      pit=None,
                      name='tmp',
                      pit2=None):
    # full path for the test and the .class files
    scanner_class = pt.walk(root_class, ".class")
    scanner_java = pt.walk(java_src, ".java")
    scanner_tests = pt.walk(root_test, "ESTest.java")
    print "classes size ={}".format(len(scanner_class))
    print "tests size ={}".format(len(scanner_tests))
    # convert the full path to package format
    scanner_class_pak = [
        pt.path_to_package('org', x, -6) for x in scanner_class
    ]
    scanner_tests_pak = [
        pt.path_to_package('org', y, -12) for y in scanner_tests
    ]
    d = dict_diff(list_one=scanner_class_pak,
                  list_two=scanner_tests_pak,
                  path_root_test=root_test)
    look_at_test(scanner_java, scanner_tests, d)
    if pit is not None:
        miss_PIT(pit, d)
    if pit2 is not None:
        miss_target_pit(pit2, d)
    dff = make_df(d, log, name)
    return d
Exemple #2
0
def add_loc_helper(row, repo, out, prefix_name='org'):
    '''
    getting the loc info to LOC dir
    :param repo: path to repo
    :param out: path where to write the csv
    '''
    commit_buggy = row['parent']
    commit_buggy = row['commit']
    bug_id = row['issue']
    path_to_faulty = row['component_path']
    package_name = row['package']
    # if str(bug_id ) == '1261':
    #     print ""
    print path_to_faulty
    checkout_version(commit_buggy, repo, None)
    pack = '/'.join(str(path_to_faulty).split('\\')[:-1])
    # #TODO: remove it
    # pack = str(pack).split('/')
    # indx =pack.index(prefix_name)
    # pack = '/'.join(pack[:indx+1])
    # #####
    klasses = pt.walk_rec('{}/{}'.format(repo, pack), [], '.java')
    d_l = []
    for class_i in klasses:
        name = pt.path_to_package(prefix_name, class_i, -5)
        size = get_LOC(class_i)
        d_l.append({'name': name, 'LOC': size, 'bug_name': bug_id})
    df = pd.DataFrame(d_l)
    df.to_csv('{}/{}_LOC.csv'.format(out, bug_id))
Exemple #3
0
def get_miss_classes_applyer(row, out_dir, repo_path):
    '''
    Go over each bug commit and get the list of classes
    '''
    commit_bug = row['parent']
    commit_fix = row['commit']
    bug_tag = row['tag_parent']
    issue_id = row['issue']
    index_bug = row['index_bug']

    #checkout the buugy version
    git_cmd = 'git checkout {}'.format(commit_bug)
    print ge.run_GIT_command_and_log(repo_path, git_cmd, None, None, False)

    # get classes from src
    d_l = []
    res = pt.walk_rec('{}/src'.format(repo_path), [], '.java')
    for item_java in res:
        class_name = pt.path_to_package('org', item_java, -5)
        d_l.append({
            'class_path': item_java,
            'name': class_name,
            'tag_bug': bug_tag,
            'commit_bug': commit_bug
        })
    df = pd.DataFrame(d_l)
    df.to_csv('{}/{}_{}.csv'.format(out_dir, issue_id, index_bug))
Exemple #4
0
def path_to_package_name(p_name, path_input):
    item = str(path_input).replace('\\', '/')
    start_package = 'org'
    if p_name == 'opennlp':
        start_package = 'opennlp'
    if item[-5:] != '.java':
        return None
    try:
        pack = pt.path_to_package(start_package, item, -1 * len('.java'))
    except Exception as e:
        pack = None
    return pack
Exemple #5
0
def look_at_test(classes, tests, d):
    '''some info'''
    d_FP = get_FP_probability()
    for entry in d.keys():
        d[entry]['loc_TEST'] = 0
        d[entry]['loc_class'] = 0
    for item in classes:
        ky = pt.path_to_package('org', item, -5)
        if str(ky).__contains__('package-info'):
            continue
        if ky in d_FP:
            d[ky]['FP'] = d_FP[ky]
        loc_class = get_LOC(p=item)
        if ky in d:
            d[ky]['loc_class'] = int(loc_class[0])
        else:
            print("the .java in not in the dict .class --> {}".format(ky))
            continue
    for cut in tests:
        ky = pt.path_to_package('org', cut, -12)
        loc_class = get_LOC(p=cut)
        if ky in d:
            num_line = int(loc_class[0])
            d[ky]['loc_TEST'] = num_line
            if num_line == 12:
                d[ky]['Empty_test_case'] = 1
                d[ky]['no_test'] = 0
            elif num_line > 12:
                d[ky]['no_test'] = 0
                d[ky]['Empty_test_case'] = 0
            elif num_line < 12:
                d[ky]['no_test'] = 1
                d[ky]['Empty_test_case'] = 1
        else:
            print("the .java in not in the dict .class --> {}".format(ky))
            continue
Exemple #6
0
def get_results(dir_res='/home/ise/test/pom_3'):
    res = pt.walk_rec(dir_res, [], 'TIKA', False, lv=-1)
    d_l = []
    d_l_empty = []
    for item in res:
        print "----{}----".format(str(item).split('/')[-1])
        name = str(item).split('/')[-1]
        d_test = {}
        id_bug = str(name).split('_')[1]
        bug_name = str(name).split('_')[0]
        folder_log_evo = pt.walk_rec(item, [], 'log_evo', False)
        folder_org = pt.walk_rec(item, [], 'org', False)
        res_log_test = pt.walk_rec(folder_log_evo[0], [], '.txt')
        for log_t in res_log_test:
            name = str(log_t).split('/')[-1][:-4]
            if name not in d_test:
                d_test[name] = {
                    'id': id_bug,
                    'bug_name': bug_name,
                    'log': 1,
                    'name': name,
                    'test': 0
                }
            else:
                msg = '[Error] duplication in the test log dir := {}'.format(
                    folder_log_evo)
                raise Exception(msg)
        if len(folder_org) > 0:
            res_test = pt.walk_rec(folder_org[0], [], 'ESTest.java')
            for test_i in res_test:
                test_name_package = pt.path_to_package('org', test_i, -5)
                test_name_package = test_name_package[:-7]
                if test_name_package not in d_test:
                    d_test[test_name_package] = {
                        'id': id_bug,
                        'bug_name': bug_name,
                        'log': 0,
                        'name': test_name_package,
                        'test': 1
                    }
                else:
                    d_test[test_name_package]['test'] = 1
        d_l_empty.extend(d_test.values())
    df = pd.DataFrame(d_l_empty)
    father = '/'.join(str(dir_res).split('/')[:-1])
    df.to_csv("{}/result_info_empty.csv".format(father))
Exemple #7
0
def csv_commit_db(csv_db,
                  repo,
                  out_dir_path,
                  is_max=True,
                  is_test=True,
                  only_java=True):

    df = pd.read_csv(csv_db,
                     names=['component_path', 'commit', 'issue', 'LOC_change'])

    df['is_java_file'] = df['component_path'].apply(
        lambda x: str(x).split('.')[-1])
    df['is_java'] = np.where(df['is_java_file'] == 'java', 1, 0)
    df['src/tset'] = df['component_path'].apply(
        lambda x: 1 if str(x).__contains__(r'src\test') else 0)
    df['first_name'] = df['component_path'].apply(
        lambda x: str(x).split('\\')[1])
    df['comp_name'] = df['component_path'].apply(
        lambda x: str(x).split('\\')[-1].split('.')[0])
    df['suffix_test'] = df['comp_name'].apply(
        lambda x: 1 if str(x).endswith('Test') else 0)
    df['is_test'] = np.where(df['first_name'] == 'test', 1, 0)
    df.to_csv('{}/tmp_1.csv'.format(out_dir_path))
    print "df_size = {}".format(len(df))

    if only_java:
        df = df.loc[df['is_java'] > 0]
        print "After cleaning the non java component df_size = {}".format(
            len(df))

    if is_test:
        df = df[df['is_test'] == 0]
        df = df[df['src/tset'] == 0]
        print "After cleaning the test component df_size = {}".format(len(df))

    if is_max:
        df = df.groupby('issue').apply(lambda x: x.loc[x['LOC_change'].idxmax(
        ), ['component_path', 'commit', 'LOC_change', 'is_test']]).reset_index(
        )

    df.to_csv('{}/tmp.csv'.format(out_dir_path))
    df['parent'] = df['commit'].apply(
        lambda x: get_the_previous_commit(x, repo))
    df['tag_commit'] = df['commit'].apply(
        lambda x: ge.get_Tag_name_by_commit(x, repo))
    df['date_commit'] = df['commit'].apply(
        lambda x: get_the_Date_commit(x, repo))
    df['tag_parent'] = df['parent'].apply(
        lambda x: ge.get_Tag_name_by_commit(x, repo))
    df['date_commit'] = pd.to_datetime(df['date_commit'])
    df["module"] = np.nan
    # make path to package name
    df['fail_component'] = df['component_path'].apply(
        lambda x: '.'.join(str(pt.path_to_package('org', x, -5)).split('\\')))
    df['package'] = df['fail_component'].apply(
        lambda x: '.'.join(str(x).split('.')[:-1]))

    # sorted DF
    df.sort_values("date_commit", inplace=True)
    df = df.reset_index(drop=True)
    df['index_bug'] = df.index

    repo_name = str(repo).split('/')[-1]

    # Split to train and test
    cut_df(df, out_dir_path)
    # write the whole df to disk
    df.to_csv('{}/{}_bug.csv'.format(out_dir_path, repo_name))