Ejemplo n.º 1
0
def get_test_xml_csv(dir_res='/home/ise/test/res'):
    d_l = []
    res = pt.walk_rec(dir_res, [], 'Result', False)
    for item_dir in res:
        bug_id = str(item_dir).split('/')[-2].split('_')[1]
        if bug_id == '133':
            print ""
        bug_name = str(item_dir).split('/')[-2].split('_')[0]
        xml_files = pt.walk_rec(item_dir, [], '.xml')
        for xml_item in xml_files:
            if str(xml_item).endswith('xml') is False:
                continue
            d = None
            name_dir = str(xml_item).split('/')[-2]
            name_file_xml = str(xml_item).split('/')[-1]
            test_mode = str(name_dir).split('_')[-1]
            test_it = str(name_dir).split('_')[-2].split('=')[1]
            test_time_b = str(name_dir).split('_')[-3].split('=')[1]
            test_date = '_'.join(str(name_dir).split('_')[3:7])
            d = pars_xml_test_file(xml_item)
            d['test_mode'] = test_mode
            d['test_it'] = test_it
            d['test_time_b'] = test_time_b
            d['test_date'] = test_date
            d['bug_id'] = bug_id
            d['bug_name'] = bug_name
            d_l.append(d)
    df = pd.DataFrame(d_l)
    dir_father = '/'.join(str(dir_res).split('/')[:-1])
    df.to_csv("{}/res.csv".format(dir_father))
    return "{}/res.csv".format(dir_father)
Ejemplo n.º 2
0
def get_weka_info(p_path, mode):
    d_tags = {}
    res = pt.walk_rec(p_path, [], '_{}'.format(mode), False)
    arff_path, pred_1_path = None, None
    for item in res:
        if str(item).endswith('arff_{}'.format(mode)):
            arff_path = item
        elif str(item).endswith('pred_1_{}'.format(mode)):
            pred_1_path = item
    res_minor = pt.walk_rec(pred_1_path, [], '', False, lv=-1)
    res_models = pt.walk_rec(arff_path, [], '.arff')
    for item in res_minor:
        name = '_'.join(str(item).split('/')[-1].split('_')[1:])
        index_sort = str(item).split('/')[-1].split('_')[0]
        files_res = pt.walk_rec(item, [], '')
        d_tags[name] = {'sort_index': index_sort}
        d_tags[name]['model'] = None
        for file_i in files_res:
            if str(file_i).endswith('.csv'):
                d_tags[name]['name'] = file_i
            elif str(file_i).endswith(".arff"):
                d_tags[name]['test'] = file_i
    for item_arff in res_models:
        name = str(item_arff).split('/')[-1].split('.')[0]
        if name in d_tags:
            d_tags[name]['model'] = item_arff
        else:
            d_tags[name] = {
                'model': item_arff,
                'test': None,
                'name': None,
                'sort_index': None
            }
    return d_tags
Ejemplo n.º 3
0
def get_diff_fix_buggy(root_dir_bug, root_dir_fix, if_count_tset_cases=False):
    d_start = {}
    d = {'bug': {}, 'fix': {}}
    res_fix = pt.walk_rec(root_dir_fix, [], '.txt')
    res_bug = pt.walk_rec(root_dir_bug, [], '.txt')
    for item_fix in res_fix:
        name = str(item_fix).split('/')[-1][:-4]
        d_start[name] = {'fix': item_fix}
    for item_bug in res_bug:
        name_bug = str(item_bug).split('/')[-1][:-4]
        if name_bug in d_start:
            d_start[name_bug]['bug'] = item_bug
        else:
            d_start[name_bug] = {'bug': item_bug}
    d_both = {}
    for ky in d_start:
        if 'bug' in d_start[ky] and 'fix' in d_start[ky]:
            d_both[ky] = {'bug': d_start[ky]['bug'], 'fix': d_start[ky]['fix']}

# print "missing --> {}".format(len(d_start)-len(d_both))
    d_l = []
    for key_i in d_both.keys():
        diff_bug, diff_fix = diff_function(d_both[key_i]['bug'],
                                           d_both[key_i]['fix'])

        # pars the bug_id and itr number from the path dir

        d_buggy = pars_bug_id_iter_id(d_both[key_i]['bug'])
        d_buggy['mode'] = 'buggy'
        d_fixed = pars_bug_id_iter_id(d_both[key_i]['fix'])
        d_fixed['mode'] = 'fixed'

        # Test add name
        d_buggy['name'] = str(key_i).split('_')[0]
        d_fixed['name'] = str(key_i).split('_')[0]

        # pars the itration number and time budget
        if if_count_tset_cases:
            num_test_bug = tests_regex_count(d_both[key_i]['bug'])
            num_test_fix = tests_regex_count(d_both[key_i]['fix'])
            d_fixed['num_of_test_cases'] = num_test_fix
            d_buggy['num_of_test_cases'] = num_test_bug
            d_l.append(d_buggy)
            d_l.append(d_fixed)
            continue

        list_junit_res = get_regex_all(diff_bug, r'(test\d+.+\n\njava.lang.+)',
                                       0, False)
        info_list = pars_junit_regex(list_junit_res, d_extand=d_buggy)
        if info_list is not None:
            d_l.extend(info_list)

            list_junit_res = get_regex_all(diff_fix,
                                           r'(test\d+.+\n\njava.lang.+)', 0,
                                           False)
        info_list = pars_junit_regex(list_junit_res, d_extand=d_fixed)
        if info_list is not None:
            d_l.extend(info_list)

    return d_l
Ejemplo n.º 4
0
def get_static_dir(root):
    bug_dir = pt.walk_rec(root, [], 'P_', False)
    list_d = []
    for dir_i in bug_dir:
        time_budget = str(dir_i).split('/')[-2].split('=')[1]
        proj_name = str(dir_i).split('/')[-2].split('_')[0]
        bug_id = str(dir_i).split('/')[-1].split('_')[3]
        evo_dir = os.path.isdir('{}/Evo_Test'.format(dir_i))
        if evo_dir:
            evo_dir_num = 1
            num_test_generated = len(
                pt.walk_rec('{}/Evo_Test'.format(dir_i), [], '.java'))
            num_test_generated = num_test_generated / float(2)
        else:
            num_test_generated = -1
            evo_dir_num = 0
        d_i = {
            "time_budget": time_budget,
            'proj_name': proj_name,
            'bug_id': bug_id,
            'evo_dir': evo_dir,
            'num_test_generated': num_test_generated
        }
        list_d.append(d_i)
    df = pd.DataFrame(list_d)
    df.to_csv('{}/static.csv'.format(root))
Ejemplo n.º 5
0
def count_package_number(commit, p_name, prefix='tools'):
    repo_path = '/home/ise/bug_miner/{0}/{0}'.format(p_name)
    run_GIT_command_and_log(repo_path, 'git checkout {}'.format(commit), None,
                            None, False)
    src_folder = pt.walk_rec(repo_path, [], prefix, False)
    src_folder = [
        x for x in src_folder if str(x).__contains__('/resources/') is False
    ]
    src_folder = [
        x for x in src_folder if str(x).__contains__('/test/') is False
    ]
    src_folder = [
        x for x in src_folder if str(x).__contains__('/ftp2/') is False
    ]
    src_folder = [
        x for x in src_folder if str(x).__contains__('/opennlp/tools') is True
    ]

    #src_folder

    print src_folder
    if len(src_folder) == 1:
        folderz = pt.walk_rec(src_folder[0], [], '', False)
        num_of_package = len(folderz)
        return num_of_package
    return None
Ejemplo n.º 6
0
def get_miss_classes(project_path_repo, fp_name_dir, out_info):
    '''
    the main func that count the missing class inrespect to the bug commit and FP results tags
    '''
    project = str(project_path_repo).split('/')[-1]
    atg_path = os.getcwd()
    df_bug = pd.read_csv('{}/tmp_files/{}_bug.csv'.format(atg_path, project))
    print list(df_bug)
    res_file = pt.walk_rec(fp_name_dir, [], 'Most_names')
    d_name_tag = {}
    tag_l = []
    for item in res_file:
        tag_name = '_'.join(str(item).split('/')[-2].split('_')[1:])
        tag_index = str(item).split('/')[-2].split('_')[0]
        tag_l.append([tag_name, int(tag_index)])
        d_name_tag[tag_name] = {'csv': item, 'index': tag_index}

    # get sorted list tags
    sorted_tags = sorted(tag_l, key=lambda tup: tup[-1])
    tags_sort = []
    for item_t in sorted_tags:
        tags_sort.append(item_t[0])

    # Go over each bug commit and get the list of classes

#  df_bug.apply(get_miss_classes_applyer,out_dir=out_info,repo_path=project_path_repo,axis=1)

# make a comparison
    res_csv = pt.walk_rec(out_info, [], '.csv')

    for item in res_csv:
        df_commit = pd.read_csv(item, index_col=0)
        if len(df_commit) == 0:
            continue
        tag_bug = df_commit['tag_bug'].iloc[0]
        tag_bug = str(tag_bug).replace('-', '_')
        df_fp_res_tag_cur = pd.read_csv(d_name_tag[tag_bug]['csv'],
                                        names=['path'])
        index = tags_sort.index(tag_name)
        if index > 0:
            old_tag = tags_sort[index - 1]
            df_fp_res_tag_old = pd.read_csv(d_name_tag[old_tag]['csv'],
                                            names=['path'])
            df_fp_res_tag_old['name'] = df_fp_res_tag_old['path'].apply(
                lambda x: path_to_package_name(None, x))
        else:
            df_fp_res_tag_old = None
        df_fp_res_tag_cur['name'] = df_fp_res_tag_cur['path'].apply(
            lambda x: path_to_package_name(None, x))
        df_commit['is_exists'] = df_commit.apply(is_exists_helper,
                                                 df_cur=df_fp_res_tag_cur,
                                                 df_old=df_fp_res_tag_old,
                                                 axis=1)
        df_commit.to_csv('{}_mod.csv'.format(str(item)[:-4]))

        # get all mod file
        res_mod(out_info)
Ejemplo n.º 7
0
def dependency_getter(repo, dir_jars, m2='/home/ise/.m2/repository'):
    '''
    get all dependency jars
    '''
    res_jar2 = pt.walk_rec('/home/ise/.m2/repository', [], '.jar')
    print len(res_jar2)
    res_jar2 = [x for x in res_jar2 if str(x).split('.')[-1] == 'jar']
    print len(res_jar2)
    res_jar1 = pt.walk_rec('{}/{}'.format(repo, dir_jars), [], '.jar')
    jarz = res_jar2 + res_jar1
    str_jarz = ':'.join(jarz)
    return str_jarz
Ejemplo n.º 8
0
def make_jar_file(project_dir_path):
    '''
    make a jar file with the builder mvn or ant
    '''
    fix_dir = '{}/fixed'.format(project_dir_path)
    log_dir = '{}/log'.format(project_dir_path)
    mvn_builder = False
    ant_builder = False
    if os.path.isfile('{}/pom.xml'.format(fix_dir)):
        mvn_builder = True
    if os.path.isfile('{}/build.xml'.format(fix_dir)):
        ant_builder = True

    os.chdir(fix_dir)
    out_jar = pt.mkdir_system(project_dir_path, 'jar_dir', False)
    if mvn_builder:
        command = 'mvn package -Dmaven.test.skip=true'
        process = Popen(shlex.split(command), stdout=PIPE, stderr=PIPE)
        stdout, stderr = process.communicate()
        loging_os_command(log_dir, 'jar_command', stdout, "stdout")
        loging_os_command(log_dir, 'jar_command', stderr, "stderr")
        # os.system(command)
        ans = pt.walk_rec("{}/target".format(fix_dir), [], '.jar')
        command = 'mvn dependency:copy-dependencies -DoutputDirectory={}'.format(
            out_jar)
        process = Popen(shlex.split(command), stdout=PIPE, stderr=PIPE)
        stdout, stderr = process.communicate()
        loging_os_command(log_dir, 'copy_dependencies', stdout, "stdout")
        loging_os_command(log_dir, 'copy_dependencies', stderr, "stderr")
        #os.system(command)
        if len(ans) == 1:
            cp_command = 'mv {} {}'.format(ans[0], out_jar)
            print '[OS] {}'.format(cp_command)
            os.system(cp_command)
            return ans[0]
    if ant_builder:
        command = 'ant jar'
        process = Popen(shlex.split(command), stdout=PIPE, stderr=PIPE)
        stdout, stderr = process.communicate()
        loging_os_command(log_dir, 'jar_command', stdout, "stdout")
        loging_os_command(log_dir, 'jar_command', stderr, "stderr")
        # os.system(command)
        ans = pt.walk_rec("{}/target".format(fix_dir), [], '.jar')
        if len(ans) == 1:
            cp_command = 'mv {} {}'.format(ans[0], out_jar)
            print '[OS] {}'.format(cp_command)
            os.system(cp_command)
            return ans[0]
    return None
Ejemplo n.º 9
0
def to_del(p='/home/ise/test/pom_3'):
    res_tiks = pt.walk_rec(p, [], 'TIKA', False, lv=-1)
    res_org = pt.walk_rec(p, [], 'org', False, lv=-6)
    print "res_tiks =", len(res_tiks)
    print "res_org  =", len(res_org)
    res_org = ['/'.join(str(x).split('/')[:-2]) for x in res_org]
    dif = []
    for y in res_tiks:
        if y not in res_org:
            dif.append(y)
    ans = []
    for item in dif:
        ans.append(str(item).split('/')[-1])
        # print str(item).split('/')[-1]
    exit()
Ejemplo n.º 10
0
def make_FP_pred(dir_target='/home/ise/tmp_d4j/out_pred/out/Lang/Lang_2'):
    '''
    concat the two csv files from the weka dir to one big Dateframe and make the probabily for bug,
    by 1-probablit for a vaild component
    '''
    out = '/'.join(str(dir_target).split('/')[:-1])
    name = str(dir_target).split('/')[-1]
    p_name = str(name).split('_')[0]
    res_test_set = pt.walk_rec(dir_target, [], 'testing__results_pred.csv')
    most_csv = pt.walk_rec(dir_target, [], 'Most_names_File.csv')
    if len(most_csv) == 1 and len(res_test_set) == 1 is False:
        print "[Error] no csv in the dir-> {}".format(dir_target)
        return None

    connect_name_pred_FP(most_csv, name, p_name, res_test_set, dir_target)
Ejemplo n.º 11
0
def mk_call_graph_raw_data(root_dir, name_find='jars_dir',
                           java_caller='/home/ise/programs/java-callgraph/target/javacg-0.1-SNAPSHOT-static.jar'):
    res = pt.walk_rec(root_dir, [], name_find, False)
    for dir_i in res:
        father_dir = '/'.join(str(dir_i).split('/')[:-1])
        jars = pt.walk_rec(dir_i, [], '.jar')
        if len(jars) != 2:
            print "[Error] in dir --> {}\nfind:\n{}".format(dir_i, jars)
            continue
        out_jars = pt.mkdir_system(father_dir, 'out_jar')
        command_java_1 = 'java -jar {} {} {} '.format(java_caller,
                                                      jars[1], father_dir)
        command_java_0 = 'java -jar {} {} {} '.format(java_caller,
                                                      jars[0], father_dir)
        util_d4j.execute_command(command_java_1, 'call_graph', out_jars)
        util_d4j.execute_command(command_java_0, 'call_graph', out_jars)
Ejemplo n.º 12
0
def get_miss_classes_applyer(row, out_dir, repo_path):
    '''
    Go over each bug commit and get the list of classes
    '''
    commit_bug = row['parent']
    commit_fix = row['commit']
    bug_tag = row['tag_parent']
    issue_id = row['issue']
    index_bug = row['index_bug']

    #checkout the buugy version
    git_cmd = 'git checkout {}'.format(commit_bug)
    print ge.run_GIT_command_and_log(repo_path, git_cmd, None, None, False)

    # get classes from src
    d_l = []
    res = pt.walk_rec('{}/src'.format(repo_path), [], '.java')
    for item_java in res:
        class_name = pt.path_to_package('org', item_java, -5)
        d_l.append({
            'class_path': item_java,
            'name': class_name,
            'tag_bug': bug_tag,
            'commit_bug': commit_bug
        })
    df = pd.DataFrame(d_l)
    df.to_csv('{}/{}_{}.csv'.format(out_dir, issue_id, index_bug))
Ejemplo n.º 13
0
def get_pair_fix_bug_folder(folder_path):
    folder_bug = pt.walk_rec(folder_path, [],
                             'test_suite_t',
                             lv=-2,
                             file_t=False)
    d_pair = {}
    for item in folder_bug:
        folder_mode = str(item).split('/')[-2].split('_')[0]
        if folder_mode == 'complie':
            continue
        mode = str(item).split('/')[-2].split('_')[-1]
        iter = str(item).split('/')[-1].split('_it_')[-1]
        if iter not in d_pair:
            d_pair[iter] = {}
        if mode == 'fixed':
            d_pair[iter]['fixed'] = item
        elif mode == 'buggy':
            d_pair[iter]['buggy'] = item
    d_l = []
    for key_i in d_pair.keys():
        if 'buggy' in d_pair[key_i] and 'fixed' in d_pair[key_i]:
            info = get_diff_fix_buggy(d_pair[key_i]['buggy'],
                                      d_pair[key_i]['fixed'])
            d_l.extend(info)
    if len(d_l) == 0:
        return None
    df = pd.DataFrame(d_l)
    df.to_csv("{}/indep_report_v1.csv".format(folder_path))
    return "{}/indep_report_v1.csv".format(folder_path)
Ejemplo n.º 14
0
def add_loc(project_name, pass_loc=False):
    csv_p = '/home/ise/bug_miner/{}/fin_df_buggy.csv'.format(project_name)
    df_fin = pd.read_csv(csv_p, index_col=0)
    p_name = str(csv_p).split('/')[-2]
    father_dir = '/'.join(str(csv_p).split('/')[:-1])
    out_loc = pt.mkdir_system(father_dir, 'LOC', False)
    repo_path = "{}/{}".format('/'.join(str(csv_p).split('/')[:-1]), p_name)
    print repo_path
    df_info = pd.read_csv("{}/tmp_files/{}_bug.csv".format(
        os.getcwd(), p_name),
                          index_col=0)
    list_bug_generated = df_fin['bug_name'].unique()
    print list(df_info)
    print len(df_info)
    df_info = df_info[df_info['issue'].isin(list_bug_generated)]
    if pass_loc is False:
        df_info.apply(add_loc_helper, repo=repo_path, out=out_loc, axis=1)
        # get all df loc from LOC folder
        res_df_loc_path = pt.walk_rec(out_loc, [], '.csv')
        all_loc_list = []
        for item_loc_path in res_df_loc_path:
            all_loc_list.append(pd.read_csv(item_loc_path, index_col=0))
        df_all_loc = pd.concat(all_loc_list)
        print list(df_all_loc)
        print list(df_fin)
        print len(df_fin)
        df_all_loc.to_csv('{}/{}.csv'.format(father_dir, 'loc'))
    else:
        df_all_loc = pd.read_csv('{}/{}.csv'.format(father_dir, 'loc'),
                                 index_col=0)
    result_df = pd.merge(df_all_loc, df_fin, 'right', on=['bug_name', 'name'])
    result_df.to_csv('{}/{}.csv'.format(father_dir, 'exp'))
    print len(result_df)
Ejemplo n.º 15
0
def add_loc_helper(row, repo, out, prefix_name='org'):
    '''
    getting the loc info to LOC dir
    :param repo: path to repo
    :param out: path where to write the csv
    '''
    commit_buggy = row['parent']
    commit_buggy = row['commit']
    bug_id = row['issue']
    path_to_faulty = row['component_path']
    package_name = row['package']
    # if str(bug_id ) == '1261':
    #     print ""
    print path_to_faulty
    checkout_version(commit_buggy, repo, None)
    pack = '/'.join(str(path_to_faulty).split('\\')[:-1])
    # #TODO: remove it
    # pack = str(pack).split('/')
    # indx =pack.index(prefix_name)
    # pack = '/'.join(pack[:indx+1])
    # #####
    klasses = pt.walk_rec('{}/{}'.format(repo, pack), [], '.java')
    d_l = []
    for class_i in klasses:
        name = pt.path_to_package(prefix_name, class_i, -5)
        size = get_LOC(class_i)
        d_l.append({'name': name, 'LOC': size, 'bug_name': bug_id})
    df = pd.DataFrame(d_l)
    df.to_csv('{}/{}_LOC.csv'.format(out, bug_id))
Ejemplo n.º 16
0
def count_class(commit, p_name):
    repo_path = '/home/ise/bug_miner/{0}/{0}'.format(p_name)
    run_GIT_command_and_log(repo_path, 'git checkout {}'.format(commit), None,
                            None, False)
    list_java = pt.walk_rec(repo_path, [], '.java')
    print len(list_java)
    return len(list_java)
Ejemplo n.º 17
0
def compile_java_class(dir_to_compile, output_dir, dependent_dir):
    """
    this function compile the .java tests to .class
    :param dir_to_compile: path where .java files
    :param output_dir: output dir where .class will be found
    :param dependent_dir: .jar for the compilation process
    :return: output dir path
    """
    #if path.isdir(dir_to_compile) is False:
    #    msg = "no dir : {}".format(dir_to_compile)
    #    raise Exception(msg)
    out_dir = pt.mkdir_system(output_dir, 'test_classes')
    files = pt.walk_rec(dependent_dir, [], '.jar', lv=-2)
    files.append(
        '/home/ise/eran/evosuite/jar/evosuite-standalone-runtime-1.0.6.jar')
    jars_string = ':'.join(files)
    dir_to_compile = '{}*'.format(dir_to_compile)
    string_command = "javac {0} -verbose -Xlint -cp {1} -d {2} -s {2} -h {2}".format(
        dir_to_compile, jars_string, out_dir)
    print "[OS] {}".format(string_command)
    os.system(string_command)
    return
    process = Popen(shlex.split(string_command), stdout=PIPE, stderr=PIPE)
    stdout, stderr = process.communicate()
    print "----stdout----"
    print stdout
    print "----stderr----"
    print stderr
    return out_dir
Ejemplo n.º 18
0
def get_snapshot_to_jar_dir(repo, path_to_target_folder):
    res = pt.walk_rec("{}".format(repo), [], 'SNAPSHOT.jar', lv=-6)
    res = [x for x in res if str(x).__contains__('/libb/') is False]
    for item in res:
        command_cp = 'cp {} {}'.format(item, path_to_target_folder)
        print "[OS] {}".format(command_cp)
        os.system(command_cp)
Ejemplo n.º 19
0
def add_hamcrest(path,
                 jar_path='/home/ise/eran/evosuite/dep/hamcrest-all-1.3.jar'):
    res = pt.walk_rec(path, [], 'hamcrest')
    res = [x for x in res if str(x).endswith('.jar')]
    for item in res:
        os.system('rm {}'.format(item))
    os.system('cp {} {}'.format(jar_path, path))
Ejemplo n.º 20
0
def get_all_self_report(res_folder):
    csv_files = pt.walk_rec(res_folder, [], 'report.csv')
    df_list = []
    for item_csv in csv_files:
        df_list.append(pd.read_csv(item_csv))
    df_all = pd.concat(df_list)
    father_dir = '/'.join(str(res_folder).split('/')[:-1])
    print list(df_all)
Ejemplo n.º 21
0
def res_mod(out_info):
    res_mod = pt.walk_rec(out_info, [], 'mod.csv')
    l_df = []
    for item in res_mod:
        l_df.append(pd.read_csv(item, index_col=0))
    df_all = pd.concat(l_df)
    x = df_all['is_exists'].value_counts()
    print "missing class % \n {}".format(x)
Ejemplo n.º 22
0
def get_results(dir_res='/home/ise/test/pom_3'):
    res = pt.walk_rec(dir_res, [], 'TIKA', False, lv=-1)
    d_l = []
    d_l_empty = []
    for item in res:
        print "----{}----".format(str(item).split('/')[-1])
        name = str(item).split('/')[-1]
        d_test = {}
        id_bug = str(name).split('_')[1]
        bug_name = str(name).split('_')[0]
        folder_log_evo = pt.walk_rec(item, [], 'log_evo', False)
        folder_org = pt.walk_rec(item, [], 'org', False)
        res_log_test = pt.walk_rec(folder_log_evo[0], [], '.txt')
        for log_t in res_log_test:
            name = str(log_t).split('/')[-1][:-4]
            if name not in d_test:
                d_test[name] = {
                    'id': id_bug,
                    'bug_name': bug_name,
                    'log': 1,
                    'name': name,
                    'test': 0
                }
            else:
                msg = '[Error] duplication in the test log dir := {}'.format(
                    folder_log_evo)
                raise Exception(msg)
        if len(folder_org) > 0:
            res_test = pt.walk_rec(folder_org[0], [], 'ESTest.java')
            for test_i in res_test:
                test_name_package = pt.path_to_package('org', test_i, -5)
                test_name_package = test_name_package[:-7]
                if test_name_package not in d_test:
                    d_test[test_name_package] = {
                        'id': id_bug,
                        'bug_name': bug_name,
                        'log': 0,
                        'name': test_name_package,
                        'test': 1
                    }
                else:
                    d_test[test_name_package]['test'] = 1
        d_l_empty.extend(d_test.values())
    df = pd.DataFrame(d_l_empty)
    father = '/'.join(str(dir_res).split('/')[:-1])
    df.to_csv("{}/result_info_empty.csv".format(father))
Ejemplo n.º 23
0
def remove_junit(path, path_to_junit='/home/ise/eran/evosuite/junit-4.12.jar'):
    res = pt.walk_rec(path, [], 'junit')
    res = [x for x in res if str(x).endswith('.jar')]
    for item in res:
        os.system('rm {}'.format(item))
    os.system('cp {} {}'.format(path_to_junit, path))
    # add hamcrest
    add_hamcrest(path)
Ejemplo n.º 24
0
def replication_table(root_p):
    '''
    this function help to make table replication to see if more replication with less time budget is better then more time in Evosuite,
    for e,g, given T time if one test suite with T is better then T/n --> n* test suites (each test suite with T/n time budget)
    :param root_p: the pass for out_xml dir
    :return: csv file
    '''
    arr_sign = ['NO_COVERAGE', 'SURVIVED', 'TIMED_OUT', 'RUN_ERROR']
    out_xml_dir = pit_render_test.walk_rec(root_p, [], 'out_xml', False, -2)
    for dir_out in out_xml_dir:
        print dir_out
        proj_cur = '/'.join(str(dir_out).split('/')[:-1])
        rep_name = pit_render_test.walk_rec(proj_cur, [], 'ALL', False, -2,
                                            False)
        list_p = pit_render_test.walk_rec(dir_out, [], '.csv', -1)
        cols = ['ID']
        acc = 0
        cols.extend(rep_name)
        big_df = pd.DataFrame(columns=cols)
        dir_out = dir_out[:-8]
        name = str(dir_out).split('t=')[1]
        time_budget = str(name).split('_')[0]
        name = "replication_table_t={}".format(time_budget)
        for csv_item in list_p:
            print "csv_item =", csv_item
            df = pd.read_csv(csv_item, index_col=0)
            acc += int(len(df))
            print "---"
            print "df col :", list(df)
            print "big col:", list(big_df)
            print "--"
            for col_name in cols:
                if col_name not in df:
                    df[col_name] = np.nan
            df = df[cols]
            big_df = pd.concat([big_df, df])
            if acc != int(len(big_df)):
                print "acc: {} big: {}".format(acc, int(len(big_df)))
        big_df[big_df == 'KILLED'] = 1
        for x in arr_sign:
            big_df[big_df == x] = 0
        size = len(rep_name)
        for i in range(1, size):
            big_df['max_0-{}'.format(i)] = big_df[rep_name[:i + 1]].max(axis=1)
        flush_csv(root_p, big_df, '{}'.format(name))
Ejemplo n.º 25
0
def eval_xgb_test_dir(dir_p,name_file='FP_'):
    res = pt.walk_rec(dir_p, [], name_file, True)
    res = [x for x in res if str(x).endswith('.csv')]
    df_l = []
    for itm in res:
        tag_name = '_'.join(str(itm).split('/')[-1].split('_')[-6:-3])
        conf_num=str(itm).split('/')[-1].split('_')[-2]

        print "tag_name:\t{}".format(tag_name)
        df = pd.read_csv(itm)

        y_pred = df['test_predictions'].values
        y_test = df['hasBug'].values


        precision, recall, thresholds, Avg_PR = reacall_precision(y_test, y_pred,ploting=False,full_out=True)

        area = auc(recall, precision)
        roc = roc_auc_score(y_test,y_pred)

        df_bug = df[df['hasBug'] == 1]
        df_valid = df[df['hasBug'] == 0]
        if len(df_bug) == 0:
            continue
        mse_valid_test = mean_squared_error(df_valid['hasBug'], df_valid['test_predictions'])
        mse_buggy_test = mean_squared_error(df_bug['hasBug'], df_bug['test_predictions'])


        #precsion_buggy = precision_score(df_bug['hasBug'], df_bug['test_predictions'])
        #recall_buggy = recall_score(df_bug['hasBug'], df_bug['test_predictions'])
#        F1_buggy = f1_score(df_bug['hasBug'], df_bug['test_predictions'])
        d_k={}
        for k in [10,20,30,100]:
            k_recall,k_precsion = metric_precsion_at_k(y_test,y_pred,k=k)
            d_k['k_{}_recall'.format(k)]=k_recall
            d_k['k_{}_precsion'.format(k)] = k_precsion

        print 'size buggy', len(df_bug)
        print 'size vaild', len(df_valid)
        print 'precntage vaild', float(len(df_valid))/float(len(df_bug)+len(df_valid)) *100.0
        print 'precntage buggy', float(len(df_bug))/float(len(df_bug)+len(df_valid))*100.0

        d_out = {'tag': tag_name, 'ROC': roc, 'conf': conf_num, 'MSE_Test_Bug': mse_buggy_test,'num_buggy':len(df_bug),'num_vaild':len(df_valid),'num_all':len(df_valid)+len(df_bug),
         'MSE_Test_Valid': mse_valid_test, #'F1_score':F1_buggy,'precsion_buggy':precsion_buggy,'recall_buggy':recall_buggy,
         'area-PRC (buggy)': area, 'Average precision-recall score': Avg_PR}

        for d_k_key in d_k.keys():
            d_out[d_k_key]=d_k[d_k_key]


        print "TEST:\t bug   MSE = {}".format(((mse_buggy_test)))
        print "TEST:\t valid MSE = {}".format(((mse_valid_test)))
        print Avg_PR
        df_l.append(d_out)
    df_res = pd.DataFrame(df_l)
    dir_p = '/'.join(str(dir_p).split('/')[:-1])
    df_res.to_csv('{}/eval_res.csv'.format(dir_p))
Ejemplo n.º 26
0
def helper_get_arrf_fiels(p_path='/home/ise/bug_miner/commons-lang1432698/FP/all_lang', mode='most', validtion=True):
    d_tags = {}
    res = pt.walk_rec(p_path, [], '_{}'.format(mode), False)
    arff_path, pred_1_path = None, None
    for item in res:
        if str(item).endswith('arff_{}'.format(mode)):
            arff_path = item
        elif str(item).endswith('pred_1_{}'.format(mode)):
            pred_1_path = item
    res_minor = pt.walk_rec(pred_1_path, [], '', False, lv=-1)
    res_models = pt.walk_rec(arff_path, [], '.arff')
    for item in res_minor:
        name = '_'.join(str(item).split('/')[-1].split('_')[1:])
        index_sort = str(item).split('/')[-1].split('_')[0]
        files_res = pt.walk_rec(item, [], '')
        d_tags[name] = {'sort_index': index_sort}
        d_tags[name]['model'] = None
        for file_i in files_res:
            if str(file_i).endswith('.csv'):
                d_tags[name]['name'] = file_i
            elif str(file_i).endswith(".arff"):
                d_tags[name]['test'] = file_i
    for item_arff in res_models:
        name = str(item_arff).split('/')[-1].split('.')[0]
        if name in d_tags:
            d_tags[name]['model'] = item_arff
        else:
            d_tags[name] = {'model': item_arff, 'test': None, 'name': None, 'sort_index': None}

    # find validation set:
    keys_list = d_tags.keys()
    keys_list = [[x, int(d_tags[x]['sort_index'])] for x in keys_list]
    keys_list_sorted = sorted(keys_list, key=lambda tup: tup[1])
    print keys_list
    only_key_sort = [x[0] for x in keys_list_sorted]
    for ky in d_tags.keys():
        index = only_key_sort.index(ky)
        if index < len(keys_list_sorted) - 1:
            ky_son = keys_list_sorted[index + 1][0]
            d_tags[ky]['validation_set'] = d_tags[ky_son]['test']
        else:
            d_tags[ky]['validation_set'] = None
    manger(d_tags)
Ejemplo n.º 27
0
def rearrange_folder_conf_xgb(
        p_path_dir='/home/ise/bug_miner/XGB/Lang_DATA/csv_res/TEST'):
    res_csv_all = pt.walk_rec(p_path_dir, [], '.csv')
    for i in res_csv_all:
        tmp = str(i).split('/')[-1].split('_')
        num_conf = tmp[-2]
        path_conf_dir = pt.mkdir_system(p_path_dir, 'conf_{}'.format(num_conf),
                                        False)
        os.system('mv {} {}'.format(i, path_conf_dir))
    exit()
Ejemplo n.º 28
0
def self_complie_bulider_func(repo, dir_cur, prefix, suffix='fix', bug_id=''):
    if os.path.isdir("{}/EVOSUITE".format(dir_cur)):
        d = {}
        java_dirz = pt.walk_rec("{}/EVOSUITE".format(dir_cur), [],
                                '',
                                False,
                                lv=-1)
        for item in java_dirz:
            if os.path.isdir("{}/{}".format(item, prefix)):
                name_folder = str(item).split('/')[-1]
                tmp = pt.walk_rec("{}/{}".format(item, prefix), [], '.java')
                path2 = '/'.join(str(tmp[0]).split('/')[:-1])
                tmp = str(name_folder).split('_')
                name_folder = 'test_suite_t_{}_it_{}'.format(
                    tmp[-2].split('=')[1], tmp[-1].split('=')[1])
                d[name_folder] = {
                    'name': name_folder,
                    'path': "{}/{}/*".format(item, prefix),
                    'path2': '{}/*'.format(path2)
                }
    else:
        print "[error] no dir {}/EVOSUITE".format(dir_cur)
        return None
    d_adder = {'bug_id': str(dir_cur).split('/')[-1], 'mode': suffix}
    res, path_jarz = package_mvn_cycle(repo)
    if path_jarz is None:
        return
    remove_junit(path_jarz)
    out_path_complie = pt.mkdir_system(dir_cur,
                                       'complie_out_{}'.format(suffix))
    out_path_junit = pt.mkdir_system(dir_cur, 'junit_out_{}'.format(suffix))
    for ky_i in d.keys():
        out_i_complie = pt.mkdir_system(out_path_complie, d[ky_i]['name'])
        out_i_junit = pt.mkdir_system(out_path_junit, d[ky_i]['name'])
        indep_bulilder.compile_java_class(d[ky_i]['path2'], out_i_complie,
                                          path_jarz)
        report_d = indep_bulilder.test_junit_commandLine("{}/{}".format(
            out_i_complie, 'test_classes'),
                                                         path_jarz,
                                                         out_i_junit,
                                                         prefix_package=prefix,
                                                         d_add=d_adder)
    print "end"
Ejemplo n.º 29
0
def mk_call_graph_df(root_dir, name_find='call_graph_stdout.txt'):
    res = pt.walk_rec(root_dir, [], name_find)
    for item in res:
        father_dir = '/'.join(str(item).split('/')[:-3])
        graph_obj = call_g.Call_g(item, father_dir)
        graph_obj.read_and_process(False)
        graph_obj.info_graph_csv()
        graph_obj.step_matrix()
        graph_obj.adj_matrix()
        graph_obj.coverage_matrix_BFS()
Ejemplo n.º 30
0
def del_dependency_dir(repo):
    '''
    del the libb dir
    '''
    if os.path.isdir("{}/libb".format(repo)):
        os.system('rm -r {}'.format("{}/libb".format(repo)))
    else:
        res = pt.walk_rec(repo, [], 'libb', False, lv=-3)
        if len(res) > 0:
            for x in res:
                os.system('rm -r {}'.format(x))