Ejemplo n.º 1
0
def run_wilcoxon_test(df, name, folder):
	columns = df.columns.values
	t_df = pd.DataFrame(columns=columns, index=columns)
	
	for i in range(0, len(columns)-1):
		for j in range(i+1, len(columns)):

			stats, p_value = scipy.stats.mannwhitneyu(
				df[columns[i]].values,
				df[columns[j]].values,
				alternative='two-sided')
			
			t_df.iloc[j, i] = p_value
	
	write_df_to_csv(folder, t_df, name)
Ejemplo n.º 2
0
def do_ground_truth_all(files=None, kws=None):

    print('\n> Defining ground truth for feature vector/s:')
    [print("\t- " + os.path.relpath(file[0])) for file in files]

    for p, n in files:
        methods = get_fv_methods(p)
        ground_truth = do_ground_truth(methods, kws)

        write_df_to_csv(GT_DIR, gt_to_df(ground_truth, kws), n)

    print('> Ground truth/s has/ve been written to folder "%s"' %
          os.path.abspath(GT_DIR))

    return GT_DIR
Ejemplo n.º 3
0
def compute_stats(metrics, classifiers, folder):
	print(indent('\n- Computing metrics statistics ... '), end='')
	
	stats = pd.DataFrame(columns=classifiers)
	
	for key, val in metrics.items():
		name = str(capitalize(key))
		
		mean = val.mean(axis=0)
		mean = mean.rename(name+' Mean')
		stats = stats.append(mean)
		
		median = val.median(axis=0)
		median = median.rename(name+' Median')
		stats = stats.append(median)
		
		std = val.std(axis=0)
		std = std.rename(name+' Standard Deviation')
		stats = stats.append(std)
	
	print('result:')
	
	print(indent(stats.to_string(), spaces=10))
	
	out = write_df_to_csv(folder, stats, 'stats')
	
	print(indent('\n- Statistics written to file "%s"' % out))
Ejemplo n.º 4
0
def biased_clf_metrics_to_csv(labels, folder):
	
	prec, rec, fscore, sup = precision_recall_fscore_support(labels, ones(len(labels)), average='binary')
	df = pd.DataFrame({'precision': prec, 'recall': rec, 'fscore': fscore}, index=[0])
	
	out = write_df_to_csv(folder, df, 'biased_metrics')
	
	print(indent('\n- Biased classifier metrics ("precision", "recall" and "fscore") written to file "%s"' % out))
Ejemplo n.º 5
0
def do_all_cluster_from_path(
			path=None,
			target=None,
			f=None,
			n=5):

	path = os.path.abspath(path)
	target = os.path.abspath(target)

	print('\n> Clustering God class methods in file/folder "%s"' % path)

	# get all the feature vectors in the folder path
	paths_and_names = get_paths_and_names(path)

	for el in paths_and_names:
		# applies teh function "f" (either k-means or hierarchical agglomerative)
		df = f(el[0], n)

		write_df_to_csv(target, df, el[1])

	print('> Clusters have been written to folder "%s"' % target)
	return target
def extract_feature_vectors(god_classes):
    print('\n> Starting feature vector extraction...')
    class_names = god_classes.class_name.tolist()
    all_feat_vectors = {}
    for src_path in god_classes.path_to_source.tolist():
        # open the class source
        with (open(src_path, 'r')) as jsc:
            # parse the class
            tree = jl.parse.parse(jsc.read())

            # iterates through the file classes
            for path, node in tree.filter(jl.parser.tree.ClassDeclaration):
                if node.name in class_names:  # check whether the class is a god class
                    # Generates the feature vector for each class
                    all_feat_vectors[node.name] = generate_all(node)
                    write_df_to_csv(FV_DIR, all_feat_vectors[node.name],
                                    node.name)

    fv_dir = os.path.abspath(FV_DIR)
    print('> Feature vector/s has/ve been written to folder "%s"' % fv_dir)

    return fv_dir
def label_feature_vectors(fv, fv_path, buggy_classes_dir):
    print('\n> Creating labels for feature vector "%s"' % fv_path)

    buggy_classes = get_buggy_classes(buggy_classes_dir)
    label_feature_vector = get_label_feature_vector(fv, buggy_classes)

    path = write_df_to_csv(
        DEF_LFV_DIR, label_feature_vector,
        gen_name_with_suffix('label_feature_vector',
                             get_dir_time_suffix(fv_path, 'feature_vector')))

    print('> Labeled feature vector has been written to file "%s"' %
          os.path.abspath(path))

    return label_feature_vector, path
Ejemplo n.º 8
0
def run_training(classifier, classifier_name, fv_path, tt, r_num):
	l_precision, l_recall, l_fscore, l_accurancy = [], [], [], []
	
	print(indent('\n- Training classifier "%s"...' % classifier_name))
	
	for i in range(0, len(r_num)):
		pred, acc = run_classifier(
			classifier,
			tt['x_trains'][i],
			tt['x_tests'][i],
			tt['y_trains'][i],
			tt['y_tests'][i])
		
		prec, rec, f1 = get_prec_recall_fscore(tt['y_tests'][i], pred)

		l_accurancy.append(acc)
		l_precision.append(prec)
		l_recall.append(rec)
		l_fscore.append(f1)

	df = pd.DataFrame(
		{
			"r_num": r_num,
			"accuracy": l_accurancy,
			"precision": l_precision,
			"recall": l_recall,
			"fscore": l_fscore
		})

	tr_folder = DEF_TR_DIR + '/' + get_dir_time_suffix(fv_path, 'label_feature_vector-')

	path = write_df_to_csv(tr_folder, df, classifier_name.replace(' ', ''))
	print(indent('\nResults written to file "%s"' % path, spaces=10))
	
	print_averages(df)
	
	make_plot(r_num, df[['accuracy', 'precision', 'recall', 'fscore']], classifier_name, tr_folder)
def extract_feature_vectors(root):
    print('\n> Starting feature vector extraction for project "%s"' % root)
    df = pd.DataFrame(columns=FV_COLS)

    for t_class in get_top_classes(root):

        mth, fld, rfc, ints = get_class_metrics(t_class)
        sz, cpx, ex, ret = get_methods_metrics(t_class)
        bcm, nml, wrd, dcm = get_npl_metrics(t_class)

        df = df.append(
            {
                'class': t_class.name,  # class name
                'MTH': mth,
                'FLD': fld,
                'RFC': rfc,
                'INT': ints,  # CLASS METRICS
                'SZ': sz,
                'CPX': cpx,
                'EX': ex,
                'RET': ret,  # METHOD METRICS
                'BCM': bcm,
                'NML': nml,
                'WRD': wrd,
                'DCM': dcm  # NPL METRICS
            },
            ignore_index=True,
            sort=-1)

    df = df_sort_cols(df, FV_COLS)
    path = write_df_to_csv(DEF_FV_DIR, df,
                           gen_name_with_time('feature_vector'))
    print('> Feature vector/s has/ve been written to file "%s"' %
          os.path.abspath(path))

    return df, path