def print_averages(df):
    averages = df[['accuracy', 'precision', 'recall', 'fscore']].mean(axis=0)

    print(indent('\nPrinting averages:', spaces=10))

    for label, avg in averages.iteritems():
        print(indent('* Average %s: %s' % (label, str(avg)), spaces=14))
Beispiel #2
0
def compute_stats(metrics, classifiers, folder):
	print(indent('\n- Computing metrics statistics ... '), end='')
	
	stats = pd.DataFrame(columns=classifiers)
	
	for key, val in metrics.items():
		name = str(capitalize(key))
		
		mean = val.mean(axis=0)
		mean = mean.rename(name+' Mean')
		stats = stats.append(mean)
		
		median = val.median(axis=0)
		median = median.rename(name+' Median')
		stats = stats.append(median)
		
		std = val.std(axis=0)
		std = std.rename(name+' Standard Deviation')
		stats = stats.append(std)
	
	print('result:')
	
	print(indent(stats.to_string(), spaces=10))
	
	out = write_df_to_csv(folder, stats, 'stats')
	
	print(indent('\n- Statistics written to file "%s"' % out))
Beispiel #3
0
def find_best_k_all(fvs, n=MAX_k):
    for path, name in fvs:
        print('\n> Filename: %s\n' % name)
        print(indent('cluster\tK-Means\t Agglomerative'))
        [
            print(indent('\t\t'.join(m)))
            for m in find_best_k(get_fv_values(path), n + 1)
        ]
Beispiel #4
0
def run_cross_validation_all(data, labels):
	print(indent('\n- Running cross validation with 5-folds ...'), end='\n\n')
	all_prec, all_recall, all_f1 = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
	
	for label, clf in CLASSIFIERS.items():
		print(indent('* Validating "%s"' % label, spaces=10))
		df = run_cross_validation(clf, data, labels)
		all_prec[label] = df['test_precision']
		all_recall[label] = df['test_recall']
		all_f1[label] = df['test_f1']

	return {
		'precision': all_prec,
		'recall': all_recall,
		'fscore': all_f1
	}
def find_god_classes(source=None):
	print('\n> Looking for god classes in "%s"...' % os.path.abspath(source))
	# creates new dataframe
	df = pd.DataFrame(columns=['class_name', 'method_num', 'path_to_source'])

	# goes through the project content
	for root, dirs, files in os.walk(source, topdown=False):
		for name in files:
			# mathces only java sources
			if name.endswith('.java'):
				with(open(root + '/' + name, 'r')) as jsc:

					# creates the AST
					tree = jl.parse.parse(jsc.read())

					# iterates through the AST (first filtering all the non-class declarations)
					for path, node in tree.filter(jl.parser.tree.ClassDeclaration):
						# adds the new class to the data frame (as well as the class details)
						df = df.append({
							'class_name': node.name,
							'path_to_source': root + '/' + node.name + '.java',
							'method_num': len(node.methods)
						}, ignore_index=True, sort=-1)
	# Filters the dataframe getting only the god classes
	god_classes = filter_all_classes(df)
	print('> Found the following god classes:\n'+indent(god_classes.to_string()))

	return god_classes
Beispiel #6
0
def run_wilcoxon_test_all(metrics, folder):
	
	b_folder = mkdir(folder + '/wilcoxon')
		
	for key, val in metrics.items():
		run_wilcoxon_test(val, key, b_folder)
	
	print(indent('\n- Wilcoxon tests results written to folder "%s"' % b_folder))
Beispiel #7
0
def biased_clf_metrics_to_csv(labels, folder):
	
	prec, rec, fscore, sup = precision_recall_fscore_support(labels, ones(len(labels)), average='binary')
	df = pd.DataFrame({'precision': prec, 'recall': rec, 'fscore': fscore}, index=[0])
	
	out = write_df_to_csv(folder, df, 'biased_metrics')
	
	print(indent('\n- Biased classifier metrics ("precision", "recall" and "fscore") written to file "%s"' % out))
def make_boxplot_all(metrics, folder):

    b_folder = mkdir(folder + '/boxplots')

    for key, val in metrics.items():
        make_boxplot(val, key, b_folder)

    print(indent('\n- Boxplots written to folder "%s"' % b_folder))
Beispiel #9
0
def gen_metrics_biased_clf(data, labels):
	print(indent('\n- Generating biased estimators ...'))
	l_prec, l_recall, l_fscore = [], [], []

	for i in range(0, 100):
		prec, rec, f1, sup = split_and_compute_stats(data, labels)
		l_prec.append(prec)
		l_recall.append(rec)
		l_fscore.append(f1)

	return l_prec, l_recall, l_fscore
def make_plot(x, data, title, folder):
    plt.figure(figsize=(26, 10))
    plt.title(title)

    plt.xlabel('Run')
    plt.ylabel('Metrics')
    for col in data.columns:
        plt.plot(x, data[col].values, marker='o', markersize=4, label=col)

    plt.legend()
    plt.grid(True)
    path = folder + '/' + title.replace(" ", "") + '.png'
    plt.savefig(path)
    plt.clf()

    print(indent('\nPlot saved to "%s"' % path, spaces=10))
Beispiel #11
0
    Populates the ontology (tree.owl) with instances of ClassDeclaration, MethodDeclaration, FieldDeclaration, 
    Statement subclasses (e.g., IfStatement, WhileStatement, etc.) and FormalParameter.

    Example usage:
        $ python3 bad_smells.py individ_creator

    flags:
    -s <path-to-source> | --source <path-to-source>:
        The path to a directory containing java files. Default is "PROJ_ROOT/lib/android-chess/app/src/main/java/jwtc/chess"
''',
    'find_bad_smells': '''
find_bad_smells:
    Runs Sparql queries to detect bad smells (Long Method, Large Class, Long Parameter List, Switch Statements, Data Class).

    Example usage:
        $ python3 bad_smells.py find_bad_smells
'''
}


def docstring_preview(text):
    return text.split('\n\n')[0]


docstring_headers = {
    key: indent(docstring_preview(value))
    for (key, value) in MODULE_DOCSTRINGS.items()
}

DOCSTRING = DOCSTRING.format(**docstring_headers)