def print_averages(df): averages = df[['accuracy', 'precision', 'recall', 'fscore']].mean(axis=0) print(indent('\nPrinting averages:', spaces=10)) for label, avg in averages.iteritems(): print(indent('* Average %s: %s' % (label, str(avg)), spaces=14))
def compute_stats(metrics, classifiers, folder): print(indent('\n- Computing metrics statistics ... '), end='') stats = pd.DataFrame(columns=classifiers) for key, val in metrics.items(): name = str(capitalize(key)) mean = val.mean(axis=0) mean = mean.rename(name+' Mean') stats = stats.append(mean) median = val.median(axis=0) median = median.rename(name+' Median') stats = stats.append(median) std = val.std(axis=0) std = std.rename(name+' Standard Deviation') stats = stats.append(std) print('result:') print(indent(stats.to_string(), spaces=10)) out = write_df_to_csv(folder, stats, 'stats') print(indent('\n- Statistics written to file "%s"' % out))
def find_best_k_all(fvs, n=MAX_k): for path, name in fvs: print('\n> Filename: %s\n' % name) print(indent('cluster\tK-Means\t Agglomerative')) [ print(indent('\t\t'.join(m))) for m in find_best_k(get_fv_values(path), n + 1) ]
def run_cross_validation_all(data, labels): print(indent('\n- Running cross validation with 5-folds ...'), end='\n\n') all_prec, all_recall, all_f1 = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() for label, clf in CLASSIFIERS.items(): print(indent('* Validating "%s"' % label, spaces=10)) df = run_cross_validation(clf, data, labels) all_prec[label] = df['test_precision'] all_recall[label] = df['test_recall'] all_f1[label] = df['test_f1'] return { 'precision': all_prec, 'recall': all_recall, 'fscore': all_f1 }
def find_god_classes(source=None): print('\n> Looking for god classes in "%s"...' % os.path.abspath(source)) # creates new dataframe df = pd.DataFrame(columns=['class_name', 'method_num', 'path_to_source']) # goes through the project content for root, dirs, files in os.walk(source, topdown=False): for name in files: # mathces only java sources if name.endswith('.java'): with(open(root + '/' + name, 'r')) as jsc: # creates the AST tree = jl.parse.parse(jsc.read()) # iterates through the AST (first filtering all the non-class declarations) for path, node in tree.filter(jl.parser.tree.ClassDeclaration): # adds the new class to the data frame (as well as the class details) df = df.append({ 'class_name': node.name, 'path_to_source': root + '/' + node.name + '.java', 'method_num': len(node.methods) }, ignore_index=True, sort=-1) # Filters the dataframe getting only the god classes god_classes = filter_all_classes(df) print('> Found the following god classes:\n'+indent(god_classes.to_string())) return god_classes
def run_wilcoxon_test_all(metrics, folder): b_folder = mkdir(folder + '/wilcoxon') for key, val in metrics.items(): run_wilcoxon_test(val, key, b_folder) print(indent('\n- Wilcoxon tests results written to folder "%s"' % b_folder))
def biased_clf_metrics_to_csv(labels, folder): prec, rec, fscore, sup = precision_recall_fscore_support(labels, ones(len(labels)), average='binary') df = pd.DataFrame({'precision': prec, 'recall': rec, 'fscore': fscore}, index=[0]) out = write_df_to_csv(folder, df, 'biased_metrics') print(indent('\n- Biased classifier metrics ("precision", "recall" and "fscore") written to file "%s"' % out))
def make_boxplot_all(metrics, folder): b_folder = mkdir(folder + '/boxplots') for key, val in metrics.items(): make_boxplot(val, key, b_folder) print(indent('\n- Boxplots written to folder "%s"' % b_folder))
def gen_metrics_biased_clf(data, labels): print(indent('\n- Generating biased estimators ...')) l_prec, l_recall, l_fscore = [], [], [] for i in range(0, 100): prec, rec, f1, sup = split_and_compute_stats(data, labels) l_prec.append(prec) l_recall.append(rec) l_fscore.append(f1) return l_prec, l_recall, l_fscore
def make_plot(x, data, title, folder): plt.figure(figsize=(26, 10)) plt.title(title) plt.xlabel('Run') plt.ylabel('Metrics') for col in data.columns: plt.plot(x, data[col].values, marker='o', markersize=4, label=col) plt.legend() plt.grid(True) path = folder + '/' + title.replace(" ", "") + '.png' plt.savefig(path) plt.clf() print(indent('\nPlot saved to "%s"' % path, spaces=10))
Populates the ontology (tree.owl) with instances of ClassDeclaration, MethodDeclaration, FieldDeclaration, Statement subclasses (e.g., IfStatement, WhileStatement, etc.) and FormalParameter. Example usage: $ python3 bad_smells.py individ_creator flags: -s <path-to-source> | --source <path-to-source>: The path to a directory containing java files. Default is "PROJ_ROOT/lib/android-chess/app/src/main/java/jwtc/chess" ''', 'find_bad_smells': ''' find_bad_smells: Runs Sparql queries to detect bad smells (Long Method, Large Class, Long Parameter List, Switch Statements, Data Class). Example usage: $ python3 bad_smells.py find_bad_smells ''' } def docstring_preview(text): return text.split('\n\n')[0] docstring_headers = { key: indent(docstring_preview(value)) for (key, value) in MODULE_DOCSTRINGS.items() } DOCSTRING = DOCSTRING.format(**docstring_headers)