if __name__ == "__main__": algorithms = { QUESTConstructor().get_name(): QUESTConstructor(), GUIDEConstructor().get_name(): GUIDEConstructor(), CARTConstructor().get_name(): CARTConstructor(), C45Constructor().get_name(): C45Constructor(), RFClassification().get_name(): RFClassification(), XGBClassification().get_name(): XGBClassification() } genesim = GENESIM() inTrees_clf = inTreesClassifier() NR_FOLDS = 5 for dataset in load_all_datasets(): df = dataset['dataframe'] label_col = dataset['label_col'] feature_cols = dataset['feature_cols'] conf_matrices, avg_nodes, times = {}, {}, {} for algorithm in algorithms: conf_matrices[algorithm] = [] avg_nodes[algorithm] = [] times[algorithm] = [] conf_matrices['GENESIM'], avg_nodes['GENESIM'], times[ 'GENESIM'] = [], [], [] conf_matrices['ISM'], avg_nodes['ISM'], times['ISM'] = [], [], [] conf_matrices['inTrees'], avg_nodes['inTrees'], times[ 'inTrees'] = [], [], []
norm_diagonal_sum = sum([ float(conf_matrix[i][i]) / float(sum(conf_matrix[i])) for i in range(len(conf_matrix)) ]) total_count = np.sum(conf_matrix) accuracies.append(float(diagonal_sum) / float(total_count)) bal_accuracies.append(float(norm_diagonal_sum) / conf_matrix.shape[0]) return { 'acc': (np.around([np.mean(accuracies)], 4)[0], np.around([np.std(accuracies)], 2)[0]), 'balacc': (np.around([np.mean(bal_accuracies)], 4)[0], np.around([np.std(bal_accuracies)], 2)[0]) } datasets = load_all_datasets() NR_FOLDS = 5 measurements = {} figures = {} for dataset in datasets: measurements[dataset['name']] = {} print dataset['name'] conf_matrices = { 'Imbalanced': [], 'RUS': [], 'Tomek': [], 'ENN': [], 'ROS': [], 'SMOTE': [], 'SMOTE SVM': [], 'SMOTE Tomek': [],