def synthesize(train_objects, train_object_labels, **kwargs): """\n Synthesize CF. Parameters ---------- train_objects : list or lists of ndarray train_object_labels : list of int **kwargs Use for filter_type and other parameters of filters. Returns ------- corr_filter : ndarray """ try: filter_type = kwargs['filter_type'] except KeyError: raise ("filter type not found.") true_objects = [] false_objects = [] for obj, label in zip(train_objects, train_object_labels): if label == 1: true_objects.append(obj) else: false_objects.append(obj) true_objects = flattenList(true_objects) false_objects = flattenList(false_objects) try: corr_filter = globals()[filter_type](true_objects, false_objects, **kwargs) return corr_filter except ImportError: print("Error! Filter {} not found! Return None.".format(filter_type)) return None
def getMetrics(true_labels, pred_labels, threshold=0.5): """\n Returns matrics for classification experiment. Parameters ---------- true_labels : list of float pred_labels : list of float threshold : float, default=0.5 Threshold in CPR experiments. Returns ------- _metric : dict Includes all of calculated metrics. """ def getConfusionMatrix(confusion_matrix): s = "\n | Predicted |\n\ -----+-------+-------+\n\ Real | 1 | 0 |\n\ -----+-------+-------+\n\ 1 |{TP: ^7d}|{FN: ^7d}|\n\ -----+-------+-------+\n\ 0 |{FP: ^7d}|{TN: ^7d}|\n\ -----+-------+-------+" TP = confusion_matrix[1, 1] TN = confusion_matrix[0, 0] FN = confusion_matrix[1, 0] FP = confusion_matrix[0, 1] return s.format(TP=TP, FN=FN, FP=FP, TN=TN) from sklearn import metrics as mtr t = threshold pred_classes = hlp.flattenList([[int(elem > t) for elem in seq] for seq in pred_labels]) _metric = {} try: _metric.update( {'accuracy': mtr.accuracy_score(true_labels, pred_labels)}) except ValueError: _metric.update( {'accuracy': mtr.accuracy_score(true_labels, pred_classes)}) try: _metric.update({ 'confusion_matrix': getConfusionMatrix(mtr.confusion_matrix(true_labels, pred_labels)) }) except ValueError: _metric.update({ 'confusion_matrix': getConfusionMatrix(mtr.confusion_matrix(true_labels, pred_classes)) }) try: _metric.update({'f1': mtr.f1_score(true_labels, pred_labels)}) except ValueError: _metric.update({'f1': mtr.f1_score(true_labels, pred_classes)}) try: _metric.update( {'precision': mtr.precision_score(true_labels, pred_labels)}) except ValueError: _metric.update( {'precision': mtr.precision_score(true_labels, pred_classes)}) try: _metric.update({'recall': mtr.recall_score(true_labels, pred_labels)}) except ValueError: _metric.update({'recall': mtr.recall_score(true_labels, pred_classes)}) try: _metric.update( {'report': mtr.classification_report(true_labels, pred_labels)}) except ValueError: _metric.update( {'report': mtr.classification_report(true_labels, pred_classes)}) try: _metric.update( {'ROC_AUC': mtr.roc_auc_score(true_labels, pred_labels)}) except ValueError: _metric.update( {'ROC_AUC': mtr.roc_auc_score(true_labels, pred_classes)}) return _metric
def run(self, params, index): """\n Start session with fixed parameters. Returns row of DataFrame with input and output parameters.""" __start = timer() try: clf_type = params['classifier_type'] train_object_folder = params['train_object_folder'] train_object_labels = params['train_object_labels'] test_object_folder = params['test_object_folder'] test_object_labels = params['test_object_labels'] is_save = params['classifier_is_save'] clf_name = params['classifier_name'] except KeyError: print("Error! Some of necessary data is not found!") return None try: filter_type = params['filter_type'] except KeyError: filter_type = None try: processing = params['classifier_processing'] except KeyError: processing = None clf = classifier(clf_type, clf_name, processing, filter_type=filter_type) clf.fit(train_object_folder, train_object_labels, is_save) folders = train_object_folder + test_object_folder labels = train_object_labels + test_object_labels labels_full = hlp.flattenList([ ([a] * len(b)) for a, b in zip(labels, returnFiles(folders)) ]) predictions = getPrediction(clf, folders, False) names = [folder.split(os.sep)[-1] for folder in folders] dataset = folders[0].split(os.sep)[-2] getDiscrChar(predictions, names=names, title=clf_name, is_save=is_save, threshold=clf.threshold, dataset=dataset) metric = getMetrics(labels_full, predictions, threshold=clf.threshold) __finish = timer() clf_raw = clf.type + ('' if filter_type is None else '_' + filter_type) if type(processing) is list: _processing = str(processing[0]) + '_' + str(processing[1]) else: _processing = str(processing) clf_raw += ('_ideal' if processing is None else '_' + _processing) print("Dataset: {dat}, classifier: {clf}, elapsed time: {t} s".format( dat=dataset, clf=clf_raw, t=__finish - __start)) df = pd.DataFrame(data=dict( date=datetime.datetime.today().isoformat(), elapsed_time=__finish - __start, classifier_type=clf_type, classifier_name=clf_name, classifier_is_saved=is_save, classifier_processing=_processing, classifier_args=None, train_object_folder=str(train_object_folder), train_object_labels=str(train_object_labels), train_object_size=None, train_object_num=None, test_object_folder=str(test_object_folder), test_object_labels=str(test_object_labels), test_object_num=None, metrics_accuracy=metric['accuracy'], metrics_confusion_matrix=metric['confusion_matrix'], metrics_f1=metric['f1'], metrics_precision=metric['precision'], metrics_recall=metric['recall'], metrics_report=metric['report'], metrics_ROC_AUC=metric['ROC_AUC']), index=[index]) return df
def __setthr__(self, train_objects, train_object_labels): """\n Set classifier's threshold. In progress... """ is_holo = (self.type == 'cf_holo') true_objects = [] false_objects = [] for obj, label in zip(train_objects, train_object_labels): if label == 1: true_objects.append(obj) else: false_objects.append(obj) true_corr_outputs = cf.predict(self.data, hlp.flattenList(true_objects), 0, return_class=False, is_holo=is_holo) false_corr_outputs = cf.predict(self.data, hlp.flattenList(false_objects), 0, return_class=False, is_holo=is_holo) DUMMY_THRESHOLDING = True if DUMMY_THRESHOLDING: self.threshold = (np.mean(true_corr_outputs) + np.mean(false_corr_outputs)) / 2 return norma = np.max(true_corr_outputs + false_corr_outputs) x = np.arange(0, 1, 1e-5) norm_dist_true = hlp.norm_dist(np.array(true_corr_outputs) / norma, x) norm_dist_false = hlp.norm_dist( np.array(false_corr_outputs) / norma, x) nd_difference = norm_dist_true - norm_dist_false x0 = np.argmax(norm_dist_false) x1 = np.argmax(norm_dist_true) try: threshold = norma * (np.argmin(np.abs(nd_difference[x0:x1])) + x0) * 1e-5 except ValueError: threshold = norma * 0.9 * x1 * 1e-5 if x0 > x1: print("Error! Threshold can't be set.") else: y0 = np.abs(nd_difference) for dx, dy in enumerate(y0[1:]): if np.abs(dy - y0[dx - 1]) < np.max([1e-6, np.min(y0)]): y0[dx] = 1 else: y0[dx] = 0 y0[dx + 1] = 0 for dx in range(len(y0) - 2): if dx < x0: y0[dx + 1] = 0 elif ((y0[dx] == 1) and (y0[dx + 2] == 1)): y0[dx + 1] = 1 final_x = 0 for dx in np.arange(x0, x1): if (y0[dx] == 1) and (final_x == 0): final_x = dx elif (y0[dx] == 0) and (final_x != 0) and (y0[dx - 1] == 1): final_x = (final_x + dx) / 2 if final_x != 0: threshold = norma * final_x * 1e-5 self.threshold = threshold
def getDiscrChar(peaks, names, title=None, is_save=False, **kwargs): """\n Returns image of discriminatory characteristic (to the file or figure). Parameters: ----------- peaks : list of lists of floats Correlation peaks. names = list of str Names of objects in dataset. title : str (default=None) Title of plot. is_save : bool, default=False If True, images are saved, else they are shown in figures. **kwargs Can be used for sending of dataset name, threshold and other parameters. Returns ------- error_key : int If 0, everything is OK. """ error_key = 0 plt.figure() norma = np.max(hlp.flattenList(peaks)) # print(norma) # print(np.shape(peaks)) x_range = max([len(cur_peaks) for cur_peaks in peaks]) max_x = np.arange(x_range) for index in range(len(peaks)): x = np.arange(len(peaks[index])) # cur_peaks = np.array(peaks[index])/norma cur_peaks = [peak / norma for peak in peaks[index]] plt.plot(x, cur_peaks, label=names[index]) try: threshold = kwargs['threshold'] plt.plot(max_x, [threshold / norma] * len(max_x), 'k--', label='Threshold') except KeyError: pass if title is not None: plt.title(title) plt.legend() plt.ylim((0, 1.05)) if is_save: try: dataset = kwargs['dataset'] except KeyError: error_key = 1 dataset = 'Unknown' fig = plt.gcf() folder = pjoin('data', 'graph') try: os.mkdir(pjoin(folder, dataset)) except OSError: pass fig.set_size_inches(18.5, 10.5) full_name = pjoin(folder, dataset, title) + '.png' fig.savefig(full_name, dpi=300, bbox_inches='tight') plt.close() else: plt.show() return error_key