def synthesize(train_objects, train_object_labels, **kwargs):
    """\n    Synthesize CF.
    Parameters
    ----------
    train_objects : list or lists of ndarray
    train_object_labels : list of int
    **kwargs
        Use for filter_type and other parameters of filters.

    Returns
    -------
    corr_filter : ndarray
    """
    try:
        filter_type = kwargs['filter_type']
    except KeyError:
        raise ("filter type not found.")
    true_objects = []
    false_objects = []
    for obj, label in zip(train_objects, train_object_labels):
        if label == 1:
            true_objects.append(obj)
        else:
            false_objects.append(obj)
    true_objects = flattenList(true_objects)
    false_objects = flattenList(false_objects)
    try:
        corr_filter = globals()[filter_type](true_objects, false_objects,
                                             **kwargs)
        return corr_filter
    except ImportError:
        print("Error! Filter {} not found! Return None.".format(filter_type))
        return None
def getMetrics(true_labels, pred_labels, threshold=0.5):
    """\n    Returns matrics for classification experiment.
    Parameters
    ----------
    true_labels : list of float
    pred_labels : list of float
    threshold : float, default=0.5
        Threshold in CPR experiments.

    Returns
    -------
    _metric : dict
        Includes all of calculated metrics.
    """
    def getConfusionMatrix(confusion_matrix):
        s = "\n         |   Predicted   |\n\
        -----+-------+-------+\n\
        Real |   1   |   0   |\n\
        -----+-------+-------+\n\
           1 |{TP: ^7d}|{FN: ^7d}|\n\
        -----+-------+-------+\n\
           0 |{FP: ^7d}|{TN: ^7d}|\n\
        -----+-------+-------+"

        TP = confusion_matrix[1, 1]
        TN = confusion_matrix[0, 0]
        FN = confusion_matrix[1, 0]
        FP = confusion_matrix[0, 1]
        return s.format(TP=TP, FN=FN, FP=FP, TN=TN)

    from sklearn import metrics as mtr
    t = threshold
    pred_classes = hlp.flattenList([[int(elem > t) for elem in seq]
                                    for seq in pred_labels])
    _metric = {}
    try:
        _metric.update(
            {'accuracy': mtr.accuracy_score(true_labels, pred_labels)})
    except ValueError:
        _metric.update(
            {'accuracy': mtr.accuracy_score(true_labels, pred_classes)})
    try:
        _metric.update({
            'confusion_matrix':
            getConfusionMatrix(mtr.confusion_matrix(true_labels, pred_labels))
        })
    except ValueError:
        _metric.update({
            'confusion_matrix':
            getConfusionMatrix(mtr.confusion_matrix(true_labels, pred_classes))
        })
    try:
        _metric.update({'f1': mtr.f1_score(true_labels, pred_labels)})
    except ValueError:
        _metric.update({'f1': mtr.f1_score(true_labels, pred_classes)})
    try:
        _metric.update(
            {'precision': mtr.precision_score(true_labels, pred_labels)})
    except ValueError:
        _metric.update(
            {'precision': mtr.precision_score(true_labels, pred_classes)})
    try:
        _metric.update({'recall': mtr.recall_score(true_labels, pred_labels)})
    except ValueError:
        _metric.update({'recall': mtr.recall_score(true_labels, pred_classes)})
    try:
        _metric.update(
            {'report': mtr.classification_report(true_labels, pred_labels)})
    except ValueError:
        _metric.update(
            {'report': mtr.classification_report(true_labels, pred_classes)})
    try:
        _metric.update(
            {'ROC_AUC': mtr.roc_auc_score(true_labels, pred_labels)})
    except ValueError:
        _metric.update(
            {'ROC_AUC': mtr.roc_auc_score(true_labels, pred_classes)})
    return _metric
    def run(self, params, index):
        """\n    Start session with fixed parameters. Returns row of DataFrame
        with input and output parameters."""
        __start = timer()
        try:
            clf_type = params['classifier_type']
            train_object_folder = params['train_object_folder']
            train_object_labels = params['train_object_labels']
            test_object_folder = params['test_object_folder']
            test_object_labels = params['test_object_labels']
            is_save = params['classifier_is_save']
            clf_name = params['classifier_name']
        except KeyError:
            print("Error! Some of necessary data is not found!")
            return None
        try:
            filter_type = params['filter_type']
        except KeyError:
            filter_type = None
        try:
            processing = params['classifier_processing']
        except KeyError:
            processing = None
        clf = classifier(clf_type,
                         clf_name,
                         processing,
                         filter_type=filter_type)
        clf.fit(train_object_folder, train_object_labels, is_save)
        folders = train_object_folder + test_object_folder
        labels = train_object_labels + test_object_labels
        labels_full = hlp.flattenList([
            ([a] * len(b)) for a, b in zip(labels, returnFiles(folders))
        ])
        predictions = getPrediction(clf, folders, False)
        names = [folder.split(os.sep)[-1] for folder in folders]
        dataset = folders[0].split(os.sep)[-2]
        getDiscrChar(predictions,
                     names=names,
                     title=clf_name,
                     is_save=is_save,
                     threshold=clf.threshold,
                     dataset=dataset)
        metric = getMetrics(labels_full, predictions, threshold=clf.threshold)
        __finish = timer()
        clf_raw = clf.type + ('' if filter_type is None else '_' + filter_type)
        if type(processing) is list:
            _processing = str(processing[0]) + '_' + str(processing[1])
        else:
            _processing = str(processing)
        clf_raw += ('_ideal' if processing is None else '_' + _processing)
        print("Dataset: {dat}, classifier: {clf}, elapsed time: {t} s".format(
            dat=dataset, clf=clf_raw, t=__finish - __start))

        df = pd.DataFrame(data=dict(
            date=datetime.datetime.today().isoformat(),
            elapsed_time=__finish - __start,
            classifier_type=clf_type,
            classifier_name=clf_name,
            classifier_is_saved=is_save,
            classifier_processing=_processing,
            classifier_args=None,
            train_object_folder=str(train_object_folder),
            train_object_labels=str(train_object_labels),
            train_object_size=None,
            train_object_num=None,
            test_object_folder=str(test_object_folder),
            test_object_labels=str(test_object_labels),
            test_object_num=None,
            metrics_accuracy=metric['accuracy'],
            metrics_confusion_matrix=metric['confusion_matrix'],
            metrics_f1=metric['f1'],
            metrics_precision=metric['precision'],
            metrics_recall=metric['recall'],
            metrics_report=metric['report'],
            metrics_ROC_AUC=metric['ROC_AUC']),
                          index=[index])
        return df
 def __setthr__(self, train_objects, train_object_labels):
     """\n    Set classifier's threshold.
     In progress...
     """
     is_holo = (self.type == 'cf_holo')
     true_objects = []
     false_objects = []
     for obj, label in zip(train_objects, train_object_labels):
         if label == 1:
             true_objects.append(obj)
         else:
             false_objects.append(obj)
     true_corr_outputs = cf.predict(self.data,
                                    hlp.flattenList(true_objects),
                                    0,
                                    return_class=False,
                                    is_holo=is_holo)
     false_corr_outputs = cf.predict(self.data,
                                     hlp.flattenList(false_objects),
                                     0,
                                     return_class=False,
                                     is_holo=is_holo)
     DUMMY_THRESHOLDING = True
     if DUMMY_THRESHOLDING:
         self.threshold = (np.mean(true_corr_outputs) +
                           np.mean(false_corr_outputs)) / 2
         return
     norma = np.max(true_corr_outputs + false_corr_outputs)
     x = np.arange(0, 1, 1e-5)
     norm_dist_true = hlp.norm_dist(np.array(true_corr_outputs) / norma, x)
     norm_dist_false = hlp.norm_dist(
         np.array(false_corr_outputs) / norma, x)
     nd_difference = norm_dist_true - norm_dist_false
     x0 = np.argmax(norm_dist_false)
     x1 = np.argmax(norm_dist_true)
     try:
         threshold = norma * (np.argmin(np.abs(nd_difference[x0:x1])) +
                              x0) * 1e-5
     except ValueError:
         threshold = norma * 0.9 * x1 * 1e-5
     if x0 > x1:
         print("Error! Threshold can't be set.")
     else:
         y0 = np.abs(nd_difference)
         for dx, dy in enumerate(y0[1:]):
             if np.abs(dy - y0[dx - 1]) < np.max([1e-6, np.min(y0)]):
                 y0[dx] = 1
             else:
                 y0[dx] = 0
         y0[dx + 1] = 0
         for dx in range(len(y0) - 2):
             if dx < x0:
                 y0[dx + 1] = 0
             elif ((y0[dx] == 1) and (y0[dx + 2] == 1)):
                 y0[dx + 1] = 1
         final_x = 0
         for dx in np.arange(x0, x1):
             if (y0[dx] == 1) and (final_x == 0):
                 final_x = dx
             elif (y0[dx] == 0) and (final_x != 0) and (y0[dx - 1] == 1):
                 final_x = (final_x + dx) / 2
         if final_x != 0:
             threshold = norma * final_x * 1e-5
     self.threshold = threshold
def getDiscrChar(peaks, names, title=None, is_save=False, **kwargs):
    """\n    Returns image of discriminatory characteristic (to the file
    or figure).
    Parameters:
    -----------
    peaks : list of lists of floats
        Correlation peaks.
    names = list of str
        Names of objects in dataset.
    title : str (default=None)
        Title of plot.
    is_save : bool, default=False
        If True, images are saved, else they are shown in figures.
    **kwargs
        Can be used for sending of dataset name, threshold and other
        parameters.

    Returns
    -------
    error_key : int
        If 0, everything is OK.
    """
    error_key = 0
    plt.figure()
    norma = np.max(hlp.flattenList(peaks))
    #    print(norma)
    #    print(np.shape(peaks))
    x_range = max([len(cur_peaks) for cur_peaks in peaks])
    max_x = np.arange(x_range)

    for index in range(len(peaks)):
        x = np.arange(len(peaks[index]))
        #        cur_peaks = np.array(peaks[index])/norma
        cur_peaks = [peak / norma for peak in peaks[index]]
        plt.plot(x, cur_peaks, label=names[index])
    try:
        threshold = kwargs['threshold']
        plt.plot(max_x, [threshold / norma] * len(max_x),
                 'k--',
                 label='Threshold')
    except KeyError:
        pass
    if title is not None:
        plt.title(title)
    plt.legend()
    plt.ylim((0, 1.05))

    if is_save:
        try:
            dataset = kwargs['dataset']
        except KeyError:
            error_key = 1
            dataset = 'Unknown'
        fig = plt.gcf()
        folder = pjoin('data', 'graph')
        try:
            os.mkdir(pjoin(folder, dataset))
        except OSError:
            pass
        fig.set_size_inches(18.5, 10.5)
        full_name = pjoin(folder, dataset, title) + '.png'
        fig.savefig(full_name, dpi=300, bbox_inches='tight')
        plt.close()
    else:
        plt.show()
    return error_key