コード例 #1
0
        (train_data_normalized, trainShiftFactor,
         trainScaleFactor) = normalizeAcrossEpoch(train_bp, 'MinMax')

        if True:
            train_data_downsampled = train_data_normalized[:, :, ::
                                                           decim_factor]
            train_reshaped = train_data_downsampled.reshape(
                train_data_downsampled.shape[0],
                -1)  # merge channel and time for the pca
            pca = PCA(0.95)
            pca.fit(train_reshaped)
            pca.components_ = -pca.components_
            train_pcaed = pca.transform(train_reshaped)

            # train classifier
            cls = rLDA(regcoeff)
            cls.fit(train_pcaed, label)

        if False:
            pca = None
            X = compute_features(train_data_normalized, sfreq, l_freq, h_freq,
                                 decim_factor, trainShiftFactor,
                                 trainScaleFactor, pca)
            # Classifier init
            RF = dict(trees=100, maxdepth=None)
            cls = RandomForestClassifier(n_estimators=RF['trees'],
                                         max_features='auto',
                                         max_depth=RF['maxdepth'],
                                         n_jobs=n_jobs)
            cls.fit(X, label)
            flen = X.shape[1]
コード例 #2
0
    def apply_cv(epochs):
        count = 1
        confusion_matrixes = []
        confusion_matrixes_percent = []
        predicted = ''
        test_label = ''
        firstIterCV = True
        probabilities = np.array([[]], ndmin=2)
        predictions = np.array([])
        best_threshold = []
        cv_probabilities = []
        cv_probabilities_label = []
        for train, test in cv:
            ## Train Data processing ##
            train_data = epochs._data[train]
            train_label = label[train]

            # Online simulation flag
            if FILTER_METHOD is 'WINDOWED':  # epochs should have one epoch only
                train_bp = mne.filter.band_pass_filter(
                    train_data,
                    sfreq,
                    Fp1=2,
                    Fp2=h_freq,
                    copy=True,
                    filter_length=None,
                    method='fft',
                    iir_params=None)  # bandpass on one epoch
            if FILTER_METHOD is 'NC' or FILTER_METHOD is 'LFILT':
                train_bp = train_data
            train_bp = train_bp[:, :, paddingIdx:paddingIdx +
                                (int((tmax - tmin) * sfreq))]

            for trial in range(train_bp.shape[0]):
                for ch in range(train_bp.shape[1]):
                    train_bp[trial, ch, :] = train_bp[trial, ch, :] - np.mean(
                        train_bp[trial, ch, :])

            # plt.figure()
            # plt.plot(train_bp[7,:].T)
            # plt.savefig(str(FILTER_METHOD)+'.png')
            # Normalization
            (train_normalized, trainShiftFactor,
             trainScaleFactor) = normalizeAcrossEpoch(train_bp, 'MinMax')

            # Downsampling
            train_downsampling = train_normalized[:, :, ::decim_factor]

            # Merge (reshape) channel and time for the PCA
            train_reshaped = train_downsampling.reshape(
                train_downsampling.shape[0], -1)

            # PCA initialisation
            if APPLY_PCA is False:
                pca = None
                train_pcaed = train_reshaped
            else:
                pca = PCA(0.95)
                pca.fit(train_reshaped)
                pca.components_ = -pca.components_  # inversion of vector to be constistant with Inaki's code
                train_pcaed = pca.transform(train_reshaped)

            # PCA
            #			train_pcaed = train_reshaped

            ## Test data processing ##
            test_data = epochs._data[test]
            test_label = label[test]

            # Compute_feature does the same steps as for train, but requires a computed PCA (that we got from train)
            # (bandpass, norm, ds, and merge channel and time)
            test_pcaed = compute_features(test_data,
                                          sfreq,
                                          l_freq,
                                          h_freq,
                                          decim_factor,
                                          trainShiftFactor,
                                          trainScaleFactor,
                                          pca,
                                          FILTER_METHOD,
                                          tmin,
                                          tmax,
                                          paddingIdx,
                                          iir_params=dict(a=a, b=b))
            #			test_pcaed = compute_features(test_data,sfreq,l_freq,h_freq,decim_factor,trainShiftFactor,trainScaleFactor,pca=None)

            ## Test ##
            train_x = train_pcaed
            test_x = test_pcaed

            # Classifier init
            #			RF = dict(trees=100, maxdepth=None)
            #			cls = RandomForestClassifier(n_estimators=RF['trees'], max_features='auto', max_depth=RF['maxdepth'], n_jobs=n_jobs)
            # cls = RandomForestClassifier(n_estimators=RF['trees'], max_features='auto', max_depth=RF['maxdepth'], class_weight="balanced", n_jobs=n_jobs)
            # cls = LDA(solver='eigen')
            #			cls = QDA(reg_param=0.3) # regularized LDA

            #			cls.fit( train_x, train_label )
            # Y_pred= cls.predict( test_x )
            # prediction = Y_pred

            # Fitting
            cls = rLDA(regcoeff)
            cls.fit(train_x, train_label)

            predicted = cls.predict(test_x)
            probs = cls.predict_proba(test_x)
            prediction = np.array(predicted)

            if useLeaveOneOut is True:
                if firstIterCV is True:
                    probabilities = np.append(probabilities, probs, axis=1)
                    firstIterCV = False
                    predictions = np.append(predictions, prediction)
                else:
                    probabilities = np.append(probabilities, probs, axis=0)
                    predictions = np.append(predictions, prediction)
            else:
                predictions = np.append(predictions, prediction)
                probabilities = np.append(probabilities, probs)

            # Performance
            if useLeaveOneOut is not True:
                cm = np.array(confusion_matrix(test_label, prediction))
                cm_normalized = cm.astype('float') / cm.sum(axis=1)[:,
                                                                    np.newaxis]
                confusion_matrixes.append(cm)
                confusion_matrixes_percent.append(cm_normalized)
                avg_confusion_matrixes = np.mean(confusion_matrixes_percent,
                                                 axis=0)

            print('CV #' + str(count))
            print('Prediction: ' + str(prediction))
            print('    Actual: ' + str(test_label))

            # Append probs to the global list
            probs_np = np.array(probs)
            cv_probabilities.append(probs_np[:, 0])
            cv_probabilities_label.append(test_label)

            #			if useLeaveOneOut is not True:
            #				print('Confusion matrix')
            #				print(cm)
            #				print('Confusion matrix (normalized)')
            #				print(cm_normalized)
            #				print('---')
            #				print('True positive rate: '+str(cm_normalized[0][0]))
            #				print('True negative rate: '+str(cm_normalized[1][1]))
            print('===================')

            ## One CV done, go to the next one
            count += 1

        best_threshold = None
        cv_prob_linear = np.ravel(cv_probabilities)
        cv_prob_label_np = np.array(cv_probabilities_label)
        cv_prob_label_linear = np.ravel(cv_prob_label_np)
        threshold_list = np.linspace(0, 1, 100)

        biglist_fpr = []
        biglist_tpr = []
        biglist_thresh = []
        biglist_cms = []

        for thresh in threshold_list:
            biglist_pred = [
                4 if x < thresh else 3 for x in cv_prob_linear
            ]  # list comprehension to quickly go through the list.
            biglist_cm = confusion_matrix(cv_prob_label_linear, biglist_pred)
            biglist_cm_norm = biglist_cm.astype('float') / biglist_cm.sum(
                axis=1)[:, np.newaxis]
            biglist_cms.append(biglist_cm_norm)
            biglist_tpr.append(biglist_cm_norm[0][0])
            biglist_fpr.append(biglist_cm_norm[1][0])
            biglist_thresh.append(thresh)
        biglist_auc = auc(biglist_fpr, biglist_tpr)

        # Make a subset of data where FPR < MAX_FPR
        idx_below_maxfpr = np.where(np.array(biglist_fpr) < MAX_FPR)
        fpr_below_maxfpr = np.array(biglist_fpr)[idx_below_maxfpr[0]]
        tpr_below_maxfpr = np.array(biglist_tpr)[idx_below_maxfpr[0]]

        # Look for the best (max value) FPR in that subset
        best_tpr_below_maxfpr = np.max(tpr_below_maxfpr)
        best_tpr_below_maxfpr_idx = np.array(
            np.where(
                biglist_tpr == best_tpr_below_maxfpr)).ravel()  # get its idx

        # Get the associated TPRs
        best_tpr_below_maxfpr_associated_fpr = np.array(
            biglist_fpr)[best_tpr_below_maxfpr_idx]
        # Get the best (min value) in that subset
        best_associated_fpr = np.min(best_tpr_below_maxfpr_associated_fpr)
        # ... get its idx
        best_associated_fpr_idx = np.array(
            np.where(biglist_fpr == best_associated_fpr)).ravel()

        # The best idx is the one that is on both set
        best_idx = best_tpr_below_maxfpr_idx[np.in1d(best_tpr_below_maxfpr_idx,
                                                     best_associated_fpr_idx)]

        plt.plot(biglist_fpr, biglist_tpr)
        plt.xlabel('False positive rate')
        plt.ylabel('True positive rate')
        best_threshold = threshold_list[best_idx]
        print('#################################')
        print('Best treshold:' + str(best_threshold))
        print('Gives a TPR of ' + str(best_tpr_below_maxfpr))
        print('And a FPR of ' + str(best_associated_fpr))
        print('CM')
        print(biglist_cms[best_idx[0]])

        return (biglist_auc, best_threshold)
コード例 #3
0
ファイル: trainer.py プロジェクト: aizmeng/pycnbi
def cross_validate(cfg, featdata, cv_file=None):
    """
    Perform cross validation
    """
    # Init a classifier
    selected_classifier = cfg.CLASSIFIER['selected']
    if selected_classifier == 'GB':
        cls = GradientBoostingClassifier(
            loss='deviance',
            learning_rate=cfg.CLASSIFIER['GB']['learning_rate'],
            presort='auto',
            n_estimators=cfg.CLASSIFIER['GB']['trees'],
            subsample=1.0,
            max_depth=cfg.CLASSIFIER['GB']['depth'],
            random_state=cfg.CLASSIFIER['GB']['seed'],
            max_features='sqrt',
            verbose=0,
            warm_start=False)
    elif selected_classifier == 'XGB':
        cls = XGBClassifier(
            loss='deviance',
            learning_rate=cfg.CLASSIFIER['XGB']['learning_rate'],
            presort='auto',
            n_estimators=cfg.CLASSIFIER['XGB']['trees'],
            subsample=1.0,
            max_depth=cfg.CLASSIFIER['XGB']['depth'],
            random_state=cfg.CLASSIFIER['XGB'],
            max_features='sqrt',
            verbose=0,
            warm_start=False)
    elif selected_classifier == 'RF':
        cls = RandomForestClassifier(
            n_estimators=cfg.CLASSIFIER['RF']['trees'],
            max_features='auto',
            max_depth=cfg.CLASSIFIER['RF']['depth'],
            n_jobs=cfg.N_JOBS,
            random_state=cfg.CLASSIFIER['RF']['seed'],
            oob_score=False,
            class_weight='balanced_subsample')
    elif selected_classifier == 'LDA':
        cls = LDA()
    elif selected_classifier == 'rLDA':
        cls = rLDA(cfg.CLASSIFIER['rLDA']['r_coeff'])
    else:
        logger.error('Unknown classifier type %s' % selected_classifier)
        raise ValueError

    # Setup features
    X_data = featdata['X_data']
    Y_data = featdata['Y_data']
    wlen = featdata['wlen']

    # Choose CV type
    ntrials, nsamples, fsize = X_data.shape
    selected_cv = cfg.CV_PERFORM['selected']
    if selected_cv == 'LeaveOneOut':
        logger.info_green('%d-fold leave-one-out cross-validation' % ntrials)
        if SKLEARN_OLD:
            cv = LeaveOneOut(len(Y_data))
        else:
            cv = LeaveOneOut()
    elif selected_cv == 'StratifiedShuffleSplit':
        logger.info_green(
            '%d-fold stratified cross-validation with test set ratio %.2f' %
            (cfg.CV_PERFORM[selected_cv]['folds'],
             cfg.CV_PERFORM[selected_cv]['test_ratio']))
        if SKLEARN_OLD:
            cv = StratifiedShuffleSplit(
                Y_data[:, 0],
                cfg.CV_PERFORM[selected_cv]['folds'],
                test_size=cfg.CV_PERFORM[selected_cv]['test_ratio'],
                random_state=cfg.CV_PERFORM[selected_cv]['seed'])
        else:
            cv = StratifiedShuffleSplit(
                n_splits=cfg.CV_PERFORM[selected_cv]['folds'],
                test_size=cfg.CV_PERFORM[selected_cv]['test_ratio'],
                random_state=cfg.CV_PERFORM[selected_cv]['seed'])
    else:
        logger.error('%s is not supported yet. Sorry.' %
                     cfg.CV_PERFORM[cfg.CV_PERFORM['selected']])
        raise NotImplementedError
    logger.info('%d trials, %d samples per trial, %d feature dimension' %
                (ntrials, nsamples, fsize))

    # Do it!
    timer_cv = qc.Timer()
    scores, cm_txt = crossval_epochs(cv,
                                     X_data,
                                     Y_data,
                                     cls,
                                     cfg.tdef.by_value,
                                     cfg.CV['BALANCE_SAMPLES'],
                                     n_jobs=cfg.N_JOBS,
                                     ignore_thres=cfg.CV['IGNORE_THRES'],
                                     decision_thres=cfg.CV['DECISION_THRES'])
    t_cv = timer_cv.sec()

    # Export results
    txt = 'Cross validation took %d seconds.\n' % t_cv
    txt += '\n- Class information\n'
    txt += '%d epochs, %d samples per epoch, %d feature dimension (total %d samples)\n' %\
        (ntrials, nsamples, fsize, ntrials * nsamples)
    for ev in np.unique(Y_data):
        txt += '%s: %d trials\n' % (cfg.tdef.by_value[ev],
                                    len(np.where(Y_data[:, 0] == ev)[0]))
    if cfg.CV['BALANCE_SAMPLES']:
        txt += 'The number of samples was balanced using %ssampling.\n' % cfg.BALANCE_SAMPLES.lower(
        )
    txt += '\n- Experiment condition\n'
    txt += 'Sampling frequency: %.3f Hz\n' % featdata['sfreq']
    txt += 'Spatial filter: %s (channels: %s)\n' % (cfg.SP_FILTER,
                                                    cfg.SP_CHANNELS)
    txt += 'Spectral filter: %s\n' % cfg.TP_FILTER[cfg.TP_FILTER['selected']]
    txt += 'Notch filter: %s\n' % cfg.NOTCH_FILTER[
        cfg.NOTCH_FILTER['selected']]
    txt += 'Channels: ' + ','.join(
        [str(featdata['ch_names'][p]) for p in featdata['picks']]) + '\n'
    txt += 'PSD range: %.1f - %.1f Hz\n' % (cfg.FEATURES['PSD']['fmin'],
                                            cfg.FEATURES['PSD']['fmax'])
    txt += 'Window step: %.2f msec\n' % (
        1000.0 * cfg.FEATURES['PSD']['wstep'] / featdata['sfreq'])
    if type(wlen) is list:
        for i, w in enumerate(wlen):
            txt += 'Window size: %.1f msec\n' % (w * 1000.0)
            txt += 'Epoch range: %s sec\n' % (cfg.EPOCH[i])
    else:
        txt += 'Window size: %.1f msec\n' % (cfg.FEATURES['PSD']['wlen'] *
                                             1000.0)
        txt += 'Epoch range: %s sec\n' % (cfg.EPOCH)
    txt += 'Decimation factor: %d\n' % cfg.FEATURES['PSD']['decim']

    # Compute stats
    cv_mean, cv_std = np.mean(scores), np.std(scores)
    txt += '\n- Average CV accuracy over %d epochs (random seed=%s)\n' % (
        ntrials, cfg.CV_PERFORM[cfg.CV_PERFORM['selected']]['seed'])
    if cfg.CV_PERFORM[cfg.CV_PERFORM['selected']] in [
            'LeaveOneOut', 'StratifiedShuffleSplit'
    ]:
        txt += "mean %.3f, std: %.3f\n" % (cv_mean, cv_std)
    txt += 'Classifier: %s, ' % selected_classifier
    if selected_classifier == 'RF':
        txt += '%d trees, %s max depth, random state %s\n' % (
            cfg.CLASSIFIER['RF']['trees'], cfg.CLASSIFIER['RF']['depth'],
            cfg.CLASSIFIER['RF']['seed'])
    elif selected_classifier == 'GB' or selected_classifier == 'XGB':
        txt += '%d trees, %s max depth, %s learing_rate, random state %s\n' % (
            cfg.CLASSIFIER['GB']['trees'], cfg.CLASSIFIER['GB']['depth'],
            cfg.CLASSIFIER['GB']['learning_rate'],
            cfg.CLASSIFIER['GB']['seed'])
    elif selected_classifier == 'rLDA':
        txt += 'regularization coefficient %.2f\n' % cfg.CLASSIFIER['rLDA'][
            'r_coeff']
    if cfg.CV['IGNORE_THRES'] is not None:
        txt += 'Decision threshold: %.2f\n' % cfg.CV['IGNORE_THRES']
    txt += '\n- Confusion Matrix\n' + cm_txt
    logger.info(txt)

    # Export to a file
    if 'export_result' in cfg.CV_PERFORM[selected_cv] and cfg.CV_PERFORM[
            selected_cv]['export_result'] is True:
        if cv_file is None:
            if cfg.EXPORT_CLS is True:
                qc.make_dirs('%s/classifier' % cfg.DATA_PATH)
                fout = open('%s/classifier/cv_result.txt' % cfg.DATA_PATH, 'w')
            else:
                fout = open('%s/cv_result.txt' % cfg.DATA_PATH, 'w')
        else:
            fout = open(cv_file, 'w')
        fout.write(txt)
        fout.close()
コード例 #4
0
ファイル: trainer.py プロジェクト: aizmeng/pycnbi
def train_decoder(cfg, featdata, feat_file=None):
    """
    Train the final decoder using all data
    """
    # Init a classifier
    selected_classifier = cfg.CLASSIFIER['selected']
    if selected_classifier == 'GB':
        cls = GradientBoostingClassifier(
            loss='deviance',
            learning_rate=cfg.CLASSIFIER[selected_classifier]['learning_rate'],
            n_estimators=cfg.CLASSIFIER[selected_classifier]['trees'],
            subsample=1.0,
            max_depth=cfg.CLASSIFIER[selected_classifier]['depth'],
            random_state=cfg.CLASSIFIER[selected_classifier]['seed'],
            max_features='sqrt',
            verbose=0,
            warm_start=False,
            presort='auto')
    elif selected_classifier == 'XGB':
        cls = XGBClassifier(
            loss='deviance',
            learning_rate=cfg.CLASSIFIER[selected_classifier]['learning_rate'],
            n_estimators=cfg.CLASSIFIER[selected_classifier]['trees'],
            subsample=1.0,
            max_depth=cfg.CLASSIFIER[selected_classifier]['depth'],
            random_state=cfg.GB['seed'],
            max_features='sqrt',
            verbose=0,
            warm_start=False,
            presort='auto')
    elif selected_classifier == 'RF':
        cls = RandomForestClassifier(
            n_estimators=cfg.CLASSIFIER[selected_classifier]['trees'],
            max_features='auto',
            max_depth=cfg.CLASSIFIER[selected_classifier]['depth'],
            n_jobs=cfg.N_JOBS,
            random_state=cfg.CLASSIFIER[selected_classifier]['seed'],
            oob_score=False,
            class_weight='balanced_subsample')
    elif selected_classifier == 'LDA':
        cls = LDA()
    elif selected_classifier == 'rLDA':
        cls = rLDA(cfg.CLASSIFIER[selected_classifier][r_coeff])
    else:
        logger.error('Unknown classifier %s' % selected_classifier)
        raise ValueError

    # Setup features
    X_data = featdata['X_data']
    Y_data = featdata['Y_data']
    wlen = featdata['wlen']
    if cfg.FEATURES['PSD']['wlen'] is None:
        cfg.FEATURES['PSD']['wlen'] = wlen
    w_frames = featdata['w_frames']
    ch_names = featdata['ch_names']
    X_data_merged = np.concatenate(X_data)
    Y_data_merged = np.concatenate(Y_data)
    if cfg.CV['BALANCE_SAMPLES']:
        X_data_merged, Y_data_merged = balance_samples(
            X_data_merged,
            Y_data_merged,
            cfg.CV['BALANCE_SAMPLES'],
            verbose=True)

    # Start training the decoder
    logger.info_green('Training the decoder')
    timer = qc.Timer()
    cls.n_jobs = cfg.N_JOBS
    cls.fit(X_data_merged, Y_data_merged)
    logger.info('Trained %d samples x %d dimension in %.1f sec' %\
          (X_data_merged.shape[0], X_data_merged.shape[1], timer.sec()))
    cls.n_jobs = 1  # always set n_jobs=1 for testing

    # Export the decoder
    classes = {c: cfg.tdef.by_value[c] for c in np.unique(Y_data)}
    if cfg.FEATURES['selected'] == 'PSD':
        data = dict(cls=cls,
                    ch_names=ch_names,
                    psde=featdata['psde'],
                    sfreq=featdata['sfreq'],
                    picks=featdata['picks'],
                    classes=classes,
                    epochs=cfg.EPOCH,
                    w_frames=w_frames,
                    w_seconds=cfg.FEATURES['PSD']['wlen'],
                    wstep=cfg.FEATURES['PSD']['wstep'],
                    spatial=cfg.SP_FILTER,
                    spatial_ch=featdata['picks'],
                    spectral=cfg.TP_FILTER[cfg.TP_FILTER['selected']],
                    spectral_ch=featdata['picks'],
                    notch=cfg.NOTCH_FILTER[cfg.NOTCH_FILTER['selected']],
                    notch_ch=featdata['picks'],
                    multiplier=cfg.MULTIPLIER,
                    ref_ch=cfg.REREFERENCE[cfg.REREFERENCE['selected']],
                    decim=cfg.FEATURES['PSD']['decim'])
    clsfile = '%s/classifier/classifier-%s.pkl' % (cfg.DATA_PATH,
                                                   platform.architecture()[0])
    qc.make_dirs('%s/classifier' % cfg.DATA_PATH)
    qc.save_obj(clsfile, data)
    logger.info('Decoder saved to %s' % clsfile)

    # Reverse-lookup frequency from FFT
    fq = 0
    if type(cfg.FEATURES['PSD']['wlen']) == list:
        fq_res = 1.0 / cfg.FEATURES['PSD']['wlen'][0]
    else:
        fq_res = 1.0 / cfg.FEATURES['PSD']['wlen']
    fqlist = []
    while fq <= cfg.FEATURES['PSD']['fmax']:
        if fq >= cfg.FEATURES['PSD']['fmin']:
            fqlist.append(fq)
        fq += fq_res

    # Show top distinctive features
    if cfg.FEATURES['selected'] == 'PSD':
        logger.info_green('Good features ordered by importance')
        if selected_classifier in ['RF', 'GB', 'XGB']:
            keys, values = qc.sort_by_value(list(cls.feature_importances_),
                                            rev=True)
        elif selected_classifier in ['LDA', 'rLDA']:
            keys, values = qc.sort_by_value(cls.w, rev=True)
        keys = np.array(keys)
        values = np.array(values)

        if cfg.EXPORT_GOOD_FEATURES:
            if feat_file is None:
                gfout = open('%s/classifier/good_features.txt' % cfg.DATA_PATH,
                             'w')
            else:
                gfout = open(feat_file, 'w')

        if type(wlen) is not list:
            ch_names = [ch_names[c] for c in featdata['picks']]
        else:
            ch_names = []
            for w in range(len(wlen)):
                for c in featdata['picks']:
                    ch_names.append('w%d-%s' % (w, ch_names[c]))

        chlist, hzlist = features.feature2chz(keys, fqlist, ch_names=ch_names)
        valnorm = values[:cfg.FEAT_TOPN].copy()
        valsum = np.sum(valnorm)
        if valsum == 0:
            valsum = 1
        valnorm = valnorm / valsum * 100.0

        # show top-N features
        for i, (ch, hz) in enumerate(zip(chlist, hzlist)):
            if i >= cfg.FEAT_TOPN:
                break
            txt = '%-3s %5.1f Hz  normalized importance %-6s  raw importance %-6s  feature %-5d' %\
                  (ch, hz, '%.2f%%' % valnorm[i], '%.2f%%' % (values[i] * 100.0), keys[i])
            logger.info(txt)

        if cfg.EXPORT_GOOD_FEATURES:
            gfout.write('Importance(%) Channel Frequency Index\n')
            for i, (ch, hz) in enumerate(zip(chlist, hzlist)):
                gfout.write('%.3f\t%s\t%s\t%d\n' %
                            (values[i] * 100.0, ch, hz, keys[i]))
            gfout.close()
コード例 #5
0
ファイル: trainer.py プロジェクト: aizmeng/pycnbi
def balance_tpr(cfg, featdata):
    """
    Find the threshold of class index 0 that yields equal number of true positive samples of each class.
    Currently only available for binary classes.

    Params
    ======
    cfg: config module
    feetdata: feature data computed using compute_features()
    """

    n_jobs = cfg.N_JOBS
    if n_jobs is None:
        n_jobs = mp.cpu_count()
    if n_jobs > 1:
        logger.info('balance_tpr(): Using %d cores' % n_jobs)
        pool = mp.Pool(n_jobs)
        results = []

    # Init a classifier
    selected_classifier = cfg.CLASSIFIER[cfg.CLASSIFIER['selected']]
    if selected_classifier == 'GB':
        cls = GradientBoostingClassifier(
            loss='deviance',
            learning_rate=cfg.CLASSIFIER['GB']['learning_rate'],
            n_estimators=cfg.CLASSIFIER['GB']['trees'],
            subsample=1.0,
            max_depth=cfg.CLASSIFIER['GB']['depth'],
            random_state=cfg.CLASSIFIER[selected_classifier]['seed'],
            max_features='sqrt',
            verbose=0,
            warm_start=False,
            presort='auto')
    elif selected_classifier == 'XGB':
        cls = XGBClassifier(
            loss='deviance',
            learning_rate=cfg.CLASSIFIER['XGB']['learning_rate'],
            n_estimators=cfg.CLASSIFIER['XGB']['trees'],
            subsample=1.0,
            max_depth=cfg.CLASSIFIER['XGB']['depth'],
            random_state=cfg.CLASSIFIER['XGB']['seed'],
            max_features='sqrt',
            verbose=0,
            warm_start=False,
            presort='auto')
    elif selected_classifier == 'RF':
        cls = RandomForestClassifier(
            n_estimators=cfg.CLASSIFIER['RF']['trees'],
            max_features='auto',
            max_depth=cfg.CLASSIFIER['RF']['depth'],
            n_jobs=cfg.N_JOBS,
            random_state=cfg.CLASSIFIER['RF']['seed'],
            oob_score=False,
            class_weight='balanced_subsample')
    elif selected_classifier == 'LDA':
        cls = LDA()
    elif selected_classifier == 'rLDA':
        cls = rLDA(cfg.CLASSIFIER['rLDA'])
    else:
        logger.error('Unknown classifier type %s' % selected_classifier)
        raise ValueError

    # Setup features
    X_data = featdata['X_data']
    Y_data = featdata['Y_data']
    wlen = featdata['wlen']
    if cfg.CLASSIFIER['PSD']['wlen'] is None:
        cfg.CLASSIFIER['PSD']['wlen'] = wlen

    # Choose CV type
    ntrials, nsamples, fsize = X_data.shape
    selected_CV = cfg.CV_PERFORM[cfg.CV_PERFORM['selected']]
    if cselected_CV == 'LeaveOneOut':
        logger.info_green('\n%d-fold leave-one-out cross-validation' % ntrials)
        if SKLEARN_OLD:
            cv = LeaveOneOut(len(Y_data))
        else:
            cv = LeaveOneOut()
    elif selected_CV == 'StratifiedShuffleSplit':
        logger.info_green(
            '\n%d-fold stratified cross-validation with test set ratio %.2f' %
            (cfg.CV_PERFORM[selected_CV]['folds'],
             cfg.CV_PERFORM[selected_CV]['test_ratio']))
        if SKLEARN_OLD:
            cv = StratifiedShuffleSplit(
                Y_data[:, 0],
                cfg.CV_PERFORM[selected_CV]['folds'],
                test_size=cfg.CV_PERFORM[selected_CV]['test_ratio'],
                random_state=cfg.CV_PERFORM[selected_CV]['random_seed'])
        else:
            cv = StratifiedShuffleSplit(
                n_splits=cfg.CV_PERFORM[selected_CV]['folds'],
                test_size=cfg.CV_PERFORM[selected_CV]['test_ratio'],
                random_state=cfg.CV_PERFORM[selected_CV]['random_seed'])
    else:
        logger.error('%s is not supported yet. Sorry.' % selected_CV)
        raise NotImplementedError
    logger.info('%d trials, %d samples per trial, %d feature dimension' %
                (ntrials, nsamples, fsize))

    # For classifier itself, single core is usually faster
    cls.n_jobs = 1
    Y_preds = []

    if SKLEARN_OLD:
        splits = cv
    else:
        splits = cv.split(X_data, Y_data[:, 0])
    for cnum, (train, test) in enumerate(splits):
        X_train = np.concatenate(X_data[train])
        X_test = np.concatenate(X_data[test])
        Y_train = np.concatenate(Y_data[train])
        Y_test = np.concatenate(Y_data[test])
        if n_jobs > 1:
            results.append(
                pool.apply_async(
                    get_predict_proba,
                    [cls, X_train, Y_train, X_test, Y_test, cnum + 1]))
        else:
            Y_preds.append(
                get_predict_proba(cls, X_train, Y_train, X_test, Y_test,
                                  cnum + 1))
        cnum += 1

    # Aggregate predictions
    if n_jobs > 1:
        pool.close()
        pool.join()
        for r in results:
            Y_preds.append(r.get())
    Y_preds = np.concatenate(Y_preds, axis=0)

    # Find threshold for class index 0
    Y_preds = sorted(Y_preds)
    mid_idx = int(len(Y_preds) / 2)
    if len(Y_preds) == 1:
        return 0.5  # should not reach here in normal conditions
    elif len(Y_preds) % 2 == 0:
        thres = Y_preds[mid_idx -
                        1] + (Y_preds[mid_idx] - Y_preds[mid_idx - 1]) / 2
    else:
        thres = Y_preds[mid_idx]
    return thres
コード例 #6
0
ファイル: trainer.py プロジェクト: LSYhhhh/pycnbi
def cross_validate(cfg, featdata, cv_file=None):
    """
    Perform cross validation
    """
    # Init a classifier
    if cfg.CLASSIFIER == 'GB':
        cls = GradientBoostingClassifier(loss='deviance',
                                         learning_rate=cfg.GB['learning_rate'],
                                         n_estimators=cfg.GB['trees'],
                                         subsample=1.0,
                                         max_depth=cfg.GB['max_depth'],
                                         random_state=cfg.GB['seed'],
                                         max_features='sqrt',
                                         verbose=0,
                                         warm_start=False,
                                         presort='auto')
    elif cfg.CLASSIFIER == 'XGB':
        cls = XGBClassifier(loss='deviance',
                            learning_rate=cfg.GB['learning_rate'],
                            n_estimators=cfg.GB['trees'],
                            subsample=1.0,
                            max_depth=cfg.GB['max_depth'],
                            random_state=cfg.GB['seed'],
                            max_features='sqrt',
                            verbose=0,
                            warm_start=False,
                            presort='auto')
    elif cfg.CLASSIFIER == 'RF':
        cls = RandomForestClassifier(n_estimators=cfg.RF['trees'],
                                     max_features='auto',
                                     max_depth=cfg.RF['max_depth'],
                                     n_jobs=cfg.N_JOBS,
                                     random_state=cfg.RF['seed'],
                                     oob_score=True,
                                     class_weight='balanced_subsample')
    elif cfg.CLASSIFIER == 'LDA':
        cls = LDA()
    elif cfg.CLASSIFIER == 'rLDA':
        cls = rLDA(cfg.RLDA_REGULARIZE_COEFF)
    else:
        raise ValueError('Unknown classifier type %s' % cfg.CLASSIFIER)

    # Setup features
    X_data = featdata['X_data']
    Y_data = featdata['Y_data']
    wlen = featdata['wlen']
    if cfg.PSD['wlen'] is None:
        cfg.PSD['wlen'] = wlen

    # Choose CV type
    ntrials, nsamples, fsize = X_data.shape
    if cfg.CV_PERFORM == 'LeaveOneOut':
        print('\n>> %d-fold leave-one-out cross-validation' % ntrials)
        if SKLEARN_OLD:
            cv = LeaveOneOut(len(Y_data))
        else:
            cv = LeaveOneOut()
    elif cfg.CV_PERFORM == 'StratifiedShuffleSplit':
        print(
            '\n>> %d-fold stratified cross-validation with test set ratio %.2f'
            % (cfg.CV_FOLDS, cfg.CV_TEST_RATIO))
        if SKLEARN_OLD:
            cv = StratifiedShuffleSplit(Y_data[:, 0],
                                        cfg.CV_FOLDS,
                                        test_size=cfg.CV_TEST_RATIO,
                                        random_state=cfg.CV_RANDOM_SEED)
        else:
            cv = StratifiedShuffleSplit(n_splits=cfg.CV_FOLDS,
                                        test_size=cfg.CV_TEST_RATIO,
                                        random_state=cfg.CV_RANDOM_SEED)
    else:
        raise NotImplementedError('%s is not supported yet. Sorry.' %
                                  cfg.CV_PERFORM)
    print('%d trials, %d samples per trial, %d feature dimension' %
          (ntrials, nsamples, fsize))

    # Do it!
    timer_cv = qc.Timer()
    scores, cm_txt = crossval_epochs(cv,
                                     X_data,
                                     Y_data,
                                     cls,
                                     cfg.tdef.by_value,
                                     cfg.BALANCE_SAMPLES,
                                     n_jobs=cfg.N_JOBS,
                                     ignore_thres=cfg.CV_IGNORE_THRES,
                                     decision_thres=cfg.CV_DECISION_THRES)
    t_cv = timer_cv.sec()

    # Export results
    txt = '\n>> Cross validation took %d seconds.\n' % t_cv
    txt += '\n- Class information\n'
    txt += '%d epochs, %d samples per epoch, %d feature dimension (total %d samples)\n' %\
        (ntrials, nsamples, fsize, ntrials * nsamples)
    for ev in np.unique(Y_data):
        txt += '%s: %d trials\n' % (cfg.tdef.by_value[ev],
                                    len(np.where(Y_data[:, 0] == ev)[0]))
    if cfg.BALANCE_SAMPLES:
        txt += 'The number of samples was balanced across classes. Method: %s\n' % cfg.BALANCE_SAMPLES
    txt += '\n- Experiment conditions\n'
    txt += 'Spatial filter: %s (channels: %s)\n' % (cfg.SP_FILTER,
                                                    cfg.SP_FILTER)
    txt += 'Spectral filter: %s\n' % cfg.TP_FILTER
    txt += 'Notch filter: %s\n' % cfg.NOTCH_FILTER
    txt += 'Channels: ' + ','.join(
        [str(featdata['ch_names'][p]) for p in featdata['picks']]) + '\n'
    txt += 'PSD range: %.1f - %.1f Hz\n' % (cfg.PSD['fmin'], cfg.PSD['fmax'])
    txt += 'Window step: %.2f msec\n' % (1000.0 * cfg.PSD['wstep'] /
                                         featdata['sfreq'])
    if type(wlen) is list:
        for i, w in enumerate(wlen):
            txt += 'Window size: %.1f msec\n' % (w * 1000.0)
            txt += 'Epoch range: %s sec\n' % (cfg.EPOCH[i])
    else:
        txt += 'Window size: %.1f msec\n' % (cfg.PSD['wlen'] * 1000.0)
        txt += 'Epoch range: %s sec\n' % (cfg.EPOCH)

    # Compute stats
    cv_mean, cv_std = np.mean(scores), np.std(scores)
    txt += '\n- Average CV accuracy over %d epochs (random seed=%s)\n' % (
        ntrials, cfg.CV_RANDOM_SEED)
    if cfg.CV_PERFORM in ['LeaveOneOut', 'StratifiedShuffleSplit']:
        txt += "mean %.3f, std: %.3f\n" % (cv_mean, cv_std)
    txt += 'Classifier: %s, ' % cfg.CLASSIFIER
    if cfg.CLASSIFIER == 'RF':
        txt += '%d trees, %s max depth, random state %s\n' % (
            cfg.RF['trees'], cfg.RF['max_depth'], cfg.RF['seed'])
    elif cfg.CLASSIFIER == 'GB' or cfg.CLASSIFIER == 'XGB':
        txt += '%d trees, %s max depth, %s learing_rate, random state %s\n' % (
            cfg.GB['trees'], cfg.GB['max_depth'], cfg.GB['learning_rate'],
            cfg.GB['seed'])
    elif cfg.CLASSIFIER == 'rLDA':
        txt += 'regularization coefficient %.2f\n' % cfg.RLDA_REGULARIZE_COEFF
    if cfg.CV_IGNORE_THRES is not None:
        txt += 'Decision threshold: %.2f\n' % cfg.CV_IGNORE_THRES
    txt += '\n- Confusion Matrix\n' + cm_txt
    print(txt)

    # Export to a file
    if hasattr(
            cfg, 'CV_EXPORT_RESULT'
    ) and cfg.CV_EXPORT_RESULT is True and cfg.CV_PERFORM is not None:
        if cv_file is None:
            if cfg.EXPORT_CLS is True:
                qc.make_dirs('%s/classifier' % cfg.DATADIR)
                fout = open('%s/classifier/cv_result.txt' % cfg.DATADIR, 'w')
            else:
                fout = open('%s/cv_result.txt' % cfg.DATADIR, 'w')
        else:
            fout = open(cv_file, 'w')
        fout.write(txt)
        fout.close()
コード例 #7
0
def createClassifier(loadedraw,\
                     events,\
                     tmin,\
                     tmax,\
                     tlow,\
                     thigh,\
                     regcoeff,\
                     useLeaveOneOut,\
                     APPLY_CAR,\
                     APPLY_PCA,\
                     l_freq,\
                     h_freq,\
                     MAX_FPR,\
                     picks_feat,\
                     baselineRange,\
                     decim_factor,\
                     cv_container,\
                     FILTER_METHOD,\
                     best_threshold,\
                     verbose=False):
    tdef, sfreq, event_id, b, a, zi, t_lower, t_upper, epochs, wframes = preprocess(loadedraw=loadedraw,\
                                                                                    events=events,\
                                                                                    APPLY_CAR=APPLY_CAR,\
                                                                                    l_freq=l_freq,\
                                                                                    h_freq=h_freq,\
                                                                                    filter_method=FILTER_METHOD,\
                                                                                    tmin=tmin,\
                                                                                    tmax=tmax,\
                                                                                    tlow=tlow,\
                                                                                    thigh=thigh,\
                                                                                    n_jobs=n_jobs,\
                                                                                    picks_feat=picks_feat,\
                                                                                    baselineRange=baselineRange,
                                                                                    verbose=False)
    train_pcaed, pca, trainShiftFactor, trainScaleFactor = compute_features(signals=epochs._data,\
                                                                            dataset_type='train',\
                                                                            sfreq=sfreq,\
                                                                            l_freq=l_freq,\
                                                                            h_freq=h_freq,\
                                                                            decim_factor=decim_factor,\
                                                                            shiftFactor=None,\
                                                                            scaleFactor=None,\
                                                                            pca=None,\
                                                                            tmin=tmin,\
                                                                            tmax=tmax,\
                                                                            tlow=tlow,\
                                                                            thigh=thigh,\
                                                                            filter_method=FILTER_METHOD)

    cls = rLDA(regcoeff)
    label = epochs.events[:, 2]
    cls.fit(train_pcaed, label)
    ch_names = [loadedraw.info['ch_names'][c] for c in picks_feat]
    data = dict(apply_car=APPLY_CAR,
                sfreq=loadedraw.info['sfreq'],\
                picks=picks_feat,\
                decim_factor=decim_factor,\
                ch_names=ch_names,\
                tmin=tmin,\
                tmax=tmax,\
                tlow=tlow,\
                thigh=thigh,\
                l_freq=l_freq,\
                h_freq=h_freq,\
                baselineRange=baselineRange,\
                shiftFactor=trainShiftFactor,\
                scaleFactor=trainScaleFactor,\
                cls=cls,\
                pca=pca,\
                threshold=best_threshold[0],\
                filter_method=FILTER_METHOD,\
                wframes=wframes)
    outdir = DATADIR + '/errp_classifier'
    qc.make_dirs(outdir)
    clsfile = outdir + '/errp_classifier.pcl'
    qc.save_obj(clsfile, data)
    print('Saved as %s' % clsfile)
    print('Using ' + str(epochs._data.shape[0]) + ' epochs')
コード例 #8
0
def processCV(loadedraw,\
              events,\
              tmin,\
              tmax,\
              tlow,\
              thigh,\
              regcoeff,\
              useLeaveOneOut,\
              APPLY_CAR,\
              APPLY_PCA,\
              l_freq,\
              h_freq,\
              MAX_FPR,\
              picks_feat,\
              baselineRange,\
              decim_factor,\
              cv_container,\
              FILTER_METHOD,\
              verbose=False):
    tdef, sfreq, event_id, b, a, zi, t_lower, t_upper, epochs, wframes = preprocess(loadedraw=loadedraw,\
                                                                                    events=events,\
                                                                                    APPLY_CAR=True,\
                                                                                    l_freq=l_freq,\
                                                                                    h_freq=h_freq,\
                                                                                    filter_method=FILTER_METHOD,\
                                                                                    tmin=tmin,\
                                                                                    tmax=tmax,\
                                                                                    tlow=tlow,\
                                                                                    thigh=thigh,\
                                                                                    n_jobs=n_jobs,\
                                                                                    picks_feat=picks_feat,\
                                                                                    baselineRange=baselineRange,
                                                                                    verbose=False)

    # %% Fold creation
    # epochs.events contains the label that we want on the third column
    # We can then get the relevent data within a fold by doing epochs._data[test]
    # It will return an array with size ({test}L, [channel]L,{time}L)
    label = epochs.events[:, 2]
    cv = StratifiedShuffleSplit(label, n_iter=20, test_size=0.1, random_state=1337)

    if useLeaveOneOut is True:
        cv = LeaveOneOut(len(label))

    # %% Fold processing
    count = 1
    confusion_matrixes = []
    confusion_matrixes_percent = []
    predicted = ''
    test_label = ''
    firstIterCV = True
    probabilities = np.array([[]], ndmin=2)
    predictions = np.array([])
    best_threshold = []
    cv_probabilities = []
    cv_probabilities_label = []

    if (cv_container is None):
        cv_container = []
        for train, test in cv:
            train_data = epochs._data[train]
            train_label = label[train]
            test_data = epochs._data[test]
            test_label = label[test]

            ## Test data processing ##
            train_pcaed, pca, trainShiftFactor, trainScaleFactor = compute_features(signals=train_data,\
                                                                                    dataset_type='train',\
                                                                                    sfreq=sfreq,\
                                                                                    l_freq=l_freq,\
                                                                                    h_freq=h_freq,\
                                                                                    decim_factor=decim_factor,\
                                                                                    shiftFactor=None,\
                                                                                    scaleFactor=None,\
                                                                                    pca=None,\
                                                                                    tmin=tmin,\
                                                                                    tmax=tmax,\
                                                                                    tlow=tlow,\
                                                                                    thigh=thigh,\
                                                                                    filter_method=FILTER_METHOD)

            # Compute_feature does the same steps as for train, but requires a computed PCA (that we got from train)
            # (bandpass, norm, ds, and merge channel and time)
            test_pcaed, pca_test_unused, _, _ = compute_features(signals=test_data,\
                                                                 dataset_type='test',\
                                                                 sfreq=sfreq,\
                                                                 l_freq=l_freq,\
                                                                 h_freq=h_freq,\
                                                                 decim_factor=decim_factor,\
                                                                 shiftFactor=trainShiftFactor,\
                                                                 scaleFactor=trainScaleFactor,\
                                                                 pca=pca,\
                                                                 tmin=tmin,\
                                                                 tmax=tmax,\
                                                                 tlow=tlow,\
                                                                 thigh=thigh,\
                                                                 filter_method=FILTER_METHOD)
            ## Test ##
            train_x = train_pcaed
            test_x = test_pcaed

            cv_container.append([train_x, test_x, train_label, test_label])

    for train_x, test_x, train_label, test_label in cv_container:
        # Fitting
        cls = rLDA(regcoeff)
        cls.fit(train_x, train_label)

        # AlternativeClassifier init
        # RF = dict(trees=100, maxdepth=None)
        # cls = RandomForestClassifier(n_estimators=RF['trees'], max_features='auto', max_depth=RF['maxdepth'], n_jobs=n_jobs)
        # cls = RandomForestClassifier(n_estimators=RF['trees'], max_features='auto', max_depth=RF['maxdepth'], class_weight="balanced", n_jobs=n_jobs)
        # cls = LDA(solver='eigen')
        # cls = QDA(reg_param=0.3) # regularized LDA

        predicted = cls.predict(test_x)
        probs = cls.predict_proba(test_x)
        prediction = np.array(predicted)

        if useLeaveOneOut is True:
            if firstIterCV is True:
                probabilities = np.append(probabilities, probs, axis=1)
                firstIterCV = False
                predictions = np.append(predictions, prediction)
            else:
                probabilities = np.append(probabilities, probs, axis=0)
                predictions = np.append(predictions, prediction)
        else:
            predictions = np.append(predictions, prediction)
            probabilities = np.append(probabilities, probs)

        # Performance
        if useLeaveOneOut is not True:
            cm = np.array(confusion_matrix(test_label, prediction))
            cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            confusion_matrixes.append(cm)
            confusion_matrixes_percent.append(cm_normalized)
            avg_confusion_matrixes = np.mean(confusion_matrixes_percent, axis=0)
        if verbose is True:
            print('CV #' + str(count))
            print('Prediction: ' + str(prediction))
            print('    Actual: ' + str(test_label))

        # Append probs to the global list
        probs_np = np.array(probs)
        cv_probabilities.append(probs_np[:, 0])
        cv_probabilities_label.append(test_label)

        #			if useLeaveOneOut is not True:
        #				print('Confusion matrix')
        #				print(cm)
        #				print('Confusion matrix (normalized)')
        #				print(cm_normalized)
        #				print('---')
        #				print('True positive rate: '+str(cm_normalized[0][0]))
        #				print('True negative rate: '+str(cm_normalized[1][1]))
        if verbose is True:
            print('===================')

        ## One CV done, go to the next one
        count += 1
    best_threshold = None
    cv_prob_linear = np.ravel(cv_probabilities)
    cv_prob_label_np = np.array(cv_probabilities_label)
    cv_prob_label_linear = np.ravel(cv_prob_label_np)
    threshold_list = np.linspace(0, 1, 100)

    biglist_fpr = []
    biglist_tpr = []
    biglist_thresh = []
    biglist_cms = []
    for thresh in threshold_list:
        biglist_pred = [4 if x < thresh else 3 for x in
                        cv_prob_linear]  # list comprehension to quickly go through the list.
        biglist_cm = confusion_matrix(cv_prob_label_linear, biglist_pred)
        biglist_cm_norm = biglist_cm.astype('float') / biglist_cm.sum(axis=1)[:, np.newaxis]
        biglist_cms.append(biglist_cm_norm)
        biglist_tpr.append(biglist_cm_norm[0][0])
        biglist_fpr.append(biglist_cm_norm[1][0])
        biglist_thresh.append(thresh)
    biglist_auc = auc(biglist_fpr, biglist_tpr)

    # Make a subset of data where FPR < MAX_FPR
    idx_below_maxfpr = np.where(np.array(biglist_fpr) < MAX_FPR)
    fpr_below_maxfpr = np.array(biglist_fpr)[idx_below_maxfpr[0]]
    tpr_below_maxfpr = np.array(biglist_tpr)[idx_below_maxfpr[0]]

    # Look for the best (max value) FPR in that subset
    best_tpr_below_maxfpr = np.max(tpr_below_maxfpr)
    best_tpr_below_maxfpr_idx = np.array(np.where(biglist_tpr == best_tpr_below_maxfpr)).ravel()  # get its idx

    # Get the associated TPRs
    best_tpr_below_maxfpr_associated_fpr = np.array(biglist_fpr)[best_tpr_below_maxfpr_idx]
    # Get the best (min value) in that subset
    best_associated_fpr = np.min(best_tpr_below_maxfpr_associated_fpr)
    # ... get its idx
    best_associated_fpr_idx = np.array(np.where(biglist_fpr == best_associated_fpr)).ravel()

    # The best idx is the one that is on both set
    best_idx = best_tpr_below_maxfpr_idx[np.in1d(best_tpr_below_maxfpr_idx, best_associated_fpr_idx)]
    best_threshold = threshold_list[best_idx]
    best_cm = biglist_cms[best_idx[0]]
    if verbose is True:
        print('#################################')
        print('FOR THIS CELL')
        plt.figure()
        plt.plot(biglist_fpr, biglist_tpr)
        plt.xlabel('False positive rate')
        plt.ylabel('True positive rate')
        print('#################################')
        print('Best treshold:' + str(best_threshold))
        print('Gives a TPR of ' + str(best_tpr_below_maxfpr))
        print('And a FPR of ' + str(best_associated_fpr))
        print('CM')
        print(best_cm)
        print('#################################')
    return (biglist_auc, best_threshold, best_cm, best_tpr_below_maxfpr, best_associated_fpr, cv_container, biglist_cms)
コード例 #9
0
ファイル: trainer_errp_hoang.py プロジェクト: syzhang/pycnbi
                oversampled_train_label = np.append(train_label,
                                                    train_label[idx_offset])
                oversampled_train_x = np.concatenate(
                    (train_x, train_x[idx_offset]), 0)
                train_label = oversampled_train_label
                train_x = oversampled_train_x

            # RF = dict(trees=1000, maxdepth=None)
            # cls = RandomForestClassifier(n_estimators=RF['trees'], max_features='auto', max_depth=RF['maxdepth'], n_jobs=n_jobs)
            # cls = LDA(solver='eigen')
            # cls = QDA(reg_param=0.3) # regularized LDA

            # w,b = trainLDA(train_x,train_label, 0.3)
            # predicted, probs = testLDA(test_x, w, b)

            rlda = rLDA()
            rlda.fit(train_x, train_label, 0.3)
            predicted, probs = rlda.predict_proba(test_x)

            prediction = np.array(predicted)

            #            cls.fit( train_x, train_label )
            #            Y_pred= cls.predict( test_x )
            #            prediction = Y_pred

            cm = np.array(confusion_matrix(test_label, prediction))
            cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            tp_rates.append(cm_normalized[0][0])
            tn_rates.append(cm_normalized[1][1])
            confusion_matrixes.append(cm)
            confusion_matrixes_percent.append(cm_normalized)