Example #1
0
def evaluate_test(x_eval, y_eval, res_eval, model, config, test=False):
    total_pred = np.array([], dtype=np.float64)
    total_y_test = np.array([], dtype=np.int64)
    if (config['old'] == False):
        size = 2048
    else:
        size = 1024

    transformed_target = np.zeros(shape=(1, size), dtype=np.float64)
    test_batch_num = int(
        math.ceil(x_eval.shape[0] / float(config['batch_size'])))
    cum_acc = []
    cum_f1 = []
    with torch.no_grad():
        for i in range(test_batch_num):
            begin_index = i * config['batch_size']
            end_index = min((i + 1) * config['batch_size'], x_eval.shape[0])
            batch_test_x = x_eval[begin_index:end_index]
            batch_test_y = y_eval[begin_index:end_index]
            batch_test_res = res_eval[begin_index:end_index]
            if (config['model_type'] == 'dan'
                    or config['model_type'] == 'dann'):
                batch_test_x = batch_test_x.reshape(-1, 1, 1,
                                                    batch_test_x.shape[1])
            elif (config['model_type'] == 'jdda'
                  or config['model_type'] == 'jdda_rnn'
                  or config['model_type'] == 'jdda_ff'):
                batch_test_x = batch_test_x.reshape(-1, 1,
                                                    batch_test_x.shape[1])
            batch_test_x = torch.from_numpy(batch_test_x).float().to(
                config['device'])
            batch_test_y = torch.from_numpy(batch_test_y).long().to(
                config['device'])
            batch_test_res = torch.from_numpy(batch_test_res).float()
            output_test, hidden_test = model(batch_test_x)
            _, predicted = torch.max(output_test.data, 1)
            # loss = _loss(batch_test_res, batch_test_y, output_test)
            total_pred = np.concatenate((total_pred, predicted.cpu()))
            total_y_test = np.concatenate((total_y_test, batch_test_y.cpu()))
            batch_acc = accuracy_score(batch_test_y.cpu().numpy(),
                                       predicted.cpu().numpy())
            _, _, batch_f1, _ = prfs(batch_test_y.cpu().numpy(),
                                     predicted.cpu().numpy(),
                                     average='weighted')
            cum_acc.append(batch_acc)
            cum_f1.append(batch_f1)
            transformed_target = np.concatenate(
                (transformed_target, hidden_test.cpu().numpy()))
    if (test == True):
        print(classification_report(total_y_test, total_pred, digits=4))
        acc = accuracy_score(total_y_test, total_pred)
        cum_acc = np.array(cum_acc)
        cum_f1 = np.array(cum_f1)
        print("Testing accuracy", acc)
        print("Average acc and std ", np.mean(cum_acc), np.std(cum_acc))
        print("Average F1 and std ", np.mean(cum_f1), np.std(cum_f1))
    _, _, f1, _ = prfs(total_y_test, total_pred, average='weighted')
    transformed_target = np.delete(transformed_target, (0), axis=0)
    print("Target - Eval", transformed_target.shape, x_eval.shape)
    return f1, transformed_target
Example #2
0
    def _compute_metrics(self,
                         gt_all,
                         pred_all,
                         types,
                         print_results: bool = False):
        labels = [t.index for t in types]
        per_type = prfs(gt_all,
                        pred_all,
                        labels=labels,
                        average=None,
                        zero_division=0)
        micro = prfs(gt_all,
                     pred_all,
                     labels=labels,
                     average='micro',
                     zero_division=0)[:-1]
        macro = prfs(gt_all,
                     pred_all,
                     labels=labels,
                     average='macro',
                     zero_division=0)[:-1]
        total_support = sum(per_type[-1])

        if print_results:
            self._print_results(per_type,
                                list(micro) + [total_support],
                                list(macro) + [total_support], types)

        return [m * 100 for m in micro + macro]
Example #3
0
def classify_vuamc(all_features,
                   features_to_use,
                   classifier=xgb.XGBClassifier,
                   n_dev=0,
                   seed=0,
                   classifier_args=None):
    if classifier_args is None:
        classifier_args = {}
    print("Extracting features {}".format(', '.join(features_to_use)))
    F = preprocess(all_features, features_to_use, n_dev=n_dev, seed=seed)
    print("{} total features".format(F['X_train'].shape[1]))
    print("{} train, {} dev, {} test".format(F['X_train'].shape[0],
                                             F['X_dev'].shape[0],
                                             F['X_test'].shape[0]))

    print("Training {}".format(classifier))
    clf = classifier(scale_pos_weight=sum(1 - F['y_train']) /
                     sum(F['y_train']),
                     **classifier_args)

    clf.fit(F['X_train'], F['y_train'])
    print("Scoring")
    y_pred_test = clf.predict(F['X_test'])
    y_pred_dev = clf.predict(F['X_dev'])
    p, r, f1, s = prfs(F['y_test'], y_pred_test, average='binary')
    print("Precision:", p)
    print("Recall:", r)
    print("F1 Score:", f1)
    acc = (F['y_test'] == y_pred_test).mean()
    print("Accuracy:", acc)
    n_features = F['X_train'].shape[1]
    # By genre
    genres = np.unique(F['test_genre'])
    for genre in genres:
        print("Genre: {}".format(genre))
        genre_mask = F['test_genre'] == genre
        y_pred_genre = clf.predict(F['X_test'][genre_mask])
        pg, rg, fg, sg = prfs(F['y_test'][genre_mask],
                              y_pred_genre,
                              average='binary')
        print("Precision:", pg)
        print("Recall:", rg)
        print("F1 Score:", fg)
        accg = (F['y_test'][genre_mask] == y_pred_genre).mean()
        print("Accuracy:", accg)
        print()
    stats = {
        'precision': p,
        'recall': r,
        'f1': f1,
        'accuracy': acc,
        'n_features': n_features
    }
    F['y_pred_test'] = y_pred_test
    F['y_pred_dev'] = y_pred_dev
    return F, stats
Example #4
0
    def evalute_one(self, metadata, predictions, batches):
        def log_loss(y, _p):
            eps = 1e-3
            p = np.clip(_p, eps, 1. - eps)
            return np.mean(-(y * np.log(p) + (1 - y) * np.log(1 - p)))

        outputs = ['y_onsets', 'y_frames', 'y_offsets']

        result = dict()
        for output in outputs:
            loss = log_loss(batches[output], predictions[output])
            y_true = (batches[output] > 0.5) * 1
            y_pred = (predictions[output] > 0.5) * 1

            p, r, f, _ = prfs(y_true, y_pred, average='micro')

            result[output] = dict(
                loss=loss,
                p=p,
                r=r,
                f=f
            )

        result['adsr'] = self.evaluate_adsr(metadata, predictions)

        return result
Example #5
0
 def on_epoch_end(self, epoch, logs={}):
     predict = np.argmax(self.model.predict(
         self.validation_data[:2], batch_size=self.params['batch_size']),
                         axis=1)
     targ = np.argmax(self.validation_data[2], axis=1)
     prec, rec, f1, _ = prfs(targ, predict, average='macro')
     if self.verbose:
         info("epoch: {} p: {:0.4f} r: {:0.4f} f:{:0.4f}".format(
             epoch, prec, rec, f1))
     if f1 > self.best:
         self.best = f1
         self.best_epoch = epoch
         self.wait = 0
         self.best_weights = self.model.get_weights()
     else:
         if self.wait >= self.patience:
             self.model.stop_training = True
             self.model.set_weights(self.best_weights)
             info("Stopping at epoch {}, best: e {}, f: {}.".format(
                 epoch, self.best_epoch, self.best))
         else:
             self.wait += 1
     if epoch == (self.params['epochs'] - 1):
         info("Stopping at final epoch {}, best: e {}, f: {}.".format(
             epoch, self.best_epoch, self.best))
         if self.best < f1:
             self.model.set_weights(self.best_weights)
 def fit_eval(m, trnX, trnY, tstX, tstY, average='micro', **param):
     m.set_params(**param)
     m.fit(trnX, trnY, tstX, tstY)
     pred = m.predict(tstX)
     prec, rec, f1, _ = prfs(tstY, pred, average=average)
     acc = accuracy_score(tstY, pred)
     return (acc, prec, rec, f1)
Example #7
0
def get_metrics(pred, pred_prob, y):
    """This function calculates the metrics of AUC_PR, Error, Precision, Recall, and F1 score from
  true labels y, prediction pred, or predicted P(y=1|x) pred_prob.

  Parameters 
  ----------
  pred : iterable (list or np.array)
    Predicted labels

  pred_prob : iterable (list or np.array)
    Predicted P(y=1|x)

  y : iterable (list or np.array)
    True labels.
  """
    precision, recall, f1, _ = zip(*prfs(y, pred))[1]
    error = 1 - accuracy_score(y, pred)
    area_under_curve = auc_score(y, pred_prob)
    metrics_dict = {
        "AUC": area_under_curve,
        "Error": error,
        "Precision": precision,
        "Recall": recall,
        "F1 score": f1,
    }
    return metrics_dict
def main():
    # Reading Data from folders
    X, y = load_data(data_path='./crop_dataset/crop_dataset/')
    print(f"Data shape: {X.shape},   Labels: {y.shape}\n")

    # Displaying random set of images from data
    display_random_set(data=X, labels=y)

    # Splitting data into training and testing data, training will consist of 70% of the data and 30% of the remaining
    # will be testing data.
    x_train, x_val, y_train, y_val = train_test_split(X,
                                                      y,
                                                      test_size=0.3,
                                                      random_state=42,
                                                      shuffle=True)
    print(
        f"Training Data: {x_train.shape},   Training labels: {y_train.shape}\nValidation Data: {x_val.shape},   "
        f"Validation labels: {y_val.shape}\n")

    # Adjusting labels to be represented as categorical data.
    y_train = to_categorical(y=y_train, num_classes=len(np.unique(y)))
    y_val = to_categorical(y=y_val, num_classes=len(np.unique(y)))

    # Creating Neural network model.
    model = build_model(num_classes=len(np.unique(y)),
                        img_dim=x_train[0].shape)

    # To train the model again change train value to True, change to False to not train.
    train_model(x=x_train,
                y=y_train,
                x_val=x_val,
                y_val=y_val,
                model=model,
                train=True)

    print("[In progress] Loading H5 model and history file...")
    classifier = load_model(filepath='traffic_sign_model.h5')
    hist_loaded = load_history(file_name='traffic_sign.pickle')
    print("[Done] Loading H5 model and history file...")

    # Loading data for testing model.
    x_test, y_test = load_test_data(test_data_dir='./test_data/test_data',
                                    test_data_labels_dir='./test_labels.csv')
    predictions = classifier.predict_classes(x_test)
    accuracy = np.array([
        1 if predictions[i] == int(y_test[i]) else 0
        for i in range(len(predictions))
    ])
    print(f"Accuracy on test data: {np.mean(accuracy) * 100} %.")

    # plotting loss and mse curves for training and validation steps
    plot_curves(hist_loaded)

    # plotting accuracy bar graph per class
    labels = np.unique(y)
    precision, recall, f1, support = prfs(y_true=y_test,
                                          y_pred=predictions,
                                          average=None)
    accuracy_per_class(labels, precision, recall, f1)
Example #9
0
def _compute_metrics(gt_all,
                     pred_all,
                     labels,
                     labels_str,
                     print_results: bool = False):
    per_type = prfs(gt_all, pred_all, labels=labels, average=None)
    micro = prfs(gt_all, pred_all, labels=labels, average='micro')[:-1]
    macro = prfs(gt_all, pred_all, labels=labels, average='macro')[:-1]
    total_support = sum(per_type[-1])

    if print_results:
        _print_results(per_type,
                       list(micro) + [total_support],
                       list(macro) + [total_support], labels_str)

    metrics = [m * 100 for m in micro + macro]
    return dict(zip(METRIC_LABELS, metrics))
Example #10
0
def get_prfs(causality_truth, causality_pred):
    assert causality_pred.shape == causality_truth.shape
    assert len(causality_pred.shape) == 2
    from sklearn.metrics import precision_recall_fscore_support as prfs
    precision, recall, F1, _ = prfs(
        to_np_array(causality_truth).flatten(),
        to_np_array(causality_pred).flatten())
    return precision[1], recall[1], F1[1]
Example #11
0
def score(train, test, opt):
    from sklearn.metrics import precision_recall_fscore_support as prfs
    pred = _predict(train, test, opt)
    gold = test.labels

    prec, rec, f1, _ = prfs(gold, pred, average='macro')
 
    print("Precision {}, Recall: {}, F-score: {}".format(
        prec, rec, f1))
Example #12
0
def evaluate(logger, tag_group, device, recurrent_model, output_model, loaders,
             global_step):
    recurrent_model.eval()
    output_model.eval()

    losses = defaultdict(list)
    all_prf = defaultdict(list)
    with torch.no_grad():
        loss_function_bce = nn.BCEWithLogitsLoss(reduction='mean')
        loss_function_mse = nn.MSELoss(reduction='mean')
        # each loader is for one sequence
        for midifilename, loader in loaders:
            print('evaluate midifilename', midifilename)
            y_true, y_pred = evaluate_aux(device, recurrent_model,
                                          output_model, loader, losses)
            print('y_true.shape', y_true.shape)
            print('y_pred.shape', y_pred.shape)

            y_pred = (y_pred > 0.5) * 1

            # import matplotlib.pyplot as plt
            # fig, axes = plt.subplots(nrows=2, sharex=True, sharey=True)
            # axes[0].imshow(y_true.T)
            # axes[1].imshow(y_pred.T)
            # plt.show()
            # exit()

            p, r, f, _ = prfs(y_true, y_pred, average='micro')
            print('p {:>4.2f} r {:>4.2f} f {:>4.2f}'.format(p, r, f))
            all_prf['p'].append(p)
            all_prf['r'].append(r)
            all_prf['f'].append(f)

    to_log = {
        '{}_prf/p'.format(tag_group): np.mean(all_prf['p']),
        '{}_prf/r'.format(tag_group): np.mean(all_prf['r']),
        '{}_prf/f'.format(tag_group): np.mean(all_prf['f']),
        '{}_losses/mse_frames'.format(tag_group):
        np.mean(losses['mse_frames']),
        '{}_losses/mse_velocity'.format(tag_group):
        np.mean(losses['mse_velocity']),
        '{}_losses/bce'.format(tag_group): np.mean(losses['bce'])
    }

    if logger is not None:
        for key, value in to_log.items():
            logger.add_scalar(key, value, global_step)
    return to_log
Example #13
0
def rfFitScore(clf, dftrain, dftrain_y, dftest, dftest_y):
    '''random forest classifier fit and score.
       clf=RandomForestClassifier, dftrain=train data,
       dftrain_y=train data Y, dftest=test data,
       dftest_y=test data Y'''
    
    clfit = clf.fit(dftrain, dftrain_y['Y'])  # clf.fit(X, y)
    
    imp = clfit.feature_importances_  # ndarray of 562    
    # clfit.fit_transform( X, y=None )  # returns X_new
    
    new_y = clfit.predict( dftest )  # returns predicted Y
    
    test_score = clfit.score( dftest, dftest_y['Y'] )
    print("test score:", test_score)  # clfit.oob_score_  
    if (clf.oob_score):
        print("oob score", clfit.oob_score_)
    
    # calculate test score by other means
    print("predict True %.3f percent, %d out of %d" % \
      ((100 * sum(dftest_y['Y'] == new_y) / dftest_y.shape[0]), \
       sum(dftest_y['Y'] == new_y), dftest_y.shape[0]))
    print("predict False %.3f percent, %d out of %d" % \
      ((100 * sum(dftest_y['Y'] != new_y) / dftest_y.shape[0]), \
       sum(dftest_y['Y'] != new_y), dftest_y.shape[0]))
    
#    new_p = clfit.predict_proba( dftest )
#    # probability of each X variable to predict each y class
#    print("test predict probabilities head:\n", new_p[:5])
    
    # cross table of variable predictions
    ptab = pd.crosstab(dftest_y['Y'], new_y, \
        rownames=['actual'], colnames=['predicted'])
    print("cross table:\n", ptab)
    
    # accuracy: percent labeled correctly
    # precision: true positives / (true positives + true negatives)
    # recall:    true positives / (true positives + false negatives)
    precision, recall, fbeta, support = prfs(dftest_y['Y'], new_y)
    print("precision", precision, "\nrecall", recall, \
        "\nfbeta", fbeta, "\nsupport", support)
    
    if (clf.oob_score):
        return test_score, imp, clfit.oob_score_
    else:
        return test_score, imp
Example #14
0
def log_metrics(y_true, y_pred, metrics_write_path, average=""):
    """
    Log the precision/recall/f1-score/support

    :param y_true: (ndarray)
        True labels
    :param y_pred: (ndarray)
        Classifier predicted labels
    :param metrics_write_path: (str)
        Place to write metrics
    :param average: (str)
        Specifies micro/macro averaging

    :return:
        None
    """
    if not average:
        # Default write_file.write both micro and macro averages
        with open(metrics_write_path, "a") as write_file:
            write_file.write("\nModel Report\n")
            timestamp = str(
                datetime.fromtimestamp((datetime.timestamp(datetime.now()))))
            write_file.write("{}\n".format(timestamp))
            write_file.write("----------------------------------------\n")

        # Micro Avg
        log_metrics(y_true, y_pred, metrics_write_path, average="micro")

        # Macro Avg
        log_metrics(y_true, y_pred, metrics_write_path, average="macro")
    else:
        metrics = prfs(y_true, y_pred, average=average)
        with open(metrics_write_path, "a") as write_file:
            write_file.write("\n")
            write_file.write("\n- {} averaging -\n".format(average))
            write_file.write(classification_report(y_true, y_pred))

            write_file.write("\nPrecision: {}\n".format(metrics[0]))
            write_file.write("Recall: {}\n".format(metrics[1]))
            write_file.write("F1-Score: {}\n".format(metrics[2]))
            write_file.write("Support: {}\n".format(metrics[3]))
            write_file.write("Accuracy: {}\n".format(
                accuracy_score(y_true, y_pred)))
            write_file.write("AUC Score ({}): {}\n".format(
                average, roc_auc_score(y_true, y_pred, average=average)))
            write_file.write(str(confusion_matrix(y_true, y_pred)) + "\n")
 def eval_params(self, nparr):
     cutoff = int(self.X.shape[0]*.7)
     C = nparr[0]
     gamma = nparr[1]
     if gamma < 0:
         gamma = 1e-3
     if C < 0:
         C = 1e-3
     svm = SVC(C=C, gamma=gamma)
     X_train, y_train = self.X[:cutoff], self.labels[:cutoff]
     X_test, y_test = self.X[cutoff:], np.array(self.labels[cutoff:])
     svm.fit(X_train, y_train)
     y_pred = svm.predict(X_test)
     # fitness = (y_pred == y_test).sum() / (y_test.shape[0]+.0)
     fitness = prfs(y_test, y_pred, average='macro')[2]
     print "F-score macro: %.6f achieved with C=%.6f and gamma=%.6f" % (fitness, C, gamma)
     return fitness
Example #16
0
def rfFitScore(clf, dftrain, dftrain_y, dftest, dftest_y):
    '''random forest classifier fit and score.
       clf=RandomForestClassifier, dftrain=train data,
       dftrain_y=train data Y, dftest=test data,
       dftest_y=test data Y'''

    clfit = clf.fit(dftrain, dftrain_y['Y'])  # clf.fit(X, y)

    imp = clfit.feature_importances_  # ndarray of 562
    # clfit.fit_transform( X, y=None )  # returns X_new

    new_y = clfit.predict(dftest)  # returns predicted Y

    test_score = clfit.score(dftest, dftest_y['Y'])
    print("test score:", test_score)  # clfit.oob_score_
    if (clf.oob_score):
        print("oob score", clfit.oob_score_)

    # calculate test score by other means
    print("predict True %.3f percent, %d out of %d" % \
      ((100 * sum(dftest_y['Y'] == new_y) / dftest_y.shape[0]), \
       sum(dftest_y['Y'] == new_y), dftest_y.shape[0]))
    print("predict False %.3f percent, %d out of %d" % \
      ((100 * sum(dftest_y['Y'] != new_y) / dftest_y.shape[0]), \
       sum(dftest_y['Y'] != new_y), dftest_y.shape[0]))

    #    new_p = clfit.predict_proba( dftest )
    #    # probability of each X variable to predict each y class
    #    print("test predict probabilities head:\n", new_p[:5])

    # cross table of variable predictions
    ptab = pd.crosstab(dftest_y['Y'], new_y, \
        rownames=['actual'], colnames=['predicted'])
    print("cross table:\n", ptab)

    # accuracy: percent labeled correctly
    # precision: true positives / (true positives + true negatives)
    # recall:    true positives / (true positives + false negatives)
    precision, recall, fbeta, support = prfs(dftest_y['Y'], new_y)
    print("precision", precision, "\nrecall", recall, \
        "\nfbeta", fbeta, "\nsupport", support)

    if (clf.oob_score):
        return test_score, imp, clfit.oob_score_
    else:
        return test_score, imp
Example #17
0
    def _score(self,
               gold,
               pred,
               scores={'precision', 'recall', 'f1-score'},
               negative_class=None,
               average=None):
        """ Return the score for the testset.
        """
        from sklearn.metrics import precision_recall_fscore_support as prfs

        if average is None:
            average = 'macro'
            if negative_class:
                average = 'binary'

        scores = [sc \
                    if ':' in sc or \
                        sc not in {'precision', 'recall', 'f1-score'}\
                    else ':'.join((sc, average))\
                    for sc in scores]
        scores = {k: None for k in scores}
        for sc_avg in list(scores):
            if ':' in sc_avg:
                sc, avg = sc_avg.split(':')
            else:
                sc = sc_avg
                avg = None
            if scores[sc_avg] is not None:
                continue
            if sc not in {'precision', 'recall', 'f1-score', 'accuracy'}:
                warning("Skipping unknown score `{}'.".format(sc))
                continue
            if sc in {'precision', 'recall', 'f1-score'}:
                if avg not in {'binary', 'micro', 'macro'}:
                    warning("Skipping `{}': unknown avgeraging method.".format(
                        sc_avg))
                    continue
                p, r, f, _ = prfs(gold, pred, average=avg)
                scores[':'.join(('precision', avg))] = p
                scores[':'.join(('recall', avg))] = r
                scores[':'.join(('f1-score', avg))] = f
            if sc == 'accuracy':
                from sklearn.metrics import accuracy_score
                scores['accuracy'] = accuracy_score(gold, pred)
        return {k: v for k, v in scores.items()}
Example #18
0
def evaluate_test(x_eval, y_eval, res_eval, model, config, test=False):
    total_pred = np.array([], dtype=np.float64)
    total_y_test = np.array([], dtype=np.int64)

    test_batch_num = int(
        math.ceil(x_eval.shape[0] / float(config['batch_size'])))
    with torch.no_grad():
        for i in range(test_batch_num):
            begin_index = i * config['batch_size']
            end_index = min((i + 1) * config['batch_size'], x_eval.shape[0])
            batch_test_x = x_eval[begin_index:end_index]
            batch_test_y = y_eval[begin_index:end_index]
            batch_test_res = res_eval[begin_index:end_index]
            if (config['model_type'] == 'dan'
                    or config['model_type'] == 'dann'):
                batch_test_x = batch_test_x.reshape(-1, 1, 1,
                                                    batch_test_x.shape[1])

            batch_test_x = torch.from_numpy(batch_test_x).float().to(
                config['device'])
            batch_test_y = torch.from_numpy(batch_test_y).long().to(
                config['device'])
            batch_test_res = torch.from_numpy(batch_test_res).float()
            output_test, _ = model(batch_test_x, batch_test_x)
            _, predicted = torch.max(output_test.data, 1)
            # loss = _loss(batch_test_res, batch_test_y, output_test)
            total_pred = np.concatenate((total_pred, predicted.cpu()))
            total_y_test = np.concatenate((total_y_test, batch_test_y.cpu()))

    # overall_loss = _loss(res_eval, y_eval, total_pred)
    # overall_loss = criterion(total_pred, total_y_test)
    # acc = accuracy_score(total_y_test, total_pred)
    if (test == True):
        print(classification_report(total_y_test, total_pred, digits=4))
        acc = accuracy_score(total_y_test, total_pred)
        print("Testing accuracy", acc)
    _, _, f1, _ = prfs(total_y_test, total_pred, average='weighted')

    # print ("Overall testing/evaluation F1 is ", f1)
    return f1
Example #19
0
def evaluate_one_loader(cuda, net, loader):
    net.eval()
    loss_function = nn.BCELoss(reduction='mean')
    smoothed_loss = 1.
    y_true = []
    y_pred = []
    for x, y in loader:
        if cuda:
            x = x.cuda()
            y = y.cuda()

        y_hat = net.predict(x)
        loss = loss_function(y_hat, y)
        smoothed_loss = smoothed_loss * 0.9 + loss.detach().cpu().item() * 0.1

        y_true.append(y.detach().cpu().numpy())
        y_pred.append((y_hat.detach().cpu().numpy() > 0.5) * 1)

    y_true = np.vstack(y_true)
    y_pred = np.vstack(y_pred)
    p, r, f, _ = prfs(y_true, y_pred, average='micro')
    return smoothed_loss, p, r, f
def success_metrics(model):
    """
    Print evaluation of the model
    parameter: Pytorch model, (dataloaders['val'])
    return: precision, recall, f1, support
    """
    model.eval()
    original_labels = []
    pred_lst = []

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            original_labels.extend(labels)
            pred_lst.extend(preds)
            
    precision, recall, f1, support = prfs(original_labels, pred_lst, average='weighted')

    print("Precision: {:.2%}\nRecall: {:.2%}\nF1 score: {:.2%}".format(precision, recall, f1))
Example #21
0
model1.add(Dense(6, activation='relu'))
model1.add(Dense(1, activation='sigmoid', input_dim=11))

model1.compile(optimizer='adam',
               loss='binary_crossentropy',
               metrics=['accuracy'])

model1.fit(X_train, y_train, batch_size=10, epochs=100)

y_pred = model1.predict(X_test)
y_pred = (y_pred > 0.5)

print('CM:', confusion_matrix(y_test, y_pred))
print('AC:', ac(y_test, y_pred))
print('F1 scores:', f1(y_test, y_pred))
print('PR:', prfs(y_test, y_pred))

#logistic Regression
from sklearn.linear_model import LogisticRegression as lr

model2 = lr().fit(X_train, y_train)
y_pred = model2.predict(X_test)
y_pred = (y_pred > 0.5)
print('CM:', confusion_matrix(y_test, y_pred))
print('AC:', ac(y_test, y_pred))
print('F1 scores:', f1(y_test, y_pred))
print('PR:', prfs(y_test, y_pred))

from sklearn.neighbors import KNeighborsClassifier as knn

model3 = knn().fit(X_train, y_train)
Example #22
0
#     # best result: epsilon = 2, threshold = 0.46, F1 = 0.515 (P = 0.468, R = 0.573)
#     # y_pred = neighbourhood_difference(y_dists)
#
#     plot_thresholds(y_true, y_pred, False, 'binary')

# Measurements on test data
if __name__ == '__main__':
    data = config.get_seg_data('test')

    print("Loading the data")
    y = np.load(data['y'])
    y_true = np.load(data['y_true_lm'])

    T = 0.5
    y_pred = compute_distance(y, euclidean_distances) > T
    P, R, F, S = prfs(y_true, y_pred, average='binary')
    print('euclidean distance: threshold = %.2f' % T)
    print_measurements(y_true, y_pred)

    y_dists = compute_distance(y, cosine_distances)

    T = 0.92
    y_pred = y_dists > T
    print('cosine distance: threshold = %.2f' % T)
    print_measurements(y_true, y_pred)

    T = 0.33
    y_pred = compute_distance(y, manhattan_distances) > T
    print('manhattan distance: threshold = %.2f' % T)
    print_measurements(y_true, y_pred)
Example #23
0
        cd_loss = criterion(cd_preds, labels)
        loss = cd_loss
        loss.backward()
        optimizer.step()

        cd_preds = cd_preds[-1]
        _, cd_preds = torch.max(cd_preds, 1)

        # Calculate and log other batch metrics
        cd_corrects = (
            100 *
            (cd_preds.squeeze().byte() == labels.squeeze().byte()).sum() /
            (labels.size()[0] * (opt.patch_size**2)))

        cd_train_report = prfs(labels.data.cpu().numpy().flatten(),
                               cd_preds.data.cpu().numpy().flatten(),
                               average='binary',
                               pos_label=1)

        train_metrics = set_metrics(train_metrics, cd_loss, cd_corrects,
                                    cd_train_report, scheduler.get_lr())

        # log the batch mean metrics
        mean_train_metrics = get_mean_metrics(train_metrics)

        for k, v in mean_train_metrics.items():
            writer.add_scalars(str(k), {'train': v}, total_step)

        # clear batch variables from memory
        del batch_img1, batch_img2, labels

    scheduler.step()
Example #24
0
###########################################################
################# ConvNet Evaluation ######################
###########################################################

print "\n fitting cnn model on %d samples..." % x_train.shape[0]
# hist = cnn_100.model.fit(x_train, y_train, nb_epoch=10)
# cnn_100.save_model(hist)
# print 'saved hist: ', str(hist)
cnn_200_pred = cnn_200.model.predict(x_test)

y_true = np.array([np.argmax(i) for i in y_test])
y_pred = np.array([np.argmax(i) for i in cnn_200_pred])

cnn_acc = accuracy_score(y_true, y_pred)
p, r, f, s = prfs(y_true, y_pred, average='macro')

print
print "cnn_20 stats: precision=%.3f, recall=%.3f, f-score=%.3f, acc: %.3f" % (
    p, r, f, cnn_acc)

###########################################################
################# LogReg/SVM Preparation ##################
###########################################################

print 'reading text data...'
t = time()
text_reader = TweetCorpusReader('../data/', text_only=False)
t = time() - t
print 'time elapsed: %.0fs' % t
Example #25
0
        def callback(epoch, model):
            if epoch % 250 != 0:
                return

            perf = []
            for X, Z, y in ((experiment.X[ts], experiment.Z[ts],
                             experiment.y[ts]),
                            (experiment.X[tr], experiment.Z[tr],
                             experiment.y[tr])):

                yhat = model.predict(X)

                y_loss = model.loss_y(X, y)
                z_loss = model.loss_z(X, Z)
                y_perf = list(prfs(y, yhat, average='binary')[:3])

                perf.extend(y_perf + [y_loss, z_loss])

            #print(Z[0])
            #print(Z_hat[0])

            if args.experiment.startswith('color') and args.record_lime:

                similarities, dispersions, times_senn, times_lime = [], [], [], []
                for i in selection:
                    x = experiment.X[i].reshape(1, -1)

                    z_senn, t_senn = model.explain(x, return_runtime=True)
                    z_senn = z_senn.ravel()
                    z_lime, Z_lime, t_lime = \
                        experiment.explain_lime(model, tr, i,
                                                n_repeats=args.lime_repeats,
                                                n_samples=args.lime_samples,
                                                n_features=args.lime_features)
                    times_senn.append(t_senn)
                    times_lime.append(t_lime)

                    n_nonzeros = len(np.nonzero(z_lime)[0])
                    similarities.append(
                        _whatever_at_k(z_senn, z_lime, n_nonzeros))

                    n_repeats = len(Z_lime)
                    dispersions.append(1 / (n_repeats * (n_repeats - 1)) * \
                                       np.sum(pairwise_distances(Z_lime, Z_lime)))

                    path = basename + '__fold={}__instance={}__epoch={}'.format(
                        k, i, epoch)
                    experiment.dump_explanation(path + '_senn.png',
                                                experiment.X[i],
                                                experiment.Z[i], z_senn)
                    experiment.dump_explanation(path + '_lime.png',
                                                experiment.X[i],
                                                experiment.Z[i], z_lime)

                perf.extend([
                    np.mean(similarities),
                    np.mean(dispersions),
                    np.mean(times_senn),
                    np.mean(times_lime),
                ])

            print('epoch {} : {}'.format(epoch, perf))
            return perf
Example #26
0
print '#'*40
print 'NO Twitter Features'
print 'SVM - Linear Kernel'
print '#'*40
print 'ACC\tPR\tRE\tF1'
print '#'*40
i=1
for tr, ts in KFold(n=len(normalized_corpus), n_folds=10):
    train = X[tr]
    test = X[ts]
    clf = LinearSVC()
    clf.fit(train, labels[tr])
    ytrue = labels[ts]
    ypred = clf.predict(test)
    acc = (ytrue == ypred).sum() / (len(ypred)+.0)
    p, r, f, s = prfs(ytrue, ypred, average='binary')
    accs.append(acc)
    ps.append(p)
    rs.append(r)
    fs.append(f)
    print "%.2f\t%.2f\t%.2f\t%.2f KFoldRnd%d" % (acc,p,r,f,i)
    i += 1
print '#'*40
print 'Mean accuracy: %.2f' % (np.mean(accs))
print 'Mean precision: %.2f' % (np.mean(ps))
print 'Mean recal: %.2f' % (np.mean(rs))
print 'Mean f-score: %.2f' % (np.mean(fs))


accs = []
ps = []
Example #27
0
            print(
                'Loading the paragraph trained classifier trained on data processed by'
                ' threshold_half_max function')
            classifier = load_pickle(config.classifier_par_half_max)
            threshold = 0.39
            y_true = process_y(data, threshold_half_max)

        print("Loading x")
        x = load_sparse_csr(data['x'])
    else:
        threshold = 0.3

        vectorizer = load_pickle(config.vectorizer)
        binarizer = load_pickle(config.binarizer)

        print('Loading the classifier')
        classifier = load_pickle(config.classifier)

        corpus, topics = build_corpus_and_topics(config.data['test'])

        print('Transforming corpus by vectorizer')
        x = vectorizer.transform(corpus)
        print('Transforming article topics by binarizer')
        y_true = binarizer.transform(topics)
        del vectorizer, binarizer, corpus, topics

    y_pred = predict_tuned(x, classifier, threshold)

    P, R, F, S = prfs(y_true, y_pred, average='samples')
    print('F1 = %.3f (P = %.3f, R = %.3f)' % (F, P, R))
Example #28
0
def performanceSummary(trueLabels,predictedLabels):
    accuracy=acu(trueLabels,predictedLabels)
    precision,recall,fscore,support=prfs(trueLabels,predictedLabels)
    return [accuracy.tolist()]+precision.tolist()+recall.tolist()+fscore.tolist()+support.tolist()
# The twitter-specific tokenizer makes the parsing slow, however
#   the accuracy is much improved with it.
X_train, train_feats = pr.process(X_train, verbose=True)

# ~7min vectoring phase
X_mat = pr.fit_transform(X_train, saveVectorizer=False, saveMatrix=False, verbose=True)
X_test, test_feats = pr.process(X_test, verbose=True)
X_test = pr.transform(X_test, saveMatrix=False, verbose=True)

# Compare the accuracy with and w/o the twitter-specific features.
# Must scale the features matrix before concatenating with the ngrams matrix.
print '\nTF-IDF Unigrams and Bigrams || Logistic Regression classifier'
print '-'*40

clf = LR()
# Roughly 3 minutes on training
t0 = time.time()
print 'Training on %d samples...' % (X_mat.shape[0])
clf.fit(X_mat, y_train)
print 'Training time: %.0fs' % ((time.time()-t0))

print 'Testing on %d samples...' % (X_test.shape[0])
y_pred = clf.predict(X_test)

acc = (y_pred==y_test).sum()/(len(y_pred)+.0)
f1 = prfs(y_test, y_pred, average="macro")[-2]
roc_auc = roc_auc_score(y_test, y_pred)

print '\nReport\n'+'-'*40
print 'Accuracy: %.4f\nMacro F-1 Score: %.4f\nROC_AUC Score: %.4f' % (acc, f1, roc_auc)
Example #30
0
            if classPred[bind]==0:
                voxPred = branch_1[bind]
            else:
                voxPred = branch_2[bind]
            revVoxPred = voxPred[:,:,::-1,:]
            voxPred = np.maximum(revVoxPred,voxPred)
            voxPredLabels = voxPred[grid_xyz[:,0],grid_xyz[:,1],grid_xyz[:,2],0]
            tree = ckdt(grid_xyz_pc)
            _,inds = tree.query(cloud,k=1)
            pcPred = voxPredLabels[inds]
            thresh_pr = [] ; thresh_re = []
            for thresh in np.arange(0.0,1.01,0.01):
                threshPred = np.array(pcPred)
                threshPred[threshPred >= thresh] = 1
                threshPred[threshPred < thresh] = 0
                p,r,f,s = prfs(seg.flatten(),threshPred.flatten(),average='binary')
                thresh_pr.append(p)
                thresh_re.append(r)
            prs.append(thresh_pr)
            recs.append(thresh_re)

        meanacc += acc
        if j%5==0:
            print('BATCH ' + str(j) + ' of ' + str(len(trainInstances)/args['batch_size']),' PREDICTION ACCURACY: ',meanacc/(j+1))
    except StopIteration:
        if not os.path.exists(args['outputDir']):
            os.makedirs(args['outputDir'])
        prs = np.array(prs)
        recs = np.array(recs)
        pr_mean = np.mean(prs,axis=0)
        rec_mean = np.mean(recs,axis=0)
Example #31
0
def permutations(data, shortest, longest, iterations, kernel, prefix, suffix):

    #initialize text data vectorizer
    if (shortest == 1) & (longest == 1):
        AAs = [
            'a', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'p',
            'q', 'r', 's', 't', 'v', 'w', 'y'
        ]
        cv = CountVectorizer(analyzer='char',
                             ngram_range=(shortest, longest),
                             vocabulary=AAs)
    else:
        cv = CountVectorizer(analyzer='char', ngram_range=(shortest, longest))

    dataVect = cv.transform(data.Sequence)

    # repeat for each kernel type:
    for k in kernel:

        #initialize feature use arrays
        precisions = pd.DataFrame(columns=data.Genus.unique())
        recalls = pd.DataFrame(columns=data.Genus.unique())
        fbetas = pd.DataFrame(columns=data.Genus.unique())
        micros = pd.DataFrame(columns=['Precision', 'Recall', 'Fbeta'])
        macros = pd.DataFrame(columns=['Precision', 'Recall', 'Fbeta'])
        scores = pd.DataFrame(columns=("iteration", "score"))

        #initialize the genus classification for this ngram
        for i in data.Genus.unique():
            data[i] = 0

        #repeat n times
        for j in range(iterations):

            randomstate = j + 5342  #make sure the randomstate input is at least 4 digits and repeatable

            #initialize classifier
            #uses default settings for the classifier, see scikit-learn documentation sklearn.svm.SVC for details
            clf = SVC(kernel=k)

            #build training and test data sets
            X_train, X_test, y_train, y_test = train_test_split(
                dataVect,
                data.Genus,
                test_size=0.5,
                stratify=data.Genus,
                random_state=randomstate)

            #Scale the data to the training set
            StSc = StandardScaler(copy=True, with_mean=False)
            StSc.fit(X_train)
            X_sc_train = StSc.transform(X_train)
            X_sc_test = StSc.transform(X_test)
            X_scaled = StSc.transform(dataVect)

            #train the classifier
            clf.fit(X_sc_train, y_train)

            #make predictions for the original dataset
            y_pred = clf.predict(X_scaled)
            score = clf.score(X_sc_test, y_test)
            scores.loc[j] = [j, score]
            data['prediction'] = y_pred

            #increment the Genus column counter for each Genus
            for c in clf.classes_:
                data.loc[data.prediction == c, c] += 1

            #record simulation data
            metrics = prfs(data.Genus, y_pred)
            precisions.loc[j] = metrics[0]
            recalls.loc[j] = metrics[1]
            fbetas.loc[j] = metrics[2]

            metrics = prfs(data.Genus, y_pred, average='micro')
            micros.loc[j, 'Precision'] = metrics[0]
            micros.loc[j, 'Recall'] = metrics[1]
            micros.loc[j, 'Fbeta'] = metrics[2]
            metrics = prfs(data.Genus, y_pred, average='macro')
            macros.loc[j, 'Precision'] = metrics[0]
            macros.loc[j, 'Recall'] = metrics[1]
            macros.loc[j, 'Fbeta'] = metrics[2]

        data.to_csv("{0}_SVM_{1}_{2}.csv".format(prefix, k, suffix))
        scores.to_csv("{0}_SVM_{1}_{2}.scores".format(prefix, k, suffix))
        precisions.to_csv("{0}_SVM_{1}{2}.precision".format(prefix, k, suffix))
        recalls.to_csv("{0}_SVM_{1}_{2}.recall".format(prefix, k, suffix))
        fbetas.to_csv("{0}_SVM_{1}_{2}.fbeta".format(prefix, k, suffix))
        micros.to_csv("{0}_SVM_{1}_{2}.micro".format(prefix, k, suffix))
        macros.to_csv("{0}_SVM_{1}_{2}.macro".format(prefix, k, suffix))
Example #32
0
def precision_recall(beta_true, beta_pred):
    b_true = beta_true!=0
    b_pred = beta_pred!=0
    p, r, f, s = prfs(b_true, b_pred, pos_label=1)
    return p, r
Example #33
0
def train_dnn(x_t, y_t, res_t, x_eval, y_eval, res_eval, x_test, y_test,
              config):
    # Start training process
    counter = 0
    best_f1 = 0.0
    f1_total = 0.0
    epoch_no_improve = 0
    n_epoch_stop = 3
    #momentum = 0.9
    #log_interval = 10
    #l2_decay = 5e-4

    batch_num = int(x_t.shape[0] / config['batch_size'])
    if (config['model_type'] == 'dan'):
        training_model = transfer_model.DANNet(config).to(config['device'])

    else:
        training_model = transfer_model.CNNModel(config).to(config['device'])

    loss_class = torch.nn.NLLLoss().to(config['device'])
    loss_domain = torch.nn.NLLLoss().to(config['device'])

    for epoch in range(config['num_epochs']):
        # Start training process
        if (config['model_type'] == 'dan'):
            LEARNING_RATE = config['learning_rate'] / math.pow(
                (1 + 10 * (epoch - 1) / config['num_epochs']), 0.75)
            print('learning rate{: .4f}'.format(LEARNING_RATE))
            optimizer = torch.optim.SGD([
                {
                    'params': training_model.sharedNet.parameters()
                },
                {
                    'params': training_model.cls_fc.parameters(),
                    'lr': LEARNING_RATE
                },
            ],
                                        lr=LEARNING_RATE / 10,
                                        momentum=config['momentum'],
                                        weight_decay=config['l2_decay'])
        elif (config['model_type'] == 'dann'):
            optimizer = torch.optim.Adam(training_model.parameters(),
                                         lr=config['learning_rate'])

        for batch in range(batch_num):
            training_model.train()
            batch_index = generate_batch(x_t.shape[0], config['batch_size'])
            test_batch_idx = generate_batch(x_test.shape[0],
                                            config['batch_size'])
            batch_x = x_t[batch_index]
            batch_y = y_t[batch_index]
            batch_res = res_t[batch_index]
            batch_x = batch_x.reshape(-1, 1, 1, batch_x.shape[1])
            batch_x = torch.from_numpy(batch_x).float().to(config['device'])
            batch_y = torch.from_numpy(batch_y).long().to(config['device'])
            batch_res = torch.from_numpy(np.array(batch_res)).float()
            batch_test_x = x_test[test_batch_idx].reshape(
                -1, 1, 1, x_test.shape[1])
            batch_test_x = torch.from_numpy(batch_test_x).float().to(
                config['device'])
            # Forward pass
            #print ("Shape source and target", batch_x.shape, batch_test_x.shape)
            alpha = 0.0
            if (config['model_type'] == 'dan'):
                label_pred, loss_mmd = training_model(batch_x, batch_test_x)
                batch_test_x = x_test[test_batch_idx].reshape(
                    -1, 1, 1, x_test.shape[1])
                batch_test_x = torch.from_numpy(batch_test_x).float().to(
                    config['device'])
                label_pred, loss_mmd = training_model(batch_x, batch_test_x)
                #loss = criterion(outputs, batch_y)
                # loss = _loss(batch_res, batch_y, outputs)
                loss_cls = F.nll_loss(F.log_softmax(label_pred, dim=1),
                                      batch_y)
                gamma = 2 / (1 + math.exp(-10 *
                                          (epoch) / config['num_epochs'])) - 1
                err = loss_cls + gamma * loss_mmd
                # Backward and optimize
                optimizer.zero_grad()

                err.backward()
                optimizer.step()
                #print ("Label prediction", label_pred)
                tr_pred = np.argmax(label_pred.cpu().detach(), 1)
                #tr_pred = np.argmax(outputs.cpu().detach(), 1)
                counter += 1
                training_model.eval()
                _, _, f1, _ = prfs(batch_y.cpu(), tr_pred, average='weighted')
                f1_total += f1

            elif (config['model_type'] == 'dann'):

                p = float(counter + epoch *
                          x_t.shape[0]) / config['num_epochs'] / x_t.shape[0]
                alpha = 2. / (1. + np.exp(-10 * p)) - 1
                training_model.zero_grad()
                #batch_x = batch_x.reshape(-1, 1, 1, x_test.shape[1])

                #class_label = torch.LongTensor(batch_size)
                domain_label = torch.zeros(config['batch_size'])
                domain_label = domain_label.long().to(config['device'])

                # Using source data
                class_output, domain_output = training_model(batch_x, alpha)
                err_s_label = loss_class(class_output, batch_y)
                err_s_domain = loss_domain(domain_output, domain_label)

                ### Using target data
                #x_test_batch = torch.FloatTensor(batch_size, 1, x_test.shape[1], x_test.shape[1])

                domain_label = torch.ones(config['batch_size'])
                domain_label = domain_label.long().to(config['device'])

                _, domain_output = training_model(batch_test_x, alpha)
                err_t_domain = loss_domain(domain_output, domain_label)
                err = err_t_domain + err_s_domain + err_s_label
                #print ("Error is ", err)
                err.backward()
                optimizer.step()

                training_model.eval()
                class_output, _ = training_model(batch_x, alpha)
                tr_pred = class_output.data.cpu().max(1, keepdim=True)[1]
                _, _, f1, _ = prfs(batch_y.cpu(), tr_pred, average='weighted')
                f1_total += f1
                if (counter + 1) % 100 == 0:
                    print(
                        'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Best F1: {:.4f}'
                        .format(epoch + 1, config['num_epochs'], batch + 1,
                                batch_num, err.item(), f1))
                counter += 1
        print("------epoch : ", epoch, " Loss: ", err.item(), " Training F1:",
              round((f1_total / batch_num), 4))
        training_model.eval()
        eval_f1 = evaluate_test(x_eval, y_eval, res_eval, training_model,
                                config, False)
        f1_total += eval_f1
        if (eval_f1 >= best_f1):
            best_f1 = eval_f1
            torch.save(training_model.state_dict(),
                       config['ckpt_path'] + 'best_model.pth')

        print("Current F1 score (evaluation) ", eval_f1)
        print("Best F1 score (evaluation) ", best_f1)
    print("All completed")
Example #34
0
concat = Dropout(.5)(concat)
output = Dense(2, activation='softmax', name='dense')(concat)
model = Model([input1, input2], output, name='deep_lstm')
model.compile("adam", "binary_crossentropy", metrics=['accuracy'])
model.summary()

from keras.utils import plot_model
# plot_model(model, to_file='attn-cnn-lstm.png', show_layer_names=True, show_shapes=True)

############################################################
################ Training and Eval #########################
############################################################

labels = np.load("labels.npy")
x1_train, x1_test, x2_train, x2_test, labels_train, labels_test = train_test_split(
    data1, data2, labels, test_size=0.15)

N_EPOCHS = 10
history = model.fit([x1_train, x2_train],
                    labels_train,
                    batch_size=128,
                    validation_split=.15,
                    epochs=N_EPOCHS,
                    callbacks=[EarlyStopping(patience=2)])

preds = model.predict([x1_test, x2_test]).argmax(axis=-1)
y_true = labels_test.argmax(axis=-1)

print(prfs(y_true, preds))
print(accuracy_score(y_true, preds))
Example #35
0
    for m in range(len(mnb.class_count_)):
        temp = np.argsort(mnb.coef_[m])[-20:]
        for e in temp[-10:]:
            if features[e] in aa_top_10.columns:
                aa_top_10[features[e]][mnb.classes_[m]] += 1
            else:
                aa_top_10[features[e]] = 0
                aa_top_10[features[e]][mnb.classes_[m]] = 1
        for k in temp[:10]:
            if features[k] in aa_next_10.columns:
                aa_next_10[features[k]][mnb.classes_[m]] += 1
            else:
                aa_next_10[features[k]] = 0
                aa_next_10[features[k]][mnb.classes_[m]] = 1

    metrics = prfs(data.Genus, y_pred)
    #record simulation data
    precisions.loc[j] = metrics[0]
    recalls.loc[j] = metrics[1]
    fbetas.loc[j] = metrics[2]

    metrics = prfs(data.Genus, y_pred, average='micro')
    micros.loc[j, 'Precision'] = metrics[0]
    micros.loc[j, 'Recall'] = metrics[1]
    micros.loc[j, 'Fbeta'] = metrics[2]

    metrics = prfs(data.Genus, y_pred, average='macro')
    macros.loc[j, 'Precision'] = metrics[0]
    macros.loc[j, 'Recall'] = metrics[1]
    macros.loc[j, 'Fbeta'] = metrics[2]
Example #36
0
#Model fitting
clf = rfc()

#Calling feature selection methods
fs = feature_selection()
#clf,x_train,x_test,x_final_test,y_out = fs.PCASelection(x_train,y_train_binary,x_test,y_test_binary,x_final_test,clf)
clf, x_train, x_test, x_final_test, y_out = fs.KBest(x_train, y_train_binary,
                                                     x_test, y_test_binary,
                                                     x_final_test, clf)
clf.fit(x_train, y_train_binary)
y_out = clf.predict(x_test)

#Printing scores
score = clf.score(x_test, y_test_binary)
print "Score : ", score
print "Precision recall f-score support : ", prfs(y_test_binary, y_out)

#Cross validation
folds = 2
print "\nManual ", folds, " fold cross validation score"
CV(x_orig_train, y_orig_train_binary, clf, folds)
scores = cross_val_score(clf, x_orig_train, y_orig_train_binary, cv=10)

#Checking with inbuilt CV function
print "\nChecking with inbuilt function"
skf = KFold(n_splits=folds, shuffle=False)
skfscore = cross_val_score(clf, x_orig_train, y_orig_train_binary, cv=skf)
print skfscore

#Manual Parameter tuning
print "\nManual parameter tuning"