Example #1
0
def contrast_bn(epochs, lr):
    train_loader_no_bn = DataLoader(64, data_type='train', scale=True)
    valid_loader_no_bn = DataLoader(64, data_type='valid', scale=True)
    test_loader_no_bn = DataLoader(64, data_type='test', scale=True)
    model = Model(4, [28 * 28, 512, 512, 10],
                  initializer='xavier',
                  optimizer='sgd')
    his_no_bn = model.train(train_loader_no_bn,
                            valid_loader_no_bn,
                            epochs,
                            learning_rate=lr)
    pred, label = model.predict(test_loader_no_bn)
    acc = np.sum(pred == label) / len(pred)
    print('acc', acc)
    cm = confusion_matrix(label.reshape(-1), pred.reshape(-1), 10)
    cm_plot(cm, 'no_bn_cm, acc {:.3f}'.format(acc))

    train_loader_bn = DataLoader(64, data_type='train', scale=True)
    valid_loader_bn = DataLoader(64, data_type='valid', scale=True)
    test_loader_bn = DataLoader(64, data_type='test', scale=True)
    model2 = Model(4, [28 * 28, 512, 512, 10],
                   initializer='xavier',
                   optimizer='sgd')
    his_bn = model2.train_bn(train_loader_bn,
                             valid_loader_bn,
                             epochs,
                             learning_rate=lr)
    pred, label = model2.predict(test_loader_bn, bn=True)
    acc = np.sum(pred == label) / len(pred)
    print('acc', acc)
    cm = confusion_matrix(label.reshape(-1), pred.reshape(-1), 10)
    cm_plot(cm, 'bn_cm, acc {:.3f}'.format(acc))

    plot_history(his_no_bn, his_bn, 'no bn', 'bn',
                 'bn and no bn training loss', 'bn and no bn validation loss')
Example #2
0
def contrast_scale(epochs, lr):
    train_loader_no_scale = DataLoader(64, data_type='train', scale=False)
    valid_loader_no_scale = DataLoader(64, data_type='valid', scale=False)
    test_loader_no_scale = DataLoader(64, data_type='test', scale=False)
    model = Model(4, [28 * 28, 512, 512, 10], initializer='xavier')
    his_no_scale = model.train(train_loader_no_scale,
                               valid_loader_no_scale,
                               epochs,
                               learning_rate=lr)
    pred, label = model.predict(test_loader_no_scale)
    acc = np.sum(pred == label) / len(pred)
    print('acc', acc)
    cm = confusion_matrix(label.reshape(-1), pred.reshape(-1), 10)
    cm_plot(cm, 'no_scale_cm, acc {:.3f}'.format(acc))

    train_loader_scale = DataLoader(64, data_type='train', scale=True)
    valid_loader_scale = DataLoader(64, data_type='valid', scale=True)
    test_loader_scale = DataLoader(64, data_type='test', scale=True)
    model2 = Model(4, [28 * 28, 512, 512, 10], initializer='xavier')
    his_scale = model2.train(train_loader_scale,
                             valid_loader_scale,
                             epochs,
                             learning_rate=lr)
    pred, label = model2.predict(test_loader_scale)
    acc = np.sum(pred == label) / len(pred)
    print('acc', acc)
    cm = confusion_matrix(label.reshape(-1), pred.reshape(-1), 10)
    cm_plot(cm, 'scale_cm, acc {:.3f}'.format(acc))

    plot_history(his_no_scale, his_scale, 'no scale', 'scale',
                 'sacle and no scale training loss',
                 'scale an no scale validation loss')
def hierarchical(encoder,
                 tsne,
                 true_data,
                 true_labels,
                 save_name="hierarchical.png"):
    """
    1. Predicts labels using hierarchical clustering
    2. Prints confusion_matrix
    3. Prints t-SNE plot of prediction
    """
    enc_output = encoder.predict(true_data)
    # Hierarchical Clustering
    labels = HierarchicalClustering()
    predictions = labels.draw_dendogram(
        enc_output,
        title='Hierarchical Clustering Dendrogram',
        savetitle="hierarchical.png")

    # Confusion matrix of hierarchical clustering
    confusion_matrix(true_labels,
                     predictions,
                     save_name="confusion_matrix_hierarchical.png")

    # Visualize test predictions from hierarchical
    true_data = np.reshape(true_data, (len(true_data), 64, 64))
    visualize_class_predictions(true_data, true_labels, predictions)
Example #4
0
def contrast_dropout(epochs, lr):
    train_loader_no_dropout = DataLoader(64, data_type='train', scale=True)
    valid_loader_no_dropout = DataLoader(64, data_type='valid', scale=True)
    test_loader_no_dropout = DataLoader(64, data_type='test', scale=True)
    model = Model(4, [28 * 28, 512, 512, 10], initializer='xavier')
    his_no_dropout = model.train(train_loader_no_dropout,
                                 valid_loader_no_dropout,
                                 epochs,
                                 learning_rate=lr)
    pred, label = model.predict(test_loader_no_dropout)
    acc = np.sum(pred == label) / len(pred)
    print('acc', acc)
    cm = confusion_matrix(label.reshape(-1), pred.reshape(-1), 10)
    cm_plot(cm, 'no_dropout_cm, acc {:.3f}'.format(acc))

    train_loader_dropout = DataLoader(64, data_type='train', scale=True)
    valid_loader_dropout = DataLoader(64, data_type='valid', scale=True)
    test_loader_dropout = DataLoader(64, data_type='test', scale=True)
    model2 = Model(4, [28 * 28, 512, 512, 10], initializer='xavier')
    his_dropout = model2.train(train_loader_dropout,
                               valid_loader_dropout,
                               epochs,
                               learning_rate=lr,
                               dropout_prob=0.3)
    pred, label = model2.predict(test_loader_dropout)
    acc = np.sum(pred == label) / len(pred)
    print('acc', acc)
    cm = confusion_matrix(label.reshape(-1), pred.reshape(-1), 10)
    cm_plot(cm, 'dropout_cm, acc {:.3f}'.format(acc))

    plot_history(his_no_dropout, his_dropout, 'no dropout', 'dropout',
                 'dropout and no dropout training loss',
                 'dropout and no dropout validation loss')
def test_knn():
    data, _ = utils.read_table("combined_data_normalized.csv", True)
    class_index = 4
    predictors = [2, 3, 5, 9]
    results = knn.knn_classifier(data, class_index, predictors, 5, 5)
    accuracy = utils.compute_accuracy(results)
    print(accuracy)
    utils.confusion_matrix(results, "Crime Rate?",
                           "KNN Classifier Prediction of Crime Rate")
def test_naive_bayes():
    data, header = utils.read_table("combined_data_normalized.csv", True)
    class_index = 4
    predictors = [2, 3, 5, 7]
    results = bayes.naive_bayes_classifier(data, header, 10, class_index,
                                           predictors, [2, 3, 5, 9])
    accuracy = utils.compute_accuracy(results)
    print(accuracy)
    utils.confusion_matrix(results, "Crime Rate?",
                           "Naive Bayes Classifier Prediction of Crime Rate")
def test_random_forest():
    data, header = utils.read_table("combined_data_discretized.csv", True)
    class_index = 4
    predictors = [2, 3, 5, 9]
    results = rforest.random_forest_classifier(data, header, class_index,
                                               predictors, 100, 25, 3)
    accuracy = utils.compute_accuracy(results)
    print(accuracy)
    utils.confusion_matrix(
        results, "Crime Rate?",
        "Random Forest Classifier Prediction of Crime Rate")
def test_decision_tree():
    data, header = utils.read_table("combined_data_discretized.csv", True)
    class_index = 4
    predictors = [2, 3, 5, 9]
    results = dtree.decision_tree_classifier(data, header, class_index,
                                             predictors, 30)
    accuracy = utils.compute_accuracy(results)
    print(accuracy)
    utils.confusion_matrix(
        results, "Crime Rate?",
        "Decision Tree Classifier Prediction of Crime Rate")
def logistic(Xtrain, Ytrain, Xdev, Ydev, verbose=False, scoring='f1'):
    """
    Trains a Logist Regression Model on the provided data. Scores the model 
    and returns both the model and the score. It also prints the optimal
    hyperparameters. 5-fold cross validation is performed to tune l1 loss ratio
    and C (regularization weight).

    Inputs:
        Xtrain
        Ytrain
        Xdev
        Ydev

    Returns:
        float: the F1 on the dev data for the best model specifications.
        LogisticRegression: the best trained model.
    """
    print("\n========================\nTraining Logistic Regression\n")
    if scoring == 'f1':
        scoring = metrics.make_scorer(metrics.f1_score, average='binary')
    logit = LogisticRegressionCV(l1_ratios=[.1, .5, .7, .9, .95, .99, 1],
                                 Cs=[0.1, 1, 10],
                                 max_iter=1e4,
                                 solver='saga',
                                 scoring=scoring,
                                 penalty='elasticnet')
    logit.fit(Xtrain, Ytrain)
    best_score = logit.score(Xdev, Ydev)
    Ydev_pred = logit.predict(Xdev)
    num_coeff = len(logit.coef_[logit.coef_ != 0])
    results = {
        "F1": best_score,
        "l1_ratio": logit.l1_ratio_[0],
        "C": logit.C_[0],
        "n_nonzero_weights": num_coeff,
        "accuracy": metrics.accuracy_score(Ydev, Ydev_pred),
        "precision": metrics.precision_score(Ydev, Ydev_pred,
                                             average='binary'),
        "recall": metrics.recall_score(Ydev, Ydev_pred, average='binary')
    }

    try:
        print(results)
    except Exception as e:
        print(f"Error occured printing results: {e}")

    if verbose:
        print(f"There are {num_coeff} non-zero weights in the logistic " +
              "regression model.")
        utils.confusion_matrix(Ydev, Ydev_pred)
        utils.roc_auc(logit, Xdev, Ydev)
        utils.precision_recall(logit, Xdev, Ydev)
    return results, logit
Example #10
0
def heat_map(prds_all, msks_all):
    if 'grss' in opt.data_dir:
        y_labels = [
            'Road', 'Tree', 'Red roof', 'Grey roof', 'Concrete\nroof',
            'Vegetation'
        ]
        sr_heatmap = normalize_rows(
            confusion_matrix(prds_all, msks_all, opt.num_classes))

        fig = plt.figure(figsize=(6, 6))
        ax = sns.heatmap(sr_heatmap,
                         linewidth=0.5,
                         cmap='Blues',
                         annot=True,
                         yticklabels=y_labels,
                         xticklabels=False)
        fig.savefig('heat_maps/' + exp_name + '/' + 'sr.png')

    elif 'coffee' in opt.data_dir:
        y_labels = ['non-coffee', 'coffee']
        sr_heatmap = normalize_rows(
            confusion_matrix(prds_all, msks_all, opt.num_classes))

        sns.set(font_scale=1.3)
        fig = plt.figure(figsize=(3.5, 3.5))
        ax = sns.heatmap(sr_heatmap,
                         linewidth=0.5,
                         cmap='Blues',
                         annot=True,
                         yticklabels=y_labels,
                         xticklabels=False)
        fig.savefig('heat_maps/' + exp_name + '/' + 'sr.png')

    elif 'vaihingen' in opt.data_dir or 'task_test' in opt.data_dir:
        y_labels = [
            'Impervious\nsurfaces', 'Building', 'Low\nvegetation', 'Tree',
            'Car'
        ]
        sr_heatmap = normalize_rows(
            confusion_matrix(prds_all, msks_all, opt.num_classes))
        sr_heatmap = np.delete(sr_heatmap, -1, axis=0)
        sr_heatmap = np.delete(sr_heatmap, -1, axis=1)

        fig = plt.figure(figsize=(5, 5))
        ax = sns.heatmap(sr_heatmap,
                         linewidth=0.5,
                         cmap='Blues',
                         annot=True,
                         yticklabels=y_labels,
                         xticklabels=False)
        fig.savefig('heat_maps/' + exp_name + '/' + 'sr.png')
def kmean(encoder, tsne, true_data, true_label):
    """
    1. Predicts labels using k-means clustering
    2. Prints confusion_matrix
    3. Prints accuracy
    4. Prints t-SNE plot of prediction
    """
    enc_output = encoder.predict(true_data)
    kmean = KMeansClustering()
    kmean.fit(enc_output)
    pred = kmean.predict(enc_output)
    accuracy(true_label, pred)
    confusion_matrix(true_label, pred, save_name="confusion_matrix_kmean.png")
    tsne.tsne_plot(true_data, pred, save_data_dir="kmean", save_name="kmean")
Example #12
0
    def _pred(self, x_df, y_df=None, data='confusion_matrix', format='tensor'):
        assert isinstance(x_df, pd.DataFrame)
        if y_df is not None: assert isinstance(y_df, pd.DataFrame)
        x_tensor = utils.get_tensor(x_df)

        y_tensor = self.df_to_tensor(y_df)

        y_pred_prob = self.model(Variable(x_tensor))

        # y_pred = x_tensor.mm(w1).clamp(min=0).mm(w2)
        if data == 'loss':
            loss = self.loss_fn(y_pred_prob, Variable(y_tensor))
            result = loss.data[0]
            return result
        elif data == 'pred':
            data_tensor = y_pred_prob.data
        else:  # data == 'confusion_matrix':
            y_pred, total_type_num = utils.max_ix(y_pred_prob.data)
            # total_type_num = len(self.ix_to_label)
            data_tensor = utils.confusion_matrix(y_pred, y_tensor,
                                                 total_type_num)

        if format == 'df':
            result = pd.DataFrame(data_tensor.numpy())
        elif format == 'np':
            result = data_tensor.numpy()
        else:  # format == 'tensor':
            result = data_tensor
        return result
Example #13
0
def eval(args):
    device = torch.device(f"cuda:{args.device_id}")
    model = AlexNet(n_cls = 100)
    model.to(device)
    model.load_state_dict(torch.load(args.pretrained_path))
    model.eval()

    test_loader = getLoaders(split="eval", batch_size = args.batch_size, num_workers=args.num_workers )

    pred_arr = []
    label_arr = []
    with torch.no_grad():
        for idx, (img, label) in tqdm(enumerate(test_loader),total= len(test_loader)):
            img = img.to(device)
            pred = model.pred(img)
            # mean of softmax prob from 10 different aug
            pred = pred.view(-1, 10, 100)
            pred = pred.mean(dim = 1) 
            pred_arr.append(pred.detach().cpu().numpy())
            label_arr.append(label.detach().numpy())
    pred_np = np.concatenate(pred_arr)
    label_np = np.concatenate(label_arr)
    top_1 = utils.top_k_acc(k = 1, pred = pred_np, label= label_np)
    top_5 = utils.top_k_acc(k = 5, pred = pred_np, label= label_np)
    confusion = utils.confusion_matrix(100, pred_np, label_np)
    torch.save({
        "top_1": top_1,
        "top_5": top_5,
        "confusion": confusion,
    }, "result.pth")
    print(f"top_1: {top_1*100:.2f}, top_5: {top_5*100:.2f}")
Example #14
0
 def measure(self, X, y, threshold=0.5):
     y_hat = self.predict(X)
     TP, FP, FN, TN = utils.confusion_matrix(threshold, y_hat, y)
     precision = float(TP) / (TP + FP)
     recall = float(TP) / (TP + FN)
     F1 = 2 * precision * recall / (precision + recall)
     return F1
 def fix_dropout(self, new_weights, old_weights, mask_prev, mask_next=None):
     """
     Update old weight with new dropout weights.
     New weights don't have the same format as old ones, so updating them is pretty tricky.
     :param new_weights: Weights of the Net after dropout
     :param old_weights: Weights of the Net before dropout (Real weights)
     :param mask_prev: Mask used on previous nodes
     :param mask_next: Mask used on next nodes
     :return: New weights for the real Net
     """
     if mask_next is not None:
         new_w = old_weights
         if mask_next is not None:
             conf_matrix = utils.confusion_matrix(mask_next, mask_prev)
             # Compute confusion matrix containing indexes of weights to update
             indexes = np.argwhere(conf_matrix)
             c = 0
             for i in range(len(new_weights)):
                 for j in range(len(new_weights[i])):
                     new_w[indexes[c][0], indexes[c][1]] = new_weights[i, j]
                     c += 1
     else:
         new_w = self.fix_dropout(new_weights, old_weights, mask_prev,
                                  np.ones(old_weights.shape[1]))
     return new_w
Example #16
0
 def _decode(self, x, x_, attention_mask, threshold=0.5):
     mask = attention_mask == 1
     y = x.masked_select(mask).cpu().long().numpy()
     y_ = x_.masked_select(mask).cpu().numpy()
     y_ = np.where(y_ > threshold, 1, 0)
     # np.array vector float
     return confusion_matrix(y, y_)
Example #17
0
    def valid(self, epoch, val_loader):
        self.G.eval()
        with torch.no_grad():
            # (tn, fp, fn, tp)
            cm = utils.ConfusionMatrix()

            for i, (input_, target_, _) in enumerate(val_loader):
                input_ = input_.to(self.torch_device)
                output_ = self.G(input_)
                target_ = target_.to(self.torch_device)

                ground_truth = target_.int().squeeze(1)
                prediction = torch.argmax(output_, dim=1).int()

                cm.update(
                    utils.confusion_matrix(prediction,
                                           ground_truth,
                                           reduce=False))
            metric = 1.5 * cm.f2 + cm.accuracy
            if metric > self.best_metric:
                self.best_metric = metric
                self.save(epoch)

            self.logger.write(
                "[Val] epoch: %d accuracy: %f f05: %f f1: %f f2: %f" %
                (epoch, cm.accuracy, cm.f05, cm.f1, cm.f2))
Example #18
0
def validate(valloader, train_state):

    model = train_state.ema_model
    criterion = train_state.criterion

    losses = AverageMeter()
    accuracy_meter = AverageMeter()

    # switch to evaluate mode
    model.eval()

    n_classes = len(train_state.class_names)
    confusion = torch.zeros(n_classes, n_classes)

    end = time.time()
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(valloader):
            if constants['use_cuda']:
                inputs, targets = inputs.cuda(), targets.cuda(
                    non_blocking=True)

            # compute output
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # measure accuracy and record loss
            acc = accuracy(outputs, targets)
            batch_confusion = confusion_matrix(outputs, targets)
            confusion += batch_confusion
            losses.update(loss.item(), inputs.size(0))
            accuracy_meter.update(acc.item(), inputs.size(0))

    return (losses.avg, accuracy_meter.avg, confusion)
Example #19
0
def main():
    graph = utils.load_graph()
    position = utils.get_positions(graph)
    utils.make_dir('images/louvain')

    true_communities = utils.get_labels(graph, list(graph.nodes))
    utils.plot_communities(graph, position, true_communities, labels=True, title='Butterfly Similarity Network - True Communities', path='images/louvain/communities_true.png')

    communities = utils.group_communities(louvain_clustering(graph))
    utils.plot_communities(graph, position, communities, labels=False, title='Butterfly Similarity Network - Louvain Communities', path='images/louvain/communities_louvain.png')

    graph_nodes = sorted(list(graph.nodes))
    predictions = utils.predict_majority_class(graph, communities)
    preds = [predictions[n] for n in graph_nodes]
    labels = [graph.nodes[n]['label'] for n in graph_nodes]
    utils.accuracy(preds, labels)
    utils.confusion_matrix(preds, labels, 'Confusion Matrix - Majority Label Predictions from Louvain Communities', 'images/louvain/cm_louvain.png')
Example #20
0
def main(args):
    # Load and standardize data.
    embedding_file_path = 'node2vec/embeddings/' + args.input
    X_train, X_test, y_train, y_test = load_splits(embedding_file_path)
    X_train, X_test = standardize_data(X_train, X_test)

    # Train classifier and make predictions.
    optimal_svc = hyperparameter_search(SVC(), X_train, y_train)
    print('Cross Validation Accuracy:', optimal_svc.best_score_)
    print('Optimal parameters:', optimal_svc.best_params_)
    predictions = optimal_svc.predict(X_test)

    # Report results.
    utils.make_dir('images/svc')
    cm_path = 'images/svc/cm_' + args.input[:-4] + '.png'
    utils.accuracy(predictions, y_test)
    utils.confusion_matrix(predictions, y_test, 'Confusion Matrix - SVC',
                           cm_path)
def getDECNetworkResults(dec, enc):
    #Load test dataset
    test_data = LoadDataset("dataset/kaggle_original_train/", 0)
    test_data, test_label, val, val_label = test_data.load_data()

    big_data = LoadDataset("dataset/kaggle_augmented_train_new/", 0)
    big_data, _, _, _ = big_data.load_data()

    # make save directory
    os.makedirs(os.path.join("dec"), exist_ok=True)
    os.chdir("dec")

    encoded = enc.predict(test_data)
    q, _ = dec.predict(test_data, verbose=0)
    y_pred = q.argmax(1)

    print(y_pred)
    confusion_matrix(test_label.astype(np.int64), y_pred)

    #Take prediction time
    for i in range(20):
        iterate = 5000 * (i + 1)
        data = big_data[0:iterate, :]
        print(data.shape)
        print("KMEAN")
        start = time.time()
        q, _ = dec.predict(data, verbose=0)
        y_pred = q.argmax(1)
        end = time.time()
        print(end - start)

    train_x = np.reshape(test_data, (3720, 64, 64))

    TSNE = TSNEAlgo()
    TSNE.tsne_fit(encoded, perplexity=35)

    TSNE.tsne_plot(train_x,
                   y_pred.astype(int),
                   save_name="Pred",
                   save_data_dir="dec")
    TSNE.tsne_plot(train_x,
                   test_label.astype(int),
                   save_name="True",
                   save_data_dir="dec")
Example #22
0
def main():
    graph = utils.load_graph()
    position = utils.get_positions(graph)
    utils.make_dir('images/spectral')

    true_communities = utils.get_labels(graph, list(graph.nodes))
    utils.plot_communities(graph, position, true_communities, labels=True, title='Butterfly Similarity Network - True Communities', path='images/spectral/communities_true.png')

    node_assignments = spectral_clustering(graph)
    nodes_to_communities = {k:v for (k,v) in zip(range(len(node_assignments)), node_assignments)}
    communities = utils.group_communities(nodes_to_communities)
    utils.plot_communities(graph, position, communities, labels=False, title='Butterfly Similarity Network - Spectral Communities', path='images/spectral/communities_spectral.png')

    graph_nodes = sorted(list(graph.nodes))
    predictions = utils.predict_majority_class(graph, communities)
    preds = [predictions[n] for n in graph_nodes]
    labels = [graph.nodes[n]['label'] for n in graph_nodes]
    utils.accuracy(preds, labels)
    utils.confusion_matrix(preds, labels, 'Confusion Matrix - Spectral Clustering', 'images/spectral/cm_spectral.png')
def train_bottomupdnn_v1(epochs=40, init_epochs=10, lr=0.01):
    print("======Train base DNN using clean data======")
    model, train_loader, test_loader, optimizer, criterion = train_init(
        "BaseDNN", lr=lr)
    clean_loader, clean_train_loader, noisy_loader = get_dataloader_bu1()
    model.mode = "BaseModel"

    # Train base DNN model
    for epoch in range(init_epochs):
        train_epoch(model, clean_train_loader, test_loader, optimizer,
                    criterion, epoch)

    # Estimate confusion matrix for new bottom-up DNN v1 model
    y_clean, y_pred = get_predict_label(model, clean_loader)
    cmatrix_clean = confusion_matrix(y_clean, y_pred)
    y_noisy, y_pred = get_predict_label(model, noisy_loader)
    cmatrix_noisy = confusion_matrix(y_noisy, y_pred)

    rmatrix = compute_rmatrix(cmatrix_clean, cmatrix_noisy)
    cmatrix = compute_estimate_confusion(rmatrix, y_noisy)

    # Initialize new bottom-up DNN v1 model
    new_model = models.ButtomUpDNN1(model.params)
    new_model.confusion.weight = nn.Parameter(cmatrix)
    new_model.confusion.weight.requires_grad = False
    optimizer = optim.SGD(new_model.parameters(), lr=lr, momentum=0.9)

    # Concatenate all clean dataset and get data loader
    clean_dataset = torch.utils.data.ConcatDataset(
        [clean_loader.dataset, clean_train_loader.dataset])
    all_clean_loader = torch.utils.data.DataLoader(dataset=clean_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True)

    # Train bottom-up DNN v1 model
    for epoch in range(init_epochs, epochs):
        train_epoch_bu1(new_model, all_clean_loader, noisy_loader, test_loader,
                        optimizer, criterion, epoch)

    # Save estimated Q matrix
    plt.matshow(cmatrix)
    plt.colorbar()
    plt.savefig("./imgs/estimated_Q.png")
 def evaluate(self, X, y, with_plot=False):
     pred = self.__fit(X, y)
     if self.classification:
         return confusion_matrix(y, pred)
     else:
         if with_plot:
             plt.figure(figsize=(12, 8))
             plt.scatter(y, pred)
             plt.xlabel('y')
             plt.ylabel('y_pred')
         return np.mean((y - pred)**2)
    def __model_evaluation(self, arr, p_dna):
        wt_data, mt_data = arr
        df_dna = confusion_matrix(wt=wt_data, mt=mt_data, predicted=p_dna)

        'Convert to amino acid'
        wt_aa = map(dna_to_aa, wt_data)
        mt_aa = map(dna_to_aa, mt_data)
        p_aa = map(dna_to_aa, p_dna)
        df_aa = confusion_matrix(wt=wt_aa, mt=mt_aa, predicted=p_aa)

        df = pd.concat([df_dna, df_aa], axis=1)

        outfile = 'summary_6nodes_nopostproc.csv'
        df.to_csv(outfile)

        'Show histogram'
        df['Accuracy'].hist(bins=20)
        df['TP'].hist(bins=20)
        df['#Mutate Positions'].hist(bins=20)
        plt.show()
Example #26
0
def train_model(train_config):
    images,labels = image_load.read_12channel_images(train_config['target_path'],train_config['image_resize'])
    kflod_container = image_load.KFoldContainer(images,labels,train_config['Kfold'])
    utils.write_config(train_config,train_config['save_path']+'config.csv')
    confuse_matrix = []
    accuracy = []
    duration = []

    for k in range(train_config['Kfold']):
        train_x,train_y,test_x,test_y = kflod_container.get_fold_k(k)
        train_x,train_y = image_load.augimage(train_x,train_y)

        print('Load Training {n:d} images'.format(n=len(train_x)))
        print('Load Testing {n:d} images'.format(n=len(test_x)))

        train_ds = tf.data.Dataset.from_tensor_slices((train_x,train_y))
        train_ds = train_ds.shuffle(buffer_size=train_config['shuffle_buffer_size']).repeat().batch(train_config['batch_size'])
        test_ds = tf.data.Dataset.from_tensor_slices((test_x,test_y))
        test_ds = test_ds.repeat(1).batch(train_config['batch_size'])

        model = models.model_7_3_12channel(train_config['image_resize'],train_config['l2_factor'])

        model.compile(optimizer=tf.keras.optimizers.Adam(lr=train_config['lr']),
                      loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

        save_name = f'{train_config["save_path"]}{train_config["model_name"]}-{k}'

        cbs = [tf.keras.callbacks.EarlyStopping(patience=train_config['early_stop_patience']),
               tf.keras.callbacks.ModelCheckpoint(monitor='val_sparse_categorical_accuracy',filepath=save_name,save_best_only=True,save_weights_only=True,verbose=1),
               utils.HSLRSchedular(train_config['lr'],
                                   watch_value_name=train_config['schedular_watch_name'],
                                   max_reduce_time=train_config['schedular_max_reduce_time'],
                                   reduce_factor=train_config['schedular_reduce_factor'],
                                   restart_factor=train_config['schedular_restart_factor'],
                                   patience=train_config['schedular_patience'],
                                   verbose=0),
               utils.HSTensorboard(log_dir=f'./logs/{save_name}/',embeddings_metadata=test_x)]

        ct = time.time()
        model.fit(train_ds,epochs=train_config['epochs'],steps_per_epoch=train_config['steps_per_epoch'],validation_data=test_ds,callbacks=cbs)
        duration.append(time.time()-ct)

        model.load_weights(save_name)
        logits = model.predict(test_x)
        cm,acc = utils.confusion_matrix(test_y,tf.argmax(logits,axis=1).numpy())
        confuse_matrix.append(cm)
        accuracy.append(acc)
        print(f'finish training. k={k}, accuracy={acc:.2f}')

    sio.savemat(train_config['save_path']+'result.mat',{'cm':np.array(confuse_matrix),
                                                        'accuracy':np.array(accuracy),
                                                        'duration':np.array(duration)})
Example #27
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    conf_mat = np.zeros((NUM_CLASSES, NUM_CLASSES))

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss.item()

        conf_mat += utils.confusion_matrix(outputs, targets, NUM_CLASSES)

        acc = sum([conf_mat[i, i]
                   for i in range(conf_mat.shape[0])]) / conf_mat.sum()
        uacc_per_class = [
            conf_mat[i, i] / conf_mat[i].sum()
            for i in range(conf_mat.shape[0])
        ]
        unweighted_acc = sum(uacc_per_class) / len(uacc_per_class)

        prec_per_class = [
            conf_mat[i, i] / conf_mat[:, i].sum()
            for i in range(conf_mat.shape[0])
        ]
        average_precision = sum(prec_per_class) / len(prec_per_class)

        utils.progress_bar(
            batch_idx, len(trainloader),
            'Loss: %.3f | Acc: %.3f%% | unweighted_Acc: %.3f%%' %
            (train_loss / (batch_idx + 1), 100. * acc, 100. * unweighted_acc))

    Train_acc = 100. * acc
Example #28
0
def process_dataset(dataset, colors):
    y = np.load("./data/" + dataset + '_labels.npy')
    pred = np.load("./data/" + dataset + '_clasification.npy')
    segments = np.load("./results/" + dataset + '_segments.npy')
    test_mask = np.load("./data/" + dataset + '_test_mask.npy').reshape(
        y.shape)
    sc_pred = classify_segments(pred, segments)
    sc_score = utils.balanced_score(y[test_mask], sc_pred[test_mask])
    sc_cm = utils.confusion_matrix(y[test_mask], sc_pred[test_mask])
    utils.save_json({"sc": sc_score}, dataset + "_sc_score")
    utils.save_csv(sc_cm, dataset + "_sc_cm")
    color_map = color_true_map(sc_pred, labels_colors=colors)
    save_image(color_map, dataset + "_sc_clasification")
def spectral(encoder, tsne, true_data, true_label):
    """
    1. Predicts labels using spectral clustering
    2. Prints confusion_matrix
    3. Prints accuracy
    4. Prints t-SNE plot of prediction
    """
    enc_output = encoder.predict(true_data)
    model = SpectralClustering(n_clusters=5,
                               affinity='nearest_neighbors',
                               assign_labels='kmeans')
    pred = model.fit_predict(enc_output)
    accuracy(true_label, pred)
    confusion_matrix(true_label,
                     pred,
                     save_name="confusion_matrix_spectral.png")
    tsne.tsne_plot(true_data,
                   pred,
                   save_data_dir="spectral",
                   save_name="spectral")
    tsne.tsne_plot(true_data,
                   true_label,
                   save_data_dir="true_label",
                   save_name="true_label")
Example #30
0
def eval(loader, model, is_test=False, confusion_matrix=False, filename=None):
    """
    Evaluate model performance on data object (graph) in loader.

    :param - loader: torch_geometric DataLoader for BIOSNAP dataset
    :param - model: trained GNN model ready for making predictions
    :param - is_test: boolean indicating whether to evaluate on test or eval split
    """
    model.eval()
    data = [data for data in loader][0]
    mask = data.test_mask if is_test else data.val_mask
    with torch.no_grad():
        pred = model(data).max(dim=1)[1][mask]
        label = data.y[mask]

    if confusion_matrix:
        utils.make_dir(gnn_utils.images_dir)
        title = 'Confusion Matrix - GNN'
        path = gnn_utils.images_dir + 'cm_' + filename + '.png'
        utils.confusion_matrix(pred.to('cpu'), label.to('cpu'), title, path)

    correct = pred.eq(label).sum().item()
    total = mask.sum().item()
    return correct / total