Esempio n. 1
0
def test_model(model, loader, is_nn):
    print(f"Testing {type(model).__name__}")
    if is_nn:
        preds_list = []
        for batch in loader.get_batch(val=False):
            msgs = LongTensor(batch[1])
            msg_len_ordered, msg_perm = (msgs != model.pad).sum(dim=1).sort(descending=True) # yapf: disable
            _, undo_msg_perm = msg_perm.sort()
            msgs = msgs[msg_perm]

            with torch.no_grad():
                model.zero_grad()
                preds = model.forward(msgs, msg_len_ordered, is_training=False)

                preds = preds[undo_msg_perm]
                preds_list.append(preds.cpu())

        preds = torch.max(torch.cat(preds_list, 0), 1)[1]
        preds = preds[:len(loader.data)] + 1
    else:
        preds = model.test(loader.data)

    if model.argv.output is None:
        out_path = os.path.join(model.argv.res_root, model.argv.model_folder,
                                f'test_epoch{model.argv.checkpoint_ver}.txt')
    else:
        out_path = model.argv.output
    utils.save_preds(out_path, preds)
Esempio n. 2
0
def evaluate(net,
             criterion,
             experiment_dir,
             args,
             valid_loader,
             plot_name):
             
    net.eval()
    epoch_loss = 0
    nb_batches = len(valid_loader)
    nb_eval = nb_batches * args.batch_size
    # Track samples by batches for scoring
    pred_y = np.zeros((nb_eval))
    true_y = np.zeros((nb_eval))
    weights = np.zeros((nb_eval))
    evt_id = []
    f_name = []
    logging.info("Evaluating {} {} samples.".format(nb_eval,plot_name))
    with torch.autograd.no_grad():
        for i, batch in enumerate(valid_loader):
            X, y, w, adj_mask, batch_nb_nodes, evt_ids, evt_names = batch
            out = net(X, adj_mask, batch_nb_nodes)
            loss = criterion(out, y, w)
            epoch_loss += loss.item() 
            # Track predictions, truth, weights over batches
            beg =     i * args.batch_size
            end = (i+1) * args.batch_size
            pred_y[beg:end] = out.data.cpu().numpy()
            true_y[beg:end] = y.data.cpu().numpy()
            weights[beg:end] = w.data.cpu().numpy()
            if plot_name==TEST_NAME:
                evt_id.extend(evt_ids)
                f_name.extend(evt_names)

            # Print running loss 2 times 
            if (((i+1) % (nb_batches//2)) == 0):
                nb_proc = (i+1)*args.batch_size
                logging.info("  {:5d}: {:.9f}".format(nb_proc,
                                                      epoch_loss/nb_proc))

    # Score predictions, save plots, and log performance
    epoch_loss /= nb_eval # Normalize loss
    tpr, roc = utils.score_plot_preds(true_y, pred_y, weights,
                                      experiment_dir, plot_name, args.eval_tpr)
    logging.info("{}: loss {:>.3E} -- AUC {:>.3E} -- TPR {:>.3e}".format(
                                      plot_name, epoch_loss, roc, tpr))

    if plot_name == TEST_NAME:
        utils.save_test_scores(nb_eval, epoch_loss, tpr, roc, experiment_dir)
        utils.save_preds(evt_id, f_name, pred_y, experiment_dir)
    return (tpr, roc, epoch_loss)
Esempio n. 3
0
# Generators
training_generator = DataGenerator(training_ids, labels, **params)
params['batch_size'] = 128
validation_generator = DataGenerator(validation_ids, labels, **params)

# Design model
model = get_model(params, True)
print(model.summary())

# Train
checkpointer = ModelCheckpoint(filepath='model.h5', verbose=2,
                               save_best_only=True, save_weights_only=False)
model.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    callbacks=[checkpointer],
                    epochs=1)

# Predict
params['shuffle'] = False
params['augment'] = False
params['batch_size'] = 1
params['dir_path'] = '/tmp/human_atlas/testing_data/'
testing_generator = DataGenerator(testing_ids, labels=None, **params)

preds = model.predict_generator(testing_generator,
                                steps=len(testing_ids),
                                verbose=1)
save_numpy(preds)
save_preds(preds, testing_ids)
print(preds, preds.shape, len(testing_ids))
Esempio n. 4
0
            acc = accuracy_score(val_gts, val_preds)
            i += 1
            print(f"[{i}/{len(file_list)/2}]acc: {acc*100:.2f} {file_path}")
    else:
        with open(file_path, 'r') as f:
            test_pred_all.append([int(line) for line in f])

val_pred_all = np.array(val_pred_all)
val_gts = np.array(val_gts)
test_pred_all = np.array(test_pred_all)

assert val_pred_all.shape[1] == val_gts.shape[0]
assert val_pred_all.shape[0] == test_pred_all.shape[0]
sys.stdout.flush()

val_pred_max_freq, val_pred_max_cnt = scipy.stats.mode(val_pred_all)
target_names = [str(l) for l in range(1, 21)]
acc = accuracy_score(val_gts, val_pred_max_freq.T)
print(acc)
print(
    classification_report(val_gts,
                          val_pred_max_freq.T,
                          target_names=target_names))
out_path = os.path.join(res_root, f'ensemble_val_acc{acc*100:0.2f}.png')
utils.plot_confusion_matrix(target_names, val_gts, val_pred_max_freq.T,
                            out_path)

test_pred_max_freq, test_pred_max_cnt = scipy.stats.mode(test_pred_all)
test_out_path = os.path.join(res_root, f'ensemble_test_acc{acc*100:0.2f}.txt')
utils.save_preds(test_out_path, test_pred_max_freq.squeeze())
print()
Esempio n. 5
0
def runModel(datagen, model, optimizer, class_wts, process, batch_size,
             n_batches, loss_wts):
    '''
    process : 'trn', 'val' or 'tst'
    '''
    running_loss = 0
    pred_list = []
    label_list = []
    soft_pred_list = []
    all_file_list = []
    with trange(n_batches, desc=process, ncols=100) as t:
        for m in range(n_batches):
            data, labels, filenames = datagen.__next__()
            labels_one_hot = utils.get_one_hot(labels).cuda()
            if process == 'trn':
                optimizer.zero_grad()
                model.train()
                pred, aux_pred = model.forward(data)
                pred = F.softmax(pred, 1)
                aux_pred = F.softmax(aux_pred, 1)
                loss = 0
                for i in range(2):
                    loss += loss_wts[0] * utils.weightedBCE(class_wts[i],
                                                            pred[:, i],
                                                            (labels_one_hot
                                                            [:, i]))\
                            + loss_wts[1] * utils.weightedBCE(class_wts[i],
                                                              aux_pred[:, i],
                                                              (labels_one_hot
                                                              [:, i]))
                loss.backward()
                if torch.isnan(loss):
                    pdb.set_trace()
                optimizer.step()
            elif process == 'val' or process == 'tst':
                model.eval()
                with torch.no_grad():
                    pred = F.softmax(model.forward(data), 1)
                    loss = utils.weightedBCE(class_wts[0], pred[:, 0],
                                             labels_one_hot[:, 0])\
                        + utils.weightedBCE(class_wts[1], pred[:, 1],
                                            labels_one_hot[:, 1])
            running_loss += loss
            hard_pred = torch.argmax(pred, 1)
            pred_list.append(hard_pred.cpu())
            soft_pred_list.append(pred.detach().cpu())
            label_list.append(labels.cpu())
            all_file_list += filenames
            t.set_postfix(loss=running_loss.item() /
                          (float(m + 1) * batch_size))
            t.update()
        finalLoss = running_loss / (float(m + 1) * batch_size)
        # if process != 'trn':
        #     pred_list, soft_pred_list, label_list = utils.test_time_aug(
        #                                                     all_file_list,
        #                                                     soft_pred_list,
        #                                                     label_list, 3)
        acc = utils.globalAcc(pred_list, label_list)
        if not isinstance(pred_list, torch.Tensor):
            f1 = sklearn.metrics.f1_score(torch.cat(label_list),
                                          torch.cat(pred_list),
                                          labels=None)
        else:
            f1 = sklearn.metrics.f1_score(label_list, pred_list, labels=None)
        auroc, auprc, fpr_tpr_arr, precision_recall_arr = utils.AUC(
            soft_pred_list, label_list)
        metrics = Metrics(finalLoss, acc, f1, auroc, auprc, fpr_tpr_arr,
                          precision_recall_arr)
        utils.save_preds(soft_pred_list, pred_list, label_list, all_file_list,
                         args.savename, process)
        return metrics
Esempio n. 6
0
# -*- coding: utf-8 -*-
"""
Created on Tue Nov  6 20:53:36 2018

@author: Arpit
"""

from utils import save_preds, get_songs, split_data, to_label
from data_processing import get_image_data
from model import Model

# get names of all the songs
songs = get_songs()

# split them in training and valid sets according to given percentage
songs_train, songs_valid = split_data(songs, 0.85)

# get actual spectrogram(2d np.array) data for the songs
X_train, Y_train = get_image_data('train', songs_train)
X_valid, Y_valid = get_image_data('valid', songs_valid)

# get names and spectrogram data for the final testing set(which is to be uploaded)
X_test, keys = get_image_data('test')

model = Model(False)
model.train(X_train, Y_train, X_valid, Y_valid, 5000)

preds = model.predict(X_test)
preds = [to_label(pred) for pred in preds]
save_preds(keys, preds, 'predictions.csv')