Esempio n. 1
0
def train(fold):
    fold_checkpoints_dir = checkpoints_dir.replace("<FOLD>", str(fold))
    fold_logs_dir = logs_dir.replace("<FOLD>", str(fold))

    if not os.path.exists(fold_checkpoints_dir):
        os.makedirs(fold_checkpoints_dir)
    if not os.path.exists(fold_logs_dir):
        os.makedirs(fold_logs_dir)

    x_train, y_train, x_test, y_test = fold_data(fold)

    print("Training and validation data processed.")
    print("Training data shape: {}".format(len(x_train)))
    print("Test data shape: {}".format(len(x_test)))

    model = multitask_cnn()

    optimizer = RMSprop(lr=base_lr)

    model.compile(
        optimizer=optimizer,
        loss=loss_dict,
        loss_weights=loss_weights_dict,
        metrics=["accuracy"],
    )

    training_log = TensorBoard(log_dir=os.path.join(fold_logs_dir, "log"),
                               write_graph=False)

    callbacks = [training_log]

    y_train_cancer = y_train["out_cancer"]
    y_test_cancer = y_test[0]

    for e in range(epochs):
        x_train_augmented = augment(x_train)
        model.fit(
            x={"thyroid_input": x_train_augmented},
            y=y_train,
            validation_data=(x_test, y_test),
            batch_size=batch_size,
            epochs=e + 1,
            initial_epoch=e,
            shuffle=True,
            callbacks=callbacks,
        )

        if np.mod(e + 1, 10) == 0:
            y_pred = model.predict(x_train, batch_size=batch_size, verbose=1)
            auc_train = roc_auc_score(y_train_cancer, y_pred[0])
            y_pred = model.predict(x_test, batch_size=batch_size, verbose=1)
            auc_test = roc_auc_score(y_test_cancer, y_pred[0])
            with open(os.path.join(fold_logs_dir, "auc.txt"), "a") as auc_file:
                auc_file.write("{},{}\n".format(auc_train, auc_test))

    model.save(os.path.join(fold_checkpoints_dir, "weights.h5"))

    print("Training fold {} completed.".format(fold))
Esempio n. 2
0
def run():
    print("Training...")
    gen = int(os.listdir(c.weights_dir + "/current")[0])
    mod = model.Model()
    mod.load_weight(c.weights_dir + "/{0}.pkl".format(gen))

    print("preparing data...")
    obsv, prob, result = data.load()
    count = np.size(obsv, 0)
    count_train = int(count * 0.8)
    obsv, prob, result = data.shuffle(obsv, prob, result)

    train_obsv = obsv[0:count_train]
    train_prob = prob[0:count_train]
    train_result = result[0:count_train]

    test_obsv = obsv[count_train:count]
    test_prob = prob[count_train:count]
    test_result = result[count_train:count]

    mod.test(test_obsv, test_prob, test_result)

    for d_gen in range(c.train_count):
        for epoch in range(c.train_epoch):
            print("epoch {0} of {1}".format(epoch + 1, c.train_epoch))

            tr_obsv, tr_prob, tr_result = data.augment(train_obsv, train_prob,
                                                       train_result)
            tr_obsv, tr_prob, tr_result = data.shuffle(tr_obsv, tr_prob,
                                                       tr_result)

            mod.train(tr_obsv, tr_prob, tr_result)
            # mod.train(train_obsv, train_prob, train_result)
            mod.test(test_obsv, test_prob, test_result)

            mod.save_weight(c.weights_dir + "/{0}.pkl".format(gen + 1))
            with open(c.weights_dir + "/eval/queue/{0}".format(gen + 1),
                      mode="w"):
                pass
        os.rename(c.weights_dir + "/current/{0}".format(gen),
                  c.weights_dir + "/current/{0}".format(gen + 1))
        gen = gen + 1
    print("Train Finished!")
Esempio n. 3
0
def run():
    gen = int(os.listdir(C.WEIGHTS_DIRECTORY + "/current")[0])
    mod = model.Model()
    mod.load_weight(C.WEIGHTS_DIRECTORY + "/{0}.pkl".format(gen))

    print("preparing data...")
    obsv, prob, result = data.load()
    count = np.size(obsv, 0)
    count_train = int(count * 0.8)
    obsv, prob, result = data.shuffle(obsv, prob, result)

    train_obsv = obsv[0:count_train]
    train_prob = prob[0:count_train]
    train_result = result[0:count_train]

    test_obsv = obsv[count_train:count]
    test_prob = prob[count_train:count]
    test_result = result[count_train:count]
    
    mod.test(test_obsv, test_prob, test_result)

    for d_gen in range(C.WEIGHT_COUNT):
        for epoch in range(C.WEIGHT_EPOCH):
            print("epoch {0} of {1}".format(epoch + 1, C.WEIGHT_EPOCH))
            
            tr_obsv, tr_prob, tr_result = data.augment(train_obsv, train_prob, train_result)
            tr_obsv, tr_prob, tr_result = data.shuffle(tr_obsv, tr_prob, tr_result)

            mod.train(tr_obsv, tr_prob, tr_result)
            # mod.train(train_obsv, train_prob, train_result)
            mod.test(test_obsv, test_prob, test_result)

            mod.save_weight(C.WEIGHTS_DIRECTORY + "/{0}.pkl".format(gen + 1))
            with open(C.WEIGHTS_DIRECTORY + "/eval/queue/{0}".format(gen + 1), mode="w"):
                pass
        os.rename(C.WEIGHTS_DIRECTORY + "/current/{0}".format(gen), C.WEIGHTS_DIRECTORY + "/current/{0}".format(gen + 1))
        gen = gen + 1
Esempio n. 4
0
import data

image_train, label_train = problem.get_data(problem.TRAIN,
                                            train_size=default_train_size)

ds_train = data.get_standard_ds(image_train, label_train)

ds_train
# %%

plt.imshow(next(iter(ds_train))[0][:, :, 0], cmap='gray')
plt.show()
# %%

ds_train_augmented = ds_train.map(lambda image, label:
                                  (data.augment(image), label))
plt.imshow(next(iter(ds_train_augmented))[0][:, :, 0], cmap='gray')
plt.show()
# %%

# Mixup with defined weights

image1 = next(iter(ds_train_augmented))[0][:, :, 0]
image2 = next(iter(ds_train_augmented.skip(2)))[0][:, :, 0]

q = 0.8
mixup_image = image1 * q + image2 * (1 - q)

plt.imshow(mixup_image, cmap='gray')
plt.show()
# %%
Esempio n. 5
0
    patience=4,
    verbose=1,
    mode='max',
    min_delta=1e-4,
    min_lr=5e-4
)

checkpoint = ModelCheckpoint(
    filepath='weights.{epoch:02d}-{val_accuracy:.2f}.h5',
    monitor='val_accuracy',
    save_best_only=True,
    save_weights_only=True
)

model.fit_generator(
    data.augment(x=x_train, y=y_train, batch_size=BATCH_SIZE, shuffle=True),
    steps_per_epoch=N_TRAIN_BATCHES,
    epochs=EPOCHS,
    validation_data=data.preprocess(x=x_test, y=y_test, batch_size=BATCH_SIZE),
    validation_steps=N_TEST_BATCHES,
    verbose=2,
    callbacks=[early_stopping, reduce_lr, checkpoint]
)

n_test = (x_test.shape[0] // BATCH_SIZE) * BATCH_SIZE
x_test_ = x_test[:n_test]
y_test_ = y_test[:n_test]

print('Normal Inference:')
y_pred_ = model.predict_generator(data.datagen.flow(x_test_, batch_size=BATCH_SIZE, shuffle=False),
                                 steps=x_test.shape[0] // BATCH_SIZE)
Esempio n. 6
0
def outlier_histogram(model, test_set=False):
    ''' the option `test_set` controls, whether the test set, or the validation set is used.'''

    data_generator = (data.test_loader if test_set else
                      [(data.val_x, torch.argmax(data.val_y, dim=1))])

    score, correct_pred = [], []

    from torchvision.datasets import FashionMNIST
    from torch.utils.data import DataLoader
    fashion_generator = DataLoader(FashionMNIST('./fashion_mnist',
                                                download=True,
                                                train=False,
                                                transform=data.transform),
                                   batch_size=data.batch_size,
                                   num_workers=8)

    # continue using dropout for WAIC
    model.train()

    def waic(x):
        waic_samples = 1
        ll_joint = []
        for i in range(waic_samples):
            losses = model(x, y=None, loss_mean=False)
            ll_joint.append(losses['nll_joint_tr'].cpu().numpy())

        ll_joint = np.stack(ll_joint, axis=1)
        return np.mean(ll_joint, axis=1) + np.var(ll_joint, axis=1)

    with torch.no_grad():
        for x, y in data_generator:
            x, y = x.cuda(), data.onehot(y.cuda())
            score.append(waic(x))
            losses = model(x, y, loss_mean=False)
            correct_pred.append(
                (torch.argmax(y, dim=1) == torch.argmax(losses['logits_tr'],
                                                        dim=1)).cpu().numpy())

        score_fashion = []
        for x, y in fashion_generator:
            x = x.cuda()
            x = data.augment(x)
            score_fashion.append(waic(x))

        score_adv = []
        score_adv_ref = []

        adv_images = np.stack(
            [np.load(f) for f in glob.glob('./adv_examples/adv_*.npy')],
            axis=0)
        ref_images = np.stack(
            [np.load(f) for f in glob.glob('./adv_examples/img_*.npy')],
            axis=0)

        adv_images = data.augment(torch.Tensor(adv_images).cuda())
        ref_images = data.augment(torch.Tensor(ref_images).cuda())

        score_adv = waic(adv_images)
        score_ref = waic(ref_images)

    model.eval()

    score = np.concatenate(score, axis=0)
    correct_pred = np.concatenate(correct_pred, axis=0)
    score_fashion = np.concatenate(score_fashion, axis=0)

    #val_range = [np.quantile(np.concatenate((score, score_fashion, score_adv)),  0.01),
    #np.quantile(np.concatenate((score, score_fashion, score_adv)),  0.6)]
    val_range = [-8, 0]
    val_range[0] -= 0.03 * (val_range[1] - val_range[0])
    val_range[1] += 0.03 * (val_range[1] - val_range[0])

    bins = 40

    plt.figure()
    plt.hist(score[correct_pred == 1],
             bins=bins,
             range=val_range,
             histtype='step',
             label='correct',
             density=True,
             color='green')
    plt.hist(score[correct_pred == 0],
             bins=3 * bins,
             range=val_range,
             histtype='step',
             label='not correct',
             density=True,
             color='red')
    plt.hist(score_fashion,
             bins=bins,
             range=val_range,
             histtype='step',
             label='$\mathcal{Fashion}$',
             density=True,
             color='magenta')

    plt.hist(score_adv,
             bins=bins,
             range=val_range,
             histtype='step',
             label='Adv attacks',
             density=True,
             color='blue')
    plt.hist(score_ref,
             bins=bins,
             range=val_range,
             histtype='step',
             label='Non-attacked images',
             density=True,
             color='gray')

    plt.legend()