def inference(X_test, X_mean, init_file, outdir):
    bs = 128
    fixed_bs = True

    print('building model...')
    l_out = model.build(bs, 10)

    print('initializing weights from %s...' % (init_file))
    network.init_weights(l_out, init_file)

    test_iter = iter_funcs.create_iter_funcs_test(l_out, bs, N=50)
    for test_idx in network.get_batch_idx(X_test.shape[0],
                                          bs,
                                          fixed=fixed_bs,
                                          shuffle=False):

        X_test_batch = X_test[test_idx]

        y_hat = test_iter(X_test_batch)

        # get the test images that were misclassified with low certainty
        for X, y in zip(X_test_batch, y_hat):
            if y.max() > 0.9:
                # undo the initial transformations: shift, scale transpose
                img = ((X + X_mean) * 255.).transpose(1, 2, 0)
                fname = '-'.join('%.5f' % p for p in y)
                cv2.imwrite(join(outdir, '%s.png' % fname), img)
Exemple #2
0
def train(X_train,
          X_valid,
          y_train,
          y_valid,
          weights_file=None,
          init_file=None):

    # model parameters
    wd = 0.0005
    bs = 128
    base_lr = 0.01
    gamma = 0.0001
    p = 0.75
    mntm = 0.9
    fixed_bs = True
    mc_dropout = True
    #mc_dropout = False
    lr_update = lambda itr: base_lr * (1 + gamma * itr)**(-p)
    snapshot_every = 5
    max_epochs = 100000

    print('building model...')
    l_out = model.build(bs, np.unique(y_train).shape[0])
    network.print_layers(l_out)

    # check if we need to load pre-trained weights
    if init_file is not None:
        print('initializing weights from %s...' % (init_file))
        network.init_weights(l_out, init_file)
    else:
        print('initializing weights randomly...')

    # do theano stuff
    print('creating shared variables...')
    lr_shared = theano.shared(floatX(base_lr))

    print('compiling theano functions...')
    train_iter = iter_funcs.create_iter_funcs_train(l_out, base_lr, mntm, wd)
    valid_iter = iter_funcs.create_iter_funcs_valid(l_out,
                                                    bs,
                                                    N=50,
                                                    mc_dropout=mc_dropout)

    # prepare to start training
    best_epoch = -1
    best_train_losses_mean, best_valid_losses_mean = np.inf, np.inf
    print('starting training at %s' % (network.get_current_time()))
    epoch_train_losses, epoch_valid_losses = [], []
    gradient_updates = 0
    epochs = []

    # start training
    try:
        for epoch in range(1, max_epochs + 1):
            t_epoch_start = time()

            train_losses, train_accs = [], []
            # print run training for each batch
            for train_idx in network.get_batch_idx(X_train.shape[0],
                                                   bs,
                                                   fixed=fixed_bs,
                                                   shuffle=True):
                X_train_batch = X_train[train_idx]
                y_train_batch = y_train[train_idx]

                #print X_train_batch.shape, y_train_batch.shape
                train_loss, train_acc = train_iter(X_train_batch,
                                                   y_train_batch)
                #train_loss, train_acc = 0, 0

                # learning rate policy
                gradient_updates += 1
                lr = lr_update(gradient_updates)
                lr_shared.set_value(floatX(lr))

                train_losses.append(train_loss)
                train_accs.append(train_acc)

            # run validation for each batch
            valid_losses, valid_accs = [], []
            for valid_idx in network.get_batch_idx(X_valid.shape[0],
                                                   bs,
                                                   fixed=fixed_bs,
                                                   shuffle=False):

                X_valid_batch = X_valid[valid_idx]
                y_valid_batch = y_valid[valid_idx]

                #print X_valid_batch.shape, y_valid_batch.shape
                valid_loss, valid_acc = valid_iter(X_valid_batch,
                                                   y_valid_batch)
                #valid_loss, valid_acc = 0, 0

                valid_losses.append(valid_loss)
                valid_accs.append(valid_acc)

            # average over the batches
            train_losses_mean = np.mean(train_losses)
            train_accs_mean = np.mean(train_accs)
            valid_losses_mean = np.mean(valid_losses)
            valid_accs_mean = np.mean(valid_accs)

            epochs.append(epoch)
            epoch_train_losses.append(train_losses_mean)
            epoch_valid_losses.append(valid_losses_mean)

            # display useful info
            epoch_color = ('', '')
            if valid_losses_mean < best_valid_losses_mean:
                best_epoch = epoch
                best_train_losses_mean = train_losses_mean
                best_valid_losses_mean = valid_losses_mean
                best_weights = layers.get_all_param_values(l_out)
                epoch_color = ('\033[32m', '\033[0m')

            t_epoch_end = time()
            duration = t_epoch_end - t_epoch_start
            print('{}{:>4}{} | {:>10.6f} | {:>10.6f} | '
                  '{:>3.2f}% | {:>3.2f}% | '
                  '{:>1.8} | {:>4.2f}s | '.format(
                      epoch_color[0], epoch, epoch_color[1], train_losses_mean,
                      valid_losses_mean, 100 * train_accs_mean,
                      100 * valid_accs_mean, lr, duration))

            if (epoch % snapshot_every) == 0:
                network.save_weights(best_weights, weights_file)

    except KeyboardInterrupt:
        print('caught ctrl-c... stopped training.')

    # display final results and save weights
    print('training finished at %s\n' % (network.get_current_time()))
    print('best local minimum for validation data at epoch %d' % (best_epoch))
    print('  train loss = %.6f' % (best_train_losses_mean))
    print('  valid loss = %.6f' % (best_valid_losses_mean))
    if best_weights is not None:
        print('saving best weights to %s' % (weights_file))
        network.save_weights(best_weights, weights_file)

    # plot the train/val loss over epochs
    print('plotting training/validation loss...')
    plt.plot(epochs, epoch_train_losses, 'b')
    plt.plot(epochs, epoch_valid_losses, 'g')
    plt.legend(('training', 'validation'))
    plt.ylabel('loss')
    plt.xlabel('epochs')
    plt.xlim((1, epochs[-1]))
    train_val_log = join('logs', '%s.png' % network.get_current_time())
    plt.savefig(train_val_log, bbox_inches='tight')
def train(
        X_train, X_valid, y_train, y_valid, weights_file=None, init_file=None):

    # model parameters
    wd = 0.0005
    bs = 128
    base_lr = 0.01
    gamma = 0.0001
    p = 0.75
    mntm = 0.9
    fixed_bs = True
    mc_dropout = True
    #mc_dropout = False
    lr_update = lambda itr: base_lr * (1 + gamma * itr) ** (-p)
    snapshot_every = 5
    max_epochs = 100000

    print('building model...')
    l_out = model.build(bs, np.unique(y_train).shape[0])
    network.print_layers(l_out)

    # check if we need to load pre-trained weights
    if init_file is not None:
        print('initializing weights from %s...' % (init_file))
        network.init_weights(l_out, init_file)
    else:
        print('initializing weights randomly...')

    # do theano stuff
    print('creating shared variables...')
    lr_shared = theano.shared(floatX(base_lr))

    print('compiling theano functions...')
    train_iter = iter_funcs.create_iter_funcs_train(l_out, base_lr, mntm, wd)
    valid_iter = iter_funcs.create_iter_funcs_valid(
        l_out, bs, N=50, mc_dropout=mc_dropout)

    # prepare to start training
    best_epoch = -1
    best_train_losses_mean, best_valid_losses_mean = np.inf, np.inf
    print('starting training at %s' % (
        network.get_current_time()))
    epoch_train_losses, epoch_valid_losses = [], []
    gradient_updates = 0
    epochs = []

    # start training
    try:
        for epoch in range(1, max_epochs + 1):
            t_epoch_start = time()

            train_losses, train_accs = [], []
            # print run training for each batch
            for train_idx in network.get_batch_idx(
                    X_train.shape[0], bs, fixed=fixed_bs, shuffle=True):
                X_train_batch = X_train[train_idx]
                y_train_batch = y_train[train_idx]

                #print X_train_batch.shape, y_train_batch.shape
                train_loss, train_acc = train_iter(
                    X_train_batch, y_train_batch)
                #train_loss, train_acc = 0, 0

                # learning rate policy
                gradient_updates += 1
                lr = lr_update(gradient_updates)
                lr_shared.set_value(floatX(lr))

                train_losses.append(train_loss)
                train_accs.append(train_acc)

            # run validation for each batch
            valid_losses, valid_accs = [], []
            for valid_idx in network.get_batch_idx(
                    X_valid.shape[0], bs, fixed=fixed_bs, shuffle=False):

                X_valid_batch = X_valid[valid_idx]
                y_valid_batch = y_valid[valid_idx]

                #print X_valid_batch.shape, y_valid_batch.shape
                valid_loss, valid_acc = valid_iter(
                    X_valid_batch, y_valid_batch)
                #valid_loss, valid_acc = 0, 0

                valid_losses.append(valid_loss)
                valid_accs.append(valid_acc)

            # average over the batches
            train_losses_mean = np.mean(train_losses)
            train_accs_mean = np.mean(train_accs)
            valid_losses_mean = np.mean(valid_losses)
            valid_accs_mean = np.mean(valid_accs)

            epochs.append(epoch)
            epoch_train_losses.append(train_losses_mean)
            epoch_valid_losses.append(valid_losses_mean)

            # display useful info
            epoch_color = ('', '')
            if valid_losses_mean < best_valid_losses_mean:
                best_epoch = epoch
                best_train_losses_mean = train_losses_mean
                best_valid_losses_mean = valid_losses_mean
                best_weights = layers.get_all_param_values(l_out)
                epoch_color = ('\033[32m', '\033[0m')

            t_epoch_end = time()
            duration = t_epoch_end - t_epoch_start
            print('{}{:>4}{} | {:>10.6f} | {:>10.6f} | '
                  '{:>3.2f}% | {:>3.2f}% | '
                  '{:>1.8} | {:>4.2f}s | '.format(
                      epoch_color[0], epoch, epoch_color[1],
                      train_losses_mean, valid_losses_mean,
                      100 * train_accs_mean, 100 * valid_accs_mean,
                      lr, duration))

            if (epoch % snapshot_every) == 0:
                network.save_weights(best_weights, weights_file)

    except KeyboardInterrupt:
        print('caught ctrl-c... stopped training.')

    # display final results and save weights
    print('training finished at %s\n' % (
        network.get_current_time()))
    print('best local minimum for validation data at epoch %d' % (
        best_epoch))
    print('  train loss = %.6f' % (
        best_train_losses_mean))
    print('  valid loss = %.6f' % (
        best_valid_losses_mean))
    if best_weights is not None:
        print('saving best weights to %s' % (weights_file))
        network.save_weights(best_weights, weights_file)

    # plot the train/val loss over epochs
    print('plotting training/validation loss...')
    plt.plot(epochs, epoch_train_losses, 'b')
    plt.plot(epochs, epoch_valid_losses, 'g')
    plt.legend(('training', 'validation'))
    plt.ylabel('loss')
    plt.xlabel('epochs')
    plt.xlim((1, epochs[-1]))
    train_val_log = join('logs', '%s.png' % network.get_current_time())
    plt.savefig(train_val_log, bbox_inches='tight')