Exemple #1
0
def generate_pred(subset):

    predictions_paths = glob.glob("predictions/"+subset+"--"+config+"-*")

    if len(predictions_paths) < times_to_run:
        for path in run_paths:
            cmd = "python predict_convnet.py %s %s %s"%(config,path,subset)
            print cmd
            os.system(cmd)

        predictions_paths = glob.glob("predictions/"+subset+"--"+config+"-*")

    assert len(predictions_paths) == times_to_run

    print "Loading %s set predictions"%subset
    predictions_list = [np.load(path) for path in predictions_paths]
    predictions_stack = np.array(predictions_list).astype("float32") # num_sources x num_datapoints x 121

    uniform_blend = predictions_stack.mean(0)

    if subset=="valid":
        t_valid = data.labels_train[np.load("validation_split_v1.pkl")['indices_valid']]
        loss_uniform_selected = utils.log_loss(uniform_blend, t_valid)
        print
        print config
        print "%s score: %.6f"%(subset,loss_uniform_selected)
        print

    target_path = "predictions/%s--%s--%s--%s.npy" % (subset, "blend_"+config, config, "avg-prob")
    np.save(target_path, uniform_blend)
    print "saving in", target_path
def forwardPropagation(input, weights, bias, originalOutput,
                       binarizedTruePrediction, prediction, numberOfSamples,
                       numberOfNeuronsInLayers, classes, optimizer):
    #computing output of first hidden layer
    h1In = numpy.dot(input[:, :3], weights[0]) + numpy.repeat(
        numpy.array([bias[0]]), repeats=[numberOfSamples], axis=0)
    h1Output = utils.relu(h1In)

    #computing output of second hidden layer
    h2In = numpy.dot(h1Output, weights[1]) + numpy.repeat(
        numpy.array([bias[1]]), repeats=[numberOfSamples], axis=0)
    h2Output = utils.relu(h2In)

    #computing output of the output layer
    OIn = numpy.dot(h2Output, weights[2]) + numpy.repeat(
        numpy.array([bias[2]]), repeats=[numberOfSamples], axis=0)
    OOutput = utils.softmax(OIn)

    myPredictedValueListAsIntegers = numpy.argmax(OOutput, axis=1)
    # Computing overall error only for plotting graph
    if prediction == False:
        errorForGraph = utils.log_loss(binarizedTruePrediction,
                                       OOutput[:] + 0.00001)
        errorForPlottingGraphList.append(errorForGraph)
        accuracyScoreForGraph.append(
            utils.accuracy_score(originalOutput,
                                 myPredictedValueListAsIntegers))
        backPropagation(input, OOutput, originalOutput,
                        binarizedTruePrediction, h1Output, h2Output,
                        numberOfNeuronsInLayers, classes, h1In, h2In, OIn,
                        optimizer)
    else:
        return myPredictedValueListAsIntegers
Exemple #3
0
def main(verbose: int = 1,
         print_freq: int = 100,
         restore: Union[bool, str] = True,
         val_freq: int = 1,
         run_id: str = "model",
         dset_name: str = "memento_frames",
         model_name: str = "frames",
         freeze_until_it: int = 1000,
         additional_metrics: Mapping[str, Callable] = {'rc': rc},
         debug_n: Optional[int] = None,
         batch_size: int = cfg.BATCH_SIZE,
         require_strict_model_load: bool = False,
         restore_optimizer=True,
         optim_string='adam',
         lr=0.01) -> None:

    print("TRAINING MODEL {} ON DATASET {}".format(model_name, dset_name))

    ckpt_savedir = os.path.join(cfg.DATA_SAVEDIR, run_id, cfg.CKPT_DIR)
    print("Saving ckpts to {}".format(ckpt_savedir))
    logs_savepath = os.path.join(cfg.DATA_SAVEDIR, run_id, cfg.LOGDIR)
    print("Saving logs to {}".format(logs_savepath))
    utils.makedirs([ckpt_savedir, logs_savepath])
    last_ckpt_path = os.path.join(ckpt_savedir, "last_model.pth")

    device = utils.set_device()

    print('DEVICE', device)

    # model
    model = get_model(model_name, device)
    # print("model", model)
    model = DataParallel(model)

    # must call this before constructing the optimizer:
    # https://pytorch.org/docs/stable/optim.html
    model.to(device)

    # set up training
    # TODO better one?

    if optim_string == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    elif optim_string == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=0.9,
                                    weight_decay=0.0001)
    else:
        raise RuntimeError(
            "Unrecognized optimizer string {}".format(optim_string))

    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=5,
                                                   gamma=0.1)
    # criterion = MemAlphaLoss(device=device)
    # criterion = MemMSELoss()
    # criterion = lambda x, y: MemMSELoss()(x, y) +
    # CaptionsLoss(device=device)(x, y)
    losses = {
        'mem_mse':
        MemMSELoss(device=device, weights=np.load("memento_weights.npy")),
        'captions':
        CaptionsLoss(device=device,
                     class_weights=cap_utils.get_vocab_weights())
    }

    initial_epoch = 0
    iteration = 0
    unfrozen = False

    if restore:
        ckpt_path = restore if isinstance(restore, str) else last_ckpt_path

        if os.path.exists(ckpt_path):

            print("Restoring weights from {}".format(ckpt_path))

            ckpt = torch.load(ckpt_path)
            utils.try_load_state_dict(model, ckpt['model_state_dict'],
                                      require_strict_model_load)

            if restore_optimizer:
                utils.try_load_optim_state(optimizer,
                                           ckpt['optimizer_state_dict'],
                                           require_strict_model_load)
            initial_epoch = ckpt['epoch']
            iteration = ckpt['it']
    else:
        ckpt_path = last_ckpt_path

    # dataset
    train_ds, val_ds, test_ds = get_dataset(dset_name)
    assert val_ds or test_ds

    if debug_n is not None:
        train_ds = Subset(train_ds, range(debug_n))
        test_ds = Subset(test_ds, range(debug_n))

    train_dl = DataLoader(train_ds,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=cfg.NUM_WORKERS)
    test_dl = DataLoader(test_ds,
                         batch_size=batch_size,
                         shuffle=False,
                         num_workers=cfg.NUM_WORKERS)

    # training loop
    start = time.time()

    try:
        for epoch in range(initial_epoch, cfg.NUM_EPOCHS):
            logger = SummaryWriter(logs_savepath)

            # effectively puts the model in train mode.
            # Opposite of model.eval()
            model.train()

            print("Epoch {}".format(epoch))

            for i, (x, y_) in tqdm(enumerate(train_dl),
                                   total=len(train_ds) / batch_size):

                y: ModelOutput[MemModelFields] = ModelOutput(y_)
                iteration += 1

                if not unfrozen and iteration > freeze_until_it:
                    print("Unfreezing encoder")
                    unfrozen = True

                    for param in model.parameters():
                        param.requires_grad = True

                logger.add_scalar('DataTime', time.time() - start, iteration)

                x = x.to(device)
                y = y.to_device(device)

                out = ModelOutput(model(x, y.get_data()))
                loss_vals = {name: l(out, y) for name, l in losses.items()}
                # print("loss_vals", loss_vals)
                loss = torch.stack(list(loss_vals.values()))

                if verbose:
                    print("stacked loss", loss)
                loss = loss.sum()
                # loss = criterion(out, y)

                # I think this zeros out previous gradients (in case people
                # want to accumulate gradients?)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # logging
                utils.log_loss(logger, loss, loss_vals, iteration)
                logger.add_scalar('ItTime', time.time() - start, iteration)
                start = time.time()

                # display metrics

            # do some validation

            if (epoch + 1) % val_freq == 0:
                print("Validating...")
                model.eval()  # puts model in validation mode
                val_iteration = iteration

                with torch.no_grad():

                    labels: Optional[ModelOutput[MemModelFields]] = None
                    preds: Optional[ModelOutput[MemModelFields]] = None
                    val_losses = []

                    for i, (x, y_) in tqdm(enumerate(test_dl),
                                           total=len(test_ds) / batch_size):
                        val_iteration += 1

                        y = ModelOutput(y_)
                        y_numpy = y.to_numpy()

                        labels = y_numpy if labels is None else labels.merge(
                            y_numpy)

                        x = x.to(device)
                        y = y.to_device(device)

                        out = ModelOutput(model(x, y.get_data()))
                        out_numpy = out.to_device('cpu').to_numpy()
                        preds = out_numpy if preds is None else preds.merge(
                            out_numpy)

                        loss_vals = {
                            name: l(out, y)
                            for name, l in losses.items()
                        }
                        loss = torch.stack(list(loss_vals.values())).sum()
                        utils.log_loss(logger,
                                       loss,
                                       loss_vals,
                                       val_iteration,
                                       phase='val')

                        val_losses.append(loss)

                    print("Calculating validation metric...")
                    # print("preds", {k: v.shape for k, v in preds.items()})
                    # assert False
                    metrics = {
                        fname: f(labels, preds, losses)
                        for fname, f in additional_metrics.items()
                    }
                    print("Validation metrics", metrics)

                    for k, v in metrics.items():
                        if isinstance(v, numbers.Number):
                            logger.add_scalar('Metric_{}'.format(k), v,
                                              iteration)

                    metrics['total_val_loss'] = sum(val_losses)

                    ckpt_path = os.path.join(
                        ckpt_savedir, utils.get_ckpt_path(epoch, metrics))
                    save_ckpt(ckpt_path, model, epoch, iteration, optimizer,
                              dset_name, model_name, metrics)

            # end of epoch
            lr_scheduler.step()

            save_ckpt(last_ckpt_path, model, epoch, iteration, optimizer,
                      dset_name, model_name)

    except KeyboardInterrupt:
        print('Got keyboard interrupt, saving model...')
        save_ckpt(last_ckpt_path, model, epoch, iteration, optimizer,
                  dset_name, model_name)
import sys

import numpy as np

import data
import utils

VALIDATION_SPLIT_PATH = "validation_split_v1.pkl"

if len(sys.argv) != 2:
    sys.exit("Usage: eval_predictions.py <validation_predictions_path>")

path = sys.argv[1]
predictions = np.load(path)

split = np.load(VALIDATION_SPLIT_PATH)
labels_valid = data.labels_train[split['indices_valid']]

loss = utils.log_loss(predictions, labels_valid)
acc = utils.accuracy(predictions, labels_valid)
loss_std = utils.log_loss_std(predictions, labels_valid)

print("Validation loss:\t\t\t%.6f" % loss)
print("Classification accuracy:\t\t%.2f%%" % (acc * 100))
print("Validation loss std:\t%.6f" % loss_std)
print()
for k in range(5):
    acc_k = utils.accuracy_topn(predictions, labels_valid, n=k + 1)
    print("Top-%d accuracy:\t\t%.2f%%" % (k + 1, acc_k * 100))
Exemple #5
0
    def _backprop(self, X, y, activations, deltas, coef_grads,
                  intercept_grads):
        """Compute the MLP loss function and its corresponding derivatives
        with respect to each parameter: weights and bias vectors.
        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data.
        y : array-like, shape (n_samples,)
            The target values.
        activations : list, length = n_layers - 1
             The ith element of the list holds the values of the ith layer.
        deltas : list, length = n_layers - 1
            The ith element of the list holds the difference between the
            activations of the i + 1 layer and the backpropagated error.
            More specifically, deltas are gradients of loss with respect to z
            in each layer, where z = wx + b is the value of a particular layer
            before passing through the activation function
        coef_grads : list, length = n_layers - 1
            The ith element contains the amount of change used to update the
            coefficient parameters of the ith layer in an iteration.
        intercept_grads : list, length = n_layers - 1
            The ith element contains the amount of change used to update the
            intercept parameters of the ith layer in an iteration.
        Returns
        -------
        loss : float
        coef_grads : list, length = n_layers - 1
        intercept_grads : list, length = n_layers - 1
        """
        n_samples = X.shape[0]

        # Forward propagate
        activations = self._forward_pass(activations)

        # Get loss

        loss = log_loss(y, activations[-1])

        # Backward propagate
        last = self.n_layers_ - 2

        # The calculation of delta[last] here works with following
        # combinations of output activation and loss function:
        # sigmoid and binary cross entropy, softmax and categorical cross
        # entropy, and identity with squared loss
        deltas[last] = activations[-1] - y

        # Compute gradient for the last layer
        coef_grads, intercept_grads = self._compute_loss_grad(
            last, n_samples, activations, deltas, coef_grads, intercept_grads)

        # Iterate over the hidden layers
        for i in range(self.n_layers_ - 2, 0, -1):
            deltas[i - 1] = np.dot(deltas[i], self.coefs_[i].T)
            inplace_relu_derivative(activations[i], deltas[i - 1])

            coef_grads, intercept_grads = self._compute_loss_grad(
                i - 1, n_samples, activations, deltas, coef_grads,
                intercept_grads)

        return loss, coef_grads, intercept_grads
Exemple #6
0
predictions_list = [np.load(path) for path in paths]
predictions_stack = np.array(predictions_list).astype("float32") # num_sources x num_datapoints x 121
uniform_blend = predictions_stack.mean(0)

print paths[0]

target_path = paths[0].replace("haralick","blend").replace("momentsinfo","blend")

# target_path = "predictions/%s--%s--%s--%s.npy" % (subset, "blend_"+config, config, "avg-prob")
if os.path.isfile(target_path):
    sys.exit("file %s already exists"%target_path)

if subset=="valid":
    t_valid = data.labels_train[np.load("validation_split_v1.pkl")['indices_valid']]
    loss_uniform_selected = utils.log_loss(uniform_blend, t_valid)
    print
    print "%s score: %.6f"%(subset,loss_uniform_selected)
    print
    
np.save(target_path, uniform_blend)
print "saving in", target_path


# ["valid--blend_featharalick_convroll4_1024_lesswd--featharalick_convroll4_1024_lesswd--avg-prob.npy"
# ,"valid--blend_featharalick_convroll4_big_wd_maxout512--featharalick_convroll4_big_wd_maxout512--avg-prob.npy"
# ,"valid--blend_featharalick_convroll4_big_weightdecay--featharalick_convroll4_big_weightdecay--avg-prob.npy"
# ,"valid--blend_featharalick_convroll4_doublescale_fs5--featharalick_convroll4_doublescale_fs5--avg-prob.npy"
# ,"valid--blend_featharalick_convroll5_preinit_resume_drop@420--featharalick_convroll5_preinit_resume_drop@420--avg-prob.npy"
# ,"valid--blend_featharalick_convroll_all_broaden_7x7_weightdecay_resume--featharalick_convroll_all_broaden_7x7_weightdecay_resume--avg-prob.npy"
# ,"valid--blend_featharalick_cp8--featharalick_cp8--avg-prob.npy"
if missing_predictions:
    print('\tPlease generate the following predictions:\n\t%s' %
          '\n\t'.join(missing_predictions))
    sys.exit(0)

# loading validation predictions
s = np.load("validation_split_v1.pkl")
t_valid = data.labels_train[s['indices_valid']]

predictions_list = [np.load(path) for path in valid_predictions_paths]
predictions_stack = np.array(predictions_list).astype(
    theano.config.floatX)  # num_sources x num_datapoints x 121

print("Individual prediction errors")
individual_prediction_errors = [
    utils.log_loss(p, t_valid) for p in predictions_list
]
del predictions_list
for i in range(n_models):
    print(individual_prediction_errors[i],
          os.path.basename(valid_predictions_paths[i]))
print()

# optimizing weights
X = theano.shared(predictions_stack)  # source predictions
t = theano.shared(utils.one_hot(t_valid))  # targets
W = T.vector('W')

s = T.nnet.softmax(W).reshape((W.shape[0], 1, 1))
weighted_avg_predictions = T.sum(X * s,
                                 axis=0)  # T.tensordot(X, s, [[0], [0]])
Exemple #8
0
    def train(self, learning_schedule = {0: 0.015, 500: 0.0015,  800: 0.00015, 1000: 0.000015}, 
                momentum = 0.9, max_epochs=3000, save_every = 20, save_path = os.getcwd()):

        self.save_every = save_every
        self.metadata_tmp_path = save_path+"/model_params.pkl"
        self.learning_rate_schedule = learning_schedule
        self.learning_rate = theano.shared(np.float32(self.learning_rate_schedule[0]))
        self.momentum = momentum

        #for trainer
        self.updates = nn.updates.nesterov_momentum(self.loss, self.all_params, self.learning_rate, self.momentum)


        train_fn = self.nesterov_trainer() #nesterov with momentum.
        train_set_iterator = DataLoader(os.getcwd(),train_test_valid='train')
        best_dev_loss = numpy.inf
        dev_set_iterator = DataLoader(os.getcwd(), train_test_valid='valid')
        dev_set_iterator.build_unequal_samples_map()
        
        #for loading the data onto the gpu
        #create_train_gen = lambda: train_set_iterator.create_gen(max_epochs)

        patience = 1000  
        patience_increase = 2.
        improvement_threshold = 0.995
        done_looping = False
        print '... training the model'
        start_time = time.clock()
        epoch = 0
        timer = None

        #for plotting
        self._costs = []
        self._train_errors = []
        self._dev_errors = []

        while (epoch < max_epochs) and (not done_looping):
            losses_train = []
            losses = []
            avg_costs = []
            timer = time.time()
            for iteration, (x, y) in enumerate(train_set_iterator):

                if iteration in self.learning_rate_schedule:
                    lr = np.float32(self.learning_rate_schedule[iteration])
                    print "  setting learning rate to %.7f" % lr
                    self.learning_rate.set_value(lr)


                print "  load training data onto GPU"
                avg_cost = train_fn(x, y)
                if np.isnan(avg_cost):
                    raise RuntimeError("NaN DETECTED.")
                
                if type(avg_cost) == list:
                    avg_costs.append(avg_cost[0])
                else:
                    avg_costs.append(avg_cost)
            
                #for saving the batch
                if ((iteration + 1) % save_every) == 0:
                    print
                    print "Saving metadata, parameters"

                    with open(self.metadata_tmp_path, 'w') as f:
                        pickle.dump({'losses_train': avg_costs,'param_values': nn.layers.get_all_param_values(self.output_layer)},
                                     f, pickle.HIGHEST_PROTOCOL)

                mean_train_loss = numpy.mean(avg_costs)
                #print "  mean training loss:\t\t%.6f" % mean_train_loss
                #losses_train.append(mean_train_loss)

                #accuracy assessment
                output = utils.one_hot(self.predict_(x)(),m=20)
                train_loss = utils.log_loss(output, y)
                acc = 1 - utils.accuracy(output, y)
                losses.append(train_loss)
                del output
                del x
                del y

                print('  epoch %i took %f seconds' %
                    (epoch, time.time() - timer))
                print('  epoch %i, avg costs %f' %
                    (epoch, mean_train_loss))
                print('  epoch %i, training error %f' %
                    (epoch, acc))

                #for plotting
                self._costs.append(mean_train_loss)
                self._train_errors.append(acc)
                
                #valid accuracy
                xd,yd = dev_set_iterator.random_batch()

                valid_output = utils.one_hot(self.predict_(xd)(),m=20)
                valid_acc = 1 - utils.accuracy(valid_output, yd)
                self._dev_errors.append(valid_acc)
                del valid_output
                del xd
                del yd

                if valid_acc < best_dev_loss:
                    best_dev_loss = valid_acc
                    best_params = copy.deepcopy(self.all_params )
                    print('!!!  epoch %i, validation error of best model %f' %
                        (epoch, valid_acc))
                    print
                    print "Saving best performance parameters"
                    with open(self.metadata_tmp_path, 'w') as f:
                        pickle.dump({'losses_train': avg_costs,'param_values': nn.layers.get_all_param_values(self.output_layer)},
                                     f, pickle.HIGHEST_PROTOCOL)
                    if (valid_acc < best_dev_loss *
                        improvement_threshold):
                        patience = max(patience, iteration * patience_increase)
                    if patience <= iteration:
                        done_looping = True
                        break
                epoch += 1
Exemple #9
0
                for x_shared, x_chunk_eval in zip(xs_shared, xs_chunk_eval):
                    x_shared.set_value(x_chunk_eval)

                outputs_chunk = []
                for b in xrange(num_batches_chunk_eval):
                    out = compute_output(b)
                    outputs_chunk.append(out)

                outputs_chunk = np.vstack(outputs_chunk)
                outputs_chunk = outputs_chunk[:
                                              chunk_length_eval]  # truncate to the right length
                outputs.append(outputs_chunk)

            outputs = np.vstack(outputs)
            loss = utils.log_loss(outputs, labels)
            acc = utils.accuracy(outputs, labels)
            print "    loss:\t%.6f" % loss
            print "    acc:\t%.2f%%" % (acc * 100)
            print

            losses.append(loss)
            del outputs

    now = time.time()
    time_since_start = now - start_time
    time_since_prev = now - prev_time
    prev_time = now
    est_time_left = time_since_start * (float(config.num_chunks_train -
                                              (e + 1)) /
                                        float(e + 1 - chunks_train_idcs[0]))
Exemple #10
0
    def train(self,
              learning_schedule={
                  0: 0.015,
                  500: 0.0015,
                  800: 0.00015,
                  1000: 0.000015
              },
              momentum=0.9,
              max_epochs=3000,
              save_every=20,
              save_path=os.getcwd()):

        self.save_every = save_every
        self.metadata_tmp_path = save_path + "/model_params.pkl"
        self.learning_rate_schedule = learning_schedule
        self.learning_rate = theano.shared(
            np.float32(self.learning_rate_schedule[0]))
        self.momentum = momentum

        #for trainer
        self.updates = nn.updates.nesterov_momentum(self.loss, self.all_params,
                                                    self.learning_rate,
                                                    self.momentum)

        train_fn = self.nesterov_trainer()  #nesterov with momentum.
        train_set_iterator = DataLoader(os.getcwd(), train_test_valid='train')
        best_dev_loss = numpy.inf
        dev_set_iterator = DataLoader(os.getcwd(), train_test_valid='valid')
        dev_set_iterator.build_unequal_samples_map()

        #for loading the data onto the gpu
        #create_train_gen = lambda: train_set_iterator.create_gen(max_epochs)

        patience = 1000
        patience_increase = 2.
        improvement_threshold = 0.995
        done_looping = False
        print '... training the model'
        start_time = time.clock()
        epoch = 0
        timer = None

        #for plotting
        self._costs = []
        self._train_errors = []
        self._dev_errors = []

        while (epoch < max_epochs) and (not done_looping):
            losses_train = []
            losses = []
            avg_costs = []
            timer = time.time()
            for iteration, (x, y) in enumerate(train_set_iterator):

                if iteration in self.learning_rate_schedule:
                    lr = np.float32(self.learning_rate_schedule[iteration])
                    print "  setting learning rate to %.7f" % lr
                    self.learning_rate.set_value(lr)

                print "  load training data onto GPU"
                avg_cost = train_fn(x, y)
                if np.isnan(avg_cost):
                    raise RuntimeError("NaN DETECTED.")

                if type(avg_cost) == list:
                    avg_costs.append(avg_cost[0])
                else:
                    avg_costs.append(avg_cost)

                #for saving the batch
                if ((iteration + 1) % save_every) == 0:
                    print
                    print "Saving metadata, parameters"

                    with open(self.metadata_tmp_path, 'w') as f:
                        pickle.dump(
                            {
                                'losses_train':
                                avg_costs,
                                'param_values':
                                nn.layers.get_all_param_values(
                                    self.output_layer)
                            }, f, pickle.HIGHEST_PROTOCOL)

                mean_train_loss = numpy.mean(avg_costs)
                #print "  mean training loss:\t\t%.6f" % mean_train_loss
                #losses_train.append(mean_train_loss)

                #accuracy assessment
                output = utils.one_hot(self.predict_(x)(), m=20)
                train_loss = utils.log_loss(output, y)
                acc = 1 - utils.accuracy(output, y)
                losses.append(train_loss)
                del output
                del x
                del y

                print('  epoch %i took %f seconds' %
                      (epoch, time.time() - timer))
                print('  epoch %i, avg costs %f' % (epoch, mean_train_loss))
                print('  epoch %i, training error %f' % (epoch, acc))

                #for plotting
                self._costs.append(mean_train_loss)
                self._train_errors.append(acc)

                #valid accuracy
                xd, yd = dev_set_iterator.random_batch()

                valid_output = utils.one_hot(self.predict_(xd)(), m=20)
                valid_acc = 1 - utils.accuracy(valid_output, yd)
                self._dev_errors.append(valid_acc)
                del valid_output
                del xd
                del yd

                if valid_acc < best_dev_loss:
                    best_dev_loss = valid_acc
                    best_params = copy.deepcopy(self.all_params)
                    print('!!!  epoch %i, validation error of best model %f' %
                          (epoch, valid_acc))
                    print
                    print "Saving best performance parameters"
                    with open(self.metadata_tmp_path, 'w') as f:
                        pickle.dump(
                            {
                                'losses_train':
                                avg_costs,
                                'param_values':
                                nn.layers.get_all_param_values(
                                    self.output_layer)
                            }, f, pickle.HIGHEST_PROTOCOL)
                    if (valid_acc < best_dev_loss * improvement_threshold):
                        patience = max(patience, iteration * patience_increase)
                    if patience <= iteration:
                        done_looping = True
                        break
                epoch += 1
import numpy as np

import data
import utils


VALIDATION_SPLIT_PATH = "validation_split_v1.pkl"

if len(sys.argv) != 2:
    sys.exit("Usage: eval_predictions.py <validation_predictions_path>")

path = sys.argv[1]
predictions = np.load(path)

split = np.load(VALIDATION_SPLIT_PATH)
labels_valid = data.labels_train[split["indices_valid"]]


loss = utils.log_loss(predictions, labels_valid)
acc = utils.accuracy(predictions, labels_valid)
loss_std = utils.log_loss_std(predictions, labels_valid)

print "Validation loss:\t\t\t%.6f" % loss
print "Classification accuracy:\t\t%.2f%%" % (acc * 100)
print "Validation loss std:\t%.6f" % loss_std
print
for k in xrange(5):
    acc_k = utils.accuracy_topn(predictions, labels_valid, n=k + 1)
    print "Top-%d accuracy:\t\t%.2f%%" % (k + 1, acc_k * 100)
    if not os.path.isfile(path):
        missing_predictions.append(path)

if missing_predictions:
    print '\tPlease generate the following predictions:\n\t%s' % '\n\t'.join(missing_predictions)
    sys.exit(0)

# loading validation predictions
s = np.load("validation_split_v1.pkl")
t_valid = data.labels_train[s['indices_valid']]

predictions_list = [np.load(path) for path in valid_predictions_paths]
predictions_stack = np.array(predictions_list).astype(theano.config.floatX)  # num_sources x num_datapoints x 121

print "Individual prediction errors"
individual_prediction_errors = [utils.log_loss(p, t_valid) for p in predictions_list]
del predictions_list
for i in xrange(n_models):
    print individual_prediction_errors[i], os.path.basename(valid_predictions_paths[i])
print

# optimizing weights
X = theano.shared(predictions_stack)  # source predictions
t = theano.shared(utils.one_hot(t_valid))  # targets
W = T.vector('W')

s = T.nnet.softmax(W).reshape((W.shape[0], 1, 1))
weighted_avg_predictions = T.sum(X * s, axis=0)  # T.tensordot(X, s, [[0], [0]])
error = nn_plankton.log_loss(weighted_avg_predictions, t)
grad = T.grad(error, W)
Exemple #13
0
    def fit(self, X_train, y_train, layers, alpha, epochs):

        """
        Main function that trans/fits the neural net by running through multiple epochs
        :param X_train: training data feautures
        :param y_train: training data labels; y_actual
        :param layers: number of hidden layers and neurons in each of them
        :param alpha: learning rate initailized value; later changed by adam optimizer
        :param epochs: number of iterations to be run
        :return: trained neural net parameters, to be used for predicting on test data
        """

        # Initialize Weights and Biases
        parameters = self.initialize(layers)

        cost_function, learning_curve = [], []

        # Binarize Labels
        classes = list(set(y_train))
        y_bin = ut.label_binarize(y_train, classes)  # Y = [1,0,0,0], [0,1,0,0]...

        for j in range(epochs):

            # Making batches of data
            # X = X_train
            # Y = y_train
            #
            # batch_slice = ut.gen_batches(X_train.shape[0], X_train.shape[0] / 1000)
            # for i in batch_slice:
            #     X_train = X[int(i.start):int(i.stop) + 1]
            #     y_train = Y[int(i.start):int(i.stop) + 1]

            # ---------------------------------------------------
            y_hat, parameters = self.forward_prop(X_train, parameters, layers)
            # y_hat is the predicted label probability by softmax

            # Utils: log_loss(y_true, y_prob)
            log_loss = ut.log_loss(y_bin, y_hat)

            # Back Propagation
            parameters = self.back_prop(X_train, y_bin, parameters, layers)

            # Prep variables for adam optimizer
            params, grads = self.prep_vars(layers, parameters)

            # Initialize constructor of adam optimizer
            learning_rate_init = alpha
            optimizer = AdamOptimizer(params, learning_rate_init)

            # updates weights with grads
            params = optimizer.update_params(grads)  # update weights

            # Unpack results from Adam Optimizer
            parameters = self.params_unpack(params, parameters, layers)

            # Append log loss, to plot curve later
            cost_function.append(log_loss)

            # ---------------------------------------------------
            # Mapping
            if j == 0:
                class_dict = dict()
                for i in range(len(y_bin)):
                    class_dict[str(y_bin[i])] = y_train[i]
            # ---------------------------------------------------

        # Making pyplots
        # print("cost_function 0, -1", cost_function[0], cost_function[-1])
        # print("learning_curve 0, -1", learning_curve[0], learning_curve[-1])
        #
        # learning_curve = pd.DataFrame(learning_curve)
        # learning_curve.to_csv("learning_curve_blackbox21.csv", header=False, index=False)
        # print("learning_curve", learning_curve)
        #
        # plt.plot(learning_curve)
        # plt.title("Learning Curve")
        # plt.xlabel("Epochs")
        # plt.ylabel("Accuracy")
        # plt.show()
        #
        # cost_function = pd.DataFrame(cost_function)
        #
        # plt.plot(cost_function)
        # plt.title("Logistic Loss")
        # plt.xlabel("Epochs")
        # plt.ylabel("Loss")
        # plt.show()

        return parameters, class_dict
                num_batches_chunk_eval = int(np.ceil(chunk_length_eval / float(config.batch_size)))

                for x_shared, x_chunk_eval in zip(xs_shared, xs_chunk_eval):
                    x_shared.set_value(x_chunk_eval)

                outputs_chunk = []
                for b in xrange(num_batches_chunk_eval):
                    out = compute_output(b)
                    outputs_chunk.append(out)

                outputs_chunk = np.vstack(outputs_chunk)
                outputs_chunk = outputs_chunk[:chunk_length_eval] # truncate to the right length
                outputs.append(outputs_chunk)

            outputs = np.vstack(outputs)
            loss = utils.log_loss(outputs, labels)
            acc = utils.accuracy(outputs, labels)
            print "    loss:\t%.6f" % loss
            print "    acc:\t%.2f%%" % (acc * 100)
            print

            losses.append(loss)
            del outputs


    now = time.time()
    time_since_start = now - start_time
    time_since_prev = now - prev_time
    prev_time = now
    est_time_left = time_since_start * (float(config.num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
    eta = datetime.now() + timedelta(seconds=est_time_left)