def _build_experiment(self):
        # Create an Nd gaussian function to optimize. This function is not
        # well-conditioned and there exists no perfect gradient step to converge in
        # only one iteration.
        N = 4
        center = 5 * np.ones((1, N)).astype(floatX)
        param = sharedX(np.zeros((1, N)))
        cost = T.sum(
            0.5 *
            T.dot(T.dot((param - center), np.diag(1. / np.arange(1, N + 1))),
                  (param - center).T))
        loss = DummyLossWithGradient(cost, param)

        optimizer = SGD(loss)
        direction_modifier = DecreasingLearningRate(lr=self.lr, dc=self.dc)
        optimizer.append_direction_modifier(direction_modifier)
        trainer = Trainer(optimizer, DummyBatchScheduler())

        # Monitor the learning rate.
        logger = tasks.Logger(
            views.MonitorVariable(
                list(direction_modifier.parameters.values())[0]))
        trainer.append_task(logger)

        return trainer, logger, direction_modifier
def test_max_epoch_stopping():
    max_epoch = 7
    trainer = Trainer(DummyOptimizer(), DummyBatchScheduler())
    trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch))
    trainer.train()

    assert_equal(trainer.status.current_epoch, max_epoch)
Exemple #3
0
    def _build_experiment(self, threshold=1):
        # Create an Nd gaussian function to optimize. This function is not
        # well-conditioned and there exists no perfect gradient step to converge in
        # only one iteration.
        N = 4
        center = 5 * np.ones((1, N)).astype(floatX)
        param = sharedX(np.zeros((1, N)))
        cost = T.sum(
            0.5 *
            T.dot(T.dot((param - center), np.diag(1. / np.arange(1, N + 1))),
                  (param - center).T))
        loss = DummyLossWithGradient(cost, param)
        gradient_clipping = DirectionClipping(threshold=threshold)
        loss.append_gradient_modifier(gradient_clipping)

        optimizer = SGD(loss)
        trainer = Trainer(optimizer, DummyBatchScheduler())

        # Monitor the learning rate.
        logger = tasks.Logger(
            views.MonitorVariable(list(optimizer.directions.values())[0]),
            views.MonitorVariable(list(loss.gradients.values())[0]),
            views.MonitorVariable(list(loss.orig_gradients.values())[0]),
            views.MonitorVariable(gradient_clipping.grad_norm))
        trainer.append_task(logger)

        return trainer, logger, gradient_clipping
def test_adagrad():
    max_epoch = 15

    # Create an Nd gaussian functions to optimize. These functions are not
    # well-conditioned and there exists no perfect gradient step to converge in
    # only one iteration.
    for N in range(1, 5):
        center = 5*np.ones((1, N)).astype(floatX)
        param = sharedX(np.zeros((1, N)))
        cost = T.sum(0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), ((param-center).T)))
        loss = DummyLossWithGradient(cost, param)

        # Even with a really high gradient step, AdaGrad can still converge.
        # Actually, it is faster than using the optimal gradient step with SGD.
        optimizer = AdaGrad(loss, lr=100, eps=1e-1)
        trainer = Trainer(optimizer, DummyBatchScheduler())
        trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch))

        # Monitor the gradient of `loss` w.r.t. to `param`.
        tracker = tasks.Tracker(loss.gradients[param])
        trainer.append_task(tracker)
        trainer.train()

        # After 15 epochs, param should be around the center and gradients near 0.
        assert_array_almost_equal(param.get_value(), center)
        assert_array_almost_equal(tracker[0], 0.)
    def _build_experiment(self):
        # Create an Nd gaussian function to optimize. This function is not
        # well-conditioned and there exists no perfect gradient step to converge in
        # only one iteration.
        N = 4
        center = 5*np.ones((1, N)).astype(floatX)
        param = sharedX(np.zeros((1, N)))
        cost = T.sum(0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), (param-center).T))
        loss = DummyLossWithGradient(cost, param)

        optimizer = SGD(loss)
        direction_modifier = ConstantLearningRate(lr=self.lr)
        optimizer.append_direction_modifier(direction_modifier)
        trainer = Trainer(optimizer, DummyBatchScheduler())

        # Monitor the learning rate.
        logger = tasks.Logger(views.MonitorVariable(list(direction_modifier.parameters.values())[0]))
        trainer.append_task(logger)

        return trainer, logger, direction_modifier
Exemple #6
0
def test_max_epoch_stopping():
    max_epoch = 7
    trainer = Trainer(DummyOptimizer(), DummyBatchScheduler())
    trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch))
    trainer.train()

    assert_equal(trainer.status.current_epoch, max_epoch)
    def _build_trainer(nb_epochs, optimizer_cls):
        print(
            "Will build a trainer is going to train a Perceptron for {0} epochs."
            .format(nb_epochs))

        print("Building model")
        model = Perceptron(trainset.input_size, nb_classes)
        model.initialize(initer.UniformInitializer(random_seed=1234))

        print("Building optimizer")
        loss = NLL(model, trainset)
        optimizer = optimizer_cls(loss=loss)
        print("Optimizer: {}".format(type(optimizer).__name__))
        #optimizer = SGD(loss=loss)
        #optimizer.append_direction_modifier(ConstantLearningRate(0.1))

        # Use mini batches of 100 examples.
        batch_scheduler = MiniBatchScheduler(trainset, 100)

        print("Building trainer")
        trainer = Trainer(optimizer, batch_scheduler)

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)

        # Print NLL mean/stderror.
        nll = views.LossView(loss=NLL(model, validset),
                             batch_scheduler=FullBatchScheduler(validset))
        logger = tasks.Logger(loss_monitor, avg_loss, nll.mean)
        trainer.append_task(logger, avg_loss)

        # Train for `nb_epochs` epochs (stopping criteria should be added at the end).
        trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs))

        return trainer, nll, logger
    def _build_experiment(self, threshold=1):
        # Create an Nd gaussian function to optimize. This function is not
        # well-conditioned and there exists no perfect gradient step to converge in
        # only one iteration.
        N = 4
        center = 5*np.ones((1, N)).astype(floatX)
        param = sharedX(np.zeros((1, N)))
        cost = T.sum(0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), (param-center).T))
        loss = DummyLossWithGradient(cost, param)
        gradient_clipping = DirectionClipping(threshold=threshold)
        loss.append_gradient_modifier(gradient_clipping)

        optimizer = SGD(loss)
        trainer = Trainer(optimizer, DummyBatchScheduler())

        # Monitor the learning rate.
        logger = tasks.Logger(views.MonitorVariable(list(optimizer.directions.values())[0]),
                              views.MonitorVariable(list(loss.gradients.values())[0]),
                              views.MonitorVariable(list(loss.orig_gradients.values())[0]),
                              views.MonitorVariable(gradient_clipping.grad_norm))
        trainer.append_task(logger)

        return trainer, logger, gradient_clipping
    def _build_trainer(nb_epochs, optimizer_cls):
        print("Will build a trainer is going to train a Perceptron for {0} epochs.".format(nb_epochs))

        print("Building model")
        model = Perceptron(trainset.input_size, nb_classes)
        model.initialize(initer.UniformInitializer(random_seed=1234))

        print("Building optimizer")
        loss = NLL(model, trainset)
        optimizer = optimizer_cls(loss=loss)
        print("Optimizer: {}".format(type(optimizer).__name__))
        #optimizer = SGD(loss=loss)
        #optimizer.append_direction_modifier(ConstantLearningRate(0.1))

        # Use mini batches of 100 examples.
        batch_scheduler = MiniBatchScheduler(trainset, 100)

        print("Building trainer")
        trainer = Trainer(optimizer, batch_scheduler)

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)

        # Print NLL mean/stderror.
        nll = views.LossView(loss=NLL(model, validset), batch_scheduler=FullBatchScheduler(validset))
        logger = tasks.Logger(loss_monitor, avg_loss, nll.mean)
        trainer.append_task(logger, avg_loss)

        # Train for `nb_epochs` epochs (stopping criteria should be added at the end).
        trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs))

        return trainer, nll, logger
def test_simple_perceptron():
    #Loading dataset
    trainset, validset, testset = load_mnist()

    #Creating model
    nb_classes = 10
    model = Perceptron(trainset.input_size, nb_classes)
    model.initialize()  # By default, uniform initialization.

    #Building optimizer
    loss = NLL(model, trainset)
    optimizer = SGD(loss=loss)
    optimizer.append_direction_modifier(ConstantLearningRate(0.1))

    # Train for 10 epochs
    batch_scheduler = MiniBatchScheduler(trainset, 100)

    trainer = Trainer(optimizer, batch_scheduler)
    trainer.append_task(stopping_criteria.MaxEpochStopping(10))

    # Print time for one epoch
    trainer.append_task(tasks.PrintEpochDuration())
    trainer.append_task(tasks.PrintTrainingDuration())

    # Log training error
    loss_monitor = views.MonitorVariable(loss.loss)
    avg_loss = tasks.AveragePerEpoch(loss_monitor)
    accum = tasks.Accumulator(loss_monitor)
    logger = tasks.Logger(loss_monitor, avg_loss)
    trainer.append_task(logger, avg_loss, accum)

    # Print NLL mean/stderror.
    nll = views.LossView(loss=NLL(model, validset), batch_scheduler=FullBatchScheduler(validset))
    trainer.append_task(tasks.Print("Validset - NLL          : {0:.1%} ± {1:.1%}",
                                    nll.mean, nll.stderror))

    # Print mean/stderror of classification errors.
    classif_error = views.LossView(loss=ClassificationError(model, validset),
                                   batch_scheduler=FullBatchScheduler(validset))
    trainer.append_task(tasks.Print("Validset - Classif error: {0:.1%} ± {1:.1%}",
                                    classif_error.mean, classif_error.stderror))

    trainer.train()
Exemple #11
0
def test_early_stopping():
    MAX_EPOCH = 100  # Add a max epoch just in case we got an infinite loop.

    class DummyCost(View):
        def __init__(self, initial_cost, costs):
            super().__init__()
            self.initial_cost = initial_cost
            self.costs = costs
            self.cpt = 0

        def update(self, status):
            if status.current_update == 0:
                return self.initial_cost

            cost = self.costs[self.cpt]
            self.cpt += 1
            return cost

    # 20 identical costs but should stop after 9 unchanged epochs.
    constant_cost = DummyCost(1, np.ones(20))
    lookahead = 9

    def callback(task, status):
        # This callback function should not be called.
        raise NameError("This callback function should not be called.")

    early_stopping = stopping_criteria.EarlyStopping(constant_cost,
                                                     lookahead,
                                                     callback=callback)

    trainer = Trainer(DummyOptimizer(), DummyBatchScheduler())
    trainer.append_task(early_stopping)
    trainer.append_task(
        stopping_criteria.MaxEpochStopping(MAX_EPOCH))  # To be safe
    trainer.train()

    assert_equal(trainer.status.current_epoch, lookahead)
    assert_equal(early_stopping.best_epoch, 0)
    assert_equal(early_stopping.best_cost, 1.)
    assert_equal(constant_cost.cpt, lookahead)

    # `lookahead` identical costs followed by `lookahead` lower identical costs.
    lookahead = 9
    costs = np.r_[np.ones(lookahead - 1), np.zeros(lookahead + 1)]
    simple_cost = DummyCost(1, costs)

    def callback(task, status):
        # This callback function should be called once after `lookahead` epoch.
        if status.current_epoch != lookahead:
            msg = "Callback should be fired up at epoch #{} not #{}.".format(
                lookahead, status.current_epoch)
            raise NameError(msg)

    early_stopping = stopping_criteria.EarlyStopping(simple_cost,
                                                     lookahead,
                                                     callback=callback)

    trainer = Trainer(DummyOptimizer(), DummyBatchScheduler())
    trainer.append_task(early_stopping)
    trainer.append_task(
        stopping_criteria.MaxEpochStopping(MAX_EPOCH))  # To be safe
    trainer.train()

    assert_equal(trainer.status.current_epoch, 2 * lookahead)
    assert_equal(early_stopping.best_epoch, lookahead)
    assert_equal(early_stopping.best_cost, 0.)

    # 20 increasing costs but should stop after 9 increasing epochs.
    lookahead = 9
    costs = range(20)
    increasing_cost = DummyCost(0, costs)

    def callback(task, status):
        # This callback function should not be called.
        raise NameError("This callback function should not be called.")

    early_stopping = stopping_criteria.EarlyStopping(increasing_cost,
                                                     lookahead,
                                                     callback=callback)

    trainer = Trainer(DummyOptimizer(), DummyBatchScheduler())
    trainer.append_task(early_stopping)
    trainer.append_task(
        stopping_criteria.MaxEpochStopping(MAX_EPOCH))  # To be safe
    trainer.train()

    assert_equal(trainer.status.current_epoch, lookahead)
    assert_equal(early_stopping.best_epoch, 0)
    assert_equal(early_stopping.best_cost, 0.)

    # Test `min_nb_epochs`
    lookahead = 9
    min_nb_epochs = 5
    costs = range(20)
    increasing_cost = DummyCost(0, costs)
    early_stopping = stopping_criteria.EarlyStopping(
        increasing_cost, lookahead, min_nb_epochs=min_nb_epochs)

    trainer = Trainer(DummyOptimizer(), DummyBatchScheduler())
    trainer.append_task(early_stopping)
    trainer.append_task(
        stopping_criteria.MaxEpochStopping(MAX_EPOCH))  # To be safe
    trainer.train()

    assert_equal(trainer.status.current_epoch, lookahead + min_nb_epochs)

    # Test that at the end the model is the best one.
    # `lookahead` decreasing costs followed by `lookahead+1` constant identical costs.
    lookahead = 9
    costs = np.r_[-np.arange(lookahead), np.zeros(lookahead + 1)]
    simple_cost = DummyCost(1, costs)

    trainer = Trainer(DummyOptimizer(), DummyBatchScheduler())
    model = trainer._optimizer.loss.model
    # Add some parameters to the model.
    model.parameters.extend([sharedX(np.zeros(4)), sharedX(np.zeros((3, 5)))])

    # Callback that will change model parameters after each epoch.
    def callback(task, status):
        for param in model.parameters:
            param.set_value(param.get_value() + 1)

    trainer.append_task(tasks.Callback(callback))

    early_stopping = stopping_criteria.EarlyStopping(simple_cost, lookahead)
    trainer.append_task(early_stopping)
    trainer.append_task(
        stopping_criteria.MaxEpochStopping(MAX_EPOCH))  # To be safe
    trainer.train()

    for param in model.parameters:
        assert_array_equal(param.get_value(),
                           lookahead * np.ones_like(param.get_value()))
Exemple #12
0
def test_simple_perceptron():
    with Timer("Loading dataset"):
        trainset, validset, testset = load_mnist()

    with Timer("Creating model"):
        # TODO: We should the number of different targets in the dataset,
        #       but I'm not sure how to do it right (keep in mind the regression?).
        output_size = 10
        model = Perceptron(trainset.input_size, output_size)
        model.initialize()  # By default, uniform initialization.

    with Timer("Building optimizer"):
        optimizer = SGD(loss=NLL(model, trainset))
        optimizer.append_update_rule(ConstantLearningRate(0.0001))

    with Timer("Building trainer"):
        # Train for 10 epochs
        batch_scheduler = MiniBatchScheduler(trainset, 100)

        trainer = Trainer(optimizer, batch_scheduler)
        trainer.append_task(stopping_criteria.MaxEpochStopping(10))

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Print mean/stderror of classification errors.
        classif_error = views.ClassificationError(model.use, validset)
        trainer.append_task(
            tasks.Print("Validset - Classif error: {0:.1%} ± {1:.1%}",
                        classif_error.mean, classif_error.stderror))

    with Timer("Training"):
        trainer.train()
Exemple #13
0
def main():
    parser = buildArgsParser()
    args = parser.parse_args()

    # Extract experiments hyperparameters
    hyperparams = dict(vars(args))

    # Remove hyperparams that should not be part of the hash
    del hyperparams['max_epoch']
    del hyperparams['keep']
    del hyperparams['force']
    del hyperparams['name']

    # Get/generate experiment name
    experiment_name = args.name
    if experiment_name is None:
        experiment_name = utils.generate_uid_from_string(repr(hyperparams))

    # Create experiment folder
    experiment_path = pjoin(".", "experiments", experiment_name)
    resuming = False
    if os.path.isdir(experiment_path) and not args.force:
        resuming = True
        print("### Resuming experiment ({0}). ###\n".format(experiment_name))
        # Check if provided hyperparams match those in the experiment folder
        hyperparams_loaded = utils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json"))
        if hyperparams != hyperparams_loaded:
            print("{\n" + "\n".join(["{}: {}".format(k, hyperparams[k]) for k in sorted(hyperparams.keys())]) + "\n}")
            print("{\n" + "\n".join(["{}: {}".format(k, hyperparams_loaded[k]) for k in sorted(hyperparams_loaded.keys())]) + "\n}")
            print("The arguments provided are different than the one saved. Use --force if you are certain.\nQuitting.")
            sys.exit(1)
    else:
        if os.path.isdir(experiment_path):
            shutil.rmtree(experiment_path)

        os.makedirs(experiment_path)
        utils.save_dict_to_json_file(pjoin(experiment_path, "hyperparams.json"), hyperparams)

    with Timer("Loading dataset"):
        trainset, validset, testset = datasets.load(args.dataset)

        image_shape = (28, 28)
        nb_channels = 1 + (args.use_mask_as_input is True)

        batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(trainset, args.batch_size,
                                                                   use_mask_as_input=args.use_mask_as_input,
                                                                   seed=args.ordering_seed)
        print("{} updates per epoch.".format(len(batch_scheduler)))

    with Timer("Building model"):
        if args.use_lasagne:
            if args.with_residual:
                model = DeepConvNadeWithResidualUsingLasagne(image_shape=image_shape,
                                                             nb_channels=nb_channels,
                                                             convnet_blueprint=args.convnet_blueprint,
                                                             fullnet_blueprint=args.fullnet_blueprint,
                                                             hidden_activation=args.hidden_activation,
                                                             use_mask_as_input=args.use_mask_as_input)
            else:
                model = DeepConvNadeUsingLasagne(image_shape=image_shape,
                                                 nb_channels=nb_channels,
                                                 convnet_blueprint=args.convnet_blueprint,
                                                 fullnet_blueprint=args.fullnet_blueprint,
                                                 hidden_activation=args.hidden_activation,
                                                 use_mask_as_input=args.use_mask_as_input,
                                                 use_batch_norm=args.batch_norm)

        elif args.with_residual:
            model = DeepConvNADEWithResidual(image_shape=image_shape,
                                             nb_channels=nb_channels,
                                             convnet_blueprint=args.convnet_blueprint,
                                             fullnet_blueprint=args.fullnet_blueprint,
                                             hidden_activation=args.hidden_activation,
                                             use_mask_as_input=args.use_mask_as_input)

        else:
            builder = DeepConvNADEBuilder(image_shape=image_shape,
                                          nb_channels=nb_channels,
                                          hidden_activation=args.hidden_activation,
                                          use_mask_as_input=args.use_mask_as_input)

            if args.blueprints_seed is not None:
                convnet_blueprint, fullnet_blueprint = generate_blueprints(args.blueprint_seed, image_shape[0])
                builder.build_convnet_from_blueprint(convnet_blueprint)
                builder.build_fullnet_from_blueprint(fullnet_blueprint)
            else:
                if args.convnet_blueprint is not None:
                    builder.build_convnet_from_blueprint(args.convnet_blueprint)

                if args.fullnet_blueprint is not None:
                    builder.build_fullnet_from_blueprint(args.fullnet_blueprint)

            model = builder.build()
            # print(str(model.convnet))
            # print(str(model.fullnet))

        model.initialize(weigths_initializer_factory(args.weights_initialization,
                                                     seed=args.initialization_seed))
        print(str(model))

    with Timer("Building optimizer"):
        loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(model, trainset)
        optimizer = optimizer_factory(hyperparams, loss)

    with Timer("Building trainer"):
        trainer = Trainer(optimizer, batch_scheduler)

        if args.max_epoch is not None:
            trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch))

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        accum = tasks.Accumulator(loss_monitor)
        logger = tasks.Logger(loss_monitor, avg_loss)
        trainer.append_task(logger, avg_loss, accum)

        # Print average training loss.
        trainer.append_task(tasks.Print("Avg. training loss:     : {}", avg_loss))

        # Print NLL mean/stderror.
        model.deterministic = True  # For batch normalization, see https://github.com/Lasagne/Lasagne/blob/master/lasagne/layers/normalization.py#L198
        nll = views.LossView(loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask(model, validset),
                             batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask(validset, batch_size=0.1*len(validset),
                                                                                      use_mask_as_input=args.use_mask_as_input,
                                                                                      keep_mask=True,
                                                                                      seed=args.ordering_seed+1))
        # trainer.append_task(tasks.Print("Validset - NLL          : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror, each_k_update=100))
        trainer.append_task(tasks.Print("Validset - NLL          : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror))

        # direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values()))))
        # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm, each_k_update=50))

        # Save training progression
        def save_model(*args):
            trainer.save(experiment_path)

        trainer.append_task(stopping_criteria.EarlyStopping(nll.mean, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_model))

        trainer.build_theano_graph()

    if resuming:
        with Timer("Loading"):
            trainer.load(experiment_path)

    with Timer("Training"):
        trainer.train()

    trainer.save(experiment_path)
    model.save(experiment_path)
def test_simple_convnade():
    nb_kernels = 8
    kernel_shape = (2, 2)
    hidden_activation = "sigmoid"
    use_mask_as_input = True
    batch_size = 1024
    ordering_seed = 1234
    max_epoch = 3
    nb_orderings = 1

    print("Will train Convoluational Deep NADE for a total of {0} epochs.".
          format(max_epoch))

    with Timer("Loading/processing binarized MNIST"):
        trainset, validset, testset = load_binarized_mnist()

        # Extract the center patch (4x4 pixels) of each image.
        indices_to_keep = [
            348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432,
            433, 434, 435
        ]

        trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep],
                           trainset.inputs.get_value()[:, indices_to_keep],
                           name="trainset")
        validset = Dataset(validset.inputs.get_value()[:, indices_to_keep],
                           validset.inputs.get_value()[:, indices_to_keep],
                           name="validset")
        testset = Dataset(testset.inputs.get_value()[:, indices_to_keep],
                          testset.inputs.get_value()[:, indices_to_keep],
                          name="testset")

        image_shape = (4, 4)
        nb_channels = 1

    with Timer("Building model"):
        builder = DeepConvNADEBuilder(image_shape=image_shape,
                                      nb_channels=nb_channels,
                                      use_mask_as_input=use_mask_as_input)

        convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)"
        fullnet_blueprint = "5 -> 16"
        print("Convnet:", convnet_blueprint)
        print("Fullnet:", fullnet_blueprint)
        builder.build_convnet_from_blueprint(convnet_blueprint)
        builder.build_fullnet_from_blueprint(fullnet_blueprint)

        model = builder.build()
        model.initialize()  # By default, uniform initialization.

    with Timer("Building optimizer"):
        loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(
            model, trainset)

        optimizer = SGD(loss=loss)
        optimizer.append_direction_modifier(ConstantLearningRate(0.001))

    with Timer("Building trainer"):
        batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(
            trainset, batch_size)

        trainer = Trainer(optimizer, batch_scheduler)

        trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch))

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        accum = tasks.Accumulator(loss_monitor)
        logger = tasks.Logger(loss_monitor, avg_loss)
        trainer.append_task(logger, avg_loss, accum)

        # Print average training loss.
        trainer.append_task(
            tasks.Print("Avg. training loss:     : {}", avg_loss))

        # Print NLL mean/stderror.
        nll = views.LossView(
            loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask(
                model, validset),
            batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask(
                validset, batch_size=len(validset)))
        trainer.append_task(
            tasks.Print("Validset - NLL          : {0:.2f} ± {1:.2f}",
                        nll.mean, nll.stderror))

        trainer.build_theano_graph()

    with Timer("Training"):
        trainer.train()

    with Timer("Checking the probs for all possible inputs sum to 1"):
        rng = np.random.RandomState(ordering_seed)
        D = np.prod(image_shape)
        inputs = cartesian([[0, 1]] * int(D), dtype=np.float32)
        ordering = np.arange(D, dtype=np.int32)
        rng.shuffle(ordering)

        symb_input = T.vector("input")
        symb_input.tag.test_value = inputs[-len(inputs) // 4]
        symb_ordering = T.ivector("ordering")
        symb_ordering.tag.test_value = ordering
        nll_of_x_given_o = theano.function([symb_input, symb_ordering],
                                           model.nll_of_x_given_o(
                                               symb_input, symb_ordering),
                                           name="nll_of_x_given_o")
        #theano.printing.pydotprint(nll_of_x_given_o, '{0}_nll_of_x_given_o_{1}'.format(model.__class__.__name__, theano.config.device), with_ids=True)

        for i in range(nb_orderings):
            print("Ordering:", ordering)
            ordering = np.arange(D, dtype=np.int32)
            rng.shuffle(ordering)

            nlls = []
            for no, input in enumerate(inputs):
                print("{}/{}".format(no, len(inputs)), end='\r')
                nlls.append(nll_of_x_given_o(input, ordering))

            print("{}/{} Done".format(len(inputs), len(inputs)))

            p_x = np.exp(np.logaddexp.reduce(-np.array(nlls)))
            print("Sum of p(x) for all x:", p_x)
            assert_almost_equal(p_x, 1., decimal=5)
    def _build_trainer(nb_epochs):
        print("Will train Convoluational Deep NADE for a total of {0} epochs.".
              format(nb_epochs))

        with Timer("Building model"):
            builder = DeepConvNADEBuilder(image_shape=image_shape,
                                          nb_channels=nb_channels,
                                          use_mask_as_input=use_mask_as_input)

            convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)"
            fullnet_blueprint = "5 -> 16"
            print("Convnet:", convnet_blueprint)
            print("Fullnet:", fullnet_blueprint)
            builder.build_convnet_from_blueprint(convnet_blueprint)
            builder.build_fullnet_from_blueprint(fullnet_blueprint)

            model = builder.build()
            model.initialize(initer.UniformInitializer(random_seed=1234))

        with Timer("Building optimizer"):
            loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(
                model, trainset)

            optimizer = SGD(loss=loss)
            optimizer.append_direction_modifier(ConstantLearningRate(0.001))

        with Timer("Building trainer"):
            batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(
                trainset, batch_size)

            trainer = Trainer(optimizer, batch_scheduler)

            # Print time for one epoch
            trainer.append_task(tasks.PrintEpochDuration())
            trainer.append_task(tasks.PrintTrainingDuration())

            # Log training error
            loss_monitor = views.MonitorVariable(loss.loss)
            avg_loss = tasks.AveragePerEpoch(loss_monitor)
            accum = tasks.Accumulator(loss_monitor)
            logger = tasks.Logger(loss_monitor, avg_loss)
            trainer.append_task(logger, avg_loss, accum)

            # Print average training loss.
            trainer.append_task(
                tasks.Print("Avg. training loss:     : {}", avg_loss))

            # Print NLL mean/stderror.
            nll = views.LossView(
                loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask(
                    model, validset),
                batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask(
                    validset, batch_size=len(validset), keep_mask=True))
            trainer.append_task(
                tasks.Print("Validset - NLL          : {0:.2f} ± {1:.2f}",
                            nll.mean, nll.stderror))

            trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs))

            return trainer, nll, logger
Exemple #16
0
def main():
    parser = build_argparser()
    args = parser.parse_args()
    print(args)
    print("Using Theano v.{}".format(theano.version.short_version))

    hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view', 'shuffle_streamlines']
    # Use this for hyperparams added in a new version, but nonexistent from older versions
    retrocompatibility_defaults = {'feed_previous_direction': False,
                                   'normalize': False}
    experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder(args, exclude=hyperparams_to_exclude,
                                                                                  retrocompatibility_defaults=retrocompatibility_defaults)

    # Log the command currently running.
    with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f:
        f.write(" ".join(sys.argv) + "\n")

    print("Resuming:" if resuming else "Creating:", experiment_path)

    with Timer("Loading dataset", newline=True):
        trainset_volume_manager = VolumeManager()
        validset_volume_manager = VolumeManager()
        trainset = datasets.load_tractography_dataset(args.train_subjects, trainset_volume_manager, name="trainset", use_sh_coeffs=args.use_sh_coeffs)
        validset = datasets.load_tractography_dataset(args.valid_subjects, validset_volume_manager, name="validset", use_sh_coeffs=args.use_sh_coeffs)
        print("Dataset sizes:", len(trainset), " |", len(validset))

        if args.view:
            tsne_view(trainset, trainset_volume_manager)
            sys.exit(0)

        batch_scheduler = batch_scheduler_factory(hyperparams, dataset=trainset, train_mode=True)
        print("An epoch will be composed of {} updates.".format(batch_scheduler.nb_updates_per_epoch))
        print(trainset_volume_manager.data_dimension, args.hidden_sizes, batch_scheduler.target_size)

    with Timer("Creating model"):
        input_size = trainset_volume_manager.data_dimension
        if hyperparams['feed_previous_direction']:
            input_size += 3

        model = model_factory(hyperparams,
                              input_size=input_size,
                              output_size=batch_scheduler.target_size,
                              volume_manager=trainset_volume_manager)
        model.initialize(weigths_initializer_factory(args.weights_initialization,
                                                     seed=args.initialization_seed))

    with Timer("Building optimizer"):
        loss = loss_factory(hyperparams, model, trainset)

        if args.clip_gradient is not None:
            loss.append_gradient_modifier(DirectionClipping(threshold=args.clip_gradient))

        optimizer = optimizer_factory(hyperparams, loss)

    with Timer("Building trainer"):
        trainer = Trainer(optimizer, batch_scheduler)

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        trainer.append_task(avg_loss)

        # Print average training loss.
        trainer.append_task(tasks.Print("Avg. training loss:         : {}", avg_loss))

        # if args.learn_to_stop:
        #     l2err_monitor = views.MonitorVariable(T.mean(loss.mean_sqr_error))
        #     avg_l2err = tasks.AveragePerEpoch(l2err_monitor)
        #     trainer.append_task(avg_l2err)
        #
        #     crossentropy_monitor = views.MonitorVariable(T.mean(loss.cross_entropy))
        #     avg_crossentropy = tasks.AveragePerEpoch(crossentropy_monitor)
        #     trainer.append_task(avg_crossentropy)
        #
        #     trainer.append_task(tasks.Print("Avg. training L2 err:       : {}", avg_l2err))
        #     trainer.append_task(tasks.Print("Avg. training stopping:     : {}", avg_crossentropy))
        #     trainer.append_task(tasks.Print("L2 err : {0:.4f}", l2err_monitor, each_k_update=100))
        #     trainer.append_task(tasks.Print("stopping : {0:.4f}", crossentropy_monitor, each_k_update=100))

        # Print NLL mean/stderror.
        # train_loss = L2DistanceForSequences(model, trainset)
        # train_batch_scheduler = StreamlinesBatchScheduler(trainset, batch_size=1000,
        #                                                   noisy_streamlines_sigma=None,
        #                                                   nb_updates_per_epoch=None,
        #                                                   seed=1234)

        # train_error = views.LossView(loss=train_loss, batch_scheduler=train_batch_scheduler)
        # trainer.append_task(tasks.Print("Trainset - Error        : {0:.2f} | {1:.2f}", train_error.sum, train_error.mean))

        # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers
        model.volume_manager = validset_volume_manager
        valid_loss = loss_factory(hyperparams, model, validset)
        valid_batch_scheduler = batch_scheduler_factory(hyperparams,
                                                        dataset=validset,
                                                        train_mode=False)

        valid_error = views.LossView(loss=valid_loss, batch_scheduler=valid_batch_scheduler)
        trainer.append_task(tasks.Print("Validset - Error        : {0:.2f} | {1:.2f}", valid_error.sum, valid_error.mean))

        # HACK: Restore trainset volume manager
        model.volume_manager = trainset_volume_manager

        lookahead_loss = valid_error.sum

        direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values()))))
        # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm))

        # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm)
        logger = tasks.Logger(valid_error.mean, valid_error.sum, direction_norm)
        trainer.append_task(logger)

        if args.view:
            import pylab as plt

            def _plot(*args, **kwargs):
                plt.figure(1)
                plt.clf()
                plt.show(False)
                plt.subplot(121)
                plt.plot(np.array(logger.get_variable_history(0)).flatten(), label="Train")
                plt.plot(np.array(logger.get_variable_history(1)).flatten(), label="Valid")
                plt.legend()

                plt.subplot(122)
                plt.plot(np.array(logger.get_variable_history(3)).flatten(), label="||d'||")
                plt.draw()

            trainer.append_task(tasks.Callback(_plot))

        # Callback function to stop training if NaN is detected.
        def detect_nan(obj, status):
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Stopping training now.")
                sys.exit()

        trainer.append_task(tasks.Callback(detect_nan, each_k_update=1))

        # Callback function to save training progression.
        def save_training(obj, status):
            trainer.save(experiment_path)

        trainer.append_task(tasks.Callback(save_training))

        # Early stopping with a callback for saving every time model improves.
        def save_improvement(obj, status):
            """ Save best model and training progression. """
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Not saving the model. Crashing now.")
                sys.exit()

            print("*** Best epoch: {0} ***\n".format(obj.best_epoch))
            model.save(experiment_path)

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())
        trainer.append_task(tasks.PrintTime(each_k_update=100))  # Profiling

        # Add stopping criteria
        trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch))
        early_stopping = stopping_criteria.EarlyStopping(lookahead_loss, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_improvement)
        trainer.append_task(early_stopping)

    with Timer("Compiling Theano graph"):
        trainer.build_theano_graph()

    if resuming:
        if not os.path.isdir(pjoin(experiment_path, 'training')):
            print("No 'training/' folder. Assuming it failed before"
                  " the end of the first epoch. Starting a new training.")
        else:
            with Timer("Loading"):
                trainer.load(experiment_path)

    with Timer("Training"):
        trainer.train()
def test_early_stopping():
    MAX_EPOCH = 100  # Add a max epoch just in case we got an infinite loop.

    class DummyCost(View):
        def __init__(self, initial_cost, costs):
            super().__init__()
            self.initial_cost = initial_cost
            self.costs = costs
            self.cpt = 0

        def update(self, status):
            if status.current_update == 0:
                return self.initial_cost

            cost = self.costs[self.cpt]
            self.cpt += 1
            return cost

    # 20 identical costs but should stop after 9 unchanged epochs.
    constant_cost = DummyCost(1, np.ones(20))
    lookahead = 9

    def callback(task, status):
        # This callback function should not be called.
        raise NameError("This callback function should not be called.")

    early_stopping = stopping_criteria.EarlyStopping(constant_cost, lookahead, callback=callback)

    trainer = Trainer(DummyOptimizer(), DummyBatchScheduler())
    trainer.append_task(early_stopping)
    trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH))  # To be safe
    trainer.train()

    assert_equal(trainer.status.current_epoch, lookahead)
    assert_equal(early_stopping.best_epoch, 0)
    assert_equal(early_stopping.best_cost, 1.)
    assert_equal(constant_cost.cpt, lookahead)

    # `lookahead` identical costs followed by `lookahead` lower identical costs.
    lookahead = 9
    costs = np.r_[np.ones(lookahead-1), np.zeros(lookahead+1)]
    simple_cost = DummyCost(1, costs)

    def callback(task, status):
        # This callback function should be called once after `lookahead` epoch.
        if status.current_epoch != lookahead:
            msg = "Callback should be fired up at epoch #{} not #{}.".format(lookahead, status.current_epoch)
            raise NameError(msg)

    early_stopping = stopping_criteria.EarlyStopping(simple_cost, lookahead, callback=callback)

    trainer = Trainer(DummyOptimizer(), DummyBatchScheduler())
    trainer.append_task(early_stopping)
    trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH))  # To be safe
    trainer.train()

    assert_equal(trainer.status.current_epoch, 2*lookahead)
    assert_equal(early_stopping.best_epoch, lookahead)
    assert_equal(early_stopping.best_cost, 0.)

    # 20 increasing costs but should stop after 9 increasing epochs.
    lookahead = 9
    costs = range(20)
    increasing_cost = DummyCost(0, costs)

    def callback(task, status):
        # This callback function should not be called.
        raise NameError("This callback function should not be called.")

    early_stopping = stopping_criteria.EarlyStopping(increasing_cost, lookahead, callback=callback)

    trainer = Trainer(DummyOptimizer(), DummyBatchScheduler())
    trainer.append_task(early_stopping)
    trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH))  # To be safe
    trainer.train()

    assert_equal(trainer.status.current_epoch, lookahead)
    assert_equal(early_stopping.best_epoch, 0)
    assert_equal(early_stopping.best_cost, 0.)

    # Test `min_nb_epochs`
    lookahead = 9
    min_nb_epochs = 15
    costs = range(20)
    increasing_cost = DummyCost(0, costs)
    early_stopping = stopping_criteria.EarlyStopping(increasing_cost, lookahead, min_nb_epochs=min_nb_epochs)

    trainer = Trainer(DummyOptimizer(), DummyBatchScheduler())
    trainer.append_task(early_stopping)
    trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH))  # To be safe
    trainer.train()

    assert_equal(trainer.status.current_epoch, min_nb_epochs)

    # Test that at the end the model is the best one.
    # `lookahead` decreasing costs followed by `lookahead+1` constant identical costs.
    lookahead = 9
    costs = np.r_[-np.arange(lookahead), np.zeros(lookahead+1)]
    simple_cost = DummyCost(1, costs)

    trainer = Trainer(DummyOptimizer(), DummyBatchScheduler())
    model = trainer._optimizer.loss.model
    # Add some parameters to the model.
    model.parameters.extend([sharedX(np.zeros(4)), sharedX(np.zeros((3, 5)))])

    # Callback that will change model parameters after each epoch.
    def callback(task, status):
        for param in model.parameters:
            param.set_value(param.get_value() + 1)

    trainer.append_task(tasks.Callback(callback))

    early_stopping = stopping_criteria.EarlyStopping(simple_cost, lookahead)
    trainer.append_task(early_stopping)
    trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH))  # To be safe
    trainer.train()

    for param in model.parameters:
        assert_array_equal(param.get_value(), lookahead*np.ones_like(param.get_value()))
def test_new_fprop_matches_old_fprop():
    nb_kernels = 8
    kernel_shape = (2, 2)
    hidden_activation = "sigmoid"
    use_mask_as_input = True
    batch_size = 1024
    ordering_seed = 1234
    max_epoch = 10
    nb_orderings = 1

    print("Will train Convoluational Deep NADE for a total of {0} epochs.".
          format(max_epoch))

    with Timer("Loading/processing binarized MNIST"):
        trainset, validset, testset = load_binarized_mnist()

        # Extract the center patch (4x4 pixels) of each image.
        indices_to_keep = [
            348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432,
            433, 434, 435
        ]

        trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep],
                           trainset.inputs.get_value()[:, indices_to_keep],
                           name="trainset")
        validset = Dataset(validset.inputs.get_value()[:, indices_to_keep],
                           validset.inputs.get_value()[:, indices_to_keep],
                           name="validset")
        testset = Dataset(testset.inputs.get_value()[:, indices_to_keep],
                          testset.inputs.get_value()[:, indices_to_keep],
                          name="testset")

        image_shape = (4, 4)
        nb_channels = 1 + (use_mask_as_input is True)

    with Timer("Building model"):
        builder = DeepConvNADEBuilder(image_shape=image_shape,
                                      nb_channels=nb_channels,
                                      use_mask_as_input=use_mask_as_input)

        convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)"
        fullnet_blueprint = "5 -> 16"
        print("Convnet:", convnet_blueprint)
        print("Fullnet:", fullnet_blueprint)
        builder.build_convnet_from_blueprint(convnet_blueprint)
        builder.build_fullnet_from_blueprint(fullnet_blueprint)

        model = builder.build()
        model.initialize()  # By default, uniform initialization.

    with Timer("Building optimizer"):
        loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(
            model, trainset)

        optimizer = SGD(loss=loss)
        optimizer.append_direction_modifier(ConstantLearningRate(0.001))

    with Timer("Building trainer"):
        batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(
            trainset, batch_size, use_mask_as_input=use_mask_as_input)

        trainer = Trainer(optimizer, batch_scheduler)

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        accum = tasks.Accumulator(loss_monitor)
        logger = tasks.Logger(loss_monitor, avg_loss)
        trainer.append_task(logger, avg_loss, accum)

        # Print average training loss.
        trainer.append_task(
            tasks.Print("Avg. training loss:     : {}", avg_loss))

        trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch))

        trainer.build_theano_graph()

    with Timer("Training"):
        trainer.train()

    mask_o_lt_d = batch_scheduler._shared_batch_mask
    fprop_output, fprop_pre_output = model.fprop(
        trainset.inputs, mask_o_lt_d, return_output_preactivation=True)
    model_output = model.get_output(
        T.concatenate([trainset.inputs * mask_o_lt_d, mask_o_lt_d], axis=1))
    assert_array_equal(model_output.eval(), fprop_pre_output.eval())
    print(np.sum(abs(model_output.eval() - fprop_pre_output.eval())))
Exemple #19
0
def main():
    parser = build_argparser()
    args = parser.parse_args()
    print(args)
    print("Using Theano v.{}".format(theano.version.short_version))

    hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view']
    # Use this for hyperparams added in a new version, but nonexistent from older versions
    retrocompatibility_defaults = {'use_layer_normalization': False}
    experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder(
        args,
        exclude=hyperparams_to_exclude,
        retrocompatibility_defaults=retrocompatibility_defaults)

    # Log the command currently running.
    with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f:
        f.write(" ".join(sys.argv) + "\n")

    print("Resuming:" if resuming else "Creating:", experiment_path)

    with Timer("Loading dataset", newline=True):
        trainset_volume_manager = VolumeManager()
        validset_volume_manager = VolumeManager()
        trainset = datasets.load_mask_classifier_dataset(
            args.train_subjects,
            trainset_volume_manager,
            name="trainset",
            use_sh_coeffs=args.use_sh_coeffs)
        validset = datasets.load_mask_classifier_dataset(
            args.valid_subjects,
            validset_volume_manager,
            name="validset",
            use_sh_coeffs=args.use_sh_coeffs)
        print("Dataset sizes:", len(trainset), " |", len(validset))

        batch_scheduler = MaskClassifierBatchScheduler(
            trainset, hyperparams['batch_size'], seed=hyperparams['seed'])
        print("An epoch will be composed of {} updates.".format(
            batch_scheduler.nb_updates_per_epoch))
        print(trainset_volume_manager.data_dimension, args.hidden_sizes,
              batch_scheduler.target_size)

    with Timer("Creating model"):
        input_size = trainset_volume_manager.data_dimension

        model = FFNN_Classification(trainset_volume_manager, input_size,
                                    hyperparams['hidden_sizes'])
        model.initialize(
            weigths_initializer_factory(args.weights_initialization,
                                        seed=args.initialization_seed))

    with Timer("Building optimizer"):
        loss = BinaryCrossEntropy(model, trainset)

        if args.clip_gradient is not None:
            loss.append_gradient_modifier(
                DirectionClipping(threshold=args.clip_gradient))

        optimizer = optimizer_factory(hyperparams, loss)

    with Timer("Building trainer"):
        trainer = Trainer(optimizer, batch_scheduler)

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        trainer.append_task(avg_loss)

        # Print average training loss.
        trainer.append_task(
            tasks.Print("Avg. training loss:         : {}", avg_loss))

        # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers
        model.volume_manager = validset_volume_manager
        valid_loss = BinaryCrossEntropy(model, validset)
        valid_batch_scheduler = MaskClassifierBatchScheduler(
            validset, hyperparams['batch_size'], seed=hyperparams['seed'])

        valid_error = views.LossView(loss=valid_loss,
                                     batch_scheduler=valid_batch_scheduler)
        trainer.append_task(
            tasks.Print("Validset - Error        : {0:.2f} | {1:.2f}",
                        valid_error.sum, valid_error.mean))

        # HACK: Restore trainset volume manager
        model.volume_manager = trainset_volume_manager

        lookahead_loss = valid_error.sum

        direction_norm = views.MonitorVariable(
            T.sqrt(sum(map(lambda d: T.sqr(d).sum(),
                           loss.gradients.values()))))
        # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm))

        # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm)
        logger = tasks.Logger(valid_error.mean, valid_error.sum,
                              direction_norm)
        trainer.append_task(logger)

        # Callback function to stop training if NaN is detected.
        def detect_nan(obj, status):
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Stopping training now.")
                sys.exit()

        trainer.append_task(tasks.Callback(detect_nan, each_k_update=1))

        # Callback function to save training progression.
        def save_training(obj, status):
            trainer.save(experiment_path)

        trainer.append_task(tasks.Callback(save_training))

        # Early stopping with a callback for saving every time model improves.
        def save_improvement(obj, status):
            """ Save best model and training progression. """
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Not saving the model. Crashing now.")
                sys.exit()

            print("*** Best epoch: {0} ***\n".format(obj.best_epoch))
            model.save(experiment_path)

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())
        trainer.append_task(tasks.PrintTime(each_k_update=100))  # Profiling

        # Add stopping criteria
        trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch))
        early_stopping = stopping_criteria.EarlyStopping(
            lookahead_loss,
            lookahead=args.lookahead,
            eps=args.lookahead_eps,
            callback=save_improvement)
        trainer.append_task(early_stopping)

    with Timer("Compiling Theano graph"):
        trainer.build_theano_graph()

    if resuming:
        if not os.path.isdir(pjoin(experiment_path, 'training')):
            print("No 'training/' folder. Assuming it failed before"
                  " the end of the first epoch. Starting a new training.")
        else:
            with Timer("Loading"):
                trainer.load(experiment_path)

    with Timer("Training"):
        trainer.train()
def test_simple_perceptron():
    with Timer("Loading dataset"):
        trainset, validset, testset = load_mnist()

    with Timer("Creating model"):
        # TODO: We should the number of different targets in the dataset,
        #       but I'm not sure how to do it right (keep in mind the regression?).
        output_size = 10
        model = Perceptron(trainset.input_size, output_size)
        model.initialize()  # By default, uniform initialization.

    with Timer("Building optimizer"):
        optimizer = SGD(loss=NLL(model, trainset))
        optimizer.append_direction_modifier(ConstantLearningRate(0.0001))

    with Timer("Building trainer"):
        # Train for 10 epochs
        batch_scheduler = MiniBatchScheduler(trainset, 100)

        trainer = Trainer(optimizer, batch_scheduler)
        trainer.append_task(stopping_criteria.MaxEpochStopping(10))

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Print mean/stderror of classification errors.
        classif_error = views.ClassificationError(model.use, validset)
        trainer.append_task(tasks.Print("Validset - Classif error: {0:.1%} ± {1:.1%}", classif_error.mean, classif_error.stderror))

    with Timer("Training"):
        trainer.train()
def test_simple_perceptron():
    # Loading dataset
    trainset, validset, testset = load_mnist()

    # Creating model
    nb_classes = 10
    model = Perceptron(trainset.input_size, nb_classes)
    model.initialize()  # By default, uniform initialization.

    # Building optimizer
    loss = NLL(model, trainset)
    optimizer = SGD(loss=loss)
    optimizer.append_direction_modifier(ConstantLearningRate(0.1))

    # Use mini batches of 100 examples.
    batch_scheduler = MiniBatchScheduler(trainset, 100)

    # Build trainer and add some tasks.
    trainer = Trainer(optimizer, batch_scheduler)

    # Print time for one epoch
    trainer.append_task(tasks.PrintEpochDuration())
    trainer.append_task(tasks.PrintTrainingDuration())

    # Log training error
    loss_monitor = views.MonitorVariable(loss.loss)
    avg_loss = tasks.AveragePerEpoch(loss_monitor)
    accum = tasks.Accumulator(loss_monitor)
    logger = tasks.Logger(loss_monitor, avg_loss)
    trainer.append_task(logger, avg_loss, accum)

    # Print NLL mean/stderror.
    nll = views.LossView(loss=NLL(model, validset),
                         batch_scheduler=FullBatchScheduler(validset))
    trainer.append_task(
        tasks.Print("Validset - NLL          : {0:.1%} ± {1:.1%}", nll.mean,
                    nll.stderror))

    # Print mean/stderror of classification errors.
    classif_error = views.LossView(
        loss=ClassificationError(model, validset),
        batch_scheduler=FullBatchScheduler(validset))
    trainer.append_task(
        tasks.Print("Validset - Classif error: {0:.1%} ± {1:.1%}",
                    classif_error.mean, classif_error.stderror))

    # Train for 10 epochs (stopping criteria should be added at the end).
    trainer.append_task(stopping_criteria.MaxEpochStopping(10))
    trainer.train()
def test_sgd():
    # Create simple Nd gaussian functions to optimize. These functions are
    # (perfectly) well-conditioned so it should take only one gradient step
    # to converge using 1/L, where L is the largest eigenvalue of the hessian.
    max_epoch = 2
    for N in range(1, 5):
        center = np.arange(1, N+1)[None, :].astype(floatX)
        param = sharedX(np.zeros((1, N)))
        cost = T.sum(0.5*T.dot(T.dot((param-center), T.eye(N)), (param-center).T))
        loss = DummyLossWithGradient(cost, param)

        trainer = Trainer(SGD(loss), DummyBatchScheduler())

        # Monitor the gradient of `loss` w.r.t. to `param`.
        tracker = tasks.Tracker(loss.gradients[param])
        trainer.append_task(tracker)

        trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch))
        trainer.train()

        # Since the problem is well-conditionned and we use an optimal gradient step 1/L,
        # two epochs should be enough for `param` to be around `center` and the gradients near 0.
        assert_array_almost_equal(param.get_value(), center)
        assert_array_almost_equal(tracker[0], 0.)

    # Create an Nd gaussian function to optimize. This function is not
    # well-conditioned and there exists no perfect gradient step to converge in
    # only one iteration.
    # cost = T.sum(N*0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), ((param-center).T)))
    max_epoch = 80
    N = 4
    center = 5*np.ones((1, N)).astype(floatX)
    param = sharedX(np.zeros((1, N)))
    cost = T.sum(0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), (param-center).T))
    loss = DummyLossWithGradient(cost, param)

    trainer = Trainer(SGD(loss), DummyBatchScheduler())
    trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch))

    # Monitor the gradient of `loss` w.r.t. to `param`.
    tracker = tasks.Tracker(loss.gradients[param])
    trainer.append_task(tracker)
    trainer.train()

    # Since the problem is well-conditionned and we use an optimal gradient step 1/L,
    # two epochs should be enough for `param` to be around `center` and the gradients near 0.
    assert_array_almost_equal(param.get_value(), center, decimal=6)
    assert_array_almost_equal(tracker[0], 0.)
Exemple #23
0
def test_simple_convnade():
    nb_kernels = 8
    kernel_shape = (2, 2)
    hidden_activation = "sigmoid"
    consider_mask_as_channel = True
    batch_size = 1024
    ordering_seed = 1234
    max_epoch = 3
    nb_orderings = 1

    print("Will train Convoluational Deep NADE for a total of {0} epochs.".
          format(max_epoch))

    with Timer("Loading/processing binarized MNIST"):
        trainset, validset, testset = load_binarized_mnist()

        # Extract the center patch (4x4 pixels) of each image.
        indices_to_keep = [
            348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432,
            433, 434, 435
        ]

        trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep],
                           trainset.inputs.get_value()[:, indices_to_keep],
                           name="trainset")
        validset = Dataset(validset.inputs.get_value()[:, indices_to_keep],
                           validset.inputs.get_value()[:, indices_to_keep],
                           name="validset")
        testset = Dataset(testset.inputs.get_value()[:, indices_to_keep],
                          testset.inputs.get_value()[:, indices_to_keep],
                          name="testset")

        image_shape = (4, 4)
        nb_channels = 1

    with Timer("Building model"):
        builder = DeepConvNADEBuilder(image_shape=image_shape,
                                      nb_channels=nb_channels,
                                      consider_mask_as_channel=True)

        convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)"
        fullnet_blueprint = "5 -> 16"
        print("Convnet:", convnet_blueprint)
        print("Fullnet:", fullnet_blueprint)
        builder.build_convnet_from_blueprint(convnet_blueprint)
        builder.build_fullnet_from_blueprint(fullnet_blueprint)

        model = builder.build()
        model.initialize()  # By default, uniform initialization.

    with Timer("Building optimizer"):
        loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(
            model, trainset)

        optimizer = SGD(loss=loss)
        optimizer.append_direction_modifier(ConstantLearningRate(0.001))

    with Timer("Building trainer"):
        batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(
            trainset, batch_size)

        trainer = Trainer(optimizer, batch_scheduler)

        trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch))

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        accum = tasks.Accumulator(loss_monitor)
        logger = tasks.Logger(loss_monitor, avg_loss)
        trainer.append_task(logger, avg_loss, accum)

        # Print average training loss.
        trainer.append_task(
            tasks.Print("Avg. training loss:     : {}", avg_loss))

        # Print NLL mean/stderror.
        nll = views.LossView(
            loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask(
                model, validset),
            batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask(
                validset, batch_size=len(validset)))
        trainer.append_task(
            tasks.Print("Validset - NLL          : {0:.2f} ± {1:.2f}",
                        nll.mean, nll.stderror))

        trainer.build_theano_graph()

    with Timer("Training"):
        trainer.train()

    with Timer("Checking the probs for all possible inputs sum to 1"):
        # rng = np.random.RandomState(ordering_seed)
        D = np.prod(image_shape)

        batch_scheduler = BatchSchedulerWithAutoregressiveMasks(
            validset,
            batch_size=len(validset),
            batch_id=0,
            ordering_id=0,
            concatenate_mask=model.nb_channels == 2,
            seed=42)
        nll = views.LossView(
            loss=NllUsingBinaryCrossEntropyWithAutoRegressiveMask(
                model, validset, batch_scheduler.mod),
            batch_scheduler=batch_scheduler)
        nlls_xod_given_xoltd = nll.losses.view(Status())
        nlls = np.sum(nlls_xod_given_xoltd.reshape(-1, len(validset)), axis=0)
        nll_validset = np.mean(nlls)
        print("Sum of NLL for validset:", nll_validset)

        inputs = cartesian([[0, 1]] * int(D), dtype=np.float32)
        dataset = ReconstructionDataset(inputs)
        batch_scheduler = BatchSchedulerWithAutoregressiveMasks(
            dataset,
            batch_size=len(dataset),
            batch_id=0,
            ordering_id=0,
            concatenate_mask=model.nb_channels == 2,
            seed=42)
        nll = views.LossView(
            loss=NllUsingBinaryCrossEntropyWithAutoRegressiveMask(
                model, dataset, batch_scheduler.mod),
            batch_scheduler=batch_scheduler)
        nlls_xod_given_xoltd = nll.losses.view(Status())
        nlls = np.sum(nlls_xod_given_xoltd.reshape(-1, len(dataset)), axis=0)
        p_x = np.exp(np.logaddexp.reduce(-nlls))
        print("Sum of p(x) for all x:", p_x)
        assert_almost_equal(p_x, 1., decimal=5)
Exemple #24
0
def main():
    parser = build_argparser()
    args = parser.parse_args()
    print(args)
    print("Using Theano v.{}".format(theano.version.short_version))

    hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view', 'shuffle_streamlines']
    # Use this for hyperparams added in a new version, but nonexistent from older versions
    retrocompatibility_defaults = {'feed_previous_direction': False,
                                   'predict_offset': False,
                                   'normalize': False,
                                   'sort_streamlines': False,
                                   'keep_step_size': False,
                                   'use_layer_normalization': False,
                                   'drop_prob': 0.,
                                   'use_zoneout': False,
                                   'skip_connections': False}
    experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder(args, exclude=hyperparams_to_exclude,
                                                                                  retrocompatibility_defaults=retrocompatibility_defaults)

    # Log the command currently running.
    with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f:
        f.write(" ".join(sys.argv) + "\n")

    print("Resuming:" if resuming else "Creating:", experiment_path)

    with Timer("Loading dataset", newline=True):
        trainset_volume_manager = VolumeManager()
        validset_volume_manager = VolumeManager()
        trainset = datasets.load_tractography_dataset(args.train_subjects, trainset_volume_manager, name="trainset",
                                                      use_sh_coeffs=args.use_sh_coeffs)
        validset = datasets.load_tractography_dataset(args.valid_subjects, validset_volume_manager, name="validset",
                                                      use_sh_coeffs=args.use_sh_coeffs)
        print("Dataset sizes:", len(trainset), " |", len(validset))

        batch_scheduler = batch_scheduler_factory(hyperparams, dataset=trainset, train_mode=True)
        print("An epoch will be composed of {} updates.".format(batch_scheduler.nb_updates_per_epoch))
        print(trainset_volume_manager.data_dimension, args.hidden_sizes, batch_scheduler.target_size)

    with Timer("Creating model"):
        input_size = trainset_volume_manager.data_dimension
        if hyperparams['feed_previous_direction']:
            input_size += 3

        model = model_factory(hyperparams,
                              input_size=input_size,
                              output_size=batch_scheduler.target_size,
                              volume_manager=trainset_volume_manager)
        model.initialize(weigths_initializer_factory(args.weights_initialization,
                                                     seed=args.initialization_seed))

    with Timer("Building optimizer"):
        loss = loss_factory(hyperparams, model, trainset)

        if args.clip_gradient is not None:
            loss.append_gradient_modifier(DirectionClipping(threshold=args.clip_gradient))

        optimizer = optimizer_factory(hyperparams, loss)

    with Timer("Building trainer"):
        trainer = Trainer(optimizer, batch_scheduler)

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        trainer.append_task(avg_loss)

        # Print average training loss.
        trainer.append_task(tasks.Print("Avg. training loss:         : {}", avg_loss))

        # if args.learn_to_stop:
        #     l2err_monitor = views.MonitorVariable(T.mean(loss.mean_sqr_error))
        #     avg_l2err = tasks.AveragePerEpoch(l2err_monitor)
        #     trainer.append_task(avg_l2err)
        #
        #     crossentropy_monitor = views.MonitorVariable(T.mean(loss.cross_entropy))
        #     avg_crossentropy = tasks.AveragePerEpoch(crossentropy_monitor)
        #     trainer.append_task(avg_crossentropy)
        #
        #     trainer.append_task(tasks.Print("Avg. training L2 err:       : {}", avg_l2err))
        #     trainer.append_task(tasks.Print("Avg. training stopping:     : {}", avg_crossentropy))
        #     trainer.append_task(tasks.Print("L2 err : {0:.4f}", l2err_monitor, each_k_update=100))
        #     trainer.append_task(tasks.Print("stopping : {0:.4f}", crossentropy_monitor, each_k_update=100))

        # Print NLL mean/stderror.
        # train_loss = L2DistanceForSequences(model, trainset)
        # train_batch_scheduler = StreamlinesBatchScheduler(trainset, batch_size=1000,
        #                                                   noisy_streamlines_sigma=None,
        #                                                   nb_updates_per_epoch=None,
        #                                                   seed=1234)

        # train_error = views.LossView(loss=train_loss, batch_scheduler=train_batch_scheduler)
        # trainer.append_task(tasks.Print("Trainset - Error        : {0:.2f} | {1:.2f}", train_error.sum, train_error.mean))

        # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers
        model.volume_manager = validset_volume_manager
        model.drop_prob = 0.  # Do not use dropout/zoneout for evaluation
        valid_loss = loss_factory(hyperparams, model, validset)
        valid_batch_scheduler = batch_scheduler_factory(hyperparams,
                                                        dataset=validset,
                                                        train_mode=False)

        valid_error = views.LossView(loss=valid_loss, batch_scheduler=valid_batch_scheduler)
        trainer.append_task(tasks.Print("Validset - Error        : {0:.2f} | {1:.2f}", valid_error.sum, valid_error.mean))

        if hyperparams['model'] == 'ffnn_regression':
            valid_batch_scheduler2 = batch_scheduler_factory(hyperparams,
                                                             dataset=validset,
                                                             train_mode=False)

            valid_l2 = loss_factory(hyperparams, model, validset, loss_type="expected_value")
            valid_l2_error = views.LossView(loss=valid_l2, batch_scheduler=valid_batch_scheduler2)
            trainer.append_task(tasks.Print("Validset - {}".format(valid_l2.__class__.__name__) + "\t: {0:.2f} | {1:.2f}", valid_l2_error.sum, valid_l2_error.mean))

        # HACK: Restore trainset volume manager
        model.volume_manager = trainset_volume_manager
        model.drop_prob = hyperparams['drop_prob']  # Restore dropout

        lookahead_loss = valid_error.sum

        direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values()))))
        # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm))

        # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm)
        logger = tasks.Logger(valid_error.mean, valid_error.sum, direction_norm)
        trainer.append_task(logger)

        if args.view:
            import pylab as plt

            def _plot(*args, **kwargs):
                plt.figure(1)
                plt.clf()
                plt.show(False)
                plt.subplot(121)
                plt.plot(np.array(logger.get_variable_history(0)).flatten(), label="Train")
                plt.plot(np.array(logger.get_variable_history(1)).flatten(), label="Valid")
                plt.legend()

                plt.subplot(122)
                plt.plot(np.array(logger.get_variable_history(3)).flatten(), label="||d'||")
                plt.draw()

            trainer.append_task(tasks.Callback(_plot))

        # Callback function to stop training if NaN is detected.
        def detect_nan(obj, status):
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Stopping training now.")
                sys.exit()

        trainer.append_task(tasks.Callback(detect_nan, each_k_update=1))

        # Callback function to save training progression.
        def save_training(obj, status):
            trainer.save(experiment_path)

        trainer.append_task(tasks.Callback(save_training))

        # Early stopping with a callback for saving every time model improves.
        def save_improvement(obj, status):
            """ Save best model and training progression. """
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Not saving the model. Crashing now.")
                sys.exit()

            print("*** Best epoch: {0} ***\n".format(obj.best_epoch))
            model.save(experiment_path)

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())
        trainer.append_task(tasks.PrintTime(each_k_update=100))  # Profiling

        # Add stopping criteria
        trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch))
        early_stopping = stopping_criteria.EarlyStopping(lookahead_loss, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_improvement)
        trainer.append_task(early_stopping)

    with Timer("Compiling Theano graph"):
        trainer.build_theano_graph()

    if resuming:
        if not os.path.isdir(pjoin(experiment_path, 'training')):
            print("No 'training/' folder. Assuming it failed before"
                  " the end of the first epoch. Starting a new training.")
        else:
            with Timer("Loading"):
                trainer.load(experiment_path)

    with Timer("Training"):
        trainer.train()