def _build_trainer(nb_epochs):
        print("Will train Convoluational Deep NADE for a total of {0} epochs.".
              format(nb_epochs))

        with Timer("Building model"):
            builder = DeepConvNADEBuilder(image_shape=image_shape,
                                          nb_channels=nb_channels,
                                          use_mask_as_input=use_mask_as_input)

            convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)"
            fullnet_blueprint = "5 -> 16"
            print("Convnet:", convnet_blueprint)
            print("Fullnet:", fullnet_blueprint)
            builder.build_convnet_from_blueprint(convnet_blueprint)
            builder.build_fullnet_from_blueprint(fullnet_blueprint)

            model = builder.build()
            model.initialize(initer.UniformInitializer(random_seed=1234))

        with Timer("Building optimizer"):
            loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(
                model, trainset)

            optimizer = SGD(loss=loss)
            optimizer.append_direction_modifier(ConstantLearningRate(0.001))

        with Timer("Building trainer"):
            batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(
                trainset, batch_size)

            trainer = Trainer(optimizer, batch_scheduler)

            # Print time for one epoch
            trainer.append_task(tasks.PrintEpochDuration())
            trainer.append_task(tasks.PrintTrainingDuration())

            # Log training error
            loss_monitor = views.MonitorVariable(loss.loss)
            avg_loss = tasks.AveragePerEpoch(loss_monitor)
            accum = tasks.Accumulator(loss_monitor)
            logger = tasks.Logger(loss_monitor, avg_loss)
            trainer.append_task(logger, avg_loss, accum)

            # Print average training loss.
            trainer.append_task(
                tasks.Print("Avg. training loss:     : {}", avg_loss))

            # Print NLL mean/stderror.
            nll = views.LossView(
                loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask(
                    model, validset),
                batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask(
                    validset, batch_size=len(validset), keep_mask=True))
            trainer.append_task(
                tasks.Print("Validset - NLL          : {0:.2f} ± {1:.2f}",
                            nll.mean, nll.stderror))

            trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs))

            return trainer, nll, logger
예제 #2
0
def main():
    parser = build_argparser()
    args = parser.parse_args()

    # Load experiments hyperparameters
    try:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(args.experiment, "hyperparams.json"))
    except:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(args.experiment, '..', "hyperparams.json"))

    model = load_model(args.experiment)
    print(str(model))

    with Timer("Loading dataset"):
        trainset, validset, testset = datasets.load(hyperparams['dataset'],
                                                    keep_on_cpu=True)
        print(" (data: {:,}; {:,}; {:,}) ".format(len(trainset), len(validset),
                                                  len(testset)),
              end="")

    # Result files.
    result_file = pjoin(args.experiment, "results_estimate.json")

    if not os.path.isfile(result_file) or args.force:
        with Timer("Evaluating NLL estimate"):
            results = {"seed": args.seed}
            results['trainset'] = estimate_NLL(model,
                                               trainset,
                                               seed=args.seed,
                                               batch_size=args.batch_size)
            results['validset'] = estimate_NLL(model,
                                               validset,
                                               seed=args.seed,
                                               batch_size=args.batch_size)
            results['testset'] = estimate_NLL(model,
                                              testset,
                                              seed=args.seed,
                                              batch_size=args.batch_size)
            utils.save_dict_to_json_file(result_file,
                                         {"NLL_estimate": results})
    else:
        print("Loading saved results... (use --force to re-run evaluation)")
        results = utils.load_dict_from_json_file(result_file)['NLL_estimate']

    for dataset in ['trainset', 'validset', 'testset']:
        print("NLL estimate on {}: {:.2f} ± {:.2f}".format(
            dataset, results[dataset]['mean'], results[dataset]['stderror']))
def main():
    parser = buildArgsParser()
    args = parser.parse_args()

    # Load experiments hyperparameters
    try:
        hyperparams = smartutils.load_dict_from_json_file(pjoin(args.experiment, "hyperparams.json"))
    except:
        hyperparams = smartutils.load_dict_from_json_file(pjoin(args.experiment, '..', "hyperparams.json"))

    model = load_model(args.experiment)
    print(str(model))

    with Timer("Generating {} samples from Conv Deep NADE".format(args.count)):
        sample = model.build_sampling_function(seed=args.seed)
        samples, probs = sample(args.count, return_probs=True, ordering_seed=args.seed)

    if args.out is not None:
        outfile = pjoin(args.experiment, args.out)
        with Timer("Saving {0} samples to '{1}'".format(args.count, outfile)):
            np.save(outfile, samples)

    if args.view:
        import pylab as plt
        from convnade import vizu
        if hyperparams["dataset"] == "binarized_mnist":
            image_shape = (28, 28)
        else:
            raise ValueError("Unknown dataset: {0}".format(hyperparams["dataset"]))

        plt.figure()
        data = vizu.concatenate_images(samples, shape=image_shape, border_size=1, clim=(0, 1))
        plt.imshow(data, cmap=plt.cm.gray, interpolation='nearest')
        plt.title("Samples")

        plt.figure()
        data = vizu.concatenate_images(probs, shape=image_shape, border_size=1, clim=(0, 1))
        plt.imshow(data, cmap=plt.cm.gray, interpolation='nearest')
        plt.title("Probs")

        plt.show()
def load_model(experiment_path):

    with Timer("Loading model"):
        from convnade import DeepConvNadeUsingLasagne, DeepConvNadeWithResidualUsingLasagne
        from convnade import DeepConvNADE, DeepConvNADEWithResidual

        for model_class in [DeepConvNadeUsingLasagne, DeepConvNadeWithResidualUsingLasagne, DeepConvNADE, DeepConvNADEWithResidual]:
            try:
                model = model_class.create(experiment_path)
                return model
            except Exception as e:
                print (e)
                pass

    raise NameError("No model found!")
    return None
예제 #5
0
def main():
    parser = build_argparser()
    args = parser.parse_args()

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    if not os.path.isdir(pjoin(experiment_path, "evaluation")):
        parser.error('Cannot find evaluations for experiment: {0}!'.format(
            experiment_path))

    results_file = pjoin(experiment_path, "results.json")

    if not os.path.isfile(results_file) or args.force:
        with Timer("Merging NLL evaluations"):
            results = merge_NLL_evaluations(
                evaluation_dir=pjoin(experiment_path, "evaluation"))
            smartutils.save_dict_to_json_file(results_file, {"NLL": results})

    else:
        print("Loading saved losses... (use --force to re-run evaluation)")
        results = smartutils.load_dict_from_json_file(results_file)["NLL"]

    nb_orderings = results['nb_orderings']
    for dataset in ['validset', 'testset']:
        print("NLL estimate on {} ({} orderings): {:.2f} ± {:.2f}".format(
            dataset, nb_orderings, results[dataset]['mean'],
            results[dataset]['stderror']))

        if results[dataset]['incomplete']:
            print(
                "** Warning **: {} evaluation is incomplete. Missing some orderings or batches."
                .format(dataset))
예제 #6
0
def test_simple_convnade():
    nb_kernels = 8
    kernel_shape = (2, 2)
    hidden_activation = "sigmoid"
    consider_mask_as_channel = True
    batch_size = 1024
    ordering_seed = 1234
    max_epoch = 3
    nb_orderings = 1

    print("Will train Convoluational Deep NADE for a total of {0} epochs.".
          format(max_epoch))

    with Timer("Loading/processing binarized MNIST"):
        trainset, validset, testset = load_binarized_mnist()

        # Extract the center patch (4x4 pixels) of each image.
        indices_to_keep = [
            348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432,
            433, 434, 435
        ]

        trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep],
                           trainset.inputs.get_value()[:, indices_to_keep],
                           name="trainset")
        validset = Dataset(validset.inputs.get_value()[:, indices_to_keep],
                           validset.inputs.get_value()[:, indices_to_keep],
                           name="validset")
        testset = Dataset(testset.inputs.get_value()[:, indices_to_keep],
                          testset.inputs.get_value()[:, indices_to_keep],
                          name="testset")

        image_shape = (4, 4)
        nb_channels = 1

    with Timer("Building model"):
        builder = DeepConvNADEBuilder(image_shape=image_shape,
                                      nb_channels=nb_channels,
                                      consider_mask_as_channel=True)

        convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)"
        fullnet_blueprint = "5 -> 16"
        print("Convnet:", convnet_blueprint)
        print("Fullnet:", fullnet_blueprint)
        builder.build_convnet_from_blueprint(convnet_blueprint)
        builder.build_fullnet_from_blueprint(fullnet_blueprint)

        model = builder.build()
        model.initialize()  # By default, uniform initialization.

    with Timer("Building optimizer"):
        loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(
            model, trainset)

        optimizer = SGD(loss=loss)
        optimizer.append_direction_modifier(ConstantLearningRate(0.001))

    with Timer("Building trainer"):
        batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(
            trainset, batch_size)

        trainer = Trainer(optimizer, batch_scheduler)

        trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch))

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        accum = tasks.Accumulator(loss_monitor)
        logger = tasks.Logger(loss_monitor, avg_loss)
        trainer.append_task(logger, avg_loss, accum)

        # Print average training loss.
        trainer.append_task(
            tasks.Print("Avg. training loss:     : {}", avg_loss))

        # Print NLL mean/stderror.
        nll = views.LossView(
            loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask(
                model, validset),
            batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask(
                validset, batch_size=len(validset)))
        trainer.append_task(
            tasks.Print("Validset - NLL          : {0:.2f} ± {1:.2f}",
                        nll.mean, nll.stderror))

        trainer.build_theano_graph()

    with Timer("Training"):
        trainer.train()

    with Timer("Checking the probs for all possible inputs sum to 1"):
        # rng = np.random.RandomState(ordering_seed)
        D = np.prod(image_shape)

        batch_scheduler = BatchSchedulerWithAutoregressiveMasks(
            validset,
            batch_size=len(validset),
            batch_id=0,
            ordering_id=0,
            concatenate_mask=model.nb_channels == 2,
            seed=42)
        nll = views.LossView(
            loss=NllUsingBinaryCrossEntropyWithAutoRegressiveMask(
                model, validset, batch_scheduler.mod),
            batch_scheduler=batch_scheduler)
        nlls_xod_given_xoltd = nll.losses.view(Status())
        nlls = np.sum(nlls_xod_given_xoltd.reshape(-1, len(validset)), axis=0)
        nll_validset = np.mean(nlls)
        print("Sum of NLL for validset:", nll_validset)

        inputs = cartesian([[0, 1]] * int(D), dtype=np.float32)
        dataset = ReconstructionDataset(inputs)
        batch_scheduler = BatchSchedulerWithAutoregressiveMasks(
            dataset,
            batch_size=len(dataset),
            batch_id=0,
            ordering_id=0,
            concatenate_mask=model.nb_channels == 2,
            seed=42)
        nll = views.LossView(
            loss=NllUsingBinaryCrossEntropyWithAutoRegressiveMask(
                model, dataset, batch_scheduler.mod),
            batch_scheduler=batch_scheduler)
        nlls_xod_given_xoltd = nll.losses.view(Status())
        nlls = np.sum(nlls_xod_given_xoltd.reshape(-1, len(dataset)), axis=0)
        p_x = np.exp(np.logaddexp.reduce(-nlls))
        print("Sum of p(x) for all x:", p_x)
        assert_almost_equal(p_x, 1., decimal=5)
예제 #7
0
def main():
    parser = buildArgsParser()
    args = parser.parse_args()

    # Extract experiments hyperparameters
    hyperparams = dict(vars(args))

    # Remove hyperparams that should not be part of the hash
    del hyperparams['max_epoch']
    del hyperparams['keep']
    del hyperparams['force']
    del hyperparams['name']

    # Get/generate experiment name
    experiment_name = args.name
    if experiment_name is None:
        experiment_name = utils.generate_uid_from_string(repr(hyperparams))

    # Create experiment folder
    experiment_path = pjoin(".", "experiments", experiment_name)
    resuming = False
    if os.path.isdir(experiment_path) and not args.force:
        resuming = True
        print("### Resuming experiment ({0}). ###\n".format(experiment_name))
        # Check if provided hyperparams match those in the experiment folder
        hyperparams_loaded = utils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json"))
        if hyperparams != hyperparams_loaded:
            print("{\n" + "\n".join(["{}: {}".format(k, hyperparams[k]) for k in sorted(hyperparams.keys())]) + "\n}")
            print("{\n" + "\n".join(["{}: {}".format(k, hyperparams_loaded[k]) for k in sorted(hyperparams_loaded.keys())]) + "\n}")
            print("The arguments provided are different than the one saved. Use --force if you are certain.\nQuitting.")
            sys.exit(1)
    else:
        if os.path.isdir(experiment_path):
            shutil.rmtree(experiment_path)

        os.makedirs(experiment_path)
        utils.save_dict_to_json_file(pjoin(experiment_path, "hyperparams.json"), hyperparams)

    with Timer("Loading dataset"):
        trainset, validset, testset = datasets.load(args.dataset)

        image_shape = (28, 28)
        nb_channels = 1 + (args.use_mask_as_input is True)

        batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(trainset, args.batch_size,
                                                                   use_mask_as_input=args.use_mask_as_input,
                                                                   seed=args.ordering_seed)
        print("{} updates per epoch.".format(len(batch_scheduler)))

    with Timer("Building model"):
        if args.use_lasagne:
            if args.with_residual:
                model = DeepConvNadeWithResidualUsingLasagne(image_shape=image_shape,
                                                             nb_channels=nb_channels,
                                                             convnet_blueprint=args.convnet_blueprint,
                                                             fullnet_blueprint=args.fullnet_blueprint,
                                                             hidden_activation=args.hidden_activation,
                                                             use_mask_as_input=args.use_mask_as_input)
            else:
                model = DeepConvNadeUsingLasagne(image_shape=image_shape,
                                                 nb_channels=nb_channels,
                                                 convnet_blueprint=args.convnet_blueprint,
                                                 fullnet_blueprint=args.fullnet_blueprint,
                                                 hidden_activation=args.hidden_activation,
                                                 use_mask_as_input=args.use_mask_as_input,
                                                 use_batch_norm=args.batch_norm)

        elif args.with_residual:
            model = DeepConvNADEWithResidual(image_shape=image_shape,
                                             nb_channels=nb_channels,
                                             convnet_blueprint=args.convnet_blueprint,
                                             fullnet_blueprint=args.fullnet_blueprint,
                                             hidden_activation=args.hidden_activation,
                                             use_mask_as_input=args.use_mask_as_input)

        else:
            builder = DeepConvNADEBuilder(image_shape=image_shape,
                                          nb_channels=nb_channels,
                                          hidden_activation=args.hidden_activation,
                                          use_mask_as_input=args.use_mask_as_input)

            if args.blueprints_seed is not None:
                convnet_blueprint, fullnet_blueprint = generate_blueprints(args.blueprint_seed, image_shape[0])
                builder.build_convnet_from_blueprint(convnet_blueprint)
                builder.build_fullnet_from_blueprint(fullnet_blueprint)
            else:
                if args.convnet_blueprint is not None:
                    builder.build_convnet_from_blueprint(args.convnet_blueprint)

                if args.fullnet_blueprint is not None:
                    builder.build_fullnet_from_blueprint(args.fullnet_blueprint)

            model = builder.build()
            # print(str(model.convnet))
            # print(str(model.fullnet))

        model.initialize(weigths_initializer_factory(args.weights_initialization,
                                                     seed=args.initialization_seed))
        print(str(model))

    with Timer("Building optimizer"):
        loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(model, trainset)
        optimizer = optimizer_factory(hyperparams, loss)

    with Timer("Building trainer"):
        trainer = Trainer(optimizer, batch_scheduler)

        if args.max_epoch is not None:
            trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch))

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        accum = tasks.Accumulator(loss_monitor)
        logger = tasks.Logger(loss_monitor, avg_loss)
        trainer.append_task(logger, avg_loss, accum)

        # Print average training loss.
        trainer.append_task(tasks.Print("Avg. training loss:     : {}", avg_loss))

        # Print NLL mean/stderror.
        model.deterministic = True  # For batch normalization, see https://github.com/Lasagne/Lasagne/blob/master/lasagne/layers/normalization.py#L198
        nll = views.LossView(loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask(model, validset),
                             batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask(validset, batch_size=0.1*len(validset),
                                                                                      use_mask_as_input=args.use_mask_as_input,
                                                                                      keep_mask=True,
                                                                                      seed=args.ordering_seed+1))
        # trainer.append_task(tasks.Print("Validset - NLL          : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror, each_k_update=100))
        trainer.append_task(tasks.Print("Validset - NLL          : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror))

        # direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values()))))
        # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm, each_k_update=50))

        # Save training progression
        def save_model(*args):
            trainer.save(experiment_path)

        trainer.append_task(stopping_criteria.EarlyStopping(nll.mean, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_model))

        trainer.build_theano_graph()

    if resuming:
        with Timer("Loading"):
            trainer.load(experiment_path)

    with Timer("Training"):
        trainer.train()

    trainer.save(experiment_path)
    model.save(experiment_path)
def test_new_fprop_matches_old_fprop():
    nb_kernels = 8
    kernel_shape = (2, 2)
    hidden_activation = "sigmoid"
    use_mask_as_input = True
    batch_size = 1024
    ordering_seed = 1234
    max_epoch = 10
    nb_orderings = 1

    print("Will train Convoluational Deep NADE for a total of {0} epochs.".
          format(max_epoch))

    with Timer("Loading/processing binarized MNIST"):
        trainset, validset, testset = load_binarized_mnist()

        # Extract the center patch (4x4 pixels) of each image.
        indices_to_keep = [
            348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432,
            433, 434, 435
        ]

        trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep],
                           trainset.inputs.get_value()[:, indices_to_keep],
                           name="trainset")
        validset = Dataset(validset.inputs.get_value()[:, indices_to_keep],
                           validset.inputs.get_value()[:, indices_to_keep],
                           name="validset")
        testset = Dataset(testset.inputs.get_value()[:, indices_to_keep],
                          testset.inputs.get_value()[:, indices_to_keep],
                          name="testset")

        image_shape = (4, 4)
        nb_channels = 1 + (use_mask_as_input is True)

    with Timer("Building model"):
        builder = DeepConvNADEBuilder(image_shape=image_shape,
                                      nb_channels=nb_channels,
                                      use_mask_as_input=use_mask_as_input)

        convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)"
        fullnet_blueprint = "5 -> 16"
        print("Convnet:", convnet_blueprint)
        print("Fullnet:", fullnet_blueprint)
        builder.build_convnet_from_blueprint(convnet_blueprint)
        builder.build_fullnet_from_blueprint(fullnet_blueprint)

        model = builder.build()
        model.initialize()  # By default, uniform initialization.

    with Timer("Building optimizer"):
        loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(
            model, trainset)

        optimizer = SGD(loss=loss)
        optimizer.append_direction_modifier(ConstantLearningRate(0.001))

    with Timer("Building trainer"):
        batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(
            trainset, batch_size, use_mask_as_input=use_mask_as_input)

        trainer = Trainer(optimizer, batch_scheduler)

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        accum = tasks.Accumulator(loss_monitor)
        logger = tasks.Logger(loss_monitor, avg_loss)
        trainer.append_task(logger, avg_loss, accum)

        # Print average training loss.
        trainer.append_task(
            tasks.Print("Avg. training loss:     : {}", avg_loss))

        trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch))

        trainer.build_theano_graph()

    with Timer("Training"):
        trainer.train()

    mask_o_lt_d = batch_scheduler._shared_batch_mask
    fprop_output, fprop_pre_output = model.fprop(
        trainset.inputs, mask_o_lt_d, return_output_preactivation=True)
    model_output = model.get_output(
        T.concatenate([trainset.inputs * mask_o_lt_d, mask_o_lt_d], axis=1))
    assert_array_equal(model_output.eval(), fprop_pre_output.eval())
    print(np.sum(abs(model_output.eval() - fprop_pre_output.eval())))
def test_save_load_convnade():
    nb_kernels = 8
    kernel_shape = (2, 2)
    hidden_activation = "hinge"
    use_mask_as_input = True
    batch_size = 1024
    ordering_seed = 1234
    max_epoch = 5
    nb_orderings = 1

    with Timer("Loading/processing binarized MNIST"):
        trainset, validset, testset = load_binarized_mnist()

        # Extract the center patch (4x4 pixels) of each image.
        indices_to_keep = [
            348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432,
            433, 434, 435
        ]

        trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep],
                           trainset.inputs.get_value()[:, indices_to_keep],
                           name="trainset")
        validset = Dataset(validset.inputs.get_value()[:, indices_to_keep],
                           validset.inputs.get_value()[:, indices_to_keep],
                           name="validset")
        testset = Dataset(testset.inputs.get_value()[:, indices_to_keep],
                          testset.inputs.get_value()[:, indices_to_keep],
                          name="testset")

        image_shape = (4, 4)
        nb_channels = 1

    # Nested function to build a trainer.
    def _build_trainer(nb_epochs):
        print("Will train Convoluational Deep NADE for a total of {0} epochs.".
              format(nb_epochs))

        with Timer("Building model"):
            builder = DeepConvNADEBuilder(image_shape=image_shape,
                                          nb_channels=nb_channels,
                                          use_mask_as_input=use_mask_as_input)

            convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)"
            fullnet_blueprint = "5 -> 16"
            print("Convnet:", convnet_blueprint)
            print("Fullnet:", fullnet_blueprint)
            builder.build_convnet_from_blueprint(convnet_blueprint)
            builder.build_fullnet_from_blueprint(fullnet_blueprint)

            model = builder.build()
            model.initialize(initer.UniformInitializer(random_seed=1234))

        with Timer("Building optimizer"):
            loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(
                model, trainset)

            optimizer = SGD(loss=loss)
            optimizer.append_direction_modifier(ConstantLearningRate(0.001))

        with Timer("Building trainer"):
            batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(
                trainset, batch_size)

            trainer = Trainer(optimizer, batch_scheduler)

            # Print time for one epoch
            trainer.append_task(tasks.PrintEpochDuration())
            trainer.append_task(tasks.PrintTrainingDuration())

            # Log training error
            loss_monitor = views.MonitorVariable(loss.loss)
            avg_loss = tasks.AveragePerEpoch(loss_monitor)
            accum = tasks.Accumulator(loss_monitor)
            logger = tasks.Logger(loss_monitor, avg_loss)
            trainer.append_task(logger, avg_loss, accum)

            # Print average training loss.
            trainer.append_task(
                tasks.Print("Avg. training loss:     : {}", avg_loss))

            # Print NLL mean/stderror.
            nll = views.LossView(
                loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask(
                    model, validset),
                batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask(
                    validset, batch_size=len(validset), keep_mask=True))
            trainer.append_task(
                tasks.Print("Validset - NLL          : {0:.2f} ± {1:.2f}",
                            nll.mean, nll.stderror))

            trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs))

            return trainer, nll, logger

    trainer1, nll1, logger1 = _build_trainer(nb_epochs=10)
    with Timer("Compiling training graph"):
        trainer1.build_theano_graph()

    with Timer("Training"):
        trainer1.train()

    trainer2a, nll2a, logger2a = _build_trainer(5)
    with Timer("Compiling training graph"):
        trainer2a.build_theano_graph()

    with Timer("Training"):
        trainer2a.train()

    # Save model halfway during training and resume it.
    with tempfile.TemporaryDirectory() as experiment_dir:
        with Timer("Saving"):
            # Save current state of the model (i.e. after 5 epochs).
            trainer2a.save(experiment_dir)

        with Timer("Loading"):
            # Load previous state from which training will resume.
            trainer2b, nll2b, logger2b = _build_trainer(10)
            trainer2b.load(experiment_dir)

            # Check we correctly reloaded the model.
            assert_equal(len(trainer2a._optimizer.loss.model.parameters),
                         len(trainer2b._optimizer.loss.model.parameters))
            for param1, param2 in zip(
                    trainer2a._optimizer.loss.model.parameters,
                    trainer2b._optimizer.loss.model.parameters):
                assert_array_equal(param1.get_value(),
                                   param2.get_value(),
                                   err_msg=param1.name)

    with Timer("Compiling training graph"):
        trainer2b.build_theano_graph()

    with Timer("Training"):
        trainer2b.train()

    # Check we correctly resumed training.
    assert_equal(len(trainer1._optimizer.loss.model.parameters),
                 len(trainer2b._optimizer.loss.model.parameters))
    for param1, param2 in zip(trainer1._optimizer.loss.model.parameters,
                              trainer2b._optimizer.loss.model.parameters):

        # I tested it, they are exactly equal when using float64.
        assert_array_almost_equal(param1.get_value(),
                                  param2.get_value(),
                                  err_msg=param1.name)

    # I tested it, they are exactly equal when using float64.
    assert_array_almost_equal(nll1.mean.view(trainer1.status),
                              nll2b.mean.view(trainer2b.status))
    assert_array_almost_equal(nll1.stderror.view(trainer1.status),
                              nll2b.stderror.view(trainer2b.status))

    # I tested it, they are exactly equal when using float64.
    assert_array_almost_equal(
        logger1.get_variable_history(0),
        logger2a.get_variable_history(0) + logger2b.get_variable_history(0))
    assert_array_almost_equal(
        logger1.get_variable_history(1),
        logger2a.get_variable_history(1) + logger2b.get_variable_history(1))
def test_simple_convnade():
    nb_kernels = 8
    kernel_shape = (2, 2)
    hidden_activation = "sigmoid"
    use_mask_as_input = True
    batch_size = 1024
    ordering_seed = 1234
    max_epoch = 3
    nb_orderings = 1

    print("Will train Convoluational Deep NADE for a total of {0} epochs.".
          format(max_epoch))

    with Timer("Loading/processing binarized MNIST"):
        trainset, validset, testset = load_binarized_mnist()

        # Extract the center patch (4x4 pixels) of each image.
        indices_to_keep = [
            348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432,
            433, 434, 435
        ]

        trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep],
                           trainset.inputs.get_value()[:, indices_to_keep],
                           name="trainset")
        validset = Dataset(validset.inputs.get_value()[:, indices_to_keep],
                           validset.inputs.get_value()[:, indices_to_keep],
                           name="validset")
        testset = Dataset(testset.inputs.get_value()[:, indices_to_keep],
                          testset.inputs.get_value()[:, indices_to_keep],
                          name="testset")

        image_shape = (4, 4)
        nb_channels = 1

    with Timer("Building model"):
        builder = DeepConvNADEBuilder(image_shape=image_shape,
                                      nb_channels=nb_channels,
                                      use_mask_as_input=use_mask_as_input)

        convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)"
        fullnet_blueprint = "5 -> 16"
        print("Convnet:", convnet_blueprint)
        print("Fullnet:", fullnet_blueprint)
        builder.build_convnet_from_blueprint(convnet_blueprint)
        builder.build_fullnet_from_blueprint(fullnet_blueprint)

        model = builder.build()
        model.initialize()  # By default, uniform initialization.

    with Timer("Building optimizer"):
        loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(
            model, trainset)

        optimizer = SGD(loss=loss)
        optimizer.append_direction_modifier(ConstantLearningRate(0.001))

    with Timer("Building trainer"):
        batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(
            trainset, batch_size)

        trainer = Trainer(optimizer, batch_scheduler)

        trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch))

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        accum = tasks.Accumulator(loss_monitor)
        logger = tasks.Logger(loss_monitor, avg_loss)
        trainer.append_task(logger, avg_loss, accum)

        # Print average training loss.
        trainer.append_task(
            tasks.Print("Avg. training loss:     : {}", avg_loss))

        # Print NLL mean/stderror.
        nll = views.LossView(
            loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask(
                model, validset),
            batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask(
                validset, batch_size=len(validset)))
        trainer.append_task(
            tasks.Print("Validset - NLL          : {0:.2f} ± {1:.2f}",
                        nll.mean, nll.stderror))

        trainer.build_theano_graph()

    with Timer("Training"):
        trainer.train()

    with Timer("Checking the probs for all possible inputs sum to 1"):
        rng = np.random.RandomState(ordering_seed)
        D = np.prod(image_shape)
        inputs = cartesian([[0, 1]] * int(D), dtype=np.float32)
        ordering = np.arange(D, dtype=np.int32)
        rng.shuffle(ordering)

        symb_input = T.vector("input")
        symb_input.tag.test_value = inputs[-len(inputs) // 4]
        symb_ordering = T.ivector("ordering")
        symb_ordering.tag.test_value = ordering
        nll_of_x_given_o = theano.function([symb_input, symb_ordering],
                                           model.nll_of_x_given_o(
                                               symb_input, symb_ordering),
                                           name="nll_of_x_given_o")
        #theano.printing.pydotprint(nll_of_x_given_o, '{0}_nll_of_x_given_o_{1}'.format(model.__class__.__name__, theano.config.device), with_ids=True)

        for i in range(nb_orderings):
            print("Ordering:", ordering)
            ordering = np.arange(D, dtype=np.int32)
            rng.shuffle(ordering)

            nlls = []
            for no, input in enumerate(inputs):
                print("{}/{}".format(no, len(inputs)), end='\r')
                nlls.append(nll_of_x_given_o(input, ordering))

            print("{}/{} Done".format(len(inputs), len(inputs)))

            p_x = np.exp(np.logaddexp.reduce(-np.array(nlls)))
            print("Sum of p(x) for all x:", p_x)
            assert_almost_equal(p_x, 1., decimal=5)