Example #1
0
def run(discriminative_regularization=True):
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=False)
    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    rval = create_training_computation_graphs(discriminative_regularization)
    cg, bn_cg, variance_parameters = rval
    pop_updates = list(
        set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    model = Model(bn_cg.outputs[0])
    selector = Selector(
        find_bricks(
            model.top_bricks,
            lambda brick: brick.name in ('encoder_convnet', 'encoder_mlp',
                                         'decoder_convnet', 'decoder_mlp')))
    parameters = list(selector.get_parameters().values()) + variance_parameters

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_cg.outputs[0],
                                parameters=parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    monitored_quantities_list = []
    for graph in [bn_cg, cg]:
        cost, kl_term, reconstruction_term = graph.outputs
        cost.name = 'nll_upper_bound'
        avg_kl_term = kl_term.mean(axis=0)
        avg_kl_term.name = 'avg_kl_term'
        avg_reconstruction_term = -reconstruction_term.mean(axis=0)
        avg_reconstruction_term.name = 'avg_reconstruction_term'
        monitored_quantities_list.append(
            [cost, avg_kl_term, avg_reconstruction_term])
    train_monitoring = DataStreamMonitoring(
        monitored_quantities_list[0], train_monitor_stream, prefix="train",
        updates=extra_updates, after_epoch=False, before_first_epoch=False,
        every_n_epochs=5)
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities_list[1], valid_monitor_stream, prefix="valid",
        after_epoch=False, before_first_epoch=False, every_n_epochs=5)

    # Prepare checkpoint
    save_path = 'celeba_vae_{}regularization.zip'.format(
        '' if discriminative_regularization else 'no_')
    checkpoint = Checkpoint(save_path, every_n_epochs=5, use_cpickle=True)

    extensions = [Timing(), FinishAfter(after_n_epochs=75), train_monitoring,
                  valid_monitoring, checkpoint, Printing(), ProgressBar()]
    main_loop = MainLoop(data_stream=main_loop_stream,
                         algorithm=algorithm, extensions=extensions)
    main_loop.run()
Example #2
0
def run():
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=True)
    main_loop_stream = streams[0]
    train_monitor_stream = streams[1]
    valid_monitor_stream = streams[2]

    cg, bn_dropout_cg = create_training_computation_graphs()

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    pop_updates = get_batch_normalization_updates(bn_dropout_cg)
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_dropout_cg.outputs[0],
                                parameters=bn_dropout_cg.parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    cost = bn_dropout_cg.outputs[0]
    cost.name = 'cost'
    train_monitoring = DataStreamMonitoring(
        [cost], train_monitor_stream, prefix="train",
        before_first_epoch=False, after_epoch=False, after_training=True,
        updates=extra_updates)

    cost, accuracy = cg.outputs
    cost.name = 'cost'
    accuracy.name = 'accuracy'
    monitored_quantities = [cost, accuracy]
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities, valid_monitor_stream, prefix="valid",
        before_first_epoch=False, after_epoch=False, every_n_epochs=5)

    # Prepare checkpoint
    checkpoint = Checkpoint(
        'celeba_classifier.zip', every_n_epochs=5, use_cpickle=True)

    extensions = [Timing(), FinishAfter(after_n_epochs=50), train_monitoring,
                  valid_monitoring, checkpoint, Printing(), ProgressBar()]
    main_loop = MainLoop(data_stream=main_loop_stream, algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
Example #3
0
def run(batch_size, save_path, z_dim, oldmodel, discriminative_regularization,
        classifier, vintage, monitor_every, monitor_before, checkpoint_every, dataset, color_convert,
        image_size, net_depth, subdir,
        reconstruction_factor, kl_factor, discriminative_factor, disc_weights,
        num_epochs):

    if dataset:
        streams = create_custom_streams(filename=dataset,
                                        training_batch_size=batch_size,
                                        monitoring_batch_size=batch_size,
                                        include_targets=False,
                                        color_convert=color_convert)
    else:
        streams = create_celeba_streams(training_batch_size=batch_size,
                                        monitoring_batch_size=batch_size,
                                        include_targets=False)

    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    rval = create_training_computation_graphs(
                z_dim, image_size, net_depth, discriminative_regularization, classifier,
                vintage, reconstruction_factor, kl_factor, discriminative_factor, disc_weights)
    cg, bn_cg, variance_parameters = rval

    pop_updates = list(
        set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    model = Model(bn_cg.outputs[0])

    selector = Selector(
        find_bricks(
            model.top_bricks,
            lambda brick: brick.name in ('encoder_convnet', 'encoder_mlp',
                                         'decoder_convnet', 'decoder_mlp')))
    parameters = list(selector.get_parameters().values()) + variance_parameters

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_cg.outputs[0],
                                parameters=parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    sys.setrecursionlimit(1000000)

    monitored_quantities_list = []
    for graph in [bn_cg, cg]:
        # cost, kl_term, reconstruction_term, discriminative_term = graph.outputs
        cost, kl_term, reconstruction_term, discriminative_term = graph.outputs[:4]
        discriminative_layer_terms = graph.outputs[4:]

        cost.name = 'nll_upper_bound'
        avg_kl_term = kl_term.mean(axis=0)
        avg_kl_term.name = 'avg_kl_term'
        avg_reconstruction_term = -reconstruction_term.mean(axis=0)
        avg_reconstruction_term.name = 'avg_reconstruction_term'
        avg_discriminative_term = discriminative_term.mean(axis=0)
        avg_discriminative_term.name = 'avg_discriminative_term'

        num_layer_terms = len(discriminative_layer_terms)
        avg_discriminative_layer_terms = [None] * num_layer_terms
        for i, term in enumerate(discriminative_layer_terms):
            avg_discriminative_layer_terms[i] = discriminative_layer_terms[i].mean(axis=0)
            avg_discriminative_layer_terms[i].name = "avg_discriminative_term_layer_{:02d}".format(i)

        monitored_quantities_list.append(
            [cost, avg_kl_term, avg_reconstruction_term,
             avg_discriminative_term] + avg_discriminative_layer_terms)

    train_monitoring = DataStreamMonitoring(
        monitored_quantities_list[0], train_monitor_stream, prefix="train",
        updates=extra_updates, after_epoch=False, before_first_epoch=monitor_before,
        every_n_epochs=monitor_every)
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities_list[1], valid_monitor_stream, prefix="valid",
        after_epoch=False, before_first_epoch=monitor_before,
        every_n_epochs=monitor_every)

    # Prepare checkpoint
    checkpoint = Checkpoint(save_path, every_n_epochs=checkpoint_every,
                            before_training=True, use_cpickle=True)

    sample_checkpoint = SampleCheckpoint(interface=DiscGenModel, z_dim=z_dim/2,
                            image_size=(image_size, image_size), channels=3,
                            dataset=dataset, split="valid", save_subdir=subdir,
                            before_training=True, after_epoch=True)
    # TODO: why does z_dim=foo become foo/2?
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  checkpoint,
                  sample_checkpoint,
                  train_monitoring, valid_monitoring, 
                  Printing(),
                  ProgressBar()]
    main_loop = MainLoop(model=model, data_stream=main_loop_stream,
                         algorithm=algorithm, extensions=extensions)

    if oldmodel is not None:
        print("Initializing parameters with old model {}".format(oldmodel))
        try:
            saved_model = load(oldmodel)
        except AttributeError:
            # newer version of blocks
            with open(oldmodel, 'rb') as src:
                saved_model = load(src)
        main_loop.model.set_parameter_values(
            saved_model.model.get_parameter_values())
        del saved_model

    main_loop.run()
Example #4
0
def run(discriminative_regularization=True):
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=False)
    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    rval = create_training_computation_graphs(discriminative_regularization)
    cg, bn_cg, variance_parameters = rval
    pop_updates = list(
        set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    model = Model(bn_cg.outputs[0])
    selector = Selector(
        find_bricks(
            model.top_bricks, lambda brick: brick.name in
            ('encoder_convnet', 'encoder_mlp', 'decoder_convnet', 'decoder_mlp'
             )))
    parameters = list(selector.get_parameters().values()) + variance_parameters

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_cg.outputs[0],
                                parameters=parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    monitored_quantities_list = []
    for graph in [bn_cg, cg]:
        cost, kl_term, reconstruction_term = graph.outputs
        cost.name = 'nll_upper_bound'
        avg_kl_term = kl_term.mean(axis=0)
        avg_kl_term.name = 'avg_kl_term'
        avg_reconstruction_term = -reconstruction_term.mean(axis=0)
        avg_reconstruction_term.name = 'avg_reconstruction_term'
        monitored_quantities_list.append(
            [cost, avg_kl_term, avg_reconstruction_term])
    train_monitoring = DataStreamMonitoring(monitored_quantities_list[0],
                                            train_monitor_stream,
                                            prefix="train",
                                            updates=extra_updates,
                                            after_epoch=False,
                                            before_first_epoch=False,
                                            every_n_epochs=5)
    valid_monitoring = DataStreamMonitoring(monitored_quantities_list[1],
                                            valid_monitor_stream,
                                            prefix="valid",
                                            after_epoch=False,
                                            before_first_epoch=False,
                                            every_n_epochs=5)

    # Prepare checkpoint
    save_path = 'celeba_vae_{}regularization.zip'.format(
        '' if discriminative_regularization else 'no_')
    checkpoint = Checkpoint(save_path, every_n_epochs=5, use_cpickle=True)

    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=75), train_monitoring, valid_monitoring,
        checkpoint,
        Printing(),
        ProgressBar()
    ]
    main_loop = MainLoop(data_stream=main_loop_stream,
                         algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
Example #5
0
def run(batch_size, save_path, z_dim, oldmodel, discriminative_regularization,
        classifier, vintage, monitor_every, monitor_before, checkpoint_every,
        dataset, color_convert, image_size, net_depth, subdir,
        reconstruction_factor, kl_factor, discriminative_factor, disc_weights,
        num_epochs):

    if dataset:
        streams = create_custom_streams(filename=dataset,
                                        training_batch_size=batch_size,
                                        monitoring_batch_size=batch_size,
                                        include_targets=False,
                                        color_convert=color_convert)
    else:
        streams = create_celeba_streams(training_batch_size=batch_size,
                                        monitoring_batch_size=batch_size,
                                        include_targets=False)

    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    rval = create_training_computation_graphs(z_dim, image_size, net_depth,
                                              discriminative_regularization,
                                              classifier, vintage,
                                              reconstruction_factor, kl_factor,
                                              discriminative_factor,
                                              disc_weights)
    cg, bn_cg, variance_parameters = rval

    pop_updates = list(
        set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    model = Model(bn_cg.outputs[0])

    selector = Selector(
        find_bricks(
            model.top_bricks, lambda brick: brick.name in
            ('encoder_convnet', 'encoder_mlp', 'decoder_convnet', 'decoder_mlp'
             )))
    parameters = list(selector.get_parameters().values()) + variance_parameters

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_cg.outputs[0],
                                parameters=parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    sys.setrecursionlimit(1000000)

    monitored_quantities_list = []
    for graph in [bn_cg, cg]:
        # cost, kl_term, reconstruction_term, discriminative_term = graph.outputs
        cost, kl_term, reconstruction_term, discriminative_term = graph.outputs[:
                                                                                4]
        discriminative_layer_terms = graph.outputs[4:]

        cost.name = 'nll_upper_bound'
        avg_kl_term = kl_term.mean(axis=0)
        avg_kl_term.name = 'avg_kl_term'
        avg_reconstruction_term = -reconstruction_term.mean(axis=0)
        avg_reconstruction_term.name = 'avg_reconstruction_term'
        avg_discriminative_term = discriminative_term.mean(axis=0)
        avg_discriminative_term.name = 'avg_discriminative_term'

        num_layer_terms = len(discriminative_layer_terms)
        avg_discriminative_layer_terms = [None] * num_layer_terms
        for i, term in enumerate(discriminative_layer_terms):
            avg_discriminative_layer_terms[i] = discriminative_layer_terms[
                i].mean(axis=0)
            avg_discriminative_layer_terms[
                i].name = "avg_discriminative_term_layer_{:02d}".format(i)

        monitored_quantities_list.append([
            cost, avg_kl_term, avg_reconstruction_term, avg_discriminative_term
        ] + avg_discriminative_layer_terms)

    train_monitoring = DataStreamMonitoring(monitored_quantities_list[0],
                                            train_monitor_stream,
                                            prefix="train",
                                            updates=extra_updates,
                                            after_epoch=False,
                                            before_first_epoch=monitor_before,
                                            every_n_epochs=monitor_every)
    valid_monitoring = DataStreamMonitoring(monitored_quantities_list[1],
                                            valid_monitor_stream,
                                            prefix="valid",
                                            after_epoch=False,
                                            before_first_epoch=monitor_before,
                                            every_n_epochs=monitor_every)

    # Prepare checkpoint
    checkpoint = Checkpoint(save_path,
                            every_n_epochs=checkpoint_every,
                            before_training=True,
                            use_cpickle=True)

    sample_checkpoint = SampleCheckpoint(interface=DiscGenModel,
                                         z_dim=z_dim / 2,
                                         image_size=(image_size, image_size),
                                         channels=3,
                                         dataset=dataset,
                                         split="valid",
                                         save_subdir=subdir,
                                         before_training=True,
                                         after_epoch=True)
    # TODO: why does z_dim=foo become foo/2?
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs), checkpoint, sample_checkpoint,
        train_monitoring, valid_monitoring,
        Printing(),
        ProgressBar()
    ]
    main_loop = MainLoop(model=model,
                         data_stream=main_loop_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    if oldmodel is not None:
        print("Initializing parameters with old model {}".format(oldmodel))
        try:
            saved_model = load(oldmodel)
        except AttributeError:
            # newer version of blocks
            with open(oldmodel, 'rb') as src:
                saved_model = load(src)
        main_loop.model.set_parameter_values(
            saved_model.model.get_parameter_values())
        del saved_model

    main_loop.run()