예제 #1
0
def run(discriminative_regularization=True):
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=False)
    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    rval = create_training_computation_graphs(discriminative_regularization)
    cg, bn_cg, variance_parameters = rval
    pop_updates = list(
        set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    model = Model(bn_cg.outputs[0])
    selector = Selector(
        find_bricks(
            model.top_bricks,
            lambda brick: brick.name in ('encoder_convnet', 'encoder_mlp',
                                         'decoder_convnet', 'decoder_mlp')))
    parameters = list(selector.get_parameters().values()) + variance_parameters

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_cg.outputs[0],
                                parameters=parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    monitored_quantities_list = []
    for graph in [bn_cg, cg]:
        cost, kl_term, reconstruction_term = graph.outputs
        cost.name = 'nll_upper_bound'
        avg_kl_term = kl_term.mean(axis=0)
        avg_kl_term.name = 'avg_kl_term'
        avg_reconstruction_term = -reconstruction_term.mean(axis=0)
        avg_reconstruction_term.name = 'avg_reconstruction_term'
        monitored_quantities_list.append(
            [cost, avg_kl_term, avg_reconstruction_term])
    train_monitoring = DataStreamMonitoring(
        monitored_quantities_list[0], train_monitor_stream, prefix="train",
        updates=extra_updates, after_epoch=False, before_first_epoch=False,
        every_n_epochs=5)
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities_list[1], valid_monitor_stream, prefix="valid",
        after_epoch=False, before_first_epoch=False, every_n_epochs=5)

    # Prepare checkpoint
    save_path = 'celeba_vae_{}regularization.zip'.format(
        '' if discriminative_regularization else 'no_')
    checkpoint = Checkpoint(save_path, every_n_epochs=5, use_cpickle=True)

    extensions = [Timing(), FinishAfter(after_n_epochs=75), train_monitoring,
                  valid_monitoring, checkpoint, Printing(), ProgressBar()]
    main_loop = MainLoop(data_stream=main_loop_stream,
                         algorithm=algorithm, extensions=extensions)
    main_loop.run()
예제 #2
0
def test_selector():
    b1 = MockBrickBottom(name="b1")
    b2 = MockBrickBottom(name="b2")
    b3 = MockBrickBottom(name="b3")
    t1 = MockBrickTop([b1, b2], name="t1")
    t2 = MockBrickTop([b2, b3], name="t2")

    s1 = Selector([t1])
    s11 = s1.select("/t1/b1")
    assert s11.bricks[0] == b1
    assert len(s11.bricks) == 1
    s12 = s1.select("/t1")
    assert s12.bricks[0] == t1
    assert len(s12.bricks) == 1

    s2 = Selector([t1, t2])
    s21 = s2.select("/t2/b2")
    assert s21.bricks[0] == b2
    assert len(s21.bricks) == 1

    assert s2.select("/t2/b2.V")[0] == b2.parameters[0]

    parameters = list(s1.get_parameters().items())
    assert parameters[0][0] == "/t1/b1.V"
    assert parameters[0][1] == b1.parameters[0]
    assert parameters[1][0] == "/t1/b1.W"
    assert parameters[1][1] == b1.parameters[1]
    assert parameters[2][0] == "/t1/b2.V"
    assert parameters[2][1] == b2.parameters[0]
    assert parameters[3][0] == "/t1/b2.W"
    assert parameters[3][1] == b2.parameters[1]
예제 #3
0
def build_model(images, labels):
    
    # Construct a bottom convolutional sequence
    bottom_conv_sequence = convolutional_sequence((3,3), 16, (160, 160))
    bottom_conv_sequence._push_allocation_config()
    
    # Flatten layer
    flattener = Flattener()

    # Construct a top MLP
    conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output'))
    #top_mlp = MLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0))
    top_mlp = BatchNormalizedMLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0))
    
    # Construct feedforward sequence
    ss_seq = FeedforwardSequence([bottom_conv_sequence.apply, flattener.apply, top_mlp.apply])
    ss_seq.push_initialization_config()
    ss_seq.initialize()
    
    prediction = ss_seq.apply(images)
    cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction)

    # add regularization
    selector = Selector([top_mlp])
    Ws = selector.get_parameters('W')
    mlp_brick_name = 'batchnormalizedmlp'
    W0 = Ws['/%s/linear_0.W' % mlp_brick_name]
    W1 = Ws['/%s/linear_1.W' % mlp_brick_name]

    cost = cost_noreg + .01 * (W0 ** 2).mean() + .01 * (W1 ** 2).mean()


    return cost
예제 #4
0
def test_selector():
    b1 = MockBrickBottom(name="b1")
    b2 = MockBrickBottom(name="b2")
    b3 = MockBrickBottom(name="b3")
    t1 = MockBrickTop([b1, b2], name="t1")
    t2 = MockBrickTop([b2, b3], name="t2")

    s1 = Selector([t1])
    s11 = s1.select("/t1/b1")
    assert s11.bricks[0] == b1
    assert len(s11.bricks) == 1
    s12 = s1.select("/t1")
    assert s12.bricks[0] == t1
    assert len(s12.bricks) == 1

    s2 = Selector([t1, t2])
    s21 = s2.select("/t2/b2")
    assert s21.bricks[0] == b2
    assert len(s21.bricks) == 1

    assert s2.select("/t2/b2.V")[0] == b2.parameters[0]

    parameters = list(s1.get_parameters().items())
    assert parameters[0][0] == "/t1/b1.V"
    assert parameters[0][1] == b1.parameters[0]
    assert parameters[1][0] == "/t1/b1.W"
    assert parameters[1][1] == b1.parameters[1]
    assert parameters[2][0] == "/t1/b2.V"
    assert parameters[2][1] == b2.parameters[0]
    assert parameters[3][0] == "/t1/b2.W"
    assert parameters[3][1] == b2.parameters[1]
예제 #5
0
def build_model(images, labels):
    
    vgg = VGG(layer='conv4_4')
    vgg.push_initialization_config()
    vgg.initialize()

    tdb = top_direction_block()
    tdb.push_initialization_config()
    tdb.initialize()

    # Construct feedforward sequence
    ss_seq = FeedforwardSequence([vgg.apply, tdb.apply])
    ss_seq.push_initialization_config()
    ss_seq.initialize()
    
    prediction = ss_seq.apply(images)
    cost       = StructuredCost().apply(labels, theano.tensor.clip(prediction, 1e-5, 1 - 1e-5))

    cg           = ComputationGraph(cost)
    cg_dropout   = apply_dropout(cg, [VariableFilter(roles=[OUTPUT])(cg.variables)[0]], .5)
    cost_dropout = cg_dropout.outputs[0]

    # define learned parameters
    selector = Selector([ss_seq])
    W         = selector.get_parameters()
    parameters = []
    parameters += [v for k, v in W.items()]

    return cost_dropout, parameters 
예제 #6
0
def build_model(images, labels):

    vgg = VGG(layer='conv3_4')
    vgg.push_initialization_config()
    vgg.initialize()

    sb = SubstractBatch()

    # Construct a bottom convolutional sequence
    layers = [
        Convolutional(filter_size=(3, 3),
                      num_filters=100,
                      use_bias=True,
                      tied_biases=True,
                      name='final_conv0'),
        BatchNormalization(name='batchnorm_1'),
        Rectifier(name='final_conv0_act'),
        Convolutional(filter_size=(3, 3),
                      num_filters=100,
                      use_bias=True,
                      tied_biases=True,
                      name='final_conv1'),
        BatchNormalization(name='batchnorm_2'),
        Rectifier(name='final_conv1_act'),
        MaxPooling(pooling_size=(2, 2), name='maxpool_final')
    ]
    bottom_conv_sequence = ConvolutionalSequence(
        layers,
        num_channels=256,
        image_size=(40, 40),
        biases_init=Constant(0.),
        weights_init=IsotropicGaussian(0.01))
    bottom_conv_sequence._push_allocation_config()

    # Flatten layer
    flattener = Flattener()

    # Construct a top MLP
    conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output'))
    print 'dim output conv:', bottom_conv_sequence.get_dim('output')
    # conv_out_dim = 20 * 40 * 40
    top_mlp = BatchNormalizedMLP(
        [Rectifier(name='non_linear_9'),
         Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10],
        weights_init=IsotropicGaussian(),
        biases_init=Constant(0))

    # Construct feedforward sequence
    ss_seq = FeedforwardSequence([
        vgg.apply, bottom_conv_sequence.apply, flattener.apply, top_mlp.apply
    ])
    ss_seq.push_initialization_config()
    ss_seq.initialize()

    prediction = ss_seq.apply(images)
    cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction)

    # add regularization
    selector = Selector([top_mlp])
    Ws = selector.get_parameters('W')
    mlp_brick_name = 'batchnormalizedmlp'
    W0 = Ws['/%s/linear_0.W' % mlp_brick_name]
    W1 = Ws['/%s/linear_1.W' % mlp_brick_name]
    cost = cost_noreg + .0001 * (W0**2).sum() + .001 * (W1**2).sum()

    # define learned parameters
    selector = Selector([ss_seq])
    Ws = selector.get_parameters('W')
    bs = selector.get_parameters('b')
    BNSCs = selector.get_parameters('batch_norm_scale')
    BNSHs = selector.get_parameters('batch_norm_shift')

    parameters_top = []
    parameters_top += [v for k, v in Ws.items()]
    parameters_top += [v for k, v in bs.items()]
    parameters_top += [v for k, v in BNSCs.items()]
    parameters_top += [v for k, v in BNSHs.items()]

    selector = Selector([vgg])
    convs = selector.get_parameters()

    parameters_all = []
    parameters_all += parameters_top
    parameters_all += [v for k, v in convs.items()]

    return cost, [parameters_top, parameters_all]
예제 #7
0
def run(batch_size, save_path, z_dim, oldmodel, discriminative_regularization,
        classifier, vintage, monitor_every, monitor_before, checkpoint_every, dataset, color_convert,
        image_size, net_depth, subdir,
        reconstruction_factor, kl_factor, discriminative_factor, disc_weights,
        num_epochs):

    if dataset:
        streams = create_custom_streams(filename=dataset,
                                        training_batch_size=batch_size,
                                        monitoring_batch_size=batch_size,
                                        include_targets=False,
                                        color_convert=color_convert)
    else:
        streams = create_celeba_streams(training_batch_size=batch_size,
                                        monitoring_batch_size=batch_size,
                                        include_targets=False)

    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    rval = create_training_computation_graphs(
                z_dim, image_size, net_depth, discriminative_regularization, classifier,
                vintage, reconstruction_factor, kl_factor, discriminative_factor, disc_weights)
    cg, bn_cg, variance_parameters = rval

    pop_updates = list(
        set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    model = Model(bn_cg.outputs[0])

    selector = Selector(
        find_bricks(
            model.top_bricks,
            lambda brick: brick.name in ('encoder_convnet', 'encoder_mlp',
                                         'decoder_convnet', 'decoder_mlp')))
    parameters = list(selector.get_parameters().values()) + variance_parameters

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_cg.outputs[0],
                                parameters=parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    sys.setrecursionlimit(1000000)

    monitored_quantities_list = []
    for graph in [bn_cg, cg]:
        # cost, kl_term, reconstruction_term, discriminative_term = graph.outputs
        cost, kl_term, reconstruction_term, discriminative_term = graph.outputs[:4]
        discriminative_layer_terms = graph.outputs[4:]

        cost.name = 'nll_upper_bound'
        avg_kl_term = kl_term.mean(axis=0)
        avg_kl_term.name = 'avg_kl_term'
        avg_reconstruction_term = -reconstruction_term.mean(axis=0)
        avg_reconstruction_term.name = 'avg_reconstruction_term'
        avg_discriminative_term = discriminative_term.mean(axis=0)
        avg_discriminative_term.name = 'avg_discriminative_term'

        num_layer_terms = len(discriminative_layer_terms)
        avg_discriminative_layer_terms = [None] * num_layer_terms
        for i, term in enumerate(discriminative_layer_terms):
            avg_discriminative_layer_terms[i] = discriminative_layer_terms[i].mean(axis=0)
            avg_discriminative_layer_terms[i].name = "avg_discriminative_term_layer_{:02d}".format(i)

        monitored_quantities_list.append(
            [cost, avg_kl_term, avg_reconstruction_term,
             avg_discriminative_term] + avg_discriminative_layer_terms)

    train_monitoring = DataStreamMonitoring(
        monitored_quantities_list[0], train_monitor_stream, prefix="train",
        updates=extra_updates, after_epoch=False, before_first_epoch=monitor_before,
        every_n_epochs=monitor_every)
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities_list[1], valid_monitor_stream, prefix="valid",
        after_epoch=False, before_first_epoch=monitor_before,
        every_n_epochs=monitor_every)

    # Prepare checkpoint
    checkpoint = Checkpoint(save_path, every_n_epochs=checkpoint_every,
                            before_training=True, use_cpickle=True)

    sample_checkpoint = SampleCheckpoint(interface=DiscGenModel, z_dim=z_dim/2,
                            image_size=(image_size, image_size), channels=3,
                            dataset=dataset, split="valid", save_subdir=subdir,
                            before_training=True, after_epoch=True)
    # TODO: why does z_dim=foo become foo/2?
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  checkpoint,
                  sample_checkpoint,
                  train_monitoring, valid_monitoring, 
                  Printing(),
                  ProgressBar()]
    main_loop = MainLoop(model=model, data_stream=main_loop_stream,
                         algorithm=algorithm, extensions=extensions)

    if oldmodel is not None:
        print("Initializing parameters with old model {}".format(oldmodel))
        try:
            saved_model = load(oldmodel)
        except AttributeError:
            # newer version of blocks
            with open(oldmodel, 'rb') as src:
                saved_model = load(src)
        main_loop.model.set_parameter_values(
            saved_model.model.get_parameter_values())
        del saved_model

    main_loop.run()
예제 #8
0
def run(discriminative_regularization=True):
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=False)
    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    rval = create_training_computation_graphs(discriminative_regularization)
    cg, bn_cg, variance_parameters = rval
    pop_updates = list(
        set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    model = Model(bn_cg.outputs[0])
    selector = Selector(
        find_bricks(
            model.top_bricks, lambda brick: brick.name in
            ('encoder_convnet', 'encoder_mlp', 'decoder_convnet', 'decoder_mlp'
             )))
    parameters = list(selector.get_parameters().values()) + variance_parameters

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_cg.outputs[0],
                                parameters=parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    monitored_quantities_list = []
    for graph in [bn_cg, cg]:
        cost, kl_term, reconstruction_term = graph.outputs
        cost.name = 'nll_upper_bound'
        avg_kl_term = kl_term.mean(axis=0)
        avg_kl_term.name = 'avg_kl_term'
        avg_reconstruction_term = -reconstruction_term.mean(axis=0)
        avg_reconstruction_term.name = 'avg_reconstruction_term'
        monitored_quantities_list.append(
            [cost, avg_kl_term, avg_reconstruction_term])
    train_monitoring = DataStreamMonitoring(monitored_quantities_list[0],
                                            train_monitor_stream,
                                            prefix="train",
                                            updates=extra_updates,
                                            after_epoch=False,
                                            before_first_epoch=False,
                                            every_n_epochs=5)
    valid_monitoring = DataStreamMonitoring(monitored_quantities_list[1],
                                            valid_monitor_stream,
                                            prefix="valid",
                                            after_epoch=False,
                                            before_first_epoch=False,
                                            every_n_epochs=5)

    # Prepare checkpoint
    save_path = 'celeba_vae_{}regularization.zip'.format(
        '' if discriminative_regularization else 'no_')
    checkpoint = Checkpoint(save_path, every_n_epochs=5, use_cpickle=True)

    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=75), train_monitoring, valid_monitoring,
        checkpoint,
        Printing(),
        ProgressBar()
    ]
    main_loop = MainLoop(data_stream=main_loop_stream,
                         algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
예제 #9
0
def run(batch_size, save_path, z_dim, oldmodel, discriminative_regularization,
        classifier, vintage, monitor_every, monitor_before, checkpoint_every,
        dataset, color_convert, image_size, net_depth, subdir,
        reconstruction_factor, kl_factor, discriminative_factor, disc_weights,
        num_epochs):

    if dataset:
        streams = create_custom_streams(filename=dataset,
                                        training_batch_size=batch_size,
                                        monitoring_batch_size=batch_size,
                                        include_targets=False,
                                        color_convert=color_convert)
    else:
        streams = create_celeba_streams(training_batch_size=batch_size,
                                        monitoring_batch_size=batch_size,
                                        include_targets=False)

    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    rval = create_training_computation_graphs(z_dim, image_size, net_depth,
                                              discriminative_regularization,
                                              classifier, vintage,
                                              reconstruction_factor, kl_factor,
                                              discriminative_factor,
                                              disc_weights)
    cg, bn_cg, variance_parameters = rval

    pop_updates = list(
        set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    model = Model(bn_cg.outputs[0])

    selector = Selector(
        find_bricks(
            model.top_bricks, lambda brick: brick.name in
            ('encoder_convnet', 'encoder_mlp', 'decoder_convnet', 'decoder_mlp'
             )))
    parameters = list(selector.get_parameters().values()) + variance_parameters

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_cg.outputs[0],
                                parameters=parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    sys.setrecursionlimit(1000000)

    monitored_quantities_list = []
    for graph in [bn_cg, cg]:
        # cost, kl_term, reconstruction_term, discriminative_term = graph.outputs
        cost, kl_term, reconstruction_term, discriminative_term = graph.outputs[:
                                                                                4]
        discriminative_layer_terms = graph.outputs[4:]

        cost.name = 'nll_upper_bound'
        avg_kl_term = kl_term.mean(axis=0)
        avg_kl_term.name = 'avg_kl_term'
        avg_reconstruction_term = -reconstruction_term.mean(axis=0)
        avg_reconstruction_term.name = 'avg_reconstruction_term'
        avg_discriminative_term = discriminative_term.mean(axis=0)
        avg_discriminative_term.name = 'avg_discriminative_term'

        num_layer_terms = len(discriminative_layer_terms)
        avg_discriminative_layer_terms = [None] * num_layer_terms
        for i, term in enumerate(discriminative_layer_terms):
            avg_discriminative_layer_terms[i] = discriminative_layer_terms[
                i].mean(axis=0)
            avg_discriminative_layer_terms[
                i].name = "avg_discriminative_term_layer_{:02d}".format(i)

        monitored_quantities_list.append([
            cost, avg_kl_term, avg_reconstruction_term, avg_discriminative_term
        ] + avg_discriminative_layer_terms)

    train_monitoring = DataStreamMonitoring(monitored_quantities_list[0],
                                            train_monitor_stream,
                                            prefix="train",
                                            updates=extra_updates,
                                            after_epoch=False,
                                            before_first_epoch=monitor_before,
                                            every_n_epochs=monitor_every)
    valid_monitoring = DataStreamMonitoring(monitored_quantities_list[1],
                                            valid_monitor_stream,
                                            prefix="valid",
                                            after_epoch=False,
                                            before_first_epoch=monitor_before,
                                            every_n_epochs=monitor_every)

    # Prepare checkpoint
    checkpoint = Checkpoint(save_path,
                            every_n_epochs=checkpoint_every,
                            before_training=True,
                            use_cpickle=True)

    sample_checkpoint = SampleCheckpoint(interface=DiscGenModel,
                                         z_dim=z_dim / 2,
                                         image_size=(image_size, image_size),
                                         channels=3,
                                         dataset=dataset,
                                         split="valid",
                                         save_subdir=subdir,
                                         before_training=True,
                                         after_epoch=True)
    # TODO: why does z_dim=foo become foo/2?
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs), checkpoint, sample_checkpoint,
        train_monitoring, valid_monitoring,
        Printing(),
        ProgressBar()
    ]
    main_loop = MainLoop(model=model,
                         data_stream=main_loop_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    if oldmodel is not None:
        print("Initializing parameters with old model {}".format(oldmodel))
        try:
            saved_model = load(oldmodel)
        except AttributeError:
            # newer version of blocks
            with open(oldmodel, 'rb') as src:
                saved_model = load(src)
        main_loop.model.set_parameter_values(
            saved_model.model.get_parameter_values())
        del saved_model

    main_loop.run()