Beispiel #1
0
    def apply(self, input_lb, input_un, target):
        batch_size = input_lb.shape[0]
        get_labeled = lambda x: x[:batch_size] if x is not None else x
        input = T.concatenate([input_lb, input_un], axis=0)
        self.layer_dims = {0: self.input_dim}
        self.lr = self.shared(self.default_lr, "learning_rate", role=None)
        top = len(self.layers) - 1

        clean = self.encoder(input, noise_std=[0])
        corr = self.encoder(input, noise_std=self.noise_std)

        ests, costs = self.decoder(clean, corr, batch_size)

        # Costs
        y = target.flatten()

        costs.class_clean = CategoricalCrossEntropy().apply(y, get_labeled(clean.h[top]))
        costs.class_clean.name = "CE_clean"

        costs.class_corr = CategoricalCrossEntropy().apply(y, get_labeled(corr.h[top]))
        costs.class_corr.name = "CE_corr"

        costs.total = costs.class_corr * 1.0
        for i in range(len(self.layers)):
            costs.total += costs.denois[i] * self.denoising_cost_x[i]
        costs.total.name = "Total_cost"

        self.costs = costs

        # Classification error
        mr = MisclassificationRate()
        self.error = mr.apply(y, get_labeled(clean.h[top])) * np.float32(100.0)
        self.error.name = "Error_rate"
Beispiel #2
0
def setup_model(configs):
    tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
    # shape: T x B x C x X x Y
    input_ = tensor5("features")
    tensor3 = theano.tensor.TensorType(config.floatX, (False,) * 3)
    locs = tensor3("locs")
    # shape: B x Classes
    target = T.ivector("targets")

    model = LSTMAttention(configs, weights_init=Glorot(), biases_init=Constant(0))
    model.initialize()

    (h, c, location, scale, alpha, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply(
        input_, locs
    )

    model.location = location
    model.scale = scale
    model.alpha = location
    model.patch = patch

    classifier = MLP(
        [Rectifier(), Softmax()], configs["classifier_dims"], weights_init=Glorot(), biases_init=Constant(0)
    )
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    cost.name = "CE"
    error_rate = MisclassificationRate().apply(target, probabilities)
    error_rate.name = "ER"
    model.cost = cost
    model.error_rate = error_rate
    model.probabilities = probabilities

    if configs["load_pretrained"]:
        blocks_model = Model(model.cost)
        all_params = blocks_model.parameters
        with open("VGG_CNN_params.npz") as f:
            loaded = np.load(f)
            all_conv_params = loaded.keys()
            for param in all_params:
                if param.name in loaded.keys():
                    assert param.get_value().shape == loaded[param.name].shape
                    param.set_value(loaded[param.name])
                    all_conv_params.pop(all_conv_params.index(param.name))
        print "the following parameters did not match: " + str(all_conv_params)

    if configs["test_model"]:
        print "TESTING THE MODEL: CHECK THE INPUT SIZE!"
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.cost], on_unused_input="ignore", allow_input_downcast=True)
        data = configs["get_streams"](configs["batch_size"])[0].get_epoch_iterator().next()
        f(data[1], data[0], data[2])

        print "Test passed! ;)"

    model.monitorings = [cost, error_rate]

    return model
def maxout_vae_mnist_test(path_vae_mnist):

    # load vae model on mnist
    vae_mnist = load(path_vae_mnist)
    maxout = Maxout()
    x = T.matrix('features')
    y = T.imatrix('targets')
    batch_size = 128
    z, _ = vae_mnist.sampler.sample(vae_mnist.encoder_mlp.apply(x))
    predict = maxout.apply(z)

    cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
    y_hat = Softmax().apply(predict)
    cost.name = 'cost'
    cg = ComputationGraph(cost)

    temp = cg.parameters
    for t, i in zip(temp, range(len(temp))):
        t.name = t.name+str(i)+"maxout"

    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y, y_hat) 

    # training
    step_rule = RMSProp(0.01, 0.9)
    #step_rule = Momentum(0.2, 0.9)
    train_set = MNIST('train')
    test_set = MNIST("test")

    data_stream_train = Flatten(DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)))

    data_stream_test =Flatten(DataStream.default_stream(
            test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size)))

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], data_stream=data_stream_train, prefix="train")
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_test, prefix="test")


    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_epochs=50),
                    Printing(every_n_epochs=1)
                  ]

    main_loop = MainLoop(data_stream=data_stream_train,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()

    # save here
    from blocks.serialization import dump
    with closing(open('../data_mnist/maxout', 'w')) as f:
	    dump(maxout, f)
Beispiel #4
0
def setup_model(configs):

    tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
    # shape: T x B x C x X x Y
    input_ = tensor5('features')
    # shape: B x Classes
    target = T.lmatrix('targets')

    model = LSTMAttention(
        configs,
        weights_init=Glorot(),
        biases_init=Constant(0))
    model.initialize()

    (h, c, location, scale, patch, downn_sampled_input,
        conved_part_1, conved_part_2, pre_lstm) = model.apply(input_)

    classifier = MLP(
        [Rectifier(), Logistic()],
        configs['classifier_dims'],
        weights_init=Glorot(),
        biases_init=Constant(0))
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = BinaryCrossEntropy().apply(target, probabilities)
    cost.name = 'CE'
    error_rate = MisclassificationRate().apply(target, probabilities)
    error_rate.name = 'ER'
    model.cost = cost

    if configs['load_pretrained']:
        blocks_model = Model(model.cost)
        all_params = blocks_model.parameters
        with open('VGG_CNN_params.npz') as f:
            loaded = np.load(f)
            all_conv_params = loaded.keys()
            for param in all_params:
                if param.name in loaded.keys():
                    assert param.get_value().shape == loaded[param.name].shape
                    param.set_value(loaded[param.name])
                    all_conv_params.pop(all_conv_params.index(param.name))
        print "the following parameters did not match: " + str(all_conv_params)

    if configs['test_model']:
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.cost],
                            on_unused_input='ignore',
                            allow_input_downcast=True)
        data = np.random.randn(10, 40, 3, 224, 224)
        targs = np.random.randn(40, 101)
        f(data, targs)
        print "Test passed! ;)"

    model.monitorings = [cost, error_rate]

    return model
Beispiel #5
0
def test_misclassification_rate():
    y = tensor.vector(dtype="int32")
    yhat = tensor.matrix(theano.config.floatX)
    top1_brick = MisclassificationRate()
    top2_brick = MisclassificationRate(top_k=2)
    top3_brick = MisclassificationRate(top_k=3)
    f = theano.function([y, yhat], [top1_brick.apply(y, yhat), top2_brick.apply(y, yhat), top3_brick.apply(y, yhat)])
    y_ = numpy.array([2, 1, 0, 1, 2], dtype="int32")
    yhat_ = numpy.array([[3, 2, 1, 0], [1, 8, 2, 1], [3, 8, 1, 2], [1, 6, 4, 2], [9, 7, 5, 5]], dtype="float32")
    top1_error = 0.6
    top2_error = 0.4
    top3_error = 0.2
    assert_allclose([top1_error, top2_error, top3_error], f(y_, yhat_))
Beispiel #6
0
    def apply(self, input_labeled, target_labeled, input_unlabeled):
        self.layer_counter = 0
        self.layer_dims = {0: self.input_dim}
        self.lr = self.shared(self.default_lr, 'learning_rate', role=None)
        top = len(self.layers) - 1

        num_labeled = input_labeled.shape[0]
        self.join = lambda l, u: T.concatenate([l, u], axis=0)
        self.labeled = lambda x: x[:num_labeled] if x is not None else x
        self.unlabeled = lambda x: x[num_labeled:] if x is not None else x
        self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x))

        input_concat = self.join(input_labeled, input_unlabeled)

        clean = self.encoder(input_concat, 'clean',
                             input_noise_std=0.0,
                             noise_std=[])
        corr = self.encoder(input_concat, 'corr',
                            input_noise_std=self.super_noise_std,
                            noise_std=self.f_local_noise_std)

        est, costs = self.decoder(clean, corr)

        # Costs
        y = target_labeled.flatten()

        costs.class_clean = CategoricalCrossEntropy().apply(
            y, clean.labeled.h[top])
        costs.class_clean.name = 'CE_clean'

        costs.class_corr = CategoricalCrossEntropy().apply(
            y, corr.labeled.h[top])
        costs.class_corr.name = 'CE_corr'

        costs.total = costs.class_corr * 1.0
        for i in range(len(self.layers)):
            costs.total += costs.denois[i] * self.denoising_cost_x[i]
        costs.total.name = 'Total_cost'

        self.costs = costs

        # Classification error
        mr = MisclassificationRate()
        self.error = mr.apply(y, clean.labeled.h[top]) * np.float32(100.)
        self.error.name = 'Error_rate'
def main(name, epochs, batch_size, learning_rate, window_size, conv_sizes, num_filters, fc_dim, enc_dim, dec_dim, step, num_digits, num_classes,
         oldmodel, live_plotting):
    channels, img_height, img_width = 1, 100, 100

    rnninits = {
        'weights_init': Uniform(width=0.02),
        'biases_init': Constant(0.),
    }

    inits = {
        'weights_init': IsotropicGaussian(0.001),
        'biases_init': Constant(0.),
    }

    rec_inits = {
        'weights_init': IsotropicGaussian(0.001),
        'biases_init': Constant(0.),
    }

    convinits = {
        'weights_init': Uniform(width=.2),
        'biases_init': Constant(0.),
    }

    n_iter = step * num_digits
    filter_size1, filter_size2 = zip(conv_sizes, conv_sizes)[:]
    w_height, w_width = window_size.split(',')
    w_height = int(w_height)
    w_width = int(w_width)

    subdir = time.strftime("%Y-%m-%d") + "-" + name
    if not os.path.exists(subdir):
        os.makedirs(subdir)

    lines = ["\n                Running experiment",
             "          subdirectory: %s" % subdir,
             "         learning rate: %g" % learning_rate,
             "        attention size: %s" % window_size,
             "          n_iterations: %d" % n_iter,
             "     encoder dimension: %d" % enc_dim,
             "     decoder dimension: %d" % dec_dim,
             "            batch size: %d" % batch_size,
             "                epochs: %d" % epochs,
             ]
    for line in lines:
        print(line)
    print()

    rectifier = Rectifier()
    conv1 = Convolutional(filter_size=filter_size2, num_filters=int(num_filters / 2), num_channels=channels, image_size=(w_height, w_width), border_mode='half',
                          name='conv1', **convinits)
    conv1_bn = SpatialBatchNormalization(input_dim=(64, 26, 26), conserve_memory=False, n_iter=n_iter, name='conv1_bn')
    conv2 = Convolutional(filter_size=filter_size2, num_channels=int(num_filters / 2), num_filters=int(num_filters / 2), image_size=(26, 26), name='conv2',
                          **convinits)
    conv2_bn = SpatialBatchNormalization(input_dim=(64, 24, 24), conserve_memory=False, n_iter=n_iter, name='conv2_bn')
    max_pooling = MaxPooling(pooling_size=(2, 2), step=(2, 2))
    conv3 = Convolutional(filter_size=filter_size2, num_filters=num_filters, num_channels=int(num_filters / 2), image_size=(12, 12), border_mode='half',
                          name='conv3', **convinits)
    conv3_bn = SpatialBatchNormalization(input_dim=(128, 12, 12), conserve_memory=False, n_iter=n_iter, name='conv3_bn')
    conv4 = Convolutional(filter_size=filter_size2, num_filters=num_filters, num_channels=num_filters, image_size=(12, 12), border_mode='half', name='conv4',
                          **convinits)
    conv4_bn = SpatialBatchNormalization(input_dim=(128, 12, 12), conserve_memory=False, n_iter=n_iter, name='conv4_bn')
    # Max Pooling
    conv5 = Convolutional(filter_size=filter_size2, num_filters=160, num_channels=num_filters, image_size=(6, 6), border_mode='half', name='conv5',
                          **convinits)
    conv5_bn = SpatialBatchNormalization(input_dim=(160, 6, 6), conserve_memory=False, n_iter=n_iter, name='conv5_bn')
    conv6 = Convolutional(filter_size=filter_size2, num_filters=192, num_channels=160, image_size=(6, 6), name='conv6', **convinits)
    conv6_bn = SpatialBatchNormalization(input_dim=(192, 4, 4), conserve_memory=False, n_iter=n_iter, name='conv6_bn')

    conv_mlp = MLP(activations=[Identity()], dims=[3072, fc_dim], name="MLP_conv", **inits)
    conv_mlp_bn = BatchNormalization(input_dim=fc_dim, conserve_memory=False, n_iter=n_iter, name='conv_mlp_bn')
    loc_mlp = MLP(activations=[Identity()], dims=[6, fc_dim], name="MLP_loc", **inits)
    loc_mlp_bn = BatchNormalization(input_dim=fc_dim, conserve_memory=False, n_iter=n_iter, name='loc_mlp_bn')
    encoder_mlp = MLP([Identity()], [fc_dim, 4 * enc_dim], name="MLP_enc", **rec_inits)
    decoder_mlp = MLP([Identity()], [enc_dim, 4 * dec_dim], name="MLP_dec", **rec_inits)
    encoder_rnn = LSTM(activation=Tanh(), dim=enc_dim, name="RNN_enc", **rnninits)

    conv_init = ConvolutionalSequence(
        [Convolutional(filter_size=filter_size1, num_filters=int(num_filters / 8), name='conv1_init'),
         SpatialBatchNormalization(conserve_memory=False, name='conv1_bn_init'),
         Convolutional(filter_size=filter_size2, num_filters=int(num_filters / 8), name='conv2_init'),
         SpatialBatchNormalization(conserve_memory=False, name='conv2_bn_init'),
         Convolutional(filter_size=filter_size2, num_filters=int(num_filters / 4), name='conv3_init'),
         SpatialBatchNormalization(conserve_memory=False, name='conv3_bn_init'),
         ], image_size=(12, 12), num_channels=channels, name='conv_seq_init', **convinits)

    decoder_rnn = LSTM(activation=Tanh(), dim=dec_dim, name="RNN_dec", **rnninits)
    emit_mlp = MLP(activations=[Tanh()], dims=[dec_dim, 6], name='emit_mlp', weights_init=Constant(0.),
                   biases_init=Constant((1., 0., 0., 0., 1., 0.)))

    classification_mlp1 = MLP(activations=[Identity()], dims=[enc_dim, fc_dim], name='MPL_class1', **inits)
    classification_mlp1_bn = BatchNormalization(input_dim=fc_dim, conserve_memory=False, n_iter=n_iter, name='classification_mlp1_bn')
    classification_mlp2 = MLP(activations=[Identity()], dims=[fc_dim, fc_dim], name='MPL_class2', **inits)
    classification_mlp2_bn = BatchNormalization(input_dim=fc_dim, conserve_memory=False, n_iter=n_iter, name='classification_mlp2_bn')
    classification_mlp3 = MLP(activations=[Softmax()], dims=[fc_dim, num_classes], name='MPL_class3', **inits)

    edram = EDRAM(channels=channels, out_height=w_height, out_width=w_width, n_iter=n_iter, num_classes=num_classes, rectifier=rectifier, conv1=conv1,
                  conv1_bn=conv1_bn, conv2=conv2, conv2_bn=conv2_bn, max_pooling=max_pooling, conv3=conv3, conv3_bn=conv3_bn, conv4=conv4, conv4_bn=conv4_bn,
                  conv5=conv5, conv5_bn=conv5_bn, conv6=conv6, conv6_bn=conv6_bn, conv_mlp=conv_mlp, conv_mlp_bn=conv_mlp_bn,
                  loc_mlp=loc_mlp, loc_mlp_bn=loc_mlp_bn, conv_init=conv_init, encoder_mlp=encoder_mlp, encoder_rnn=encoder_rnn, decoder_mlp=decoder_mlp,
                  decoder_rnn=decoder_rnn, classification_mlp1=classification_mlp1, classification_mlp1_bn=classification_mlp1_bn,
                  classification_mlp2=classification_mlp2, classification_mlp2_bn=classification_mlp2_bn, classification_mlp3=classification_mlp3,
                  emit_mlp=emit_mlp)
    edram.initialize()

    # ------------------------------------------------------------------------
    x = T.ftensor4('features')
    x_coarse = T.ftensor4('features_coarse')
    y = T.ivector('labels')
    wr = T.fmatrix('locations')

    with batch_normalization(edram):
        bn_p, bn_l, m_c1_bn, s_c1_bn, m_c2_bn, s_c2_bn, m_c3_bn, s_c3_bn, m_c4_bn, s_c4_bn, m_c5_bn, s_c5_bn, m_c6_bn, s_c6_bn, \
        m_c_bn, s_c_bn, m_l_bn, s_l_bn, m_cl1_bn, s_cl1_bn, m_cl2_bn, s_cl2_bn = edram.calculate_train(x, x_coarse)

    def compute_cost(p, wr, y, l):
        cost_where = T.dot(T.sqr(wr - l), [1, 0.5, 1, 0.5, 1, 1])
        cost_y = T.stack([T.nnet.categorical_crossentropy(T.maximum(p[i, :], 1e-7), y) for i in range(0, n_iter)])

        return cost_where, cost_y

    cost_where, cost_y = compute_cost(bn_p, wr, y, bn_l)
    bn_cost = cost_y + cost_where
    bn_cost = bn_cost.sum(axis=0)
    bn_cost = bn_cost.mean()
    bn_cost.name = 'cost'

    bn_error_rate = MisclassificationRate().apply(y, bn_p[-1])
    bn_error_rate.name = 'error_rate'

    # ------------------------------------------------------------
    bn_cg = ComputationGraph([bn_cost, bn_error_rate])

    # Prepare algorithm
    algorithm = GradientDescent(
        cost=bn_cg.outputs[0],
        on_unused_sources='ignore',
        parameters=bn_cg.parameters,
        step_rule=CompositeRule([
            RemoveNotFinite(),
            StepClipping(10.),
            Adam(learning_rate)
        ])
    )

    pop_updates = get_batch_normalization_updates(bn_cg)
    update_params = [conv1_bn.population_mean, conv1_bn.population_stdev, conv2_bn.population_mean, conv2_bn.population_stdev, conv3_bn.population_mean,
                     conv3_bn.population_stdev, conv4_bn.population_mean, conv4_bn.population_stdev, conv5_bn.population_mean, conv5_bn.population_stdev,
                     conv6_bn.population_mean, conv6_bn.population_stdev, conv_mlp_bn.population_mean, conv_mlp_bn.population_stdev,
                     loc_mlp_bn.population_mean, loc_mlp_bn.population_stdev, classification_mlp1_bn.population_mean, classification_mlp1_bn.population_stdev,
                     classification_mlp2_bn.population_mean, classification_mlp2_bn.population_stdev]
    update_values = [m_c1_bn, s_c1_bn, m_c2_bn, s_c2_bn, m_c3_bn, s_c3_bn, m_c4_bn, s_c4_bn, m_c5_bn, s_c5_bn, m_c6_bn, s_c6_bn, m_c_bn, s_c_bn, m_l_bn, s_l_bn,
                     m_cl1_bn, s_cl1_bn, m_cl2_bn, s_cl2_bn]

    pop_updates.extend([(p, m) for p, m in zip(update_params, update_values)])

    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate)) for p, m in pop_updates]
    algorithm.add_updates(extra_updates)

    # ------------------------------------------------------------------------
    # Setup monitors

    p, l = edram.calculate_test(x, x_coarse)
    cost_where, cost_y = compute_cost(p, wr, y, l)
    cost = cost_y + cost_where
    cost = cost.sum(axis=0)
    cost = cost.mean()
    cost.name = 'cost'

    error_rate = MisclassificationRate().apply(y, p[-1])
    error_rate.name = 'error_rate'
    monitors = [cost, error_rate]

    plotting_extensions = []
    # Live plotting...
    if live_plotting:
        plot_channels = [
            ['train_cost', 'test_cost'],
            ['train_error_rate', 'test_error_rate'],
        ]
        plotting_extensions = [
            Plot(subdir, channels=plot_channels, server_url='http://155.69.150.60:80/')
        ]

    # ------------------------------------------------------------

    mnist_cluttered_train = MNISTCluttered(which_sets=['train'], sources=('features', 'locations', 'labels'))
    mnist_cluttered_test = MNISTCluttered(which_sets=['test'], sources=('features', 'locations', 'labels'))

    main_loop = MainLoop(
        model=Model([bn_cost]),
        data_stream=DataStream.default_stream(mnist_cluttered_train, iteration_scheme=ShuffledScheme(mnist_cluttered_train.num_examples, batch_size)),
        algorithm=algorithm,
        extensions=[Timing(),
                    FinishAfter(after_n_epochs=epochs),
                    DataStreamMonitoring(monitors,
                                         DataStream.default_stream(mnist_cluttered_train, iteration_scheme=SequentialScheme(mnist_cluttered_train.num_examples,
                                                                                                                            batch_size)), prefix='train'),
                    DataStreamMonitoring(monitors,
                                         DataStream.default_stream(mnist_cluttered_test,
                                                                   iteration_scheme=SequentialScheme(mnist_cluttered_test.num_examples, batch_size)),
                                         prefix="test"),
                    PartsOnlyCheckpoint("{}/{}".format(subdir, name), before_training=False, after_epoch=True, save_separately=['log', ]),
                    TrackTheBest('test_error_rate', 'best_test_error_rate'),
                    BestCheckpount("{}/{}".format(subdir, name), 'best_test_error_rate', save_separately=['model', ]),
                    Printing(),
                    ProgressBar(),
                    PrintingTo("\n".join(lines), "{}/{}_log.txt".format(subdir, name)), ] + plotting_extensions)
    if oldmodel is not None:
        print("Initializing parameters with old model %s" % oldmodel)
        with open(oldmodel, "rb") as f:
            oldmodel = pickle.load(f)
            main_loop.model.set_parameter_values(oldmodel.get_parameter_values())

            main_loop.model.get_top_bricks()[0].conv1_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv1_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv1_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv1_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv2_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv2_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv2_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv2_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv3_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv3_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv3_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv3_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv4_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv4_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv4_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv4_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv5_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv5_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv5_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv5_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv6_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv6_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv6_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv6_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].loc_mlp_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].loc_mlp_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].loc_mlp_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].loc_mlp_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv_mlp_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv_mlp_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv_mlp_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv_mlp_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].classification_mlp1_bn.population_mean.set_value(
                oldmodel.get_top_bricks()[0].classification_mlp1_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].classification_mlp1_bn.population_stdev.set_value(
                oldmodel.get_top_bricks()[0].classification_mlp1_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].classification_mlp2_bn.population_mean.set_value(
                oldmodel.get_top_bricks()[0].classification_mlp2_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].classification_mlp2_bn.population_stdev.set_value(
                oldmodel.get_top_bricks()[0].classification_mlp2_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv_init.layers[1].population_mean.set_value(
                oldmodel.get_top_bricks()[0].conv_init.layers[1].population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv_init.layers[1].population_stdev.set_value(
                oldmodel.get_top_bricks()[0].conv_init.layers[1].population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv_init.layers[3].population_mean.set_value(
                oldmodel.get_top_bricks()[0].conv_init.layers[3].population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv_init.layers[3].population_stdev.set_value(
                oldmodel.get_top_bricks()[0].conv_init.layers[3].population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv_init.layers[5].population_mean.set_value(
                oldmodel.get_top_bricks()[0].conv_init.layers[5].population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv_init.layers[5].population_stdev.set_value(
                oldmodel.get_top_bricks()[0].conv_init.layers[5].population_stdev.get_value())
        del oldmodel
    main_loop.run()
Beispiel #8
0
    biases_init=IsotropicGaussian(),
    prototype=input_mlp,
)
parallel_nets.initialize()
l_h, r_h = parallel_nets.apply(l_x=l_x, r_x=r_x)

# Concatenate the inputs from the two hidden subnets into a single variable
# for input into the next layer.
merge = tensor.concatenate([l_h, r_h], axis=1)

y_hat = output_mlp.apply(merge)

# Define a cost function to optimize, and a classification error rate:
# Also apply the outputs from the net, and corresponding targets:
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
error = MisclassificationRate().apply(y.flatten(), y_hat)
error.name = 'error'

# Need to define the computation graph:
graph = ComputationGraph(cost)

# This returns a list of weight vectors for each layer
W = VariableFilter(roles=[WEIGHT])(graph.variables)

# Add some regularization to this model:
lam = 0.001
cost += lam * l2_norm(W)
cost.name = 'entropy'

# This is the model without dropout, but with l2 reg.
model = Model(cost)
Beispiel #9
0
def main(job_id, params):

    config = ConfigParser.ConfigParser()
    config.readfp(open('./params'))

    max_epoch = int(config.get('hyperparams', 'max_iter', 100))
    base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
    train_batch = int(config.get('hyperparams', 'train_batch', 256))
    valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
    test_batch = int(config.get('hyperparams', 'valid_batch', 512))
    hidden_units = int(config.get('hyperparams', 'hidden_units', 16))


    W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
    W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
    b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
    b_mu = float(config.get('hyperparams', 'b_mu', 0.0))

    dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
    weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
    max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
    solver = config.get('hyperparams', 'solver_type', 'rmsprop')
    data_file = config.get('hyperparams', 'data_file')
    fine_tune = config.getboolean('hyperparams', 'fine_tune')

    # Spearmint optimization parameters:
    if params:
        base_lr = float(params['base_lr'][0])
        dropout_ratio = float(params['dropout_ratio'][0])
        hidden_units = params['hidden_units'][0]
        weight_decay = params['weight_decay'][0]

    if 'adagrad' in solver:
        solver_type = CompositeRule([AdaGrad(learning_rate=base_lr), VariableClipping(threshold=max_norm)])
    else:
        solver_type = CompositeRule([RMSProp(learning_rate=base_lr), VariableClipping(threshold=max_norm)])

    rn_file = '/projects/francisco/repositories/NI-ML/models/deepnets/blocks/ff/models/rnet/2015-06-25-18:13'
    ln_file = '/projects/francisco/repositories/NI-ML/models/deepnets/blocks/ff/models/lnet/2015-06-29-11:45'

    right_dim = 10519
    left_dim = 11427

    train = H5PYDataset(data_file, which_set='train')
    valid = H5PYDataset(data_file, which_set='valid')
    test = H5PYDataset(data_file, which_set='test')

    l_x = tensor.matrix('l_features')
    r_x = tensor.matrix('r_features')
    y = tensor.lmatrix('targets')

    lnet = load(ln_file).model.get_top_bricks()[0]
    rnet = load(rn_file).model.get_top_bricks()[0]

    # Pre-trained layers:

    # Inputs -> hidden_1 -> hidden 2
    for side, net in zip(['l', 'r'], [lnet, rnet]):
        for child in net.children:
            child.name = side + '_' + child.name

    ll1 = lnet.children[0]
    lr1 = lnet.children[1]
    ll2 = lnet.children[2]
    lr2 = lnet.children[3]

    rl1 = rnet.children[0]
    rr1 = rnet.children[1]
    rl2 = rnet.children[2]
    rr2 = rnet.children[3]

    l_h = lr2.apply(ll2.apply(lr1.apply(ll1.apply(l_x))))
    r_h = rr2.apply(rl2.apply(rr1.apply(rl1.apply(r_x))))

    input_dim = ll2.output_dim + rl2.output_dim

    # hidden_2 -> hidden_3 -> hidden_4 -> Logistic output
    output_mlp = MLP(activations=[
        Rectifier(name='h3'),
        Rectifier(name='h4'),
        Softmax(name='output'),
    ],
                     dims=[
                         input_dim,
                         hidden_units,
                         hidden_units,
                         2,
                     ],
                     weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
                     biases_init=IsotropicGaussian(std=W_sd, mean=W_mu))

    output_mlp.initialize()

    # # Concatenate the inputs from the two hidden subnets into a single variable
    # # for input into the next layer.
    merge = tensor.concatenate([l_h, r_h], axis=1)
    #
    y_hat = output_mlp.apply(merge)

    # Define a cost function to optimize, and a classification error rate.
    # Also apply the outputs from the net and corresponding targets:
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'error'

    # This is the model: before applying dropout
    model = Model(cost)

    # Need to define the computation graph for the cost func:
    cost_graph = ComputationGraph([cost])

    # This returns a list of weight vectors for each layer
    W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

    # Add some regularization to this model:
    cost += weight_decay * l2_norm(W)
    cost.name = 'entropy'

    # computational graph with l2 reg
    cost_graph = ComputationGraph([cost])

    # Apply dropout to inputs:
    inputs = VariableFilter([INPUT])(cost_graph.variables)
    dropout_inputs = [input for input in inputs if input.name.startswith('linear_')]
    dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]], 0.2)
    dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:], dropout_ratio)
    dropout_cost = dropout_graph.outputs[0]
    dropout_cost.name = 'dropout_entropy'

    # If no fine-tuning of l-r models is wanted, find the params for only
    # the joint layers:
    if fine_tune:
        params_to_update = dropout_graph.parameters
    else:
        params_to_update = VariableFilter([PARAMETER], bricks=output_mlp.children)(cost_graph)

    # Learning Algorithm:
    algo = GradientDescent(
        step_rule=solver_type,
        params=params_to_update,
        cost=dropout_cost)

    # algo.step_rule.learning_rate.name = 'learning_rate'

    # Data stream used for training model:
    training_stream = Flatten(
        DataStream.default_stream(
            dataset=train,
            iteration_scheme=ShuffledScheme(
                train.num_examples,
                batch_size=train_batch)))

    training_monitor = TrainingDataMonitoring([dropout_cost,
                                               aggregation.mean(error),
                                               aggregation.mean(algo.total_gradient_norm)],
                                              after_batch=True)


    # Use the 'valid' set for validation during training:
    validation_stream = Flatten(
        DataStream.default_stream(
            dataset=valid,
            iteration_scheme=ShuffledScheme(
                valid.num_examples,
                batch_size=valid_batch)))

    validation_monitor = DataStreamMonitoring(
        variables=[cost, error],
        data_stream=validation_stream,
        prefix='validation',
        after_epoch=True)

    test_stream = Flatten(
        DataStream.default_stream(
            dataset=test,
            iteration_scheme=ShuffledScheme(
                test.num_examples,
                batch_size=test_batch)))

    test_monitor = DataStreamMonitoring(
        variables=[error],
        data_stream=test_stream,
        prefix='test',
        after_training=True)

    plotting = Plot('AdniNet_LeftRight',
                    channels=[
                        ['dropout_entropy'],
                        ['error', 'validation_error'],
                    ],
    )

    # Checkpoint class used to save model and log:
    stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H:%M')
    checkpoint = Checkpoint('./models/{}'.format(stamp),
                            save_separately=['model', 'log'],
                            every_n_epochs=1)

    # The main loop will train the network and output reports, etc
    main_loop = MainLoop(
        data_stream=training_stream,
        model=model,
        algorithm=algo,
        extensions=[
            validation_monitor,
            training_monitor,
            plotting,
            FinishAfter(after_n_epochs=max_epoch),
            FinishIfNoImprovementAfter(notification_name='validation_error', epochs=1),
            Printing(),
            ProgressBar(),
            checkpoint,
            test_monitor,
        ])
    main_loop.run()
    ve = float(main_loop.log.last_epoch_row['validation_error'])
    te = float(main_loop.log.last_epoch_row['error'])
    spearmint_loss = ve + abs(te - ve)
    print 'Spearmint Loss: {}'.format(spearmint_loss)
    return spearmint_loss
from blocks.bricks import Linear, Logistic, Softmax


# In[10]:

hidden_layer_size = 100
input_to_hidden = Linear(name='input_to_hidden', input_dim=117, output_dim=hidden_layer_size)
h = Logistic().apply(input_to_hidden.apply(x))
hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_layer_size, output_dim=2)
y_hat = Softmax().apply(hidden_to_output.apply(h))

y = tensor.lmatrix('targets')
from blocks.bricks.cost import CategoricalCrossEntropy, MisclassificationRate
cost = CategoricalCrossEntropy().apply(y, y_hat)
error_rate = MisclassificationRate().apply(y.argmax(axis=1), y_hat)
error_rate.name = "error_rate"

# >>> from blocks.roles import WEIGHT
from blocks.graph import ComputationGraph
# >>> from blocks.filter import VariableFilter
cg = ComputationGraph(cost)
# >>> W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
# >>> cost = cost + 0.005 * (W1 ** 2).sum() + 0.005 * (W2 ** 2).sum()
# >>> cost.name = 'cost_with_regularization'
cost.name = 'cost_simple_xentropy'

from blocks.initialization import IsotropicGaussian, Constant
input_to_hidden.weights_init = hidden_to_output.weights_init = IsotropicGaussian(0.01)
input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0)
input_to_hidden.initialize()
def training_model_mnist(learning_rate, momentum, iteration, batch_size, epoch_end, iter_batch):

    x = T.tensor4('features')
    y = T.imatrix('targets')

    classifier = build_model_mnist()

    predict = classifier.apply(x)
    y_hat = Softmax().apply(predict)

    cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
    cost.name = "cost"
    cg = ComputationGraph(cost)
    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y.flatten(), y_hat)
    error_rate.name = "error"


    train_set = MNIST(('train', ))
    test_set = MNIST(("test",))

    if iteration =="slice":
        data_stream = DataStream.default_stream(
                train_set, iteration_scheme=SequentialScheme_slice(train_set.num_examples,
                                                            batch_size))
        data_stream_test = DataStream.default_stream(
                test_set, iteration_scheme=SequentialScheme_slice(test_set.num_examples,
                                                            batch_size))
    else:
        data_stream = DataStream.default_stream(
                train_set, iteration_scheme=SequentialScheme(train_set.num_examples,
                                                            batch_size))

        data_stream_test = DataStream.default_stream(
                test_set, iteration_scheme=SequentialScheme(test_set.num_examples,
                                                            batch_size))

    step_rule = Momentum(learning_rate=learning_rate,
                         momentum=momentum)

    start = time.clock()
    time_spent = shared_floatx(np.float32(0.), name="time_spent")
    time_extension = Time_reference(start, time_spent, every_n_batches=1)

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], prefix="train", every_n_epochs=iter_batch)
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate, time_spent], data_stream=data_stream_test, prefix="valid",
        every_n_epochs=iter_batch)

    # add a monitor variable about the time
    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_epochs=epoch_end),
                    Printing(every_n_epochs=iter_batch),
                    time_extension
                  ]

    main_loop = MainLoop(data_stream=data_stream,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()
Beispiel #12
0
def main(save_to,
         num_epochs,
         subset=None,
         num_batches=None,
         batch_size=None,
         regularization=None,
         annealing=None,
         histogram=None,
         resume=False):
    output_size = 10
    convnet = create_noisy_all_conv_net(batch_size, True)

    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    test_cost = (CategoricalCrossEntropy().apply(y.flatten(),
                                                 probs).copy(name='cost'))
    test_components = (ComponentwiseCrossEntropy().apply(
        y.flatten(), probs).copy(name='components'))
    test_error_rate = (MisclassificationRate().apply(
        y.flatten(), probs).copy(name='error_rate'))
    test_confusion = (ConfusionMatrix().apply(y.flatten(),
                                              probs).copy(name='confusion'))
    test_confusion.tag.aggregation_scheme = Sum(test_confusion)

    test_cg = ComputationGraph([test_cost, test_error_rate, test_components])

    # Apply dropout to all layer outputs except final softmax
    # dropout_vars = VariableFilter(
    #         roles=[OUTPUT], bricks=[Convolutional],
    #         theano_name_regex="^conv_[25]_apply_output$")(test_cg.variables)
    # drop_cg = apply_dropout(test_cg, dropout_vars, 0.5)

    # Apply 0.2 dropout to the pre-averaging layer
    # dropout_vars_2 = VariableFilter(
    #         roles=[OUTPUT], bricks=[Convolutional],
    #         theano_name_regex="^conv_8_apply_output$")(test_cg.variables)
    # train_cg = apply_dropout(test_cg, dropout_vars_2, 0.2)

    # Apply 0.2 dropout to the input, as in the paper
    # train_cg = apply_dropout(test_cg, [x], 0.2)
    # train_cg = drop_cg
    train_cg = test_cg

    train_cost, train_error_rate, train_components = (test_cost,
                                                      test_error_rate,
                                                      test_components)

    # Apply regularization to the cost

    trainable_parameters = VariableFilter(roles=[WEIGHT, BIAS])(
        train_cg.parameters)
    noise_parameters = VariableFilter(roles=[NOISE])(train_cg.parameters)
    biases = VariableFilter(roles=[BIAS])(train_cg.parameters)
    weights = VariableFilter(roles=[WEIGHT])(train_cg.variables)
    logsigma = VariableFilter(roles=[LOG_SIGMA])(train_cg.variables)

    test_nits = VariableFilter(roles=[NITS])(train_cg.auxiliary_variables)
    test_nit_rate = tensor.concatenate([n.flatten() for n in test_nits]).mean()
    test_nit_rate.name = 'nit_rate'

    train_nit_rate = test_nit_rate

    l2_norm = sum([(W**2).sum() for W in weights])
    l2_norm.name = 'l2_norm'
    l2_regularization = 0.0001 * l2_norm
    l2_regularization.name = 'l2_regularization'
    # test_cost = test_cost + l2_regularization
    # test_cost.name = 'cost_with_regularization'

    mean_log_sigma = tensor.concatenate([n.flatten() for n in logsigma]).mean()
    mean_log_sigma.name = 'log_sigma'

    # Training version of cost
    nit_penalty = theano.shared(
        numpy.asarray(regularization, dtype=theano.config.floatX))
    nit_penalty.name = 'nit_penalty'
    train_cost_without_regularization = train_cost
    train_cost_without_regularization.name = 'cost_without_regularization'
    nit_regularization = nit_penalty * train_nit_rate
    nit_regularization.name = 'nit_regularization'
    train_cost = train_cost + nit_regularization + l2_regularization
    train_cost.name = 'cost_with_regularization'

    cifar10_train = CIFAR10(("train", ))
    cifar10_train_stream = RandomPadCropFlip(NormalizeBatchLevels(
        DataStream.default_stream(cifar10_train,
                                  iteration_scheme=ShuffledScheme(
                                      cifar10_train.num_examples, batch_size)),
        which_sources=('features', )), (32, 32),
                                             pad=5,
                                             which_sources=('features', ))
    # cifar10_train_stream = NormalizeBatchLevels(DataStream.default_stream(
    #        cifar10_train, iteration_scheme=ShuffledScheme(
    #             cifar10_train.num_examples, batch_size)),
    #     which_sources=('features',))

    cifar10_test = CIFAR10(("test", ))
    cifar10_test_stream = NormalizeBatchLevels(DataStream.default_stream(
        cifar10_test,
        iteration_scheme=ShuffledScheme(cifar10_test.num_examples,
                                        batch_size)),
                                               which_sources=('features', ))

    momentum = Momentum(0.002, 0.9)

    # Create a step rule that doubles the learning rate of biases, like Caffe.
    # scale_bias = Restrict(Scale(2), biases)
    # step_rule = CompositeRule([scale_bias, momentum])
    step_rule = CompositeRule([StepClipping(10), momentum])

    # Train with simple SGD
    algorithm = GradientDescent(cost=train_cost,
                                parameters=trainable_parameters,
                                step_rule=step_rule)

    add_noise = NoiseExtension(noise_parameters=noise_parameters)

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [
        Timing(), add_noise,
        FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches),
        EpochSchedule(momentum.learning_rate, [(1, 0.005), (3, 0.01),
                                               (5, 0.02), (200, 0.002),
                                               (250, 0.0002), (300, 0.00002)]),
        NoisyDataStreamMonitoring(
            [test_cost, test_error_rate, test_nit_rate, test_confusion],
            cifar10_test_stream,
            noise_parameters=noise_parameters,
            prefix="test"),
        TrainingDataMonitoring([
            train_cost, train_error_rate, train_nit_rate,
            train_cost_without_regularization, l2_norm, nit_penalty,
            l2_regularization, nit_regularization, mean_log_sigma,
            momentum.learning_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               every_n_batches=100,
                               after_epoch=True),
        Plot('Training performance for ' + save_to,
             channels=[
                 [
                     'train_cost_with_regularization',
                     'train_cost_without_regularization',
                     'train_l2_regularization', 'train_nit_regularization'
                 ],
                 ['train_error_rate'],
                 ['train_total_gradient_norm'],
             ],
             every_n_batches=100),
        Plot('Test performance for ' + save_to,
             channels=[[
                 'train_error_rate',
                 'test_error_rate',
             ]],
             after_epoch=True),
        Checkpoint(save_to),
        ProgressBar(),
        Printing()
    ]

    if annealing:
        extensions.append(EpochExponentiation(nit_penalty, 1 - annealing))

    if histogram:
        attribution = AttributionExtension(components=train_components,
                                           parameters=trainable_parameters,
                                           components_size=output_size,
                                           after_batch=True)
        extensions.insert(0, attribution)

    if resume:
        extensions.append(Load(save_to, True, True))

    model = Model(train_cost)

    main_loop = MainLoop(algorithm,
                         cifar10_train_stream,
                         model=model,
                         extensions=extensions)

    main_loop.run()

    if histogram:
        save_attributions(attribution, filename=histogram)

    with open('execution-log.json', 'w') as outfile:
        json.dump(main_loop.log, outfile, cls=NumpyEncoder)
def main(feature_maps=None, mlp_hiddens=None,
         conv_sizes=None, pool_sizes=None, batch_size=None,
         num_batches=None):
    if feature_maps is None:
        feature_maps = [32, 48, 64, 80, 128, 128]
    if mlp_hiddens is None:
        mlp_hiddens = [1000]
    if conv_sizes is None:
        conv_sizes = [7, 5, 5, 5, 5, 4]
    if pool_sizes is None:
        pool_sizes = [3, 2, 2, 2, 2, 1]
    if batch_size is None:
        batch_size = 64
    conv_steps=[2, 1, 1, 1, 1, 1] #same as stride
    image_size = (256, 256)
    output_size = 2
    learningRate = 0.001
    drop_prob = 0.5
    weight_noise = 0.75
    num_epochs = 250
    num_batches = None
    host_plot='http://*****:*****@ %s' % (graph_name, datetime.datetime.now(), socket.gethostname()),
                                channels=[['train_error_rate', 'valid_error_rate'],
                                 ['train_total_gradient_norm']], after_epoch=True, server_url=host_plot))
            PLOT_AVAILABLE = True
        except ImportError:
            PLOT_AVAILABLE = False
        extensions.append(Checkpoint(save_to, after_epoch=True, after_training=True, save_separately=['log']))


    logger.info("Building the model")

    model = Model(cost)

    ########### Loading images #####################
    main_loop = MainLoop(
        algorithm,
        stream_data_train,
        model=model,
        extensions=extensions)

    main_loop.run()
Beispiel #14
0
def training_committee_member(instance, learning_rate, train, batch_size, valid, valid_full=1.):
	valid_full+=0.1
	x = T.tensor4('x')
	y = T.imatrix('y')
	y_prev = instance.apply(x)
	#cost = CategoricalCrossEntropy().apply(y.flatten(), y_prev).mean()
	cost = Softmax().categorical_cross_entropy(y.flatten(), y_prev).mean()
    	error = MisclassificationRate().apply(y.flatten(), Softmax().apply(y_prev)).mean()
	# take only the last parameters to avoid having the same members among the committee
	W, B = get_Params(y_prev) # take all the parameters for now !!!!
	params=W[:2]+B[:2]
	"""
	w = W[0];
	layer_number=None
	for w_tmp in W:
		if layer_number is None:
			(_, layer_number, _) = analyze_param_name(w.name)
		(_, layer_number_tmp, _) = analyze_param_name(w_tmp.name)
		if layer_number_tmp > layer_number:
			w = w_tmp
			layer_number=layer_number_tmp
	for b_tmp in B:
		(_, layer_number_tmp, _) = analyze_param_name(b_tmp.name)
		if layer_number == layer_number_tmp:
			b = b_tmp
		break
	params = [w,b]
	"""
	#updates, _ = Adam(cost, params, learning_rate)
	updates,_ = RMSProp(cost, params, learning_rate, decay_rate=0.9)
	#updates, _ = Sgd(cost, params, learning_rate)
	train_function = theano.function([x,y], cost, updates=updates,
			allow_input_downcast=True)
	
	test_function = theano.function([x,y], cost,
			allow_input_downcast=True)
	error_function = theano.function([x,y], error,
			allow_input_downcast=True)

	
	x_train, y_train = train
	x_valid, y_valid = valid
	n_train = len(y_train)/batch_size
	stop=False
	init_increment = 5
	increment = init_increment
	error_cost=[]; train_cost=[]
	n_valid = len(y_valid)/batch_size
	for minibatch in range(n_valid):
			x_value = x_valid[minibatch*batch_size:(minibatch+1)*batch_size]
			y_value = y_valid[minibatch*batch_size:(minibatch+1)*batch_size]
			error_cost.append(error_function(x_value, y_value))
			train_cost.append(test_function(x_value, y_value))

	before = np.mean(error_cost)
	if before <=valid_full:
		stop=True
	best_score = np.mean(train_cost)
	best_error = before

	while not stop:
		train_cost=[]; error_cost=[]
		for minibatch in range(n_train):
			x_value = x_train[minibatch*batch_size:(minibatch+1)*batch_size]
			y_value = y_train[minibatch*batch_size:(minibatch+1)*batch_size]
			train_function(x_value, y_value)
			if minibatch ==10:
				if increment !=0:
					for minibatch in range(n_valid):
						x_value = x_valid[minibatch*batch_size:(minibatch+1)*batch_size]
						y_value = y_valid[minibatch*batch_size:(minibatch+1)*batch_size]
						train_cost.append(test_function(x_value, y_value))
						error_cost.append(error_function(x_value, y_value))
					#print np.mean(train_cost)*100
					error = np.mean(error_cost)
					score = np.mean(train_cost)
					if error <= valid_full:
						#print 'A'
						stop=True
						break
					elif score < best_score*0.995:
						best_score = score
						best_error = error
						increment = init_increment
						#print (best_score, np.mean(error_cost)*100)
					else:
						#print 'hihi', increment
						increment-=1
						#print (best_score, score, np.mean(error_cost)*100)
				else:
					stop=True
					break

	# evaluation validation !
	"""
	error_cost=[]
	n_valid = len(y_valid)/batch_size
	for minibatch in range(n_valid):
			x_value = x_valid[minibatch*batch_size:(minibatch+1)*batch_size]
			y_value = y_valid[minibatch*batch_size:(minibatch+1)*batch_size]
			error_cost.append(error_function(x_value, y_value))
	"""
	# new_round with more precision
	if np.mean(error_cost) <=valid_full or learning_rate <=1e-5:
		print (before, best_error*100)
		print '#####'
		return instance
	else:
		#print 'restart'
		return training_committee_member(instance, learning_rate*0.1, train, batch_size, valid, valid_full)
def main(save_to,
         num_epochs,
         feature_maps=None,
         mlp_hiddens=None,
         conv_sizes=None,
         pool_sizes=None,
         batch_size=200,
         num_batches=None):
    if feature_maps is None:
        feature_maps = [32, 32, 64, 64, 128, 128]
    if mlp_hiddens is None:
        mlp_hiddens = [1000]
    if conv_sizes is None:
        conv_sizes = [7, 5, 5, 5, 3, 3]
    if pool_sizes is None:
        pool_sizes = [2, 2, 2, 2, 2, 2]
    image_size = (128, 128)
    batch_size = 64
    output_size = 2
    learningRate = 0.01
    drop_prob = 0.4
    weight_noise = 0.75
    num_epochs = 150
    num_batches = None

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(conv_activations,
                    3,
                    image_size,
                    filter_sizes=zip(conv_sizes, conv_sizes),
                    feature_maps=feature_maps,
                    pooling_sizes=zip(pool_sizes, pool_sizes),
                    top_mlp_activations=mlp_activations,
                    top_mlp_dims=mlp_hiddens + [output_size],
                    border_mode='full',
                    weights_init=Uniform(width=.2),
                    biases_init=Constant(0))
    # We push initialization config to set different initialization schemes
    # for convolutional layers.
    convnet.push_initialization_config()
    convnet.layers[0].weights_init = Uniform(width=.2)
    convnet.layers[1].weights_init = Uniform(width=.09)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
    convnet.initialize()
    logging.info(
        "Input dim: {} {} {}".format(*convnet.children[0].get_dim('input_')))
    for i, layer in enumerate(convnet.layers):
        if isinstance(layer, Activation):
            logging.info("Layer {} ({})".format(i, layer.__class__.__name__))
        else:
            logging.info("Layer {} ({}) dim: {} {} {}".format(
                i, layer.__class__.__name__, *layer.get_dim('output')))
    x = tensor.tensor4('image_features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    # Save on csv
    # numpy.save(probs)
    cost = (CategoricalCrossEntropy().apply(y.flatten(),
                                            probs).copy(name='cost'))
    error_rate = (MisclassificationRate().apply(y.flatten(),
                                                probs).copy(name='error_rate'))
    error_rate2 = error_rate.copy(name='error_rate2')

    cg = ComputationGraph([cost, error_rate])
    weights = VariableFilter(roles=[FILTER, WEIGHT])(cg.variables)

    ############# Dropout #############

    logger.info('Applying dropout')
    cg = apply_dropout(cg, weights[0:3], drop_prob)
    dropped_out = VariableFilter(roles=[DROPOUT])(cg.variables)

    ############# Guaussian Noise #############

    logger.info('Applying Gaussian noise')
    cg = apply_noise(cg, weights, weight_noise)

    ########### Loading images #####################

    from fuel.datasets.dogs_vs_cats import DogsVsCats
    from fuel.streams import DataStream, ServerDataStream
    from fuel.schemes import ShuffledScheme
    from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, Random2DRotation
    from fuel.transformers import Flatten, Cast, ScaleAndShift

    def create_data(data):
        stream = DataStream(data,
                            iteration_scheme=ShuffledScheme(
                                data.num_examples, batch_size))
        stream = MinimumImageDimensions(stream,
                                        image_size,
                                        which_sources=('image_features', ))
        stream = MaximumImageDimensions(stream,
                                        image_size,
                                        which_sources=('image_features', ))
        stream = RandomHorizontalSwap(stream,
                                      which_sources=('image_features', ))
        stream = Random2DRotation(stream, which_sources=('image_features', ))
        #stream = ScikitResize(stream, image_size, which_sources=('image_features',))
        stream = ScaleAndShift(stream,
                               1. / 255,
                               0,
                               which_sources=('image_features', ))
        stream = Cast(stream,
                      dtype='float32',
                      which_sources=('image_features', ))
        return stream

    stream_data_train = create_data(
        DogsVsCats(('train', ), subset=slice(0, 22500)))
    stream_data_test = create_data(
        DogsVsCats(('train', ), subset=slice(22500, 25000)))
    #stream_data_train = create_data(DogsVsCats(('train',), subset=slice(0, 10)))
    #stream_data_test = create_data(DogsVsCats(('train',), subset=slice(10, 12)))

    # Train with simple SGD
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Momentum(learning_rate=learningRate,
                                                   momentum=0.7))
    #algorithm = GradientDescent(cost=cost, parameters=cg.parameters,step_rule=Scale(learning_rate=learningRate))
    #algorithm = GradientDescent(cost=cost, parameters=cg.parameters,step_rule=Adam(0.001))

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = []
    extensions.append(Timing())
    extensions.append(
        FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches))
    extensions.append(
        DataStreamMonitoring([cost, error_rate],
                             stream_data_test,
                             prefix="valid"))
    extensions.append(
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True))
    extensions.append(
        Checkpoint("Model1_uniform_init.pkl",
                   after_epoch=True,
                   after_training=True,
                   save_separately=['log']))
    extensions.append(ProgressBar())
    extensions.append(Printing())

    host_plot = 'http://hades.calculquebec.ca:5090'
    extensions.append(
        Plot('5C 3*3C 2*2P 204080...F 004LR 09Mom %s %s @ %s' %
             ('CNN ', datetime.datetime.now(), socket.gethostname()),
             channels=[['train_error_rate', 'valid_error_rate'],
                       ['train_total_gradient_norm']],
             after_epoch=True,
             server_url=host_plot))
    logger.info("Building the model")

    model = Model(cost)

    ########### Loading images #####################
    main_loop = MainLoop(algorithm,
                         stream_data_train,
                         model=model,
                         extensions=extensions)

    main_loop.run()
 def error(self, x, y):
     y_pred = Softmax().apply(self.apply(x))
     return MisclassificationRate().apply(y.flatten(), y_pred).mean()
Beispiel #17
0
def main(job_id, params):

    config = ConfigParser.ConfigParser()
    config.readfp(open('./params'))

    max_epoch = int(config.get('hyperparams', 'max_iter', 100))
    base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
    train_batch = int(config.get('hyperparams', 'train_batch', 256))
    valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
    test_batch = int(config.get('hyperparams', 'valid_batch', 512))
    hidden_units = int(config.get('hyperparams', 'hidden_units', 16))

    W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
    W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
    b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
    b_mu = float(config.get('hyperparams', 'b_mu', 0.0))

    dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
    weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
    max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
    solver = config.get('hyperparams', 'solver_type', 'rmsprop')
    data_file = config.get('hyperparams', 'data_file')
    fine_tune = config.getboolean('hyperparams', 'fine_tune')

    # Spearmint optimization parameters:
    if params:
        base_lr = float(params['base_lr'][0])
        dropout_ratio = float(params['dropout_ratio'][0])
        hidden_units = params['hidden_units'][0]
        weight_decay = params['weight_decay'][0]

    if 'adagrad' in solver:
        solver_type = CompositeRule([
            AdaGrad(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])
    else:
        solver_type = CompositeRule([
            RMSProp(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])

    rn_file = '/projects/francisco/repositories/NI-ML/models/deepnets/blocks/ff/models/rnet/2015-06-25-18:13'
    ln_file = '/projects/francisco/repositories/NI-ML/models/deepnets/blocks/ff/models/lnet/2015-06-29-11:45'

    right_dim = 10519
    left_dim = 11427

    train = H5PYDataset(data_file, which_set='train')
    valid = H5PYDataset(data_file, which_set='valid')
    test = H5PYDataset(data_file, which_set='test')

    l_x = tensor.matrix('l_features')
    r_x = tensor.matrix('r_features')
    y = tensor.lmatrix('targets')

    lnet = load(ln_file).model.get_top_bricks()[0]
    rnet = load(rn_file).model.get_top_bricks()[0]

    # Pre-trained layers:

    # Inputs -> hidden_1 -> hidden 2
    for side, net in zip(['l', 'r'], [lnet, rnet]):
        for child in net.children:
            child.name = side + '_' + child.name

    ll1 = lnet.children[0]
    lr1 = lnet.children[1]
    ll2 = lnet.children[2]
    lr2 = lnet.children[3]

    rl1 = rnet.children[0]
    rr1 = rnet.children[1]
    rl2 = rnet.children[2]
    rr2 = rnet.children[3]

    l_h = lr2.apply(ll2.apply(lr1.apply(ll1.apply(l_x))))
    r_h = rr2.apply(rl2.apply(rr1.apply(rl1.apply(r_x))))

    input_dim = ll2.output_dim + rl2.output_dim

    # hidden_2 -> hidden_3 -> hidden_4 -> Logistic output
    output_mlp = MLP(activations=[
        Rectifier(name='h3'),
        Rectifier(name='h4'),
        Softmax(name='output'),
    ],
                     dims=[
                         input_dim,
                         hidden_units,
                         hidden_units,
                         2,
                     ],
                     weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
                     biases_init=IsotropicGaussian(std=W_sd, mean=W_mu))

    output_mlp.initialize()

    # # Concatenate the inputs from the two hidden subnets into a single variable
    # # for input into the next layer.
    merge = tensor.concatenate([l_h, r_h], axis=1)
    #
    y_hat = output_mlp.apply(merge)

    # Define a cost function to optimize, and a classification error rate.
    # Also apply the outputs from the net and corresponding targets:
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'error'

    # This is the model: before applying dropout
    model = Model(cost)

    # Need to define the computation graph for the cost func:
    cost_graph = ComputationGraph([cost])

    # This returns a list of weight vectors for each layer
    W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

    # Add some regularization to this model:
    cost += weight_decay * l2_norm(W)
    cost.name = 'entropy'

    # computational graph with l2 reg
    cost_graph = ComputationGraph([cost])

    # Apply dropout to inputs:
    inputs = VariableFilter([INPUT])(cost_graph.variables)
    dropout_inputs = [
        input for input in inputs if input.name.startswith('linear_')
    ]
    dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]], 0.2)
    dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:],
                                  dropout_ratio)
    dropout_cost = dropout_graph.outputs[0]
    dropout_cost.name = 'dropout_entropy'

    # If no fine-tuning of l-r models is wanted, find the params for only
    # the joint layers:
    if fine_tune:
        params_to_update = dropout_graph.parameters
    else:
        params_to_update = VariableFilter(
            [PARAMETER], bricks=output_mlp.children)(cost_graph)

    # Learning Algorithm:
    algo = GradientDescent(step_rule=solver_type,
                           params=params_to_update,
                           cost=dropout_cost)

    # algo.step_rule.learning_rate.name = 'learning_rate'

    # Data stream used for training model:
    training_stream = Flatten(
        DataStream.default_stream(dataset=train,
                                  iteration_scheme=ShuffledScheme(
                                      train.num_examples,
                                      batch_size=train_batch)))

    training_monitor = TrainingDataMonitoring([
        dropout_cost,
        aggregation.mean(error),
        aggregation.mean(algo.total_gradient_norm)
    ],
                                              after_batch=True)

    # Use the 'valid' set for validation during training:
    validation_stream = Flatten(
        DataStream.default_stream(dataset=valid,
                                  iteration_scheme=ShuffledScheme(
                                      valid.num_examples,
                                      batch_size=valid_batch)))

    validation_monitor = DataStreamMonitoring(variables=[cost, error],
                                              data_stream=validation_stream,
                                              prefix='validation',
                                              after_epoch=True)

    test_stream = Flatten(
        DataStream.default_stream(
            dataset=test,
            iteration_scheme=ShuffledScheme(test.num_examples,
                                            batch_size=test_batch)))

    test_monitor = DataStreamMonitoring(variables=[error],
                                        data_stream=test_stream,
                                        prefix='test',
                                        after_training=True)

    plotting = Plot(
        'AdniNet_LeftRight',
        channels=[
            ['dropout_entropy'],
            ['error', 'validation_error'],
        ],
    )

    # Checkpoint class used to save model and log:
    stamp = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y-%m-%d-%H:%M')
    checkpoint = Checkpoint('./models/{}'.format(stamp),
                            save_separately=['model', 'log'],
                            every_n_epochs=1)

    # The main loop will train the network and output reports, etc
    main_loop = MainLoop(data_stream=training_stream,
                         model=model,
                         algorithm=algo,
                         extensions=[
                             validation_monitor,
                             training_monitor,
                             plotting,
                             FinishAfter(after_n_epochs=max_epoch),
                             FinishIfNoImprovementAfter(
                                 notification_name='validation_error',
                                 epochs=1),
                             Printing(),
                             ProgressBar(),
                             checkpoint,
                             test_monitor,
                         ])
    main_loop.run()
    ve = float(main_loop.log.last_epoch_row['validation_error'])
    te = float(main_loop.log.last_epoch_row['error'])
    spearmint_loss = ve + abs(te - ve)
    print 'Spearmint Loss: {}'.format(spearmint_loss)
    return spearmint_loss
Beispiel #18
0
    biases_init=IsotropicGaussian(),
    prototype=input_mlp,
)
parallel_nets.initialize()
l_h, r_h = parallel_nets.apply(l_x=l_x, r_x=r_x)

# Concatenate the inputs from the two hidden subnets into a single variable
# for input into the next layer.
merge = tensor.concatenate([l_h, r_h], axis=1)

y_hat = output_mlp.apply(merge)

# Define a cost function to optimize, and a classification error rate:
# Also apply the outputs from the net, and corresponding targets:
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
error = MisclassificationRate().apply(y.flatten(), y_hat)
error.name = 'error'

# Need to define the computation graph:
graph = ComputationGraph(cost)

# This returns a list of weight vectors for each layer
W = VariableFilter(roles=[WEIGHT])(graph.variables)

# Add some regularization to this model:
lam = 0.001
cost += lam * l2_norm(W)
cost.name = 'entropy'

# This is the model without dropout, but with l2 reg.
model = Model(cost)
Beispiel #19
0
def main(job_id, params, config_file='params.ec'):
    config = ConfigParser.ConfigParser()
    config.readfp(open('./configs/{}'.format(config_file)))

    pr = pprint.PrettyPrinter(indent=4)
    pr.pprint(config)

    net_name  =  config.get('hyperparams', 'net_name', 'adni')
    struct_name = net_name.split('_')[0]

    max_epoch = int(config.get('hyperparams', 'max_iter', 100))
    base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
    train_batch = int(config.get('hyperparams', 'train_batch', 256))
    valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
    test_batch = int(config.get('hyperparams', 'valid_batch', 512))

    W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
    W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
    b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
    b_mu = float(config.get('hyperparams', 'b_mu', 0.0))

    hidden_units = int(config.get('hyperparams', 'hidden_units', 32))
    input_dropout_ratio = float(config.get('hyperparams', 'input_dropout_ratio', 0.2))
    dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
    weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
    max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
    solver = config.get('hyperparams', 'solver_type', 'rmsprop')
    data_file = config.get('hyperparams', 'data_file')
    side = config.get('hyperparams', 'side', 'b')

    input_dim = input_dims[struct_name]

    # Spearmint optimization parameters:
    if params:
        base_lr = float(params['base_lr'][0])
        dropout_ratio = float(params['dropout_ratio'][0])
        hidden_units = params['hidden_units'][0]
        weight_decay = params['weight_decay'][0]

    if 'adagrad' in solver:
        solver_type = CompositeRule([AdaGrad(learning_rate=base_lr), VariableClipping(threshold=max_norm)])
    else:
        solver_type = CompositeRule([RMSProp(learning_rate=base_lr), VariableClipping(threshold=max_norm)])


    data_file = config.get('hyperparams', 'data_file')

    if 'b' in side:
        train = H5PYDataset(data_file, which_set='train')
        valid = H5PYDataset(data_file, which_set='valid')
        test = H5PYDataset(data_file, which_set='test')
        x_l = tensor.matrix('l_features')
        x_r = tensor.matrix('r_features')
        x = tensor.concatenate([x_l, x_r], axis=1)

    else:
        train = H5PYDataset(data_file, which_set='train', sources=['{}_features'.format(side), 'targets'])
        valid = H5PYDataset(data_file, which_set='valid', sources=['{}_features'.format(side), 'targets'])
        test = H5PYDataset(data_file, which_set='test', sources=['{}_features'.format(side), 'targets'])
        x = tensor.matrix('{}_features'.format(side))

    y = tensor.lmatrix('targets')


    # Define a feed-forward net with an input, two hidden layers, and a softmax output:
    model = MLP(activations=[
        Rectifier(name='h1'),
        Rectifier(name='h2'),
        Softmax(name='output'),
    ],
                dims=[
                    input_dim[side],
                    hidden_units,
                    hidden_units,
                    2],
                weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
                biases_init=IsotropicGaussian(b_sd, b_mu))

    # Don't forget to initialize params:
    model.initialize()

    # y_hat is the output of the neural net with x as its inputs
    y_hat = model.apply(x)

    # Define a cost function to optimize, and a classification error rate.
    # Also apply the outputs from the net and corresponding targets:
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'error'

    # This is the model: before applying dropout
    model = Model(cost)

    # Need to define the computation graph for the cost func:
    cost_graph = ComputationGraph([cost])

    # This returns a list of weight vectors for each layer
    W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

    # Add some regularization to this model:
    cost += weight_decay * l2_norm(W)
    cost.name = 'entropy'

    # computational graph with l2 reg
    cost_graph = ComputationGraph([cost])

    # Apply dropout to inputs:
    inputs = VariableFilter([INPUT])(cost_graph.variables)
    dropout_inputs = [input for input in inputs if input.name.startswith('linear_')]
    dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]], input_dropout_ratio)
    dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:], dropout_ratio)
    dropout_cost = dropout_graph.outputs[0]
    dropout_cost.name = 'dropout_entropy'

    # Learning Algorithm (notice: we use the dropout cost for learning):
    algo = GradientDescent(
        step_rule=solver_type,
        params=dropout_graph.parameters,
        cost=dropout_cost)

    # algo.step_rule.learning_rate.name = 'learning_rate'

    # Data stream used for training model:
    training_stream = Flatten(
        DataStream.default_stream(
            dataset=train,
            iteration_scheme=ShuffledScheme(
                train.num_examples,
                batch_size=train_batch)))

    training_monitor = TrainingDataMonitoring([dropout_cost,
                                               aggregation.mean(error),
                                               aggregation.mean(algo.total_gradient_norm)],
                                              after_batch=True)

    # Use the 'valid' set for validation during training:
    validation_stream = Flatten(
        DataStream.default_stream(
            dataset=valid,
            iteration_scheme=ShuffledScheme(
                valid.num_examples,
                batch_size=valid_batch)))

    validation_monitor = DataStreamMonitoring(
        variables=[cost, error],
        data_stream=validation_stream,
        prefix='validation',
        after_epoch=True)

    test_stream = Flatten(
        DataStream.default_stream(
            dataset=test,
            iteration_scheme=ShuffledScheme(
                test.num_examples,
                batch_size=test_batch)))

    test_monitor = DataStreamMonitoring(
        variables=[error],
        data_stream=test_stream,
        prefix='test',
        after_training=True)


    plotting = Plot('{}_{}'.format(net_name, side),
                    channels=[
                        ['dropout_entropy'],
                        ['error', 'validation_error'],
                    ],
                    after_batch=False)

    # Checkpoint class used to save model and log:
    stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H:%M')
    checkpoint = Checkpoint('./models/{}/{}/{}'.format(struct_name, side, stamp),
                            save_separately=['model', 'log'],
                            every_n_epochs=1)

    # Home-brewed class for early stopping when we detect we have started to overfit:
    # And by that I mean if the means of the val error and training error over the
    # previous 'epochs' is greater than the 'threshold', we are overfitting.
    early_stopper = FinishIfOverfitting(error_name='error',
                                        validation_name='validation_error',
                                        threshold=0.05,
                                        epochs=5,
                                        burn_in=100)

    # The main loop will train the network and output reports, etc
    main_loop = MainLoop(
        data_stream=training_stream,
        model=model,
        algorithm=algo,
        extensions=[
            validation_monitor,
            training_monitor,
            plotting,
            FinishAfter(after_n_epochs=max_epoch),
            early_stopper,
            Printing(),
            ProgressBar(),
            checkpoint,
            test_monitor,
        ])
    main_loop.run()

    ve = float(main_loop.log.last_epoch_row['validation_error'])
    te = float(main_loop.log.last_epoch_row['error'])
    spearmint_loss = ve + abs(te - ve)
    print 'Spearmint Loss: {}'.format(spearmint_loss)
    return spearmint_loss
Beispiel #20
0
def train_net(net,
              train_stream,
              test_stream,
              L1=None,
              L2=None,
              early_stopping=False,
              finish=None,
              dropout=False,
              jobid=None,
              update=None,
              duration=None,
              **ignored):
    x = tensor.tensor4('image_features')
    y = tensor.lmatrix('targets')

    y_hat = net.apply(x)

    #Cost
    cost_before = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    cost_before.name = "cost_without_regularization"

    #Error
    #Taken from brodesf
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = "Misclassification rate"

    #Regularization
    cg = ComputationGraph(cost_before)
    WS = VariableFilter(roles=[WEIGHT])(cg.variables)

    if dropout:
        print("Dropout")
        cg = apply_dropout(cg, WS, 0.5)

    if L1:
        print("L1 with lambda ", L1)
        L1_reg = L1 * sum([abs(W).sum() for W in WS])
        L1_reg.name = "L1 regularization"
        cost_before += L1_reg

    if L2:
        print("L2 with lambda ", L2)
        L2_reg = L2 * sum([(W**2).sum() for W in WS])
        L2_reg.name = "L2 regularization"
        cost_before += L2_reg

    cost = cost_before
    cost.name = 'cost_with_regularization'

    #Initialization
    print("Initilization")
    net.initialize()

    #Algorithm
    step_rule = Scale(learning_rate=0.1)
    if update is not None:
        if update == "rmsprop":
            print("Using RMSProp")
            step_rule = RMSProp()

    remove_not_finite = RemoveNotFinite(0.9)
    step_rule = CompositeRule([step_rule, remove_not_finite])
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=step_rule)

    print("Extensions")
    extensions = []

    #Monitoring
    monitor = DataStreamMonitoring(variables=[cost, error],
                                   data_stream=test_stream,
                                   prefix="test")
    extensions.append(monitor)

    def filename(suffix=""):
        prefix = jobid if jobid else str(os.getpid())
        ctime = str(time.time())
        return "checkpoints/" + prefix + "_" + ctime + "_" + suffix + ".zip"

    #Serialization
    #serialization = Checkpoint(filename())
    #extensions.append(serialization)

    notification = "test_" + error.name
    track = TrackTheBest(notification)
    best_notification = track.notification_name
    checkpointbest = SaveBest(best_notification, filename("best"))
    extensions.extend([track, checkpointbest])

    if early_stopping:
        print("Early stopping")
        stopper = FinishIfNoImprovementAfterPlus(best_notification)
        extensions.append(stopper)

    #Other extensions
    if finish != None:
        print("Force finish ", finish)
        extensions.append(FinishAfter(after_n_epochs=finish))

    if duration != None:
        print("Stop after ", duration, " seconds")
        extensions.append(FinishAfterTime(duration))

    extensions.extend([Timing(), Printing()])

    #Main loop
    main_loop = MainLoop(data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    print("Main loop start")
    main_loop.run()
Beispiel #21
0
    def apply(self, input_labeled, target_labeled, input_unlabeled):
        self.target_labeled = target_labeled
        self.layer_counter = 0
        input_dim = self.p.encoder_layers[0]

        # Store the dimension tuples in the same order as layers.
        layers = self.layers
        self.layer_dims = {0: input_dim}

        self.lr = self.default_lr

        self.costs = costs = AttributeDict()
        self.costs.denois = AttributeDict()

        self.act = AttributeDict()
        self.error = AttributeDict()

        top = len(layers) - 1

        N = input_labeled.shape[0]
        self.join = lambda l, u: T.concatenate([l, u], axis=0)
        self.labeled = lambda x: x[:N] if x is not None else x
        self.unlabeled = lambda x: x[N:] if x is not None else x
        self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x))

        input_concat = self.join(input_labeled, input_unlabeled)

        def encoder(input_, path_name, input_noise_std=0, noise_std=[]):
            h = input_

            logger.info('  0: noise %g' % input_noise_std)
            if input_noise_std > 0.:
                h = h + self.noise_like(h) * input_noise_std

            d = AttributeDict()
            d.unlabeled = self.new_activation_dict()
            d.labeled = self.new_activation_dict()
            d.labeled.z[0] = self.labeled(h)
            d.unlabeled.z[0] = self.unlabeled(h)
            prev_dim = input_dim
            for i, (spec, _, act_f) in layers[1:]:
                d.labeled.h[i - 1], d.unlabeled.h[i - 1] = self.split_lu(h)
                noise = noise_std[i] if i < len(noise_std) else 0.
                curr_dim, z, m, s, h = self.f(h, prev_dim, spec, i, act_f,
                                              path_name=path_name,
                                              noise_std=noise)
                assert self.layer_dims.get(i) in (None, curr_dim)
                self.layer_dims[i] = curr_dim
                d.labeled.z[i], d.unlabeled.z[i] = self.split_lu(z)
                d.unlabeled.s[i] = s
                d.unlabeled.m[i] = m
                prev_dim = curr_dim
            d.labeled.h[i], d.unlabeled.h[i] = self.split_lu(h)
            return d

        # Clean, supervised
        logger.info('Encoder: clean, labeled')
        clean = self.act.clean = encoder(input_concat, 'clean')

        # Corrupted, supervised
        logger.info('Encoder: corr, labeled')
        corr = self.act.corr = encoder(input_concat, 'corr',
                                       input_noise_std=self.p.super_noise_std,
                                       noise_std=self.p.f_local_noise_std)
        est = self.act.est = self.new_activation_dict()

        # Decoder path in opposite order
        logger.info('Decoder: z_corr -> z_est')
        for i, ((_, spec), l_type, act_f) in layers[::-1]:
            z_corr = corr.unlabeled.z[i]
            z_clean = clean.unlabeled.z[i]
            z_clean_s = clean.unlabeled.s.get(i)
            z_clean_m = clean.unlabeled.m.get(i)
            fspec = layers[i+1][1][0] if len(layers) > i+1 else (None, None)

            if i == top:
                ver = corr.unlabeled.h[i]
                ver_dim = self.layer_dims[i]
                top_g = True
            else:
                ver = est.z.get(i + 1)
                ver_dim = self.layer_dims.get(i + 1)
                top_g = False

            z_est = self.g(z_lat=z_corr,
                           z_ver=ver,
                           in_dims=ver_dim,
                           out_dims=self.layer_dims[i],
                           l_type=l_type,
                           num=i,
                           fspec=fspec,
                           top_g=top_g)

            if z_est is not None:
                # Denoising cost

                if z_clean_s and self.p.zestbn == 'bugfix':
                    z_est_norm = (z_est - z_clean_m) / T.sqrt(z_clean_s + np.float32(1e-10))
                elif z_clean_s is None or self.p.zestbn == 'no':
                    z_est_norm = z_est
                else:
                    assert False, 'Not supported path'

                se = SquaredError('denois' + str(i))
                costs.denois[i] = se.apply(z_est_norm.flatten(2),
                                           z_clean.flatten(2)) \
                    / np.prod(self.layer_dims[i], dtype=floatX)
                costs.denois[i].name = 'denois' + str(i)
                denois_print = 'denois %.2f' % self.p.denoising_cost_x[i]
            else:
                denois_print = ''

            # Store references for later use
            est.h[i] = self.apply_act(z_est, act_f)
            est.z[i] = z_est
            est.s[i] = None
            est.m[i] = None
            logger.info('  g%d: %10s, %s, dim %s -> %s' % (
                i, l_type,
                denois_print,
                self.layer_dims.get(i+1),
                self.layer_dims.get(i)
                ))

        # Costs
        y = target_labeled.flatten()

        costs.class_clean = CategoricalCrossEntropy().apply(y, clean.labeled.h[top])
        costs.class_clean.name = 'cost_class_clean'

        costs.class_corr = CategoricalCrossEntropy().apply(y, corr.labeled.h[top])
        costs.class_corr.name = 'cost_class_corr'

        # This will be used for training
        costs.total = costs.class_corr * 1.0
        for i in range(top + 1):
            if costs.denois.get(i) and self.p.denoising_cost_x[i] > 0:
                costs.total += costs.denois[i] * self.p.denoising_cost_x[i]
        costs.total.name = 'cost_total'

        # Classification error
        mr = MisclassificationRate()
        self.error.clean = mr.apply(y, clean.labeled.h[top]) * np.float32(100.)
        self.error.clean.name = 'error_rate_clean'
Beispiel #22
0
mlp.weights_init = Uniform(0.0, 0.01)
mlp.biases_init = Constant(0.0)
mlp.initialize()

lin = Linear(200, 10, use_bias=True)
lin.weights_init = Uniform(0.0, 0.01)
lin.biases_init = Constant(0.0)
lin.initialize()

train_out = lin.apply(mlp.apply(flat_x))
test_out = lin.apply(mlp.apply(flat_x))

sm = Softmax(name='softmax')
loss = sm.categorical_cross_entropy(flat_y, train_out).mean()
loss.name = 'nll'
misclass = MisclassificationRate().apply(flat_y, train_out)
misclass.name = 'misclass'

test_loss = sm.categorical_cross_entropy(flat_y, test_out).mean()
test_loss.name = 'nll'
test_misclass = MisclassificationRate().apply(flat_y, test_out)
test_misclass.name = 'misclass'

model = Model(loss)

######################
# Data
######################
import numpy 
#from mnist import MNIST
from fuel.datasets.mnist import MNIST
Beispiel #23
0
def train(train_set, test_set):
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')

    l1 = Linear(
            name='input_to_hidden',
            input_dim=2,
            output_dim=3,
            weights_init=IsotropicGaussian(0.1),
            biases_init=Constant(0)
    )
    l1.initialize()
    h = Logistic().apply(l1.apply(x))

    l2 = Linear(
            name='hidden_to_output',
            input_dim=l1.output_dim,
            output_dim=2,
            weights_init=IsotropicGaussian(0.1),
            biases_init=Constant(0)
    )
    l2.initialize()
    y_hat = Softmax().apply(l2.apply(h))

    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)

    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'misclassification_rate'

    cg = ComputationGraph(cost)

    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + 1e-8 * (W1 ** 2).sum() + 1e-8 * (W2 ** 2).sum()
    cost.name = 'cost_with_regularization'

    print('W1', W1.get_value())
    print('W2', W2.get_value())

    algorithm = GradientDescent(
            cost=cost,
            parameters=cg.parameters,
            step_rule=RMSProp()
    )

    data_stream_train = Flatten(
            DataStream.default_stream(
                    train_set,
                    iteration_scheme=ShuffledScheme(train_set.num_examples, batch_size=4)
            )
    )

    data_stream_test = Flatten(
            DataStream.default_stream(
                    test_set,
                    iteration_scheme=SequentialScheme(test_set.num_examples, batch_size=1)
            )
    )

    monitor = DataStreamMonitoring(
            variables=[cost, error],
            data_stream=data_stream_test,
            prefix="test"
    )

    main_loop = MainLoop(
            data_stream=data_stream_train,
            algorithm=algorithm,
            extensions=[
                monitor,
                FinishAfter(after_n_epochs=100),
                Printing(),
                # ProgressBar()
            ]
    )

    main_loop.run()
def train_snli_model(new_training_job,
                     config,
                     save_path,
                     params,
                     fast_start,
                     fuel_server,
                     seed,
                     model='simple'):
    if config['exclude_top_k'] > config['num_input_words'] and config[
            'num_input_words'] > 0:
        raise Exception("Some words have neither word nor def embedding")
    c = config
    logger = configure_logger(name="snli_baseline_training",
                              log_file=os.path.join(save_path, "log.txt"))
    if not os.path.exists(save_path):
        logger.info("Start a new job")
        os.mkdir(save_path)
    else:
        logger.info("Continue an existing job")
    with open(os.path.join(save_path, "cmd.txt"), "w") as f:
        f.write(" ".join(sys.argv))

    # Make data paths nice
    for path in [
            'dict_path', 'embedding_def_path', 'embedding_path', 'vocab',
            'vocab_def', 'vocab_text'
    ]:
        if c.get(path, ''):
            if not os.path.isabs(c[path]):
                c[path] = os.path.join(fuel.config.data_path[0], c[path])

    main_loop_path = os.path.join(save_path, 'main_loop.tar')
    main_loop_best_val_path = os.path.join(save_path, 'main_loop_best_val.tar')
    stream_path = os.path.join(save_path, 'stream.pkl')

    # Save config to save_path
    json.dump(config, open(os.path.join(save_path, "config.json"), "w"))

    if model == 'simple':
        nli_model, data, used_dict, used_retrieval, _ = _initialize_simple_model_and_data(
            c)
    elif model == 'esim':
        nli_model, data, used_dict, used_retrieval, _ = _initialize_esim_model_and_data(
            c)
    else:
        raise NotImplementedError()

    # Compute cost
    s1, s2 = T.lmatrix('sentence1'), T.lmatrix('sentence2')

    if c['dict_path']:
        assert os.path.exists(c['dict_path'])
        s1_def_map, s2_def_map = T.lmatrix('sentence1_def_map'), T.lmatrix(
            'sentence2_def_map')
        def_mask = T.fmatrix("def_mask")
        defs = T.lmatrix("defs")
    else:
        s1_def_map, s2_def_map = None, None
        def_mask = None
        defs = None

    s1_mask, s2_mask = T.fmatrix('sentence1_mask'), T.fmatrix('sentence2_mask')
    y = T.ivector('label')

    cg = {}
    for train_phase in [True, False]:
        # NOTE: Please don't change outputs of cg
        if train_phase:
            with batch_normalization(nli_model):
                pred = nli_model.apply(s1,
                                       s1_mask,
                                       s2,
                                       s2_mask,
                                       def_mask=def_mask,
                                       defs=defs,
                                       s1_def_map=s1_def_map,
                                       s2_def_map=s2_def_map,
                                       train_phase=train_phase)
        else:
            pred = nli_model.apply(s1,
                                   s1_mask,
                                   s2,
                                   s2_mask,
                                   def_mask=def_mask,
                                   defs=defs,
                                   s1_def_map=s1_def_map,
                                   s2_def_map=s2_def_map,
                                   train_phase=train_phase)

        cost = CategoricalCrossEntropy().apply(y.flatten(), pred)
        error_rate = MisclassificationRate().apply(y.flatten(), pred)
        cg[train_phase] = ComputationGraph([cost, error_rate])

    # Weight decay (TODO: Make it less bug prone)
    if model == 'simple':
        weights_to_decay = VariableFilter(
            bricks=[dense for dense, relu, bn in nli_model._mlp],
            roles=[WEIGHT])(cg[True].variables)
        weight_decay = np.float32(c['l2']) * sum(
            (w**2).sum() for w in weights_to_decay)
    elif model == 'esim':
        weight_decay = 0.0
    else:
        raise NotImplementedError()

    final_cost = cg[True].outputs[0] + weight_decay
    final_cost.name = 'final_cost'

    # Add updates for population parameters

    if c.get("bn", True):
        pop_updates = get_batch_normalization_updates(cg[True])
        extra_updates = [(p, m * 0.1 + p * (1 - 0.1)) for p, m in pop_updates]
    else:
        pop_updates = []
        extra_updates = []

    if params:
        logger.debug("Load parameters from {}".format(params))
        with open(params) as src:
            loaded_params = load_parameters(src)
            cg[True].set_parameter_values(loaded_params)
            for param, m in pop_updates:
                param.set_value(loaded_params[get_brick(
                    param).get_hierarchical_name(param)])

    if os.path.exists(os.path.join(save_path, "main_loop.tar")):
        logger.warning("Manually loading BN stats :(")
        with open(os.path.join(save_path, "main_loop.tar")) as src:
            loaded_params = load_parameters(src)

        for param, m in pop_updates:
            param.set_value(
                loaded_params[get_brick(param).get_hierarchical_name(param)])

    if theano.config.compute_test_value != 'off':
        test_value_data = next(
            data.get_stream('train', batch_size=4).get_epoch_iterator())
        s1.tag.test_value = test_value_data[0]
        s1_mask.tag.test_value = test_value_data[1]
        s2.tag.test_value = test_value_data[2]
        s2_mask.tag.test_value = test_value_data[3]
        y.tag.test_value = test_value_data[4]

    # Freeze embeddings
    if not c['train_emb']:
        frozen_params = [
            p for E in nli_model.get_embeddings_lookups() for p in E.parameters
        ]
        train_params = [p for p in cg[True].parameters]
        assert len(set(frozen_params) & set(train_params)) > 0
    else:
        frozen_params = []
    if not c.get('train_def_emb', 1):
        frozen_params_def = [
            p for E in nli_model.get_def_embeddings_lookups()
            for p in E.parameters
        ]
        train_params = [p for p in cg[True].parameters]
        assert len(set(frozen_params_def) & set(train_params)) > 0
        frozen_params += frozen_params_def
    train_params = [p for p in cg[True].parameters if p not in frozen_params]
    train_params_keys = [
        get_brick(p).get_hierarchical_name(p) for p in train_params
    ]

    # Optimizer
    algorithm = GradientDescent(cost=final_cost,
                                on_unused_sources='ignore',
                                parameters=train_params,
                                step_rule=Adam(learning_rate=c['lr']))
    algorithm.add_updates(extra_updates)
    m = Model(final_cost)

    parameters = m.get_parameter_dict()  # Blocks version mismatch
    logger.info("Trainable parameters" + "\n" +
                pprint.pformat([(key, parameters[key].get_value().shape)
                                for key in sorted(train_params_keys)],
                               width=120))
    logger.info("# of parameters {}".format(
        sum([
            np.prod(parameters[key].get_value().shape)
            for key in sorted(train_params_keys)
        ])))

    ### Monitored args ###
    train_monitored_vars = [final_cost] + cg[True].outputs
    monitored_vars = cg[False].outputs
    val_acc = monitored_vars[1]
    to_monitor_names = [
        'def_unk_ratio', 's1_merged_input_rootmean2', 's1_def_mean_rootmean2',
        's1_gate_rootmean2', 's1_compose_gate_rootmean2'
    ]
    for k in to_monitor_names:
        train_v, valid_v = VariableFilter(name=k)(
            cg[True]), VariableFilter(name=k)(cg[False])
        if len(train_v):
            logger.info("Adding {} tracking".format(k))
            train_monitored_vars.append(train_v[0])
            monitored_vars.append(valid_v[0])
        else:
            logger.warning("Didnt find {} in cg".format(k))

    if c['monitor_parameters']:
        for name in train_params_keys:
            param = parameters[name]
            num_elements = numpy.product(param.get_value().shape)
            norm = param.norm(2) / num_elements
            grad_norm = algorithm.gradients[param].norm(2) / num_elements
            step_norm = algorithm.steps[param].norm(2) / num_elements
            stats = tensor.stack(norm, grad_norm, step_norm,
                                 step_norm / grad_norm)
            stats.name = name + '_stats'
            train_monitored_vars.append(stats)

    regular_training_stream = data.get_stream('train',
                                              batch_size=c['batch_size'],
                                              seed=seed)

    if fuel_server:
        # the port will be configured by the StartFuelServer extension
        training_stream = ServerDataStream(
            sources=regular_training_stream.sources,
            hwm=100,
            produces_examples=regular_training_stream.produces_examples)
    else:
        training_stream = regular_training_stream

    ### Build extensions ###

    extensions = [
        # Load(main_loop_path, load_iteration_state=True, load_log=True)
        #     .set_conditions(before_training=not new_training_job),
        StartFuelServer(regular_training_stream,
                        stream_path,
                        hwm=100,
                        script_path=os.path.join(
                            os.path.dirname(__file__),
                            "../bin/start_fuel_server.py"),
                        before_training=fuel_server),
        Timing(every_n_batches=c['mon_freq']),
        ProgressBar(),
        RetrievalPrintStats(retrieval=used_retrieval,
                            every_n_batches=c['mon_freq_valid'],
                            before_training=not fast_start),
        Timestamp(),
        TrainingDataMonitoring(train_monitored_vars,
                               prefix="train",
                               every_n_batches=c['mon_freq']),
    ]

    if c['layout'] == 'snli':
        validation = DataStreamMonitoring(monitored_vars,
                                          data.get_stream('valid',
                                                          batch_size=14,
                                                          seed=seed),
                                          before_training=not fast_start,
                                          on_resumption=True,
                                          after_training=True,
                                          every_n_batches=c['mon_freq_valid'],
                                          prefix='valid')
        extensions.append(validation)
    elif c['layout'] == 'mnli':
        validation = DataStreamMonitoring(monitored_vars,
                                          data.get_stream('valid_matched',
                                                          batch_size=14,
                                                          seed=seed),
                                          every_n_batches=c['mon_freq_valid'],
                                          on_resumption=True,
                                          after_training=True,
                                          prefix='valid_matched')
        validation_mismatched = DataStreamMonitoring(
            monitored_vars,
            data.get_stream('valid_mismatched', batch_size=14, seed=seed),
            every_n_batches=c['mon_freq_valid'],
            before_training=not fast_start,
            on_resumption=True,
            after_training=True,
            prefix='valid_mismatched')
        extensions.extend([validation, validation_mismatched])
    else:
        raise NotImplementedError()

    # Similarity trackers for embeddings
    if len(c.get('vocab_def', '')):
        retrieval_vocab = Vocabulary(c['vocab_def'])
    else:
        retrieval_vocab = data.vocab

    retrieval_all = Retrieval(vocab_text=retrieval_vocab,
                              dictionary=used_dict,
                              max_def_length=c['max_def_length'],
                              exclude_top_k=0,
                              max_def_per_word=c['max_def_per_word'])

    for name in [
            's1_word_embeddings', 's1_dict_word_embeddings',
            's1_translated_word_embeddings'
    ]:
        variables = VariableFilter(name=name)(cg[False])
        if len(variables):
            s1_emb = variables[0]
            logger.info("Adding similarity tracking for " + name)
            # A bit sloppy about downcast

            if "dict" in name:
                embedder = construct_dict_embedder(theano.function(
                    [s1, defs, def_mask, s1_def_map],
                    s1_emb,
                    allow_input_downcast=True),
                                                   vocab=data.vocab,
                                                   retrieval=retrieval_all)
                extensions.append(
                    SimilarityWordEmbeddingEval(
                        embedder=embedder,
                        prefix=name,
                        every_n_batches=c['mon_freq_valid'],
                        before_training=not fast_start))
            else:
                embedder = construct_embedder(theano.function(
                    [s1], s1_emb, allow_input_downcast=True),
                                              vocab=data.vocab)
                extensions.append(
                    SimilarityWordEmbeddingEval(
                        embedder=embedder,
                        prefix=name,
                        every_n_batches=c['mon_freq_valid'],
                        before_training=not fast_start))

    track_the_best = TrackTheBest(validation.record_name(val_acc),
                                  before_training=not fast_start,
                                  every_n_epochs=c['save_freq_epochs'],
                                  after_training=not fast_start,
                                  every_n_batches=c['mon_freq_valid'],
                                  choose_best=min)
    extensions.append(track_the_best)

    # Special care for serializing embeddings
    if len(c.get('embedding_path', '')) or len(c.get('embedding_def_path',
                                                     '')):
        extensions.insert(
            0,
            LoadNoUnpickling(main_loop_path,
                             load_iteration_state=True,
                             load_log=True).set_conditions(
                                 before_training=not new_training_job))
        extensions.append(
            Checkpoint(main_loop_path,
                       parameters=train_params + [p for p, m in pop_updates],
                       save_main_loop=False,
                       save_separately=['log', 'iteration_state'],
                       before_training=not fast_start,
                       every_n_epochs=c['save_freq_epochs'],
                       after_training=not fast_start).add_condition(
                           ['after_batch', 'after_epoch'],
                           OnLogRecord(track_the_best.notification_name),
                           (main_loop_best_val_path, )))
    else:
        extensions.insert(
            0,
            Load(main_loop_path, load_iteration_state=True,
                 load_log=True).set_conditions(
                     before_training=not new_training_job))
        extensions.append(
            Checkpoint(main_loop_path,
                       parameters=cg[True].parameters +
                       [p for p, m in pop_updates],
                       before_training=not fast_start,
                       every_n_epochs=c['save_freq_epochs'],
                       after_training=not fast_start).add_condition(
                           ['after_batch', 'after_epoch'],
                           OnLogRecord(track_the_best.notification_name),
                           (main_loop_best_val_path, )))

    extensions.extend([
        DumpCSVSummaries(save_path,
                         every_n_batches=c['mon_freq_valid'],
                         after_training=True),
        DumpTensorflowSummaries(save_path,
                                after_epoch=True,
                                every_n_batches=c['mon_freq_valid'],
                                after_training=True),
        Printing(every_n_batches=c['mon_freq_valid']),
        PrintMessage(msg="save_path={}".format(save_path),
                     every_n_batches=c['mon_freq']),
        FinishAfter(after_n_batches=c['n_batches']).add_condition(
            ['after_batch'],
            OnLogStatusExceed('iterations_done', c['n_batches']))
    ])

    logger.info(extensions)

    ### Run training ###

    if "VISDOM_SERVER" in os.environ:
        print("Running visdom server")
        ret = subprocess.Popen([
            os.path.join(os.path.dirname(__file__), "../visdom_plotter.py"),
            "--visdom-server={}".format(os.environ['VISDOM_SERVER']),
            "--folder={}".format(save_path)
        ])
        time.sleep(0.1)
        if ret.returncode is not None:
            raise Exception()
        atexit.register(lambda: os.kill(ret.pid, signal.SIGINT))

    model = Model(cost)
    for p, m in pop_updates:
        model._parameter_dict[get_brick(p).get_hierarchical_name(p)] = p

    main_loop = MainLoop(algorithm,
                         training_stream,
                         model=model,
                         extensions=extensions)

    assert os.path.exists(save_path)
    main_loop.run()
Beispiel #25
0
from blocks.roles import WEIGHT
from blocks.bricks.cost import MisclassificationRate


x = tensor.matrix('features')
y = tensor.lmatrix('targets')

lin1 = Linear(name='lin1', input_dim=126, output_dim=50, weights_init=Constant(0.005), biases_init=Constant(0))
act1_sigmoid = Logistic().apply(lin1.apply(x))
lin2 = Linear(name='lin2', input_dim=50, output_dim=2, weights_init=Constant(0.001), biases_init=Constant(0))
act2_softmax = Softmax().apply(lin2.apply(act1_sigmoid))

lin1.initialize()
lin2.initialize()

missclass = MisclassificationRate().apply(y.argmax(axis=1), act2_softmax)
missclass.name = 'missclassification'

cost = CategoricalCrossEntropy().apply(y, act2_softmax)

comp_graph = ComputationGraph([cost])

W1, W2 = VariableFilter(roles=[WEIGHT])(comp_graph.variables)

cost = cost + 0.005 * (W1**2).sum() + 0.005 * (W2**2).sum()
cost.name = 'cost'

from blocks.algorithms import GradientDescent, Scale
from blocks.extensions import FinishAfter, Printing, ProgressBar
from blocks.extensions.monitoring import DataStreamMonitoring
from fuel.transformers import Flatten
Beispiel #26
0
    def apply(self, input_labeled, target_labeled, input_unlabeled):
        self.layer_counter = 0
        input_dim = self.p.encoder_layers[0]

        # Store the dimension tuples in the same order as layers.
        layers = self.layers
        self.layer_dims = {0: input_dim}

        self.lr = self.shared(self.default_lr, 'learning_rate', role=None)

        self.costs = costs = AttributeDict()
        self.costs.denois = AttributeDict()

        self.act = AttributeDict()
        self.error = AttributeDict()

        top = len(layers) - 1

        N = input_labeled.shape[0]
        self.join = lambda l, u: T.concatenate([l, u], axis=0)
        self.labeled = lambda x: x[:N] if x is not None else x
        self.unlabeled = lambda x: x[N:] if x is not None else x
        self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x))

        input_concat = self.join(input_labeled, input_unlabeled)

        def encoder(input_, path_name, input_noise_std=0, noise_std=[]):

            h = input_
            logger.info('  0: noise %g' % input_noise_std)

            if input_noise_std > 0.:
                h = h + self.noise_like(h) * input_noise_std

            d = AttributeDict()
            d.unlabeled = self.new_activation_dict()
            d.labeled = self.new_activation_dict()
            d.labeled.z[0] = self.labeled(h)
            d.unlabeled.z[0] = self.unlabeled(h)
            prev_dim = input_dim

            for i, (spec, _, act_f) in layers[1:]:
                d.labeled.h[i - 1], d.unlabeled.h[i - 1] = self.split_lu(h)
                noise = noise_std[i] if i < len(noise_std) else 0.
                curr_dim, z, m, s, h = self.f(h,
                                              prev_dim,
                                              spec,
                                              i,
                                              act_f,
                                              path_name=path_name,
                                              noise_std=noise)
                assert self.layer_dims.get(i) in (None, curr_dim)
                self.layer_dims[i] = curr_dim
                d.labeled.z[i], d.unlabeled.z[i] = self.split_lu(z)
                d.unlabeled.s[i] = s
                d.unlabeled.m[i] = m
                prev_dim = curr_dim

            d.labeled.h[i], d.unlabeled.h[i] = self.split_lu(h)

            return d

        # Clean, supervised
        logger.info('Encoder: clean, labeled')
        clean = self.act.clean = encoder(input_concat, 'clean')

        # Corrupted, supervised
        logger.info('Encoder: corr, labeled')
        corr = self.act.corr = encoder(input_concat,
                                       'corr',
                                       input_noise_std=self.p.super_noise_std,
                                       noise_std=self.p.f_local_noise_std)
        est = self.act.est = self.new_activation_dict()

        # Decoder path in opposite order
        logger.info('Decoder: z_corr -> z_est')
        for i, ((_, spec), l_type, act_f) in layers[::-1]:
            z_corr = corr.unlabeled.z[i]
            z_clean = clean.unlabeled.z[i]
            z_clean_s = clean.unlabeled.s.get(i)
            z_clean_m = clean.unlabeled.m.get(i)
            fspec = layers[i + 1][1][0] if len(layers) > i + 1 else (None,
                                                                     None)

            if i == top:
                ver = corr.unlabeled.h[i]
                ver_dim = self.layer_dims[i]
                top_g = True
            else:
                ver = est.z.get(i + 1)
                ver_dim = self.layer_dims.get(i + 1)
                top_g = False

            z_est = self.g(z_lat=z_corr,
                           z_ver=ver,
                           in_dims=ver_dim,
                           out_dims=self.layer_dims[i],
                           l_type=l_type,
                           num=i,
                           fspec=fspec,
                           top_g=top_g)

            if z_est is not None:
                # Denoising cost

                if z_clean_s and self.p.zestbn == 'bugfix':
                    z_est_norm = (z_est - z_clean_m
                                  ) / T.sqrt(z_clean_s + np.float32(1e-10))
                elif z_clean_s is None or self.p.zestbn == 'no':
                    z_est_norm = z_est
                else:
                    assert False, 'Not supported path'

                se = SquaredError('denois' + str(i))
                costs.denois[i] = se.apply(z_est_norm.flatten(2),
                                           z_clean.flatten(2)) \
                    / np.prod(self.layer_dims[i], dtype=floatX)
                costs.denois[i].name = 'denois' + str(i)
                denois_print = 'denois %.2f' % self.p.denoising_cost_x[i]
            else:
                denois_print = ''

            # Store references for later use
            est.h[i] = self.apply_act(z_est, act_f)
            est.z[i] = z_est
            est.s[i] = None
            est.m[i] = None
            logger.info('  g%d: %10s, %s, dim %s -> %s' %
                        (i, l_type, denois_print, self.layer_dims.get(i + 1),
                         self.layer_dims.get(i)))

        # Costs
        y = target_labeled.flatten()

        costs.class_clean = CategoricalCrossEntropy().apply(
            y, clean.labeled.h[top])
        costs.class_clean.name = 'cost_class_clean'

        costs.class_corr = CategoricalCrossEntropy().apply(
            y, corr.labeled.h[top])
        costs.class_corr.name = 'cost_class_corr'

        # This will be used for training
        costs.total = costs.class_corr * 1.0
        for i in range(top + 1):
            if costs.denois.get(i) and self.p.denoising_cost_x[i] > 0:
                costs.total += costs.denois[i] * self.p.denoising_cost_x[i]

        costs.total.name = 'cost_total'

        # Classification error
        mr = MisclassificationRate()
        self.error.clean = mr.apply(y, clean.labeled.h[top]) * np.float32(100.)
        self.error.clean.name = 'error_rate_clean'
Beispiel #27
0
def train_paired_dnn(train_x, train_y, dev_x, dev_y, test_x, test_y):
    train_y = train_y.flatten().astype(int)
    dev_y = dev_y.flatten().astype(int)
    test_y = test_y.flatten().astype(int)

    batch_size = 256

    n_train, in_dim = train_x.shape
    n_dev = dev_x.shape[0]
    n_test = test_x.shape[0]

    hid_dims = 2 * np.array([512, 512, 512, 512]) 
    out_dim = 1

    ds_train = make_ds(train_x, train_y, batch_size, n_train, SequentialScheme)
    ds_dev = make_ds(dev_x, dev_y, batch_size, n_dev, SequentialScheme)
    ds_test = make_ds(test_x, test_y, batch_size, n_test, SequentialScheme)

    mlp = MLP(
      activations=[Rectifier(), Rectifier(), Rectifier(), Rectifier(), Logistic()],
      dims=[in_dim, hid_dims[0], hid_dims[1], hid_dims[2], hid_dims[3], out_dim],
      weights_init=Uniform(mean=0, width=1/32),
      biases_init=Constant(0)
    )
    mlp.initialize()

    x = tensor.matrix('features')
    y = tensor.matrix('targets', dtype='int64')
    y_hat = mlp.apply(x)
    model = Model(y_hat)

    cost = MyBinaryCrossEntropy().apply(y, y_hat)
    cost.name = 'cost'
    misrate = MisclassificationRate().apply(y.flatten(), y_hat)
    misrate.name = 'misclassfication'

    cg = ComputationGraph([cost, misrate])
    drop_vars = VariableFilter(
        roles=[INPUT], 
        bricks=mlp.linear_transformations[1:]
    )(cg.variables)
    cg_dropout = apply_dropout(cg, drop_vars, 0.2)
    cost_dropout, error_rate_dropout = cg_dropout.outputs

    learning_rate = 0.0015
    momentum = 0.9
    step_rule = CompositeRule([
        Momentum(learning_rate=learning_rate, momentum=momentum),
        AdaGrad(learning_rate=learning_rate)
    ])
    algorithm = GradientDescent(cost=cost_dropout, 
                                parameters=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost_dropout, 
                   error_rate_dropout, 
                   aggregation.mean(algorithm.total_gradient_norm)],
        after_epoch=True,
        prefix="train"
    )
    monitor_dev = DataStreamMonitoring(
    #    variables=[cost_dropout, error_rate_dropout],
        variables=[cost, misrate],
        data_stream=ds_dev, 
        prefix="dev"
    )
    monitor_test = DataStreamMonitoring(
    #    variables=[cost_dropout, error_rate_dropout],
        variables=[cost, misrate],
        data_stream=ds_test, 
        prefix="test"
    )
    track_str = 'train_{0}'.format(cost_dropout.name)
    track_best_str = '{0}_best_so_far'.format(track_str)
    print track_str, track_best_str

    n_epochs = 2
    print 'n_epochs:', n_epochs
    main_loop = MainLoop(
       model=model,
       data_stream=ds_train, 
       algorithm=algorithm,
       extensions=[Timing(),
                   monitor_train,
                   monitor_dev,
                   monitor_test,
                   TrackTheBest(track_str),
                   Checkpoint("best_model.pkl",
                       use_cpickle = True
                   ).add_condition(['after_epoch'],
                       predicate=OnLogRecord(track_best_str)),
                   FinishAfter(after_n_epochs=n_epochs), 
                   # FinishIfNoImprovementAfter(track_best_str, epochs=n_epochs),
                   Printing()]
    )
    main_loop.run() 

    acc([x], y_hat, train_x, train_y, 'train')
    acc([x], y_hat, dev_x, dev_y, 'dev')
    acc([x], y_hat, test_x, test_y, 'test')
Beispiel #28
0
                weights_init=Uniform(width=0.2),
                biases_init=Constant(0))
########## hyper parameters###########################################
# We push initialization config to set different initialization schemes
convnet.push_initialization_config()
convnet.layers[0].weights_init = Uniform(width=0.2)
convnet.layers[1].weights_init = Uniform(width=0.09)
convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=0.08)
convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=0.11)
convnet.initialize()
#########################################################333
#Generate output and error signal
predict = convnet.apply(x)

cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost')
error = MisclassificationRate().apply(y.flatten(), predict)
#Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
error_rate = error.copy(name='error_rate')
error_rate2 = error.copy(name='error_rate2')
cg = ComputationGraph([cost, error_rate])
########### ALGORITHM of training#############
algorithm = GradientDescent(cost=cost,
                            parameters=cg.parameters,
                            step_rule=Scale(learning_rate=0.1))
extensions = [
    Timing(),
    FinishAfter(after_n_epochs=num_epochs),
    DataStreamMonitoring([cost, error_rate, error_rate2],
                         stream_valid,
                         prefix="valid"),
    TrainingDataMonitoring(
def test_communication(path_vae_mnist,
                       path_maxout_mnist):
                       
    # load models
    vae_mnist = load(path_vae_mnist)
    # get params : to be remove from the computation graph

    # write an object maxout
    classifier = Maxout()
    # get params : to be removed from the computation graph

    # vae whose prior is a zero mean unit variance normal distribution
    activation = Rectifier()
    full_weights_init = Orthogonal()
    weights_init = full_weights_init

    # SVHN en niveau de gris
    layers = [32*32, 200, 200, 200, 50]
    encoder_layers = layers[:-1]
    encoder_mlp = MLP([activation] * (len(encoder_layers)-1),
              encoder_layers,
              name="MLP_SVHN_encode", biases_init=Constant(0.), weights_init=weights_init)

    enc_dim = encoder_layers[-1]
    z_dim = layers[-1]
    sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init)
    decoder_layers = layers[:]  ## includes z_dim as first layer
    decoder_layers.reverse()
    decoder_mlp = MLP([activation] * (len(decoder_layers)-2) + [Rectifier()],
              decoder_layers,
              name="MLP_SVHN_decode", biases_init=Constant(0.), weights_init=weights_init)

    
    vae_svhn = VAEModel(encoder_mlp, sampler, decoder_mlp)
    vae_svhn.initialize()

    # do the connection
    
    x = T.tensor4('x') # SVHN samples preprocessed with local contrast normalization
    x_ = (T.sum(x, axis=1)).flatten(ndim=2)
    y = T.imatrix('y')
    batch_size = 512

    svhn_z, _ = vae_svhn.sampler.sample(vae_svhn.encoder_mlp.apply(x_))
    mnist_decode = vae_mnist.decoder_mlp.apply(svhn_z)
    # reshape
    shape = mnist_decode.shape
    mnist_decode = mnist_decode.reshape((shape[0], 1, 28, 28))
    prediction = classifier.apply(mnist_decode)
    y_hat = Softmax().apply(prediction)

    x_recons, kl_terms = vae_svhn.reconstruct(x_)
    recons_term = BinaryCrossEntropy().apply(x_, T.clip(x_recons, 1e-4, 1 - 1e-4))
    recons_term.name = "recons_term"

    cost_A = recons_term + kl_terms.mean()
    cost_A.name = "cost_A"

    cost_B = Softmax().categorical_cross_entropy(y.flatten(), prediction)
    cost_B.name = 'cost_B'

    cost = cost_B
    cost.name = "cost"
    cg = ComputationGraph(cost) # probably discard some of the parameters
    parameters = cg.parameters
    params = []
    for t in parameters:
        if not re.match(".*mnist", t.name):
            params.append(t)

    """
    f = theano.function([x], cost_A)
    value_x = np.random.ranf((1, 3, 32, 32)).astype("float32")
    print f(value_x)
    
    return
    """
    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y.flatten(), y_hat)
    error_rate.name = "error_rate"
    
    # training here
    step_rule = RMSProp(0.001,0.99)

    dataset_hdf5_file="/Tmp/ducoffem/SVHN/"
    train_set = H5PYDataset(os.path.join(dataset_hdf5_file, "all.h5"), which_set='train')
    test_set = H5PYDataset(os.path.join(dataset_hdf5_file, "all.h5"), which_set='valid')
    
    data_stream = DataStream.default_stream(
        train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size))
        
    data_stream_test = DataStream.default_stream(
        test_set, iteration_scheme=SequentialScheme(2000, batch_size))


    algorithm = GradientDescent(cost=cost, params=params,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], prefix="train", every_n_batches=10)
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_test, prefix="valid", every_n_batches=10)

    # drawing_samples = ImagesSamplesSave("../data_svhn", vae, (3, 32, 32), every_n_epochs=1)
    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_batches=10000),
                    Printing(every_n_batches=10)
                  ]

    main_loop = MainLoop(data_stream=data_stream,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()
Beispiel #30
0
def build_and_run(experimentconfig, modelconfig, save_to=None): #modelconfig, 
    """ part of this is adapted from lasagne tutorial""" 
    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('image_features')
    target_var = T.lmatrix('targets')
    target_vec = T.extra_ops.to_one_hot(target_var[:,0],2)

    # Create vgg model
    print("Building model...")

    image_size = modelconfig['image_size']
    network = vgg16.build_small_model()
    prediction = lasagne.utils.as_theano_expression(lasagne.layers.get_output(network["prob"],input_var))
#    test_prediction = lasagne.layers.get_output(network["prob"],input_var,deterministic=True)

    # Loss function -> The objective to minimize 
    print("Instanciation of loss function...")
 
 #  loss = lasagne.objectives.categorical_crossentropy(prediction, target_var.flatten())
    loss = lasagne.objectives.squared_error(prediction,target_vec)
 #   test_loss = lasagne.objectives.squared_error(test_prediction,target_vec)
    loss = loss.mean()

   # layers = network.values()  
    #l1 and l2 regularization
   # pondlayers = {x:0.01 for x in layers}
   # l1_penality = lasagne.regularization.regularize_layer_params_weighted(pondlayers, lasagne.regularization.l2)
   # l2_penality = lasagne.regularization.regularize_layer_params(layers[len(layers)/4:], lasagne.regularization.l1) * 1e-4
   # reg_penalty = l1_penality + l2_penality
   # reg_penalty.name = 'reg_penalty'
    #loss = loss + reg_penalty
    loss.name = 'loss'

    error_rate = MisclassificationRate().apply(target_var.flatten(), prediction).copy(
            name='error_rate')

    # Load the dataset
    print("Loading data...")
    if 'test' in experimentconfig.keys() and experimentconfig['test'] is True:
        train_stream, valid_stream, test_stream = get_stream(experimentconfig['batch_size'],image_size,test=True)
    else :
        train_stream, valid_stream, test_stream = get_stream(experimentconfig['batch_size'],image_size,test=False)

    # Defining step rule and algorithm
    if 'step_rule' in experimentconfig.keys() and not experimentconfig['step_rule'] is None :
        step_rule = experimentconfig['step_rule'](learning_rate=experimentconfig['learning_rate'])
    else :
        step_rule=Scale(learning_rate=experimentconfig['learning_rate'])

    params = map(lasagne.utils.as_theano_expression,lasagne.layers.get_all_params(network['prob'], trainable=True))

    algorithm = GradientDescent(
                cost=loss, gradients={var:T.grad(loss,var) for var in params},
                step_rule=step_rule)

    grad_norm = aggregation.mean(algorithm.total_gradient_norm) 
    grad_norm.name='grad_norm'   

    print("Initializing extensions...")
    plot = Plot(save_to, channels=[['train_loss','valid_loss','train_grad_norm'],['train_error_rate','valid_error_rate']], server_url='http://hades.calculquebec.ca:5042')    
    checkpoint = Checkpoint('models/best_'+save_to+'.tar')
  #  checkpoint.add_condition(['after_n_batches=25'],
    checkpoint.add_condition(['after_epoch'],
                         predicate=OnLogRecord('valid_error_rate_best_so_far'))

    #Defining extensions
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=experimentconfig['num_epochs'],
                              after_n_batches=experimentconfig['num_batches']),
                  TrainingDataMonitoring([loss, error_rate, grad_norm, reg_penalty], prefix="train", after_epoch=True), #after_n_epochs=1
                  DataStreamMonitoring([loss, error_rate],valid_stream,prefix="valid", after_epoch=True), #after_n_epochs=1
                  #Checkpoint(save_to,after_n_epochs=5),
                  #ProgressBar(),
                  plot,
                  #       after_batch=True),
                  Printing(after_epoch=True),
                  TrackTheBest('valid_error_rate',min), #Keep best
                  checkpoint,  #Save best
                  FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=5)] # Early-stopping

   # model = Model(ComputationGraph(network))

    main_loop = MainLoop(
        algorithm,
        train_stream,
      #  model=model,
        extensions=extensions)
    print("Starting main loop...")

    main_loop.run()
Beispiel #31
0
def main(save_to, cost_name, learning_rate, momentum, num_epochs):
    mlp = MLP([None], [784, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    scores = mlp.apply(x)

    batch_size = y.shape[0]
    indices = tensor.arange(y.shape[0])
    target_scores = tensor.set_subtensor(
        tensor.zeros((batch_size, 10))[indices, y.flatten()],
        1)
    score_diff = scores - target_scores

    # Logistic Regression
    if cost_name == 'lr':
        cost = Softmax().categorical_cross_entropy(y.flatten(), scores).mean()
    # MSE
    elif cost_name == 'mse':
        cost = (score_diff ** 2).mean()
    # Perceptron
    elif cost_name == 'perceptron':
        cost = (scores.max(axis=1) - scores[indices, y.flatten()]).mean()
    # TLE
    elif cost_name == 'minmin':
        cost = abs(score_diff[indices, y.flatten()]).mean()
        cost += abs(score_diff[indices, scores.argmax(axis=1)]).mean()
    # TLEcut
    elif cost_name == 'minmin_cut':
        # Score of the groundtruth should be greater or equal than its target score
        cost = tensor.maximum(0, -score_diff[indices, y.flatten()]).mean()
        # Score of the prediction should be less or equal than its actual score
        cost += tensor.maximum(0, score_diff[indices, scores.argmax(axis=1)]).mean()
    # TLE2
    elif cost_name == 'minmin2':
        cost = ((score_diff[tensor.arange(y.shape[0]), y.flatten()]) ** 2).mean()
        cost += ((score_diff[tensor.arange(y.shape[0]), scores.argmax(axis=1)]) ** 2).mean()
    # Direct loss minimization
    elif cost_name == 'direct':
        epsilon = 0.1
        cost = (- scores[indices, (scores + epsilon * target_scores).argmax(axis=1)]
                + scores[indices, scores.argmax(axis=1)]).mean()
        cost /= epsilon
    elif cost_name == 'svm':
        cost = (scores[indices, (scores - 1 * target_scores).argmax(axis=1)]
                - scores[indices, y.flatten()]).mean()
    else:
        raise ValueError("Unknown cost " + cost)

    error_rate = MisclassificationRate().apply(y.flatten(), scores)
    error_rate.name = 'error_rate'

    cg = ComputationGraph([cost])
    cost.name = 'cost'

    mnist_train = MNIST(("train",))
    mnist_test = MNIST(("test",))

    if learning_rate == None:
        learning_rate = 0.0001
    if momentum == None:
        momentum = 0.0
    rule = Momentum(learning_rate=learning_rate,
                    momentum=momentum)
    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=rule)
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      Flatten(
                          DataStream.default_stream(
                              mnist_test,
                              iteration_scheme=SequentialScheme(
                                  mnist_test.num_examples, 500)),
                          which_sources=('features',)),
                      prefix="test"),
                  # CallbackExtension(
                  #    lambda: rule.learning_rate.set_value(rule.learning_rate.get_value() * 0.9),
                  #    after_epoch=True),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm),
                       rule.learning_rate],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_cost',
                 'test_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        Flatten(
            DataStream.default_stream(
                mnist_train,
                iteration_scheme=SequentialScheme(
                    mnist_train.num_examples, 50)),
            which_sources=('features',)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()

    df = pandas.DataFrame.from_dict(main_loop.log, orient='index')
    res = {'cost' : cost_name,
           'learning_rate' : learning_rate,
           'momentum' : momentum,
           'train_cost' : df.train_cost.iloc[-1],
           'test_cost' : df.test_cost.iloc[-1],
           'best_test_cost' : df.test_cost.min(),
           'train_error' : df.train_error_rate.iloc[-1],
           'test_error' : df.test_error_rate.iloc[-1],
           'best_test_error' : df.test_error_rate.min()}
    res = {k: float(v) if isinstance(v, numpy.ndarray) else v for k, v in res.items()}
    json.dump(res, sys.stdout)
    sys.stdout.flush()
Beispiel #32
0
def main(save_to,
         save_freq,
         num_epochs,
         feature_maps=None,
         mlp_hiddens=None,
         conv_sizes=None,
         pool_sizes=None,
         batch_size=500,
         num_batches=None):

    if feature_maps is None:
        feature_maps = [20, 50]
    if mlp_hiddens is None:
        mlp_hiddens = [500]
    if conv_sizes is None:
        conv_sizes = [5, 5]
    if pool_sizes is None:
        pool_sizes = [2, 2]
    image_size = (28, 28)
    output_size = 10

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(conv_activations,
                    1,
                    image_size,
                    filter_sizes=zip(conv_sizes, conv_sizes),
                    feature_maps=feature_maps,
                    pooling_sizes=zip(pool_sizes, pool_sizes),
                    top_mlp_activations=mlp_activations,
                    top_mlp_dims=mlp_hiddens + [output_size],
                    border_mode='full',
                    weights_init=Uniform(width=.2),
                    biases_init=Constant(0))
    # We push initialization config to set different initialization schemes
    # for convolutional layers.
    convnet.push_initialization_config()
    convnet.layers[0].weights_init = Uniform(width=.2)
    convnet.layers[1].weights_init = Uniform(width=.09)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
    convnet.initialize()
    logging.info(
        "Input dim: {} {} {}".format(*convnet.children[0].get_dim('input_')))
    for i, layer in enumerate(convnet.layers):
        if (isinstance(layer, Activation)):
            logging.info("Layer {} ({})".format(i, layer.__class__.__name__))
        else:
            logging.info("Layer {} ({}) dim: {} {} {}".format(
                i, layer.__class__.__name__, *layer.get_dim('output')))

    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(),
                                           probs).copy(name='cost')
    error_rate = MisclassificationRate().apply(y.flatten(),
                                               probs).copy(name='error_rate')

    cg = ComputationGraph([cost, error_rate])

    mnist_train = MNIST(("train", ))
    mnist_train_stream = DataStream.default_stream(
        mnist_train,
        iteration_scheme=ShuffledScheme(mnist_train.num_examples, batch_size))

    mnist_test = MNIST(("test", ))
    mnist_test_stream = DataStream.default_stream(
        mnist_test,
        iteration_scheme=ShuffledScheme(mnist_test.num_examples, batch_size))

    # Train with simple SGD
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches),
        DataStreamMonitoring([cost, error_rate],
                             mnist_test_stream,
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        CheckpointBlock(save_to, every_n_batches=save_freq),
        ProgressBar(),
        Printing()
    ]

    model = Model(cost)

    main_loop = MainLoop(algorithm,
                         mnist_train_stream,
                         model=model,
                         extensions=extensions)

    main_loop.run()
Beispiel #33
0
def train_net(net, train_stream, test_stream, L1 = None, L2=None, early_stopping=False,
        finish=None, dropout=False, jobid=None, update=None,
        duration= None,
        **ignored):
    x = tensor.tensor4('image_features')
    y = tensor.lmatrix('targets')

    y_hat = net.apply(x)

    #Cost
    cost_before = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    cost_before.name = "cost_without_regularization"

    #Error
    #Taken from brodesf
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = "Misclassification rate"

    #Regularization
    cg = ComputationGraph(cost_before)
    WS = VariableFilter(roles=[WEIGHT])(cg.variables)

    if dropout:
        print("Dropout")
        cg = apply_dropout(cg, WS, 0.5)

    if L1:
        print("L1 with lambda ",L1)
        L1_reg = L1 * sum([abs(W).sum() for W in WS])
        L1_reg.name = "L1 regularization"
        cost_before += L1_reg

    if L2:
        print("L2 with lambda ",L2)
        L2_reg = L2 * sum([(W ** 2).sum() for W in WS])
        L2_reg.name = "L2 regularization"
        cost_before += L2_reg

    cost = cost_before
    cost.name = 'cost_with_regularization'

    #Initialization
    print("Initilization")
    net.initialize()

    #Algorithm
    step_rule = Scale(learning_rate=0.1)
    if update is not None:
        if update == "rmsprop":
            print("Using RMSProp")
            step_rule = RMSProp()

    remove_not_finite = RemoveNotFinite(0.9)
    step_rule = CompositeRule([step_rule, remove_not_finite])
    algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=step_rule)

    print("Extensions")
    extensions = []

    #Monitoring
    monitor = DataStreamMonitoring(variables=[cost, error], data_stream=test_stream, prefix="test")
    extensions.append(monitor)

    def filename(suffix=""):
        prefix = jobid if jobid else str(os.getpid())
        ctime = str(time.time())
        return "checkpoints/" + prefix + "_" + ctime + "_" + suffix + ".zip"

    #Serialization
    #serialization = Checkpoint(filename())
    #extensions.append(serialization)

    notification = "test_"+error.name
    track = TrackTheBest(notification)
    best_notification = track.notification_name
    checkpointbest = SaveBest(best_notification, filename("best"))
    extensions.extend([track, checkpointbest])

    if early_stopping:
        print("Early stopping")
        stopper = FinishIfNoImprovementAfterPlus(best_notification)
        extensions.append(stopper)

    #Other extensions
    if finish != None:
        print("Force finish ", finish)
        extensions.append(FinishAfter(after_n_epochs=finish))

    if duration != None:
        print("Stop after " , duration, " seconds")
        extensions.append(FinishAfterTime(duration))

    extensions.extend([
        Timing(),
        Printing()
        ])

    #Main loop
    main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions)

    print("Main loop start")
    main_loop.run()
Beispiel #34
0
def main(num_epochs=50, batch_normalized=True, alpha=0.1):
    """Run the example.

    Parameters
    ----------
    num_epochs : int, optional
        Number of epochs for which to train.

    batch_normalized : bool, optional
        Batch-normalize the training graph. Defaults to `True`.

    alpha : float, optional
        Weight to apply to a new sample when calculating running
        averages for population statistics (1 - alpha weight is
        given to the existing average).

    """
    if batch_normalized:
        # Add an extra keyword argument that only BatchNormalizedMLP takes,
        # in order to speed things up at the cost of a bit of extra memory.
        mlp_class = BatchNormalizedMLP
        extra_kwargs = {'conserve_memory': False}
    else:
        mlp_class = MLP
        extra_kwargs = {}
    mlp = mlp_class([Logistic(), Logistic(), Logistic(), Softmax()],
                    [2, 5, 5, 5, 3],
                    weights_init=IsotropicGaussian(0.2),
                    biases_init=Constant(0.), **extra_kwargs)
    mlp.initialize()

    # Generate a dataset with 3 spiral arms, using 8000 examples for
    # training and 2000 for testing.
    dataset = Spiral(num_examples=10000, classes=3,
                     sources=['features', 'label'],
                     noise=0.05)
    train_stream = DataStream(dataset,
                              iteration_scheme=ShuffledScheme(examples=8000,
                                                              batch_size=20))
    test_stream = DataStream(dataset,
                             iteration_scheme=SequentialScheme(
                                 examples=list(range(8000, 10000)),
                                 batch_size=2000))

    # Build a cost graph; this contains BatchNormalization bricks that will
    # by default run in inference mode.
    features = tensor.matrix('features')
    label = tensor.lvector('label')
    prediction = mlp.apply(features)
    cost = CategoricalCrossEntropy().apply(label, prediction)
    misclass = MisclassificationRate().apply(label, prediction)
    misclass.name = 'misclass'  # The default name for this is annoyingly long
    original_cg = ComputationGraph([cost, misclass])

    if batch_normalized:
        cg = apply_batch_normalization(original_cg)
        # Add updates for population parameters
        pop_updates = get_batch_normalization_updates(cg)
        extra_updates = [(p, m * alpha + p * (1 - alpha))
                         for p, m in pop_updates]
    else:
        cg = original_cg
        extra_updates = []

    algorithm = GradientDescent(step_rule=Adam(0.001),
                                cost=cg.outputs[0],
                                parameters=cg.parameters)
    algorithm.add_updates(extra_updates)

    main_loop = MainLoop(algorithm=algorithm,
                         data_stream=train_stream,
                         # Use the original cost and misclass variables so
                         # that we monitor the (original) inference-mode graph.
                         extensions=[DataStreamMonitoring([cost, misclass],
                                                          train_stream,
                                                          prefix='train'),
                                     DataStreamMonitoring([cost, misclass],
                                                          test_stream,
                                                          prefix='test'),
                                     Printing(),
                                     FinishAfter(after_n_epochs=num_epochs)])
    main_loop.run()
    return main_loop
def main(feature_maps=None, mlp_hiddens=None, conv_sizes=None, pool_sizes=None, batch_size=None, num_batches=None):
    if feature_maps is None:
        feature_maps = [32, 48, 64, 80, 128, 128]
    if mlp_hiddens is None:
        mlp_hiddens = [1000]
    if conv_sizes is None:
        conv_sizes = [7, 5, 5, 5, 5, 4]
    if pool_sizes is None:
        pool_sizes = [3, 2, 2, 2, 2, 1]
    if batch_size is None:
        batch_size = 64
    conv_steps = [2, 1, 1, 1, 1, 1]  # same as stride
    image_size = (256, 256)
    output_size = 2
    learningRate = 0.001
    drop_prob = 0.5
    weight_noise = 0.75
    num_epochs = 250
    num_batches = None
    host_plot = "http://*****:*****@ %s" % (graph_name, datetime.datetime.now(), socket.gethostname()),
                    channels=[["train_error_rate", "valid_error_rate"], ["train_total_gradient_norm"]],
                    after_epoch=True,
                    server_url=host_plot,
                )
            )
            PLOT_AVAILABLE = True
        except ImportError:
            PLOT_AVAILABLE = False
        extensions.append(Checkpoint(save_to, after_epoch=True, after_training=True, save_separately=["log"]))

    logger.info("Building the model")

    model = Model(cost)

    ########### Loading images #####################
    main_loop = MainLoop(algorithm, stream_data_train, model=model, extensions=extensions)

    main_loop.run()
Beispiel #36
0
class LookUpTrain(Initializable, Feedforward):
    @lazy(allocation=['dwin', 'n_mot', 'vect_size', 'n_hidden'])
    def __init__(self, dwin, n_mot, vect_size, n_hidden, n_out=2, **kwargs):
        self.dwin = dwin
        self.n_mot = n_mot
        self.vect_size = vect_size
        if isinstance(n_hidden, int):
            self.n_hidden = [n_hidden]
        else:
            self.n_hidden = n_hidden
        self.n_out = n_out
        self.window = Window(self.dwin,
                             self.n_mot,
                             self.vect_size,
                             self.n_hidden,
                             self.n_out,
                             weights_init=IsotropicGaussian(0.001))
        super(LookUpTrain, self).__init__(**kwargs)
        self.softmax = Softmax()
        self.error = MisclassificationRate()
        self.children = [self.window, self.softmax, self.error]

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_):
        return self.window.apply(input_)

    @application(inputs=['x', 'y'], outputs=['output'])
    def cost(self, x, y):
        return self.softmax.categorical_cross_entropy(y, self.apply(x))

    @application(inputs=['x', 'y'], outputs=['output'])
    def errors(self, x, y):
        return self.error.apply(y, self.apply(x))

    @application(inputs=['x'], outputs=['output'])
    def predict(self, x):
        return T.argmax(self.apply(x), axis=1)

    @application(inputs=['x'], outputs=['output'])
    def predict_confidency(self, x):
        return T.max(self.apply(x), axis=1)

    def update_lookup_weights(self):
        self.window.update_lookup_weights()

    @application(inputs=['input_', 'input_corrupt'], outputs=['output'])
    def score(self, input_, input_corrupt):
        # modify the input_ with an incorrect central word ?
        return (1 - -self.apply(input_)).norm(2) + (
            self.apply(input_corrupt)).norm(2)
        #return T.maximum(0,1 - self.apply(input_)+self.apply(input_corrupt) )[0]
        return T.maximum(0, 1 - self.apply(input_))[0] + 0.1 * T.maximum(
            0, 1 + self.apply(input_corrupt))[0] + 0.1 * T.maximum(
                0, 1 - self.apply(input_) + self.apply(input_corrupt))[0]
        # change that !!!!

    def _initialize(self):
        self.window.initialize()

    @application(inputs=['input_'], outputs=['output'])
    def embedding(self, input_):
        return self.window.embedding(input_)

    def _allocate(self):
        self.window.allocate()

    def load(self, repo, filename):
        params = getParams(self, T.itensor3())
        with closing(open(os.path.join(repo, filename), 'rb')) as f:
            params_value = pickle.load(f)
        for p, p_value in zip(params, params_value):
            p.set_value(p_value.get_value())

    def get_Params(self):
        return self.window.get_Params()

    def save(self, repo, filename):
        params = getParams(self, T.itensor3())
        index = 0
        while os.path.isfile(os.path.join(repo, filename + "_" + str(index))):
            index += 1
        filename = filename + "_" + str(index)
        with closing(open(os.path.join(repo, filename), 'wb')) as f:
            pickle.dump(params, f, protocol=pickle.HIGHEST_PROTOCOL)
Beispiel #37
0
def build_submodel(input_shape,
                   output_dim,
                   L_dim_conv_layers,
                   L_filter_size,
                   L_pool_size,
                   L_activation_conv,
                   L_dim_full_layers,
                   L_activation_full,
                   L_exo_dropout_conv_layers,
                   L_exo_dropout_full_layers,
                   L_endo_dropout_conv_layers,
                   L_endo_dropout_full_layers,
                   L_border_mode=None,
                   L_filter_step=None,
                   L_pool_step=None):


    # TO DO : target size and name of the features

    x = T.tensor4('features')
    y = T.imatrix('targets')

    assert len(input_shape) == 3, "input_shape must be a 3d tensor"

    num_channels = input_shape[0]
    image_size = tuple(input_shape[1:])
    print image_size
    print num_channels
    prediction = output_dim

    # CONVOLUTION
    output_conv = x
    output_dim = num_channels*np.prod(image_size)
    conv_layers = []
    assert len(L_dim_conv_layers) == len(L_filter_size)
    if L_filter_step is None:
        L_filter_step = [None] * len(L_dim_conv_layers)
    assert len(L_dim_conv_layers) == len(L_pool_size)
    if L_pool_step is None:
        L_pool_step = [None] * len(L_dim_conv_layers)
    assert len(L_dim_conv_layers) == len(L_pool_step)
    assert len(L_dim_conv_layers) == len(L_activation_conv)
    if L_border_mode is None:
        L_border_mode = ["valid"] * len(L_dim_conv_layers)
    assert len(L_dim_conv_layers) == len(L_border_mode)
    assert len(L_dim_conv_layers) == len(L_endo_dropout_conv_layers)
    assert len(L_dim_conv_layers) == len(L_exo_dropout_conv_layers)

    # regarding the batch dropout : the dropout is applied on the filter
    # which is equivalent to the output dimension
    # you have to look at the dropout_rate of the next layer
    # that is why we need to have the first dropout value of L_exo_dropout_full_layers
    
    # the first value has to be 0.0 in this context, and we'll
    # assume that it is, but let's have an assert
    assert L_exo_dropout_conv_layers[0] == 0.0, "L_exo_dropout_conv_layers[0] has to be 0.0 in this context. There are ways to make it work, of course, but we don't support this with this scripts."

    # here modifitication of L_exo_dropout_conv_layers
    L_exo_dropout_conv_layers = L_exo_dropout_conv_layers[1:] + [L_exo_dropout_full_layers[0]]

    if len(L_dim_conv_layers):
        for (num_filters, filter_size, filter_step,
            pool_size, pool_step, activation_str, border_mode,
            dropout, index) in zip(L_dim_conv_layers,
                                  L_filter_size,
                                  L_filter_step,
                                  L_pool_size,
                                  L_pool_step,
                                  L_activation_conv,
                                  L_border_mode,
                                  L_exo_dropout_conv_layers,
                                  xrange(len(L_dim_conv_layers))
                                  ):

            # convert filter_size and pool_size in tuple
            filter_size = tuple(filter_size)

            if filter_step is None:
                filter_step = (1, 1)
            else:
                filter_step = tuple(filter_step)

            if pool_size is None:
                pool_size = (0,0)
            else:
                pool_size = tuple(pool_size)

            # TO DO : leaky relu
            if activation_str.lower() == 'rectifier':
                activation = Rectifier().apply
            elif activation_str.lower() == 'tanh':
                activation = Tanh().apply
            elif activation_str.lower() in ['sigmoid', 'logistic']:
                activation = Logistic().apply
            elif activation_str.lower() in ['id', 'identity']:
                activation = Identity().apply
            else:
                raise Exception("unknown activation function : %s", activation_str)

            assert 0.0 <= dropout and dropout < 1.0
            num_filters = num_filters - int(num_filters*dropout)

            print "border_mode : %s" % border_mode

            # filter_step
            # http://blocks.readthedocs.org/en/latest/api/bricks.html#module-blocks.bricks.conv

            kwargs = {}
            if filter_step is None or filter_step == (1,1):
                pass
            else:
                # there's a bit of a mix of names because `Convolutional` takes
                # a "step" argument, but `ConvolutionActivation` takes "conv_step" argument
                kwargs['conv_step'] = filter_step

            if (pool_size[0] == 0 and pool_size[1] == 0):
                layer_conv = ConvolutionalActivation(activation=activation,
                                                filter_size=filter_size,
                                                num_filters=num_filters,
                                                border_mode=border_mode,
                                                name="layer_%d" % index,
                                                **kwargs)
            else:
                if pool_step is None:
                    pass
                else:
                    kwargs['pooling_step'] = tuple(pool_step)

                layer_conv = ConvolutionalLayer(activation=activation,
                                                filter_size=filter_size,
                                                num_filters=num_filters,
                                                border_mode=border_mode,
                                                pooling_size=pool_size,
                                                name="layer_%d" % index,
                                                **kwargs)

            conv_layers.append(layer_conv)

        convnet = ConvolutionalSequence(conv_layers, num_channels=num_channels,
                                    image_size=image_size,
                                    weights_init=Uniform(width=0.1),
                                    biases_init=Constant(0.0),
                                    name="conv_section")
        convnet.push_allocation_config()
        convnet.initialize()
        output_dim = np.prod(convnet.get_dim('output'))
        output_conv = convnet.apply(output_conv)
        


    output_conv = Flattener().apply(output_conv)

    # FULLY CONNECTED
    output_mlp = output_conv
    full_layers = []
    assert len(L_dim_full_layers) == len(L_activation_full)
    assert len(L_dim_full_layers) + 1 == len(L_endo_dropout_full_layers)
    assert len(L_dim_full_layers) + 1 == len(L_exo_dropout_full_layers)

    # reguarding the batch dropout : the dropout is applied on the filter
    # which is equivalent to the output dimension
    # you have to look at the dropout_rate of the next layer
    # that is why we throw away the first value of L_exo_dropout_full_layers
    L_exo_dropout_full_layers = L_exo_dropout_full_layers[1:]
    pre_dim = output_dim
    print "When constructing the model, the output_dim of the conv section is %d." % output_dim
    if len(L_dim_full_layers):
        for (dim, activation_str,
            dropout, index) in zip(L_dim_full_layers,
                                  L_activation_full,
                                  L_exo_dropout_full_layers,
                                  range(len(L_dim_conv_layers),
                                        len(L_dim_conv_layers)+ 
                                        len(L_dim_full_layers))
                                   ):
                                          
                # TO DO : leaky relu
                if activation_str.lower() == 'rectifier':
                    activation = Rectifier().apply
                elif activation_str.lower() == 'tanh':
                    activation = Tanh().apply
                elif activation_str.lower() in ['sigmoid', 'logistic']:
                    activation = Logistic().apply
                elif activation_str.lower() in ['id', 'identity']:
                    activation = Identity().apply
                else:
                    raise Exception("unknown activation function : %s", activation_str)

                assert 0.0 <= dropout and dropout < 1.0
                dim = dim - int(dim*dropout)
                print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % (dropout, pre_dim, dim)

                layer_full = MLP(activations=[activation], dims=[pre_dim, dim],
                                 weights_init=Uniform(width=0.1),
                                 biases_init=Constant(0.0),
                                name="layer_%d" % index)
                layer_full.initialize()
                full_layers.append(layer_full)
                pre_dim = dim

        for layer in full_layers:
            output_mlp = layer.apply(output_mlp)

        output_dim = L_dim_full_layers[-1] - int(L_dim_full_layers[-1]*L_exo_dropout_full_layers[-1])

    # COST FUNCTION
    output_layer = Linear(output_dim, prediction,
                          weights_init=Uniform(width=0.1),
                          biases_init=Constant(0.0),
                          name="layer_"+str(len(L_dim_conv_layers)+ 
                                            len(L_dim_full_layers))
                          )
    output_layer.initialize()
    full_layers.append(output_layer)
    y_pred = output_layer.apply(output_mlp)
    y_hat = Softmax().apply(y_pred)
    # SOFTMAX and log likelihood
    y_pred = Softmax().apply(y_pred)
    # be careful. one version expects the output of a softmax; the other expects just the
    # output of the network
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_pred)
    #cost = Softmax().categorical_cross_entropy(y.flatten(), y_pred)
    cost.name = "cost"

    # Misclassification
    error_rate_brick = MisclassificationRate()
    error_rate = error_rate_brick.apply(y.flatten(), y_hat)
    error_rate.name = "error_rate"

    # put names

    D_params, D_kind = build_params(x, T.matrix(), conv_layers, full_layers)
    # test computation graph
    

    cg = ComputationGraph(cost)

    # DROPOUT
    L_endo_dropout = L_endo_dropout_conv_layers + L_endo_dropout_full_layers

    cg_dropout = cg
    inputs = VariableFilter(roles=[INPUT])(cg.variables)

    for (index, drop_rate) in enumerate(L_endo_dropout):
        for input_ in inputs:
            m = re.match(r"layer_(\d+)_apply.*", input_.name)
            if m and index == int(m.group(1)):
                if drop_rate < 0.0001:
                    print "Skipped applying dropout on %s because the dropout rate was under 0.0001." % input_.name
                    break
                else:
                    cg_dropout = apply_dropout(cg, [input_], drop_rate)
                    print "Applied dropout %f on %s." % (drop_rate, input_.name)
                    break


    cg = cg_dropout

    return (cg, error_rate, cost, D_params, D_kind)
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    #attention --->
    patch_shape = (16, 16)
    image_shape = (784, 100)
    import numpy
    import theano.tensor as T
    n_spatial_dims = 2
    cropper = SoftRectangularCropper(n_spatial_dims=n_spatial_dims,
                                     patch_shape=patch_shape,
                                     image_shape=image_shape,
                                     kernel=Gaussian())

    batch_size = 10
    scales = 1.3**numpy.arange(-7, 6)
    n_patches = len(scales)
    locations = (numpy.ones(
        (n_patches, batch_size, 2)) * image_shape / 2).astype(numpy.float32)
    scales = numpy.tile(scales[:, numpy.newaxis, numpy.newaxis],
                        (1, batch_size, 2)).astype(numpy.float32)
    Tpatches = T.stack(*[
        cropper.apply(x, T.constant(location), T.constant(scale))[0]
        for location, scale in zip(locations, scales)
    ])
    patches = theano.function([x], Tpatches)(batch['features'])

    import ipdb as pdb
    pdb.set_trace()
    probs = mlp.apply(tensor.flatten(patches, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Beispiel #39
0
statistics_list=[(M1,S1,a1), (M2,S2,a2), (M3,S3,a3), (M4,S4,a4), (M5,S5,a5)]

# initialize_variables
# for variable (M,S) in variables:
# 	compute M and S in the whole data.

if normalization == 'bn2':
    for m,s,var in statistics_list:
        var.tag.aggregation_scheme = MeanAndVariance(var, var.shape[0], axis = 0)
        init_mn, init_var = DatasetEvaluator([var]).evaluate(stream_train)[var.name]
        m.set_value(init_mn.astype(floatX))
        s.set_value(sqrt(init_var).astype(floatX))

cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
cost.name = 'cost'
error_rate = MisclassificationRate().apply(y.flatten(), probs)
error_rate.name = 'error_rate'

cg = ComputationGraph([cost])
    
parameters = cg.parameters
# add gradient descent to M,S
if normalization == 'bn2':
    for m,s,var in statistics_list:
        parameters.extend([m,s])

algorithm = GradientDescent(
    cost=cost, parameters=parameters, step_rule=Adam(0.01))

#update the M and S with batch statistics
alpha = 0.1
Beispiel #40
0
    def train(self, X, Y, idx_folds, hyper_params, model_prefix, verbose=False):

        import os
        from collections import OrderedDict
        from fuel.datasets import IndexableDataset
        from blocks.model import Model
        from blocks.bricks import Linear, Softmax
        from blocks.bricks.conv import MaxPooling
        from blocks.initialization import Uniform
        from deepthought.bricks.cost import HingeLoss
        import numpy as np
        import theano
        from theano import tensor

        assert model_prefix is not None

        fold_weights_filename = '{}_weights.npy'.format(model_prefix)

        # convert Y to one-hot encoding
        n_classes = len(set(Y))
        Y = np.eye(n_classes, dtype=int)[Y]

        features = tensor.matrix('features', dtype=theano.config.floatX)
        targets = tensor.lmatrix('targets')

        input_ = features

        dim = X.shape[-1]
        
        # optional additional layers
        if self.pipeline_factory is not None:
            # need to re-shape flattened input to restore bc01 format
            input_shape = (input_.shape[0],) + hyper_params['classifier_input_shape']  # tuple, uses actual batch size
            input_ = input_.reshape(input_shape)

            pipeline = self.pipeline_factory.build_pipeline(input_shape, hyper_params)
            input_ = pipeline.apply(input_)                        
            input_ = input_.flatten(ndim=2)
            
            # this is very hacky, but there seems to be no elegant way to obtain a value for dim
            dummy_fn = theano.function(inputs=[features], outputs=input_)
            dummy_out = dummy_fn(X[:1])
            dim = dummy_out.shape[-1]
            
            
        if hyper_params['classifier_pool_width'] > 1:
            # FIXME: this is probably broken!
            
    #        c = hyper_params['num_components']
    #        input_ = input_.reshape((input_.shape[0], c, input_.shape[-1] // c, 1))  # restore bc01
            # need to re-shape flattened input to restore bc01 format
            input_shape = hyper_params['classifier_pool_input_shape']  # tuple
            input_ = input_.reshape(input_shape)

            pool = MaxPooling(name='pool',
                              input_dim=input_shape[1:],  # (c, X.shape[-1] // c, 1),
                              pooling_size=(hyper_params['classifier_pool_width'], 1),
                              step=(hyper_params['classifier_pool_stride'], 1))
            input_ = pool.apply(input_)
            input_ = input_.reshape((input_.shape[0], tensor.prod(input_.shape[1:])))

            dim = np.prod(pool.get_dim('output'))


        linear = Linear(name='linear',
                        input_dim=dim,
                        output_dim=n_classes,
                        weights_init=Uniform(mean=0, std=0.01),
                        use_bias=False)
        linear.initialize()

        softmax = Softmax('softmax')

        probs = softmax.apply(linear.apply(input_))
        prediction = tensor.argmax(probs, axis=1)

        model = Model(probs)  # classifier with raw probability outputs
        predict = theano.function([features], prediction)  # ready-to-use predict function

        if os.path.isfile(fold_weights_filename):
            # load filter weights from existing file
            fold_weights = np.load(fold_weights_filename)
            print 'loaded filter weights from', fold_weights_filename
        else:
            # train model

            from blocks.bricks.cost import MisclassificationRate
            from blocks.filter import VariableFilter
            from blocks.graph import ComputationGraph
            from blocks.roles import WEIGHT
            from blocks.bricks import Softmax
            from blocks.model import Model
            from blocks.algorithms import GradientDescent, Adam
            from blocks.extensions import FinishAfter, Timing, Printing, ProgressBar
            from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
            from blocks.extensions.predicates import OnLogRecord
            from fuel.streams import DataStream
            from fuel.schemes import SequentialScheme, ShuffledScheme
            from blocks.monitoring import aggregation
            from blocks.main_loop import MainLoop
            from blocks.extensions.training import TrackTheBest
            from deepthought.extensions.parameters import BestParams
            # from deepthought.datasets.selection import DatasetMetaDB

            init_param_values = model.get_parameter_values()

            cost = HingeLoss().apply(targets, probs)
            # Note: this requires just the class labels, not in a one-hot encoding
            error_rate = MisclassificationRate().apply(targets.argmax(axis=1), probs)
            error_rate.name = 'error_rate'

            cg = ComputationGraph([cost])

            # L1 regularization
            if hyper_params['classifier_l1wdecay'] > 0:
                weights = VariableFilter(roles=[WEIGHT])(cg.variables)
                cost = cost + hyper_params['classifier_l1wdecay'] * sum([abs(W).sum() for W in weights])

            cost.name = 'cost'

            # iterate over trial folds
            fold_weights = []
            fold_errors = []

            # for ifi, ifold in fold_generator.get_inner_cv_folds(outer_fold):
            #
            #     train_selectors = fold_generator.get_fold_selectors(outer_fold=outer_fold, inner_fold=ifold['train'])
            #     valid_selectors = fold_generator.get_fold_selectors(outer_fold=outer_fold, inner_fold=ifold['valid'])
            #
            #     metadb = DatasetMetaDB(meta, train_selectors.keys())
            #
            #     # get selected trial IDs
            #     train_idx = metadb.select(train_selectors)
            #     valid_idx = metadb.select(valid_selectors)

            for train_idx, valid_idx in idx_folds:

                # print train_idx
                # print valid_idx

                trainset = IndexableDataset(indexables=OrderedDict(
                    [('features', X[train_idx]), ('targets', Y[train_idx])]))

                validset = IndexableDataset(indexables=OrderedDict(
                    [('features', X[valid_idx]), ('targets', Y[valid_idx])]))

                model.set_parameter_values(init_param_values)

                best_params = BestParams()
                best_params.add_condition(['after_epoch'],
                                          predicate=OnLogRecord('error_rate_valid_best_so_far'))

                algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam())

                extensions = [Timing(),
                              FinishAfter(after_n_epochs=hyper_params['classifier_max_epochs']),
                              DataStreamMonitoring(
                                  [cost, error_rate],
                                  DataStream.default_stream(
                                      validset,
                                      iteration_scheme=SequentialScheme(
                                          validset.num_examples, hyper_params['classifier_batch_size'])),
                                  suffix="valid"),
                              TrainingDataMonitoring(
                                  [cost, error_rate,
                                   aggregation.mean(algorithm.total_gradient_norm)],
                                  suffix="train",
                                  after_epoch=True),
                              TrackTheBest('error_rate_valid'),
                              best_params  # after TrackTheBest!
                              ]

                if verbose:
                    extensions.append(Printing())  # optional
                    extensions.append(ProgressBar())

                main_loop = MainLoop(
                    algorithm,
                    DataStream.default_stream(
                        trainset,
                        iteration_scheme=ShuffledScheme(trainset.num_examples, hyper_params['classifier_batch_size'])),
                    model=model,
                    extensions=extensions)

                main_loop.run()

                fold_weights.append(best_params.values['/linear.W'])
                fold_errors.append(main_loop.status['best_error_rate_valid'])
                # break # FIXME

            fold_errors = np.asarray(fold_errors).squeeze()
            print 'simple NN fold classification errors:', fold_errors

            fold_weights = np.asarray(fold_weights)

            # store filter weights for later analysis
            np.save(fold_weights_filename, fold_weights)

        weights = fold_weights.mean(axis=0)

        linear.parameters[0].set_value(weights)

        return model, predict
def main(num_epochs=100):
    x = tensor.matrix('features')
    m = tensor.matrix('features_mask')

    x_int = x.astype(dtype='int32').T
    train_dataset = TextFile('inspirational.txt')
    train_dataset.indexables[0] = numpy.array(sorted(
        train_dataset.indexables[0], key=len
    ))

    n_voc = len(train_dataset.dict.keys())

    init_probs = numpy.array(
        [sum(filter(lambda idx:idx == w,
                    [s[0] for s in train_dataset.indexables[
                        train_dataset.sources.index('features')]]
                    )) for w in xrange(n_voc)],
        dtype=theano.config.floatX
    )
    init_probs = init_probs / init_probs.sum()

    n_h = 100
    linear_embedding = LookupTable(
        length=n_voc,
        dim=n_h,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    linear_embedding.initialize()
    lstm_biases = numpy.zeros(4 * n_h).astype(dtype=theano.config.floatX)
    lstm_biases[n_h:(2 * n_h)] = 4.
    rnn = SimpleRecurrent(
        dim=n_h,
        activation=Tanh(),
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    rnn.initialize()
    score_layer = Linear(
        input_dim=n_h,
        output_dim=n_voc,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    score_layer.initialize()

    embedding = (linear_embedding.apply(x_int[:-1])
                 * tensor.shape_padright(m.T[1:]))
    rnn_out = rnn.apply(inputs=embedding, mask=m.T[1:])
    probs = softmax(
        sequence_map(score_layer.apply, rnn_out, mask=m.T[1:])[0]
    )
    idx_mask = m.T[1:].nonzero()
    cost = CategoricalCrossEntropy().apply(
        x_int[1:][idx_mask[0], idx_mask[1]],
        probs[idx_mask[0], idx_mask[1]]
    )
    cost.name = 'cost'
    misclassification = MisclassificationRate().apply(
        x_int[1:][idx_mask[0], idx_mask[1]],
        probs[idx_mask[0], idx_mask[1]]
    )
    misclassification.name = 'misclassification'

    cg = ComputationGraph([cost])
    params = cg.parameters

    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=Adam()
    )

    train_data_stream = Padding(
        data_stream=DataStream(
            dataset=train_dataset,
            iteration_scheme=BatchwiseShuffledScheme(
                examples=train_dataset.num_examples,
                batch_size=10,
            )
        ),
        mask_sources=('features',)
    )

    model = Model(cost)

    extensions = []
    extensions.append(Timing())
    extensions.append(FinishAfter(after_n_epochs=num_epochs))
    extensions.append(TrainingDataMonitoring(
        [cost, misclassification],
        prefix='train',
        after_epoch=True))

    batch_size = 10
    length = 30
    trng = MRG_RandomStreams(18032015)
    u = trng.uniform(size=(length, batch_size, n_voc))
    gumbel_noise = -tensor.log(-tensor.log(u))
    init_samples = (tensor.log(init_probs).dimshuffle(('x', 0))
                    + gumbel_noise[0]).argmax(axis=-1)
    init_states = rnn.initial_state('states', batch_size)

    def sampling_step(g_noise, states, samples_step):
        embedding_step = linear_embedding.apply(samples_step)
        next_states = rnn.apply(inputs=embedding_step,
                                            states=states,
                                            iterate=False)
        probs_step = softmax(score_layer.apply(next_states))
        next_samples = (tensor.log(probs_step)
                        + g_noise).argmax(axis=-1)

        return next_states, next_samples

    [_, samples], _ = theano.scan(
        fn=sampling_step,
        sequences=[gumbel_noise[1:]],
        outputs_info=[init_states, init_samples]
    )

    sampling = theano.function([], samples.owner.inputs[0].T)

    plotters = []
    plotters.append(Plotter(
        channels=[['train_cost', 'train_misclassification']],
        titles=['Costs']))

    extensions.append(PlotManager('Language modelling example',
                                  plotters=plotters,
                                  after_epoch=True,
                                  after_training=True))
    extensions.append(Printing())
    extensions.append(PrintSamples(sampler=sampling,
                                   voc=train_dataset.inv_dict))

    main_loop = MainLoop(model=model,
                         data_stream=train_data_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    main_loop.run()
Beispiel #42
0
def build_and_run(label, config):
    ############## CREATE THE NETWORK ###############
    #Define the parameters
    num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation  = config['num_epochs'], config['num_batches'], config['num_channels'], config['image_shape'], config['filter_size'], config['num_filter'], config['pooling_sizes'], config['mlp_hiddens'], config['output_size'], config['batch_size'], config['activation'], config['mlp_activation']
#    print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation)
    lambda_l1 = 0.000025
    lambda_l2 = 0.000025

    print("Building model")
    #Create the symbolics variable
    x = T.tensor4('image_features')
    y = T.lmatrix('targets')

    #Get the parameters
    conv_parameters = zip(filter_size, num_filter)

    #Create the convolutions layers
    conv_layers = list(interleave([(Convolutional(
                                      filter_size=filter_size,
                                      num_filters=num_filter,
                                      name='conv_{}'.format(i))
                    for i, (filter_size, num_filter)
                    in enumerate(conv_parameters)),
                  (activation),
            (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))
        #    (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))

    #Create the sequence
    conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.))
    #Initialize the convnet
    conv_sequence.initialize()
    #Add the MLP
    top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size]
    out = Flattener().apply(conv_sequence.apply(x))
    mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2),
              biases_init=Constant(0.))
    #Initialisze the MLP
    mlp.initialize()
    #Get the output
    predict = mlp.apply(out)

    cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost')
    error = MisclassificationRate().apply(y.flatten(), predict)

    #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
    error_rate = error.copy(name='error_rate')
    error_rate2 = error.copy(name='error_rate2')

    ########### REGULARIZATION ##################
    cg = ComputationGraph([cost])
    weights = VariableFilter(roles=[WEIGHT])(cg.variables)
    biases = VariableFilter(roles=[BIAS])(cg.variables)
  # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
    l2_penalty = T.sum([lambda_l2 * (W ** 2).sum() for i,W in enumerate(weights+biases)]) # Gradually increase penalty for layer
  # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases])
  # # #l2_penalty = l2_penalty_weights + l2_penalty_bias
    l2_penalty.name = 'l2_penalty'
    l1_penalty = T.sum([lambda_l1*T.abs_(z).sum() for z in weights+biases])
  #  l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer    
  #  l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases])
  #  l1_penalty = l1_penalty_biases + l1_penalty_weights
    l1_penalty.name = 'l1_penalty'
    costreg = cost + l2_penalty + l1_penalty
    costreg.name = 'costreg'
    
    ########### DEFINE THE ALGORITHM #############
  #  algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum())
    algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam())

    ########### GET THE DATA #####################
    istest = 'test' in config.keys()
    train_stream, valid_stream, test_stream = get_stream(batch_size,image_shape,test=istest)
    

    ########### INITIALIZING EXTENSIONS ##########
    checkpoint = Checkpoint('models/best_'+label+'.tar')
    checkpoint.add_condition(['after_epoch'],
                         predicate=OnLogRecord('valid_error_rate_best_so_far'))
    #Adding a live plot with the bokeh server
    plot = Plot(label,
        channels=[['train_error_rate', 'valid_error_rate'],
                  ['valid_cost', 'valid_error_rate2'],
                 # ['train_costreg','train_grad_norm']], #  
                 ['train_costreg','train_total_gradient_norm','train_l2_penalty','train_l1_penalty']],
                  server_url="http://hades.calculquebec.ca:5042")  
   
    grad_norm = aggregation.mean(algorithm.total_gradient_norm)
    grad_norm.name = 'grad_norm'

    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs,
                  after_n_batches=num_batches),
                  DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"),
                  TrainingDataMonitoring([costreg, error_rate, error_rate2,
                    grad_norm,l2_penalty,l1_penalty],
                     prefix="train", after_epoch=True),
                  plot,
                  ProgressBar(),
                  Printing(),
                  TrackTheBest('valid_error_rate',min), #Keep best
                  checkpoint,  #Save best
                  FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4)] # Early-stopping                  
    model = Model(cost)
    main_loop = MainLoop(algorithm,data_stream=train_stream,model=model,extensions=extensions)
    main_loop.run()
if mode is "CPU_test":
    data_train_stream = create_data(DogsVsCats(('train',), subset=slice(0, 100)))
    data_valid_stream = create_data(DogsVsCats(('train',), subset=slice(100, 110)))
if mode is  "GPU_run":
    data_train_stream = create_data(DogsVsCats(('train',), subset=slice(0, 22500)))
    data_valid_stream = create_data(DogsVsCats(('train',), subset=slice(22500, 25000)))
if mode is "data_server":
    data_train_stream = ServerDataStream(('image_features','targets'), False, port=5560)
    data_valid_stream = ServerDataStream(('image_features','targets'), False, port=5561)


### Setting up the model
probs = top_mlp.apply(conv_out)

cost = CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name='cost')
error = MisclassificationRate().apply(y.flatten(), probs)
error_rate = error.copy(name='error_rate')
error_rate2 = error.copy(name='error_rate2')
cg = ComputationGraph([cost, error_rate])

### Gradient Descent
algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=learning_rate))

extensions = [Timing(),
              FinishAfter(after_n_epochs=num_epochs),
              DataStreamMonitoring(
                  [cost, error_rate, error_rate2],
                  data_valid_stream,
                  prefix="valid"),
              TrainingDataMonitoring(
                  [cost, error_rate,
Beispiel #44
0
def train(train_set, test_set, l2_weight=1e-18):
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')

    n_classifiers = 3
    n_classes = 2
    l1 = Linear(
            name='l1',
            input_dim=2,
            output_dim=10,
            weights_init=IsotropicGaussian(0.1),
            biases_init=Constant(0)
    )
    l1.initialize()
    h1 = Logistic().apply(l1.apply(x))

    l2 = Linear(
            name='l1',
            input_dim=l1.output_dim,
            output_dim=n_classes * n_classifiers,
            weights_init=IsotropicGaussian(0.1),
            biases_init=Constant(0)
    )
    l2.initialize()
    l2 = l2.apply(h1)
    y_hat = MultiTargetSoftmax().apply(l2, n_classes, n_classifiers)

    cost = MultiTargetCategoricalCrossEntropy().apply(y, y_hat)
    error = MisclassificationRate().apply(y, y_hat)
    error.name = 'misclassification_rate'

    cg = ComputationGraph(cost)

    for w in VariableFilter(roles=[WEIGHT])(cg.variables):
        cost += l2_weight * (w ** 2).sum()
    cost.name = 'cost_with_regularization'

    # print('W1', W1.get_value())
    # print('W2', W2.get_value())

    algorithm = GradientDescent(
            cost=cost,
            parameters=cg.parameters,
            step_rule=RMSProp()
    )

    data_stream_train = Flatten(
            DataStream.default_stream(
                    train_set,
                    iteration_scheme=ShuffledScheme(train_set.num_examples, batch_size=80)
            )
    )

    data_stream_test = Flatten(
            DataStream.default_stream(
                    test_set,
                    iteration_scheme=SequentialScheme(test_set.num_examples, batch_size=1)
            )
    )

    monitor = DataStreamMonitoring(
            variables=[cost, error],
            data_stream=data_stream_test,
            prefix="test"
    )

    main_loop = MainLoop(
            data_stream=data_stream_train,
            algorithm=algorithm,
            extensions=[
                monitor,
                FinishAfter(after_n_epochs=100),
                Printing(),
                # ProgressBar()
            ]
    )

    main_loop.run()

    return x, y_hat
Beispiel #45
0
def main(save_to, num_epochs, flag, ksize):
    batch_size = 128
    dim = 100
    n_steps = 20
    i2h1 = MLP([Identity()], [784, dim], biases_init=Constant(0.), weights_init=IsotropicGaussian(.001))
    h2o1 = MLP([Rectifier(), Softmax()], [dim, dim, 10], biases_init=Constant(0.), weights_init=IsotropicGaussian(.001))
    rec1 = SimpleRecurrent(dim=dim, activation=Tanh(), weights_init=Orthogonal())
    i2h1.initialize()
    h2o1.initialize()
    rec1.initialize()

    x = tensor.tensor3('features')
    y = tensor.lmatrix('targets')

    preproc = i2h1.apply(x)
    h1 = rec1.apply(preproc)
    probs = tensor.flatten(h2o1.apply(h1[-1],), outdim=2)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)
    cost.name = 'final_cost'
    error_rate.name = 'error_rate'

    cg = ComputationGraph([cost, error_rate])

    mnist_train = MNIST("train", subset=slice(0, 50000))
    mnist_valid = MNIST("train", subset=slice(50000, 60000))
    mnist_test = MNIST("test")
    trainstream = Mapping(Flatten(DataStream(mnist_train,
                          iteration_scheme=SequentialScheme(50000, batch_size))),
                          _meanize(n_steps, flag, ksize))
    validstream = Mapping(Flatten(DataStream(mnist_valid,
                                             iteration_scheme=SequentialScheme(10000,
                                                                               batch_size))),
                          _meanize(n_steps, flag, ksize))
    teststream = Mapping(Flatten(DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(10000,
                                                                              batch_size))),
                         _meanize(n_steps, flag, ksize))

    algorithm = GradientDescent(
        cost=cost, params=cg.parameters,
        step_rule=CompositeRule([Adam(), StepClipping(100)]))
    main_loop = MainLoop(
        algorithm,
        trainstream,
        extensions=[Timing(),
                    FinishAfter(after_n_epochs=num_epochs),
                    DataStreamMonitoring(
                        [cost, error_rate],
                        validstream,
                        prefix="test"),
                    DataStreamMonitoringAndSaving(
                    [cost, error_rate],
                    teststream,
                    [i2h1, h2o1, rec1],
                    'best_'+save_to+'.pkl',
                    cost_name=error_rate.name,
                    after_epoch=True,
                    prefix='valid'
                    ),
                    TrainingDataMonitoring(
                        [cost,
                         aggregation.mean(algorithm.total_gradient_norm)],
                        prefix="train",
                        after_epoch=True),
                    # Plot(
                    #     save_to,
                    #     channels=[
                    #         ['test_final_cost',
                    #          'test_misclassificationrate_apply_error_rate'],
                    #         ['train_total_gradient_norm']]),
                    Printing()])
    main_loop.run()
Beispiel #46
0
def main():
    print("Build the network")
    input_of_image = tensor.matrix('features')
    input_to_hidden = Linear(name='input_to_hidden',
                             input_dim=784,
                             output_dim=100)
    h = Tanh().apply(input_to_hidden.apply(input_of_image))
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=100,
                              output_dim=10)
    output_hat = Softmax().apply(hidden_to_output.apply(h))

    output = tensor.lmatrix('targets')
    cost = CategoricalCrossEntropy().apply(output.flatten(), output_hat)
    correct_rate = 1 - MisclassificationRate().apply(output.flatten(),
                                                     output_hat)
    correct_rate.name = 'correct_rate'
    print(type(correct_rate))
    cost.name = 'cost'

    cg = ComputationGraph(cost)

    # Initialize the parameters
    input_to_hidden.weights_init = hidden_to_output.weights_init = IsotropicGaussian(
        0.01)
    input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0)
    input_to_hidden.initialize()
    hidden_to_output.initialize()

    # Train
    print("Prepare the data.")
    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    ## Carve the data into lots of batches.
    data_stream_train = DataStream(mnist_train,
                                   iteration_scheme=SequentialScheme(
                                       mnist_train.num_examples,
                                       batch_size=256))

    ## Set the algorithm for the training.
    algorithm = GradientDescent(cost=cost,
                                params=cg.parameters,
                                step_rule=CompositeRule(
                                    [Scale(0.9),
                                     StepSwitcher(0.05, 0.1)]))

    ## Add a monitor extension for the training.
    data_stream_test = DataStream(
        mnist_test,
        iteration_scheme=SequentialScheme(mnist_test.num_examples,
                                          batch_size=1024))

    test_monitor = DataStreamMonitoring(variables=[cost, correct_rate],
                                        data_stream=data_stream_test,
                                        prefix="test",
                                        after_every_epoch=True)

    train_monitor = TrainingDataMonitoring(
        variables=[cost, correct_rate, algorithm.total_step_norm],
        prefix='train',
        after_every_batch=True)

    ## Add a plot monitor.
    plot = Plot(document='new',
                channels=[['train_correct_rate', 'test_correct_rate']],
                start_server=True,
                after_every_batch=True)

    print("Start training")
    main_loop = MainLoop(algorithm=algorithm,
                         data_stream=data_stream_train,
                         extensions=[
                             plot, test_monitor, train_monitor,
                             FinishAfter(after_n_epochs=20),
                             Printing()
                         ])
    main_loop.run()