Beispiel #1
0
def setup_model(configs):
    tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
    # shape: T x B x C x X x Y
    input_ = tensor5("features")
    tensor3 = theano.tensor.TensorType(config.floatX, (False,) * 3)
    locs = tensor3("locs")
    # shape: B x Classes
    target = T.ivector("targets")

    model = LSTMAttention(configs, weights_init=Glorot(), biases_init=Constant(0))
    model.initialize()

    (h, c, location, scale, alpha, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply(
        input_, locs
    )

    model.location = location
    model.scale = scale
    model.alpha = location
    model.patch = patch

    classifier = MLP(
        [Rectifier(), Softmax()], configs["classifier_dims"], weights_init=Glorot(), biases_init=Constant(0)
    )
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    cost.name = "CE"
    error_rate = MisclassificationRate().apply(target, probabilities)
    error_rate.name = "ER"
    model.cost = cost
    model.error_rate = error_rate
    model.probabilities = probabilities

    if configs["load_pretrained"]:
        blocks_model = Model(model.cost)
        all_params = blocks_model.parameters
        with open("VGG_CNN_params.npz") as f:
            loaded = np.load(f)
            all_conv_params = loaded.keys()
            for param in all_params:
                if param.name in loaded.keys():
                    assert param.get_value().shape == loaded[param.name].shape
                    param.set_value(loaded[param.name])
                    all_conv_params.pop(all_conv_params.index(param.name))
        print "the following parameters did not match: " + str(all_conv_params)

    if configs["test_model"]:
        print "TESTING THE MODEL: CHECK THE INPUT SIZE!"
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.cost], on_unused_input="ignore", allow_input_downcast=True)
        data = configs["get_streams"](configs["batch_size"])[0].get_epoch_iterator().next()
        f(data[1], data[0], data[2])

        print "Test passed! ;)"

    model.monitorings = [cost, error_rate]

    return model
Beispiel #2
0
def setup_model(configs):

    tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
    # shape: T x B x C x X x Y
    input_ = tensor5('features')
    # shape: B x Classes
    target = T.lmatrix('targets')

    model = LSTMAttention(
        configs,
        weights_init=Glorot(),
        biases_init=Constant(0))
    model.initialize()

    (h, c, location, scale, patch, downn_sampled_input,
        conved_part_1, conved_part_2, pre_lstm) = model.apply(input_)

    classifier = MLP(
        [Rectifier(), Logistic()],
        configs['classifier_dims'],
        weights_init=Glorot(),
        biases_init=Constant(0))
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = BinaryCrossEntropy().apply(target, probabilities)
    cost.name = 'CE'
    error_rate = MisclassificationRate().apply(target, probabilities)
    error_rate.name = 'ER'
    model.cost = cost

    if configs['load_pretrained']:
        blocks_model = Model(model.cost)
        all_params = blocks_model.parameters
        with open('VGG_CNN_params.npz') as f:
            loaded = np.load(f)
            all_conv_params = loaded.keys()
            for param in all_params:
                if param.name in loaded.keys():
                    assert param.get_value().shape == loaded[param.name].shape
                    param.set_value(loaded[param.name])
                    all_conv_params.pop(all_conv_params.index(param.name))
        print "the following parameters did not match: " + str(all_conv_params)

    if configs['test_model']:
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.cost],
                            on_unused_input='ignore',
                            allow_input_downcast=True)
        data = np.random.randn(10, 40, 3, 224, 224)
        targs = np.random.randn(40, 101)
        f(data, targs)
        print "Test passed! ;)"

    model.monitorings = [cost, error_rate]

    return model
Beispiel #3
0
    def pretrain(model,
                 hyper_params,
                 full_hdf5,
                 full_meta,
                 train_selectors,
                 valid_selectors=None):
        """
        generic training method for siamese networks;
        works with any network structure
        :return:
        """
        from theano import tensor
        from blocks.bricks.cost import MisclassificationRate
        from deepthought.datasets.triplet import TripletsIndexDataset
        from deepthought.bricks.cost import HingeLoss

        train_data = TripletsIndexDataset(
            full_hdf5,
            full_meta,
            train_selectors,
            targets_source=hyper_params['pretrain_target_source'],
            group_attribute=hyper_params['group_attribute'])

        if valid_selectors is not None:
            if hyper_params['use_ext_dataset_for_validation']:
                ext_selectors = train_selectors
            else:
                ext_selectors = None

            valid_data = TripletsIndexDataset(
                full_hdf5,
                full_meta,
                valid_selectors,
                ext_selectors=ext_selectors,
                targets_source=hyper_params['pretrain_target_source'],
                group_attribute=hyper_params['group_attribute'])
        else:
            valid_data = None

        # Note: this has to match the sources defined in the dataset
        #y = tensor.lvector('targets')
        y = tensor.lmatrix('targets')

        # Note: this requires a one-hot encoding of the targets
        probs = model.outputs[0]
        cost = HingeLoss().apply(y, probs)
        # Note: this requires just the class labels, not in a one-hot encoding
        error_rate = MisclassificationRate().apply(y.argmax(axis=1), probs)
        error_rate.name = 'error_rate'

        return GenericNNEncoderExperiment.run_pretrain(model, hyper_params,
                                                       cost, train_data,
                                                       valid_data,
                                                       [error_rate])
def main(name, epochs, batch_size, learning_rate, window_size, conv_sizes, num_filters, fc_dim, enc_dim, dec_dim, step, num_digits, num_classes,
         oldmodel, live_plotting):
    channels, img_height, img_width = 1, 100, 100

    rnninits = {
        'weights_init': Uniform(width=0.02),
        'biases_init': Constant(0.),
    }

    inits = {
        'weights_init': IsotropicGaussian(0.001),
        'biases_init': Constant(0.),
    }

    rec_inits = {
        'weights_init': IsotropicGaussian(0.001),
        'biases_init': Constant(0.),
    }

    convinits = {
        'weights_init': Uniform(width=.2),
        'biases_init': Constant(0.),
    }

    n_iter = step * num_digits
    filter_size1, filter_size2 = zip(conv_sizes, conv_sizes)[:]
    w_height, w_width = window_size.split(',')
    w_height = int(w_height)
    w_width = int(w_width)

    subdir = time.strftime("%Y-%m-%d") + "-" + name
    if not os.path.exists(subdir):
        os.makedirs(subdir)

    lines = ["\n                Running experiment",
             "          subdirectory: %s" % subdir,
             "         learning rate: %g" % learning_rate,
             "        attention size: %s" % window_size,
             "          n_iterations: %d" % n_iter,
             "     encoder dimension: %d" % enc_dim,
             "     decoder dimension: %d" % dec_dim,
             "            batch size: %d" % batch_size,
             "                epochs: %d" % epochs,
             ]
    for line in lines:
        print(line)
    print()

    rectifier = Rectifier()
    conv1 = Convolutional(filter_size=filter_size2, num_filters=int(num_filters / 2), num_channels=channels, image_size=(w_height, w_width), border_mode='half',
                          name='conv1', **convinits)
    conv1_bn = SpatialBatchNormalization(input_dim=(64, 26, 26), conserve_memory=False, n_iter=n_iter, name='conv1_bn')
    conv2 = Convolutional(filter_size=filter_size2, num_channels=int(num_filters / 2), num_filters=int(num_filters / 2), image_size=(26, 26), name='conv2',
                          **convinits)
    conv2_bn = SpatialBatchNormalization(input_dim=(64, 24, 24), conserve_memory=False, n_iter=n_iter, name='conv2_bn')
    max_pooling = MaxPooling(pooling_size=(2, 2), step=(2, 2))
    conv3 = Convolutional(filter_size=filter_size2, num_filters=num_filters, num_channels=int(num_filters / 2), image_size=(12, 12), border_mode='half',
                          name='conv3', **convinits)
    conv3_bn = SpatialBatchNormalization(input_dim=(128, 12, 12), conserve_memory=False, n_iter=n_iter, name='conv3_bn')
    conv4 = Convolutional(filter_size=filter_size2, num_filters=num_filters, num_channels=num_filters, image_size=(12, 12), border_mode='half', name='conv4',
                          **convinits)
    conv4_bn = SpatialBatchNormalization(input_dim=(128, 12, 12), conserve_memory=False, n_iter=n_iter, name='conv4_bn')
    # Max Pooling
    conv5 = Convolutional(filter_size=filter_size2, num_filters=160, num_channels=num_filters, image_size=(6, 6), border_mode='half', name='conv5',
                          **convinits)
    conv5_bn = SpatialBatchNormalization(input_dim=(160, 6, 6), conserve_memory=False, n_iter=n_iter, name='conv5_bn')
    conv6 = Convolutional(filter_size=filter_size2, num_filters=192, num_channels=160, image_size=(6, 6), name='conv6', **convinits)
    conv6_bn = SpatialBatchNormalization(input_dim=(192, 4, 4), conserve_memory=False, n_iter=n_iter, name='conv6_bn')

    conv_mlp = MLP(activations=[Identity()], dims=[3072, fc_dim], name="MLP_conv", **inits)
    conv_mlp_bn = BatchNormalization(input_dim=fc_dim, conserve_memory=False, n_iter=n_iter, name='conv_mlp_bn')
    loc_mlp = MLP(activations=[Identity()], dims=[6, fc_dim], name="MLP_loc", **inits)
    loc_mlp_bn = BatchNormalization(input_dim=fc_dim, conserve_memory=False, n_iter=n_iter, name='loc_mlp_bn')
    encoder_mlp = MLP([Identity()], [fc_dim, 4 * enc_dim], name="MLP_enc", **rec_inits)
    decoder_mlp = MLP([Identity()], [enc_dim, 4 * dec_dim], name="MLP_dec", **rec_inits)
    encoder_rnn = LSTM(activation=Tanh(), dim=enc_dim, name="RNN_enc", **rnninits)

    conv_init = ConvolutionalSequence(
        [Convolutional(filter_size=filter_size1, num_filters=int(num_filters / 8), name='conv1_init'),
         SpatialBatchNormalization(conserve_memory=False, name='conv1_bn_init'),
         Convolutional(filter_size=filter_size2, num_filters=int(num_filters / 8), name='conv2_init'),
         SpatialBatchNormalization(conserve_memory=False, name='conv2_bn_init'),
         Convolutional(filter_size=filter_size2, num_filters=int(num_filters / 4), name='conv3_init'),
         SpatialBatchNormalization(conserve_memory=False, name='conv3_bn_init'),
         ], image_size=(12, 12), num_channels=channels, name='conv_seq_init', **convinits)

    decoder_rnn = LSTM(activation=Tanh(), dim=dec_dim, name="RNN_dec", **rnninits)
    emit_mlp = MLP(activations=[Tanh()], dims=[dec_dim, 6], name='emit_mlp', weights_init=Constant(0.),
                   biases_init=Constant((1., 0., 0., 0., 1., 0.)))

    classification_mlp1 = MLP(activations=[Identity()], dims=[enc_dim, fc_dim], name='MPL_class1', **inits)
    classification_mlp1_bn = BatchNormalization(input_dim=fc_dim, conserve_memory=False, n_iter=n_iter, name='classification_mlp1_bn')
    classification_mlp2 = MLP(activations=[Identity()], dims=[fc_dim, fc_dim], name='MPL_class2', **inits)
    classification_mlp2_bn = BatchNormalization(input_dim=fc_dim, conserve_memory=False, n_iter=n_iter, name='classification_mlp2_bn')
    classification_mlp3 = MLP(activations=[Softmax()], dims=[fc_dim, num_classes], name='MPL_class3', **inits)

    edram = EDRAM(channels=channels, out_height=w_height, out_width=w_width, n_iter=n_iter, num_classes=num_classes, rectifier=rectifier, conv1=conv1,
                  conv1_bn=conv1_bn, conv2=conv2, conv2_bn=conv2_bn, max_pooling=max_pooling, conv3=conv3, conv3_bn=conv3_bn, conv4=conv4, conv4_bn=conv4_bn,
                  conv5=conv5, conv5_bn=conv5_bn, conv6=conv6, conv6_bn=conv6_bn, conv_mlp=conv_mlp, conv_mlp_bn=conv_mlp_bn,
                  loc_mlp=loc_mlp, loc_mlp_bn=loc_mlp_bn, conv_init=conv_init, encoder_mlp=encoder_mlp, encoder_rnn=encoder_rnn, decoder_mlp=decoder_mlp,
                  decoder_rnn=decoder_rnn, classification_mlp1=classification_mlp1, classification_mlp1_bn=classification_mlp1_bn,
                  classification_mlp2=classification_mlp2, classification_mlp2_bn=classification_mlp2_bn, classification_mlp3=classification_mlp3,
                  emit_mlp=emit_mlp)
    edram.initialize()

    # ------------------------------------------------------------------------
    x = T.ftensor4('features')
    x_coarse = T.ftensor4('features_coarse')
    y = T.ivector('labels')
    wr = T.fmatrix('locations')

    with batch_normalization(edram):
        bn_p, bn_l, m_c1_bn, s_c1_bn, m_c2_bn, s_c2_bn, m_c3_bn, s_c3_bn, m_c4_bn, s_c4_bn, m_c5_bn, s_c5_bn, m_c6_bn, s_c6_bn, \
        m_c_bn, s_c_bn, m_l_bn, s_l_bn, m_cl1_bn, s_cl1_bn, m_cl2_bn, s_cl2_bn = edram.calculate_train(x, x_coarse)

    def compute_cost(p, wr, y, l):
        cost_where = T.dot(T.sqr(wr - l), [1, 0.5, 1, 0.5, 1, 1])
        cost_y = T.stack([T.nnet.categorical_crossentropy(T.maximum(p[i, :], 1e-7), y) for i in range(0, n_iter)])

        return cost_where, cost_y

    cost_where, cost_y = compute_cost(bn_p, wr, y, bn_l)
    bn_cost = cost_y + cost_where
    bn_cost = bn_cost.sum(axis=0)
    bn_cost = bn_cost.mean()
    bn_cost.name = 'cost'

    bn_error_rate = MisclassificationRate().apply(y, bn_p[-1])
    bn_error_rate.name = 'error_rate'

    # ------------------------------------------------------------
    bn_cg = ComputationGraph([bn_cost, bn_error_rate])

    # Prepare algorithm
    algorithm = GradientDescent(
        cost=bn_cg.outputs[0],
        on_unused_sources='ignore',
        parameters=bn_cg.parameters,
        step_rule=CompositeRule([
            RemoveNotFinite(),
            StepClipping(10.),
            Adam(learning_rate)
        ])
    )

    pop_updates = get_batch_normalization_updates(bn_cg)
    update_params = [conv1_bn.population_mean, conv1_bn.population_stdev, conv2_bn.population_mean, conv2_bn.population_stdev, conv3_bn.population_mean,
                     conv3_bn.population_stdev, conv4_bn.population_mean, conv4_bn.population_stdev, conv5_bn.population_mean, conv5_bn.population_stdev,
                     conv6_bn.population_mean, conv6_bn.population_stdev, conv_mlp_bn.population_mean, conv_mlp_bn.population_stdev,
                     loc_mlp_bn.population_mean, loc_mlp_bn.population_stdev, classification_mlp1_bn.population_mean, classification_mlp1_bn.population_stdev,
                     classification_mlp2_bn.population_mean, classification_mlp2_bn.population_stdev]
    update_values = [m_c1_bn, s_c1_bn, m_c2_bn, s_c2_bn, m_c3_bn, s_c3_bn, m_c4_bn, s_c4_bn, m_c5_bn, s_c5_bn, m_c6_bn, s_c6_bn, m_c_bn, s_c_bn, m_l_bn, s_l_bn,
                     m_cl1_bn, s_cl1_bn, m_cl2_bn, s_cl2_bn]

    pop_updates.extend([(p, m) for p, m in zip(update_params, update_values)])

    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate)) for p, m in pop_updates]
    algorithm.add_updates(extra_updates)

    # ------------------------------------------------------------------------
    # Setup monitors

    p, l = edram.calculate_test(x, x_coarse)
    cost_where, cost_y = compute_cost(p, wr, y, l)
    cost = cost_y + cost_where
    cost = cost.sum(axis=0)
    cost = cost.mean()
    cost.name = 'cost'

    error_rate = MisclassificationRate().apply(y, p[-1])
    error_rate.name = 'error_rate'
    monitors = [cost, error_rate]

    plotting_extensions = []
    # Live plotting...
    if live_plotting:
        plot_channels = [
            ['train_cost', 'test_cost'],
            ['train_error_rate', 'test_error_rate'],
        ]
        plotting_extensions = [
            Plot(subdir, channels=plot_channels, server_url='http://155.69.150.60:80/')
        ]

    # ------------------------------------------------------------

    mnist_cluttered_train = MNISTCluttered(which_sets=['train'], sources=('features', 'locations', 'labels'))
    mnist_cluttered_test = MNISTCluttered(which_sets=['test'], sources=('features', 'locations', 'labels'))

    main_loop = MainLoop(
        model=Model([bn_cost]),
        data_stream=DataStream.default_stream(mnist_cluttered_train, iteration_scheme=ShuffledScheme(mnist_cluttered_train.num_examples, batch_size)),
        algorithm=algorithm,
        extensions=[Timing(),
                    FinishAfter(after_n_epochs=epochs),
                    DataStreamMonitoring(monitors,
                                         DataStream.default_stream(mnist_cluttered_train, iteration_scheme=SequentialScheme(mnist_cluttered_train.num_examples,
                                                                                                                            batch_size)), prefix='train'),
                    DataStreamMonitoring(monitors,
                                         DataStream.default_stream(mnist_cluttered_test,
                                                                   iteration_scheme=SequentialScheme(mnist_cluttered_test.num_examples, batch_size)),
                                         prefix="test"),
                    PartsOnlyCheckpoint("{}/{}".format(subdir, name), before_training=False, after_epoch=True, save_separately=['log', ]),
                    TrackTheBest('test_error_rate', 'best_test_error_rate'),
                    BestCheckpount("{}/{}".format(subdir, name), 'best_test_error_rate', save_separately=['model', ]),
                    Printing(),
                    ProgressBar(),
                    PrintingTo("\n".join(lines), "{}/{}_log.txt".format(subdir, name)), ] + plotting_extensions)
    if oldmodel is not None:
        print("Initializing parameters with old model %s" % oldmodel)
        with open(oldmodel, "rb") as f:
            oldmodel = pickle.load(f)
            main_loop.model.set_parameter_values(oldmodel.get_parameter_values())

            main_loop.model.get_top_bricks()[0].conv1_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv1_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv1_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv1_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv2_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv2_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv2_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv2_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv3_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv3_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv3_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv3_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv4_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv4_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv4_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv4_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv5_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv5_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv5_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv5_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv6_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv6_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv6_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv6_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].loc_mlp_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].loc_mlp_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].loc_mlp_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].loc_mlp_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv_mlp_bn.population_mean.set_value(oldmodel.get_top_bricks()[0].conv_mlp_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv_mlp_bn.population_stdev.set_value(oldmodel.get_top_bricks()[0].conv_mlp_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].classification_mlp1_bn.population_mean.set_value(
                oldmodel.get_top_bricks()[0].classification_mlp1_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].classification_mlp1_bn.population_stdev.set_value(
                oldmodel.get_top_bricks()[0].classification_mlp1_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].classification_mlp2_bn.population_mean.set_value(
                oldmodel.get_top_bricks()[0].classification_mlp2_bn.population_mean.get_value())
            main_loop.model.get_top_bricks()[0].classification_mlp2_bn.population_stdev.set_value(
                oldmodel.get_top_bricks()[0].classification_mlp2_bn.population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv_init.layers[1].population_mean.set_value(
                oldmodel.get_top_bricks()[0].conv_init.layers[1].population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv_init.layers[1].population_stdev.set_value(
                oldmodel.get_top_bricks()[0].conv_init.layers[1].population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv_init.layers[3].population_mean.set_value(
                oldmodel.get_top_bricks()[0].conv_init.layers[3].population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv_init.layers[3].population_stdev.set_value(
                oldmodel.get_top_bricks()[0].conv_init.layers[3].population_stdev.get_value())

            main_loop.model.get_top_bricks()[0].conv_init.layers[5].population_mean.set_value(
                oldmodel.get_top_bricks()[0].conv_init.layers[5].population_mean.get_value())
            main_loop.model.get_top_bricks()[0].conv_init.layers[5].population_stdev.set_value(
                oldmodel.get_top_bricks()[0].conv_init.layers[5].population_stdev.get_value())
        del oldmodel
    main_loop.run()
Beispiel #5
0
    prototype=input_mlp,
)
parallel_nets.initialize()
l_h, r_h = parallel_nets.apply(l_x=l_x, r_x=r_x)

# Concatenate the inputs from the two hidden subnets into a single variable
# for input into the next layer.
merge = tensor.concatenate([l_h, r_h], axis=1)

y_hat = output_mlp.apply(merge)

# Define a cost function to optimize, and a classification error rate:
# Also apply the outputs from the net, and corresponding targets:
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
error = MisclassificationRate().apply(y.flatten(), y_hat)
error.name = 'error'

# Need to define the computation graph:
graph = ComputationGraph(cost)

# This returns a list of weight vectors for each layer
W = VariableFilter(roles=[WEIGHT])(graph.variables)

# Add some regularization to this model:
lam = 0.001
cost += lam * l2_norm(W)
cost.name = 'entropy'

# This is the model without dropout, but with l2 reg.
model = Model(cost)
Beispiel #6
0
def train_net(net, train_stream, test_stream, L1 = None, L2=None, early_stopping=False,
        finish=None, dropout=False, jobid=None, update=None,
        duration= None,
        **ignored):
    x = tensor.tensor4('image_features')
    y = tensor.lmatrix('targets')

    y_hat = net.apply(x)

    #Cost
    cost_before = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    cost_before.name = "cost_without_regularization"

    #Error
    #Taken from brodesf
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = "Misclassification rate"

    #Regularization
    cg = ComputationGraph(cost_before)
    WS = VariableFilter(roles=[WEIGHT])(cg.variables)

    if dropout:
        print("Dropout")
        cg = apply_dropout(cg, WS, 0.5)

    if L1:
        print("L1 with lambda ",L1)
        L1_reg = L1 * sum([abs(W).sum() for W in WS])
        L1_reg.name = "L1 regularization"
        cost_before += L1_reg

    if L2:
        print("L2 with lambda ",L2)
        L2_reg = L2 * sum([(W ** 2).sum() for W in WS])
        L2_reg.name = "L2 regularization"
        cost_before += L2_reg

    cost = cost_before
    cost.name = 'cost_with_regularization'

    #Initialization
    print("Initilization")
    net.initialize()

    #Algorithm
    step_rule = Scale(learning_rate=0.1)
    if update is not None:
        if update == "rmsprop":
            print("Using RMSProp")
            step_rule = RMSProp()

    remove_not_finite = RemoveNotFinite(0.9)
    step_rule = CompositeRule([step_rule, remove_not_finite])
    algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=step_rule)

    print("Extensions")
    extensions = []

    #Monitoring
    monitor = DataStreamMonitoring(variables=[cost, error], data_stream=test_stream, prefix="test")
    extensions.append(monitor)

    def filename(suffix=""):
        prefix = jobid if jobid else str(os.getpid())
        ctime = str(time.time())
        return "checkpoints/" + prefix + "_" + ctime + "_" + suffix + ".zip"

    #Serialization
    #serialization = Checkpoint(filename())
    #extensions.append(serialization)

    notification = "test_"+error.name
    track = TrackTheBest(notification)
    best_notification = track.notification_name
    checkpointbest = SaveBest(best_notification, filename("best"))
    extensions.extend([track, checkpointbest])

    if early_stopping:
        print("Early stopping")
        stopper = FinishIfNoImprovementAfterPlus(best_notification)
        extensions.append(stopper)

    #Other extensions
    if finish != None:
        print("Force finish ", finish)
        extensions.append(FinishAfter(after_n_epochs=finish))

    if duration != None:
        print("Stop after " , duration, " seconds")
        extensions.append(FinishAfterTime(duration))

    extensions.extend([
        Timing(),
        Printing()
        ])

    #Main loop
    main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions)

    print("Main loop start")
    main_loop.run()
Beispiel #7
0
def train_net(net,
              train_stream,
              test_stream,
              L1=None,
              L2=None,
              early_stopping=False,
              finish=None,
              dropout=False,
              jobid=None,
              update=None,
              duration=None,
              **ignored):
    x = tensor.tensor4('image_features')
    y = tensor.lmatrix('targets')

    y_hat = net.apply(x)

    #Cost
    cost_before = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    cost_before.name = "cost_without_regularization"

    #Error
    #Taken from brodesf
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = "Misclassification rate"

    #Regularization
    cg = ComputationGraph(cost_before)
    WS = VariableFilter(roles=[WEIGHT])(cg.variables)

    if dropout:
        print("Dropout")
        cg = apply_dropout(cg, WS, 0.5)

    if L1:
        print("L1 with lambda ", L1)
        L1_reg = L1 * sum([abs(W).sum() for W in WS])
        L1_reg.name = "L1 regularization"
        cost_before += L1_reg

    if L2:
        print("L2 with lambda ", L2)
        L2_reg = L2 * sum([(W**2).sum() for W in WS])
        L2_reg.name = "L2 regularization"
        cost_before += L2_reg

    cost = cost_before
    cost.name = 'cost_with_regularization'

    #Initialization
    print("Initilization")
    net.initialize()

    #Algorithm
    step_rule = Scale(learning_rate=0.1)
    if update is not None:
        if update == "rmsprop":
            print("Using RMSProp")
            step_rule = RMSProp()

    remove_not_finite = RemoveNotFinite(0.9)
    step_rule = CompositeRule([step_rule, remove_not_finite])
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=step_rule)

    print("Extensions")
    extensions = []

    #Monitoring
    monitor = DataStreamMonitoring(variables=[cost, error],
                                   data_stream=test_stream,
                                   prefix="test")
    extensions.append(monitor)

    def filename(suffix=""):
        prefix = jobid if jobid else str(os.getpid())
        ctime = str(time.time())
        return "checkpoints/" + prefix + "_" + ctime + "_" + suffix + ".zip"

    #Serialization
    #serialization = Checkpoint(filename())
    #extensions.append(serialization)

    notification = "test_" + error.name
    track = TrackTheBest(notification)
    best_notification = track.notification_name
    checkpointbest = SaveBest(best_notification, filename("best"))
    extensions.extend([track, checkpointbest])

    if early_stopping:
        print("Early stopping")
        stopper = FinishIfNoImprovementAfterPlus(best_notification)
        extensions.append(stopper)

    #Other extensions
    if finish != None:
        print("Force finish ", finish)
        extensions.append(FinishAfter(after_n_epochs=finish))

    if duration != None:
        print("Stop after ", duration, " seconds")
        extensions.append(FinishAfterTime(duration))

    extensions.extend([Timing(), Printing()])

    #Main loop
    main_loop = MainLoop(data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    print("Main loop start")
    main_loop.run()
Beispiel #8
0
def main(job_id, params):

    config = ConfigParser.ConfigParser()
    config.readfp(open('./params'))

    max_epoch = int(config.get('hyperparams', 'max_iter', 100))
    base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
    train_batch = int(config.get('hyperparams', 'train_batch', 256))
    valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
    test_batch = int(config.get('hyperparams', 'valid_batch', 512))
    hidden_units = int(config.get('hyperparams', 'hidden_units', 16))


    W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
    W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
    b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
    b_mu = float(config.get('hyperparams', 'b_mu', 0.0))

    dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
    weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
    max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
    solver = config.get('hyperparams', 'solver_type', 'rmsprop')
    data_file = config.get('hyperparams', 'data_file')
    fine_tune = config.getboolean('hyperparams', 'fine_tune')

    # Spearmint optimization parameters:
    if params:
        base_lr = float(params['base_lr'][0])
        dropout_ratio = float(params['dropout_ratio'][0])
        hidden_units = params['hidden_units'][0]
        weight_decay = params['weight_decay'][0]

    if 'adagrad' in solver:
        solver_type = CompositeRule([AdaGrad(learning_rate=base_lr), VariableClipping(threshold=max_norm)])
    else:
        solver_type = CompositeRule([RMSProp(learning_rate=base_lr), VariableClipping(threshold=max_norm)])

    rn_file = '/projects/francisco/repositories/NI-ML/models/deepnets/blocks/ff/models/rnet/2015-06-25-18:13'
    ln_file = '/projects/francisco/repositories/NI-ML/models/deepnets/blocks/ff/models/lnet/2015-06-29-11:45'

    right_dim = 10519
    left_dim = 11427

    train = H5PYDataset(data_file, which_set='train')
    valid = H5PYDataset(data_file, which_set='valid')
    test = H5PYDataset(data_file, which_set='test')

    l_x = tensor.matrix('l_features')
    r_x = tensor.matrix('r_features')
    y = tensor.lmatrix('targets')

    lnet = load(ln_file).model.get_top_bricks()[0]
    rnet = load(rn_file).model.get_top_bricks()[0]

    # Pre-trained layers:

    # Inputs -> hidden_1 -> hidden 2
    for side, net in zip(['l', 'r'], [lnet, rnet]):
        for child in net.children:
            child.name = side + '_' + child.name

    ll1 = lnet.children[0]
    lr1 = lnet.children[1]
    ll2 = lnet.children[2]
    lr2 = lnet.children[3]

    rl1 = rnet.children[0]
    rr1 = rnet.children[1]
    rl2 = rnet.children[2]
    rr2 = rnet.children[3]

    l_h = lr2.apply(ll2.apply(lr1.apply(ll1.apply(l_x))))
    r_h = rr2.apply(rl2.apply(rr1.apply(rl1.apply(r_x))))

    input_dim = ll2.output_dim + rl2.output_dim

    # hidden_2 -> hidden_3 -> hidden_4 -> Logistic output
    output_mlp = MLP(activations=[
        Rectifier(name='h3'),
        Rectifier(name='h4'),
        Softmax(name='output'),
    ],
                     dims=[
                         input_dim,
                         hidden_units,
                         hidden_units,
                         2,
                     ],
                     weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
                     biases_init=IsotropicGaussian(std=W_sd, mean=W_mu))

    output_mlp.initialize()

    # # Concatenate the inputs from the two hidden subnets into a single variable
    # # for input into the next layer.
    merge = tensor.concatenate([l_h, r_h], axis=1)
    #
    y_hat = output_mlp.apply(merge)

    # Define a cost function to optimize, and a classification error rate.
    # Also apply the outputs from the net and corresponding targets:
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'error'

    # This is the model: before applying dropout
    model = Model(cost)

    # Need to define the computation graph for the cost func:
    cost_graph = ComputationGraph([cost])

    # This returns a list of weight vectors for each layer
    W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

    # Add some regularization to this model:
    cost += weight_decay * l2_norm(W)
    cost.name = 'entropy'

    # computational graph with l2 reg
    cost_graph = ComputationGraph([cost])

    # Apply dropout to inputs:
    inputs = VariableFilter([INPUT])(cost_graph.variables)
    dropout_inputs = [input for input in inputs if input.name.startswith('linear_')]
    dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]], 0.2)
    dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:], dropout_ratio)
    dropout_cost = dropout_graph.outputs[0]
    dropout_cost.name = 'dropout_entropy'

    # If no fine-tuning of l-r models is wanted, find the params for only
    # the joint layers:
    if fine_tune:
        params_to_update = dropout_graph.parameters
    else:
        params_to_update = VariableFilter([PARAMETER], bricks=output_mlp.children)(cost_graph)

    # Learning Algorithm:
    algo = GradientDescent(
        step_rule=solver_type,
        params=params_to_update,
        cost=dropout_cost)

    # algo.step_rule.learning_rate.name = 'learning_rate'

    # Data stream used for training model:
    training_stream = Flatten(
        DataStream.default_stream(
            dataset=train,
            iteration_scheme=ShuffledScheme(
                train.num_examples,
                batch_size=train_batch)))

    training_monitor = TrainingDataMonitoring([dropout_cost,
                                               aggregation.mean(error),
                                               aggregation.mean(algo.total_gradient_norm)],
                                              after_batch=True)


    # Use the 'valid' set for validation during training:
    validation_stream = Flatten(
        DataStream.default_stream(
            dataset=valid,
            iteration_scheme=ShuffledScheme(
                valid.num_examples,
                batch_size=valid_batch)))

    validation_monitor = DataStreamMonitoring(
        variables=[cost, error],
        data_stream=validation_stream,
        prefix='validation',
        after_epoch=True)

    test_stream = Flatten(
        DataStream.default_stream(
            dataset=test,
            iteration_scheme=ShuffledScheme(
                test.num_examples,
                batch_size=test_batch)))

    test_monitor = DataStreamMonitoring(
        variables=[error],
        data_stream=test_stream,
        prefix='test',
        after_training=True)

    plotting = Plot('AdniNet_LeftRight',
                    channels=[
                        ['dropout_entropy'],
                        ['error', 'validation_error'],
                    ],
    )

    # Checkpoint class used to save model and log:
    stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H:%M')
    checkpoint = Checkpoint('./models/{}'.format(stamp),
                            save_separately=['model', 'log'],
                            every_n_epochs=1)

    # The main loop will train the network and output reports, etc
    main_loop = MainLoop(
        data_stream=training_stream,
        model=model,
        algorithm=algo,
        extensions=[
            validation_monitor,
            training_monitor,
            plotting,
            FinishAfter(after_n_epochs=max_epoch),
            FinishIfNoImprovementAfter(notification_name='validation_error', epochs=1),
            Printing(),
            ProgressBar(),
            checkpoint,
            test_monitor,
        ])
    main_loop.run()
    ve = float(main_loop.log.last_epoch_row['validation_error'])
    te = float(main_loop.log.last_epoch_row['error'])
    spearmint_loss = ve + abs(te - ve)
    print 'Spearmint Loss: {}'.format(spearmint_loss)
    return spearmint_loss
Beispiel #9
0
mlp.biases_init = Constant(0.0)
mlp.initialize()

lin = Linear(200, 10, use_bias=True)
lin.weights_init = Uniform(0.0, 0.01)
lin.biases_init = Constant(0.0)
lin.initialize()

train_out = lin.apply(mlp.apply(flat_x))
test_out = lin.apply(mlp.apply(flat_x))

sm = Softmax(name='softmax')
loss = sm.categorical_cross_entropy(flat_y, train_out).mean()
loss.name = 'nll'
misclass = MisclassificationRate().apply(flat_y, train_out)
misclass.name = 'misclass'

test_loss = sm.categorical_cross_entropy(flat_y, test_out).mean()
test_loss.name = 'nll'
test_misclass = MisclassificationRate().apply(flat_y, test_out)
test_misclass.name = 'misclass'

model = Model(loss)

######################
# Data
######################
import numpy
#from mnist import MNIST
from fuel.datasets.mnist import MNIST
from fuel.transformers import ScaleAndShift, ForceFloatX
Beispiel #10
0
def main(job_id, params, config_file='params.ec'):
    config = ConfigParser.ConfigParser()
    config.readfp(open('./configs/{}'.format(config_file)))

    pr = pprint.PrettyPrinter(indent=4)
    pr.pprint(config)

    net_name  =  config.get('hyperparams', 'net_name', 'adni')
    struct_name = net_name.split('_')[0]

    max_epoch = int(config.get('hyperparams', 'max_iter', 100))
    base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
    train_batch = int(config.get('hyperparams', 'train_batch', 256))
    valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
    test_batch = int(config.get('hyperparams', 'valid_batch', 512))

    W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
    W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
    b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
    b_mu = float(config.get('hyperparams', 'b_mu', 0.0))

    hidden_units = int(config.get('hyperparams', 'hidden_units', 32))
    input_dropout_ratio = float(config.get('hyperparams', 'input_dropout_ratio', 0.2))
    dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
    weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
    max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
    solver = config.get('hyperparams', 'solver_type', 'rmsprop')
    data_file = config.get('hyperparams', 'data_file')
    side = config.get('hyperparams', 'side', 'b')

    input_dim = input_dims[struct_name]

    # Spearmint optimization parameters:
    if params:
        base_lr = float(params['base_lr'][0])
        dropout_ratio = float(params['dropout_ratio'][0])
        hidden_units = params['hidden_units'][0]
        weight_decay = params['weight_decay'][0]

    if 'adagrad' in solver:
        solver_type = CompositeRule([AdaGrad(learning_rate=base_lr), VariableClipping(threshold=max_norm)])
    else:
        solver_type = CompositeRule([RMSProp(learning_rate=base_lr), VariableClipping(threshold=max_norm)])


    data_file = config.get('hyperparams', 'data_file')

    if 'b' in side:
        train = H5PYDataset(data_file, which_set='train')
        valid = H5PYDataset(data_file, which_set='valid')
        test = H5PYDataset(data_file, which_set='test')
        x_l = tensor.matrix('l_features')
        x_r = tensor.matrix('r_features')
        x = tensor.concatenate([x_l, x_r], axis=1)

    else:
        train = H5PYDataset(data_file, which_set='train', sources=['{}_features'.format(side), 'targets'])
        valid = H5PYDataset(data_file, which_set='valid', sources=['{}_features'.format(side), 'targets'])
        test = H5PYDataset(data_file, which_set='test', sources=['{}_features'.format(side), 'targets'])
        x = tensor.matrix('{}_features'.format(side))

    y = tensor.lmatrix('targets')


    # Define a feed-forward net with an input, two hidden layers, and a softmax output:
    model = MLP(activations=[
        Rectifier(name='h1'),
        Rectifier(name='h2'),
        Softmax(name='output'),
    ],
                dims=[
                    input_dim[side],
                    hidden_units,
                    hidden_units,
                    2],
                weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
                biases_init=IsotropicGaussian(b_sd, b_mu))

    # Don't forget to initialize params:
    model.initialize()

    # y_hat is the output of the neural net with x as its inputs
    y_hat = model.apply(x)

    # Define a cost function to optimize, and a classification error rate.
    # Also apply the outputs from the net and corresponding targets:
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'error'

    # This is the model: before applying dropout
    model = Model(cost)

    # Need to define the computation graph for the cost func:
    cost_graph = ComputationGraph([cost])

    # This returns a list of weight vectors for each layer
    W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

    # Add some regularization to this model:
    cost += weight_decay * l2_norm(W)
    cost.name = 'entropy'

    # computational graph with l2 reg
    cost_graph = ComputationGraph([cost])

    # Apply dropout to inputs:
    inputs = VariableFilter([INPUT])(cost_graph.variables)
    dropout_inputs = [input for input in inputs if input.name.startswith('linear_')]
    dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]], input_dropout_ratio)
    dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:], dropout_ratio)
    dropout_cost = dropout_graph.outputs[0]
    dropout_cost.name = 'dropout_entropy'

    # Learning Algorithm (notice: we use the dropout cost for learning):
    algo = GradientDescent(
        step_rule=solver_type,
        params=dropout_graph.parameters,
        cost=dropout_cost)

    # algo.step_rule.learning_rate.name = 'learning_rate'

    # Data stream used for training model:
    training_stream = Flatten(
        DataStream.default_stream(
            dataset=train,
            iteration_scheme=ShuffledScheme(
                train.num_examples,
                batch_size=train_batch)))

    training_monitor = TrainingDataMonitoring([dropout_cost,
                                               aggregation.mean(error),
                                               aggregation.mean(algo.total_gradient_norm)],
                                              after_batch=True)

    # Use the 'valid' set for validation during training:
    validation_stream = Flatten(
        DataStream.default_stream(
            dataset=valid,
            iteration_scheme=ShuffledScheme(
                valid.num_examples,
                batch_size=valid_batch)))

    validation_monitor = DataStreamMonitoring(
        variables=[cost, error],
        data_stream=validation_stream,
        prefix='validation',
        after_epoch=True)

    test_stream = Flatten(
        DataStream.default_stream(
            dataset=test,
            iteration_scheme=ShuffledScheme(
                test.num_examples,
                batch_size=test_batch)))

    test_monitor = DataStreamMonitoring(
        variables=[error],
        data_stream=test_stream,
        prefix='test',
        after_training=True)


    plotting = Plot('{}_{}'.format(net_name, side),
                    channels=[
                        ['dropout_entropy'],
                        ['error', 'validation_error'],
                    ],
                    after_batch=False)

    # Checkpoint class used to save model and log:
    stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H:%M')
    checkpoint = Checkpoint('./models/{}/{}/{}'.format(struct_name, side, stamp),
                            save_separately=['model', 'log'],
                            every_n_epochs=1)

    # Home-brewed class for early stopping when we detect we have started to overfit:
    # And by that I mean if the means of the val error and training error over the
    # previous 'epochs' is greater than the 'threshold', we are overfitting.
    early_stopper = FinishIfOverfitting(error_name='error',
                                        validation_name='validation_error',
                                        threshold=0.05,
                                        epochs=5,
                                        burn_in=100)

    # The main loop will train the network and output reports, etc
    main_loop = MainLoop(
        data_stream=training_stream,
        model=model,
        algorithm=algo,
        extensions=[
            validation_monitor,
            training_monitor,
            plotting,
            FinishAfter(after_n_epochs=max_epoch),
            early_stopper,
            Printing(),
            ProgressBar(),
            checkpoint,
            test_monitor,
        ])
    main_loop.run()

    ve = float(main_loop.log.last_epoch_row['validation_error'])
    te = float(main_loop.log.last_epoch_row['error'])
    spearmint_loss = ve + abs(te - ve)
    print 'Spearmint Loss: {}'.format(spearmint_loss)
    return spearmint_loss
Beispiel #11
0
def main(num_epochs=50, batch_normalized=True, alpha=0.1):
    """Run the example.

    Parameters
    ----------
    num_epochs : int, optional
        Number of epochs for which to train.

    batch_normalized : bool, optional
        Batch-normalize the training graph. Defaults to `True`.

    alpha : float, optional
        Weight to apply to a new sample when calculating running
        averages for population statistics (1 - alpha weight is
        given to the existing average).

    """
    if batch_normalized:
        # Add an extra keyword argument that only BatchNormalizedMLP takes,
        # in order to speed things up at the cost of a bit of extra memory.
        mlp_class = BatchNormalizedMLP
        extra_kwargs = {'conserve_memory': False}
    else:
        mlp_class = MLP
        extra_kwargs = {}
    mlp = mlp_class([Logistic(), Logistic(),
                     Logistic(), Softmax()], [2, 5, 5, 5, 3],
                    weights_init=IsotropicGaussian(0.2),
                    biases_init=Constant(0.),
                    **extra_kwargs)
    mlp.initialize()

    # Generate a dataset with 3 spiral arms, using 8000 examples for
    # training and 2000 for testing.
    dataset = Spiral(num_examples=10000,
                     classes=3,
                     sources=['features', 'label'],
                     noise=0.05)
    train_stream = DataStream(dataset,
                              iteration_scheme=ShuffledScheme(examples=8000,
                                                              batch_size=20))
    test_stream = DataStream(dataset,
                             iteration_scheme=SequentialScheme(
                                 examples=list(range(8000, 10000)),
                                 batch_size=2000))

    # Build a cost graph; this contains BatchNormalization bricks that will
    # by default run in inference mode.
    features = tensor.matrix('features')
    label = tensor.lvector('label')
    prediction = mlp.apply(features)
    cost = CategoricalCrossEntropy().apply(label, prediction)
    misclass = MisclassificationRate().apply(label, prediction)
    misclass.name = 'misclass'  # The default name for this is annoyingly long
    original_cg = ComputationGraph([cost, misclass])

    if batch_normalized:
        cg = apply_batch_normalization(original_cg)
        # Add updates for population parameters
        pop_updates = get_batch_normalization_updates(cg)
        extra_updates = [(p, m * alpha + p * (1 - alpha))
                         for p, m in pop_updates]
    else:
        cg = original_cg
        extra_updates = []

    algorithm = GradientDescent(step_rule=Adam(0.001),
                                cost=cg.outputs[0],
                                parameters=cg.parameters)
    algorithm.add_updates(extra_updates)

    main_loop = MainLoop(
        algorithm=algorithm,
        data_stream=train_stream,
        # Use the original cost and misclass variables so
        # that we monitor the (original) inference-mode graph.
        extensions=[
            DataStreamMonitoring([cost, misclass],
                                 train_stream,
                                 prefix='train'),
            DataStreamMonitoring([cost, misclass], test_stream, prefix='test'),
            Printing(),
            FinishAfter(after_n_epochs=num_epochs)
        ])
    main_loop.run()
    return main_loop
Beispiel #12
0
from blocks.bricks.cost import MisclassificationRate


x = tensor.matrix('features')
y = tensor.lmatrix('targets')

lin1 = Linear(name='lin1', input_dim=126, output_dim=50, weights_init=Constant(0.005), biases_init=Constant(0))
act1_sigmoid = Logistic().apply(lin1.apply(x))
lin2 = Linear(name='lin2', input_dim=50, output_dim=2, weights_init=Constant(0.001), biases_init=Constant(0))
act2_softmax = Softmax().apply(lin2.apply(act1_sigmoid))

lin1.initialize()
lin2.initialize()

missclass = MisclassificationRate().apply(y.argmax(axis=1), act2_softmax)
missclass.name = 'missclassification'

cost = CategoricalCrossEntropy().apply(y, act2_softmax)

comp_graph = ComputationGraph([cost])

W1, W2 = VariableFilter(roles=[WEIGHT])(comp_graph.variables)

cost = cost + 0.005 * (W1**2).sum() + 0.005 * (W2**2).sum()
cost.name = 'cost'

from blocks.algorithms import GradientDescent, Scale
from blocks.extensions import FinishAfter, Printing, ProgressBar
from blocks.extensions.monitoring import DataStreamMonitoring
from fuel.transformers import Flatten
from fuel.streams import DataStream
Beispiel #13
0
    def train(self, X, Y, idx_folds, hyper_params, model_prefix, verbose=False):

        import os
        from collections import OrderedDict
        from fuel.datasets import IndexableDataset
        from blocks.model import Model
        from blocks.bricks import Linear, Softmax
        from blocks.bricks.conv import MaxPooling
        from blocks.initialization import Uniform
        from deepthought.bricks.cost import HingeLoss
        import numpy as np
        import theano
        from theano import tensor

        assert model_prefix is not None

        fold_weights_filename = '{}_weights.npy'.format(model_prefix)

        # convert Y to one-hot encoding
        n_classes = len(set(Y))
        Y = np.eye(n_classes, dtype=int)[Y]

        features = tensor.matrix('features', dtype=theano.config.floatX)
        targets = tensor.lmatrix('targets')

        input_ = features

        dim = X.shape[-1]
        
        # optional additional layers
        if self.pipeline_factory is not None:
            # need to re-shape flattened input to restore bc01 format
            input_shape = (input_.shape[0],) + hyper_params['classifier_input_shape']  # tuple, uses actual batch size
            input_ = input_.reshape(input_shape)

            pipeline = self.pipeline_factory.build_pipeline(input_shape, hyper_params)
            input_ = pipeline.apply(input_)                        
            input_ = input_.flatten(ndim=2)
            
            # this is very hacky, but there seems to be no elegant way to obtain a value for dim
            dummy_fn = theano.function(inputs=[features], outputs=input_)
            dummy_out = dummy_fn(X[:1])
            dim = dummy_out.shape[-1]
            
            
        if hyper_params['classifier_pool_width'] > 1:
            # FIXME: this is probably broken!
            
    #        c = hyper_params['num_components']
    #        input_ = input_.reshape((input_.shape[0], c, input_.shape[-1] // c, 1))  # restore bc01
            # need to re-shape flattened input to restore bc01 format
            input_shape = hyper_params['classifier_pool_input_shape']  # tuple
            input_ = input_.reshape(input_shape)

            pool = MaxPooling(name='pool',
                              input_dim=input_shape[1:],  # (c, X.shape[-1] // c, 1),
                              pooling_size=(hyper_params['classifier_pool_width'], 1),
                              step=(hyper_params['classifier_pool_stride'], 1))
            input_ = pool.apply(input_)
            input_ = input_.reshape((input_.shape[0], tensor.prod(input_.shape[1:])))

            dim = np.prod(pool.get_dim('output'))


        linear = Linear(name='linear',
                        input_dim=dim,
                        output_dim=n_classes,
                        weights_init=Uniform(mean=0, std=0.01),
                        use_bias=False)
        linear.initialize()

        softmax = Softmax('softmax')

        probs = softmax.apply(linear.apply(input_))
        prediction = tensor.argmax(probs, axis=1)

        model = Model(probs)  # classifier with raw probability outputs
        predict = theano.function([features], prediction)  # ready-to-use predict function

        if os.path.isfile(fold_weights_filename):
            # load filter weights from existing file
            fold_weights = np.load(fold_weights_filename)
            print 'loaded filter weights from', fold_weights_filename
        else:
            # train model

            from blocks.bricks.cost import MisclassificationRate
            from blocks.filter import VariableFilter
            from blocks.graph import ComputationGraph
            from blocks.roles import WEIGHT
            from blocks.bricks import Softmax
            from blocks.model import Model
            from blocks.algorithms import GradientDescent, Adam
            from blocks.extensions import FinishAfter, Timing, Printing, ProgressBar
            from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
            from blocks.extensions.predicates import OnLogRecord
            from fuel.streams import DataStream
            from fuel.schemes import SequentialScheme, ShuffledScheme
            from blocks.monitoring import aggregation
            from blocks.main_loop import MainLoop
            from blocks.extensions.training import TrackTheBest
            from deepthought.extensions.parameters import BestParams
            # from deepthought.datasets.selection import DatasetMetaDB

            init_param_values = model.get_parameter_values()

            cost = HingeLoss().apply(targets, probs)
            # Note: this requires just the class labels, not in a one-hot encoding
            error_rate = MisclassificationRate().apply(targets.argmax(axis=1), probs)
            error_rate.name = 'error_rate'

            cg = ComputationGraph([cost])

            # L1 regularization
            if hyper_params['classifier_l1wdecay'] > 0:
                weights = VariableFilter(roles=[WEIGHT])(cg.variables)
                cost = cost + hyper_params['classifier_l1wdecay'] * sum([abs(W).sum() for W in weights])

            cost.name = 'cost'

            # iterate over trial folds
            fold_weights = []
            fold_errors = []

            # for ifi, ifold in fold_generator.get_inner_cv_folds(outer_fold):
            #
            #     train_selectors = fold_generator.get_fold_selectors(outer_fold=outer_fold, inner_fold=ifold['train'])
            #     valid_selectors = fold_generator.get_fold_selectors(outer_fold=outer_fold, inner_fold=ifold['valid'])
            #
            #     metadb = DatasetMetaDB(meta, train_selectors.keys())
            #
            #     # get selected trial IDs
            #     train_idx = metadb.select(train_selectors)
            #     valid_idx = metadb.select(valid_selectors)

            for train_idx, valid_idx in idx_folds:

                # print train_idx
                # print valid_idx

                trainset = IndexableDataset(indexables=OrderedDict(
                    [('features', X[train_idx]), ('targets', Y[train_idx])]))

                validset = IndexableDataset(indexables=OrderedDict(
                    [('features', X[valid_idx]), ('targets', Y[valid_idx])]))

                model.set_parameter_values(init_param_values)

                best_params = BestParams()
                best_params.add_condition(['after_epoch'],
                                          predicate=OnLogRecord('error_rate_valid_best_so_far'))

                algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam())

                extensions = [Timing(),
                              FinishAfter(after_n_epochs=hyper_params['classifier_max_epochs']),
                              DataStreamMonitoring(
                                  [cost, error_rate],
                                  DataStream.default_stream(
                                      validset,
                                      iteration_scheme=SequentialScheme(
                                          validset.num_examples, hyper_params['classifier_batch_size'])),
                                  suffix="valid"),
                              TrainingDataMonitoring(
                                  [cost, error_rate,
                                   aggregation.mean(algorithm.total_gradient_norm)],
                                  suffix="train",
                                  after_epoch=True),
                              TrackTheBest('error_rate_valid'),
                              best_params  # after TrackTheBest!
                              ]

                if verbose:
                    extensions.append(Printing())  # optional
                    extensions.append(ProgressBar())

                main_loop = MainLoop(
                    algorithm,
                    DataStream.default_stream(
                        trainset,
                        iteration_scheme=ShuffledScheme(trainset.num_examples, hyper_params['classifier_batch_size'])),
                    model=model,
                    extensions=extensions)

                main_loop.run()

                fold_weights.append(best_params.values['/linear.W'])
                fold_errors.append(main_loop.status['best_error_rate_valid'])
                # break # FIXME

            fold_errors = np.asarray(fold_errors).squeeze()
            print 'simple NN fold classification errors:', fold_errors

            fold_weights = np.asarray(fold_weights)

            # store filter weights for later analysis
            np.save(fold_weights_filename, fold_weights)

        weights = fold_weights.mean(axis=0)

        linear.parameters[0].set_value(weights)

        return model, predict
Beispiel #14
0
def train(train_set, test_set, l2_weight=1e-18):
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')

    n_classifiers = 3
    n_classes = 2
    l1 = Linear(
            name='l1',
            input_dim=2,
            output_dim=10,
            weights_init=IsotropicGaussian(0.1),
            biases_init=Constant(0)
    )
    l1.initialize()
    h1 = Logistic().apply(l1.apply(x))

    l2 = Linear(
            name='l1',
            input_dim=l1.output_dim,
            output_dim=n_classes * n_classifiers,
            weights_init=IsotropicGaussian(0.1),
            biases_init=Constant(0)
    )
    l2.initialize()
    l2 = l2.apply(h1)
    y_hat = MultiTargetSoftmax().apply(l2, n_classes, n_classifiers)

    cost = MultiTargetCategoricalCrossEntropy().apply(y, y_hat)
    error = MisclassificationRate().apply(y, y_hat)
    error.name = 'misclassification_rate'

    cg = ComputationGraph(cost)

    for w in VariableFilter(roles=[WEIGHT])(cg.variables):
        cost += l2_weight * (w ** 2).sum()
    cost.name = 'cost_with_regularization'

    # print('W1', W1.get_value())
    # print('W2', W2.get_value())

    algorithm = GradientDescent(
            cost=cost,
            parameters=cg.parameters,
            step_rule=RMSProp()
    )

    data_stream_train = Flatten(
            DataStream.default_stream(
                    train_set,
                    iteration_scheme=ShuffledScheme(train_set.num_examples, batch_size=80)
            )
    )

    data_stream_test = Flatten(
            DataStream.default_stream(
                    test_set,
                    iteration_scheme=SequentialScheme(test_set.num_examples, batch_size=1)
            )
    )

    monitor = DataStreamMonitoring(
            variables=[cost, error],
            data_stream=data_stream_test,
            prefix="test"
    )

    main_loop = MainLoop(
            data_stream=data_stream_train,
            algorithm=algorithm,
            extensions=[
                monitor,
                FinishAfter(after_n_epochs=100),
                Printing(),
                # ProgressBar()
            ]
    )

    main_loop.run()

    return x, y_hat
Beispiel #15
0
def main(num_epochs=50, batch_normalized=True, alpha=0.1):
    """Run the example.

    Parameters
    ----------
    num_epochs : int, optional
        Number of epochs for which to train.

    batch_normalized : bool, optional
        Batch-normalize the training graph. Defaults to `True`.

    alpha : float, optional
        Weight to apply to a new sample when calculating running
        averages for population statistics (1 - alpha weight is
        given to the existing average).

    """
    if batch_normalized:
        # Add an extra keyword argument that only BatchNormalizedMLP takes,
        # in order to speed things up at the cost of a bit of extra memory.
        mlp_class = BatchNormalizedMLP
        extra_kwargs = {'conserve_memory': False}
    else:
        mlp_class = MLP
        extra_kwargs = {}
    mlp = mlp_class([Logistic(), Logistic(), Logistic(), Softmax()],
                    [2, 5, 5, 5, 3],
                    weights_init=IsotropicGaussian(0.2),
                    biases_init=Constant(0.), **extra_kwargs)
    mlp.initialize()

    # Generate a dataset with 3 spiral arms, using 8000 examples for
    # training and 2000 for testing.
    dataset = Spiral(num_examples=10000, classes=3,
                     sources=['features', 'label'],
                     noise=0.05)
    train_stream = DataStream(dataset,
                              iteration_scheme=ShuffledScheme(examples=8000,
                                                              batch_size=20))
    test_stream = DataStream(dataset,
                             iteration_scheme=SequentialScheme(
                                 examples=list(range(8000, 10000)),
                                 batch_size=2000))

    # Build a cost graph; this contains BatchNormalization bricks that will
    # by default run in inference mode.
    features = tensor.matrix('features')
    label = tensor.lvector('label')
    prediction = mlp.apply(features)
    cost = CategoricalCrossEntropy().apply(label, prediction)
    misclass = MisclassificationRate().apply(label, prediction)
    misclass.name = 'misclass'  # The default name for this is annoyingly long
    original_cg = ComputationGraph([cost, misclass])

    if batch_normalized:
        cg = apply_batch_normalization(original_cg)
        # Add updates for population parameters
        pop_updates = get_batch_normalization_updates(cg)
        extra_updates = [(p, m * alpha + p * (1 - alpha))
                         for p, m in pop_updates]
    else:
        cg = original_cg
        extra_updates = []

    algorithm = GradientDescent(step_rule=Adam(0.001),
                                cost=cg.outputs[0],
                                parameters=cg.parameters)
    algorithm.add_updates(extra_updates)

    main_loop = MainLoop(algorithm=algorithm,
                         data_stream=train_stream,
                         # Use the original cost and misclass variables so
                         # that we monitor the (original) inference-mode graph.
                         extensions=[DataStreamMonitoring([cost, misclass],
                                                          train_stream,
                                                          prefix='train'),
                                     DataStreamMonitoring([cost, misclass],
                                                          test_stream,
                                                          prefix='test'),
                                     Printing(),
                                     FinishAfter(after_n_epochs=num_epochs)])
    main_loop.run()
    return main_loop
Beispiel #16
0
def main(save_to, cost_name, learning_rate, momentum, num_epochs):
    mlp = MLP([None], [784, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    scores = mlp.apply(x)

    batch_size = y.shape[0]
    indices = tensor.arange(y.shape[0])
    target_scores = tensor.set_subtensor(
        tensor.zeros((batch_size, 10))[indices, y.flatten()],
        1)
    score_diff = scores - target_scores

    # Logistic Regression
    if cost_name == 'lr':
        cost = Softmax().categorical_cross_entropy(y.flatten(), scores).mean()
    # MSE
    elif cost_name == 'mse':
        cost = (score_diff ** 2).mean()
    # Perceptron
    elif cost_name == 'perceptron':
        cost = (scores.max(axis=1) - scores[indices, y.flatten()]).mean()
    # TLE
    elif cost_name == 'minmin':
        cost = abs(score_diff[indices, y.flatten()]).mean()
        cost += abs(score_diff[indices, scores.argmax(axis=1)]).mean()
    # TLEcut
    elif cost_name == 'minmin_cut':
        # Score of the groundtruth should be greater or equal than its target score
        cost = tensor.maximum(0, -score_diff[indices, y.flatten()]).mean()
        # Score of the prediction should be less or equal than its actual score
        cost += tensor.maximum(0, score_diff[indices, scores.argmax(axis=1)]).mean()
    # TLE2
    elif cost_name == 'minmin2':
        cost = ((score_diff[tensor.arange(y.shape[0]), y.flatten()]) ** 2).mean()
        cost += ((score_diff[tensor.arange(y.shape[0]), scores.argmax(axis=1)]) ** 2).mean()
    # Direct loss minimization
    elif cost_name == 'direct':
        epsilon = 0.1
        cost = (- scores[indices, (scores + epsilon * target_scores).argmax(axis=1)]
                + scores[indices, scores.argmax(axis=1)]).mean()
        cost /= epsilon
    elif cost_name == 'svm':
        cost = (scores[indices, (scores - 1 * target_scores).argmax(axis=1)]
                - scores[indices, y.flatten()]).mean()
    else:
        raise ValueError("Unknown cost " + cost)

    error_rate = MisclassificationRate().apply(y.flatten(), scores)
    error_rate.name = 'error_rate'

    cg = ComputationGraph([cost])
    cost.name = 'cost'

    mnist_train = MNIST(("train",))
    mnist_test = MNIST(("test",))

    if learning_rate == None:
        learning_rate = 0.0001
    if momentum == None:
        momentum = 0.0
    rule = Momentum(learning_rate=learning_rate,
                    momentum=momentum)
    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=rule)
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      Flatten(
                          DataStream.default_stream(
                              mnist_test,
                              iteration_scheme=SequentialScheme(
                                  mnist_test.num_examples, 500)),
                          which_sources=('features',)),
                      prefix="test"),
                  # CallbackExtension(
                  #    lambda: rule.learning_rate.set_value(rule.learning_rate.get_value() * 0.9),
                  #    after_epoch=True),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm),
                       rule.learning_rate],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_cost',
                 'test_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        Flatten(
            DataStream.default_stream(
                mnist_train,
                iteration_scheme=SequentialScheme(
                    mnist_train.num_examples, 50)),
            which_sources=('features',)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()

    df = pandas.DataFrame.from_dict(main_loop.log, orient='index')
    res = {'cost' : cost_name,
           'learning_rate' : learning_rate,
           'momentum' : momentum,
           'train_cost' : df.train_cost.iloc[-1],
           'test_cost' : df.test_cost.iloc[-1],
           'best_test_cost' : df.test_cost.min(),
           'train_error' : df.train_error_rate.iloc[-1],
           'test_error' : df.test_error_rate.iloc[-1],
           'best_test_error' : df.test_error_rate.min()}
    res = {k: float(v) if isinstance(v, numpy.ndarray) else v for k, v in res.items()}
    json.dump(res, sys.stdout)
    sys.stdout.flush()
Beispiel #17
0
mlp.biases_init = Constant(0.0)
mlp.initialize()

lin = Linear(200, 10, use_bias=True)
lin.weights_init = Uniform(0.0, 0.01)
lin.biases_init = Constant(0.0)
lin.initialize()

train_out = lin.apply(mlp.apply(flat_x))
test_out = lin.apply(mlp.apply(flat_x))

sm = Softmax(name='softmax')
loss = sm.categorical_cross_entropy(flat_y, train_out).mean()
loss.name = 'nll'
misclass = MisclassificationRate().apply(flat_y, train_out)
misclass.name = 'misclass'

test_loss = sm.categorical_cross_entropy(flat_y, test_out).mean()
test_loss.name = 'nll'
test_misclass = MisclassificationRate().apply(flat_y, test_out)
test_misclass.name = 'misclass'

model = Model(loss)

######################
# Data
######################
import numpy 
#from mnist import MNIST
from fuel.datasets.mnist import MNIST
from fuel.transformers import ScaleAndShift, ForceFloatX
Beispiel #18
0
def main(save_to, cost_name, learning_rate, momentum, num_epochs):
    mlp = MLP([None], [784, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    scores = mlp.apply(x)

    batch_size = y.shape[0]
    indices = tensor.arange(y.shape[0])
    target_scores = tensor.set_subtensor(
        tensor.zeros((batch_size, 10))[indices, y.flatten()], 1)
    score_diff = scores - target_scores

    # Logistic Regression
    if cost_name == 'lr':
        cost = Softmax().categorical_cross_entropy(y.flatten(), scores).mean()
    # MSE
    elif cost_name == 'mse':
        cost = (score_diff**2).mean()
    # Perceptron
    elif cost_name == 'perceptron':
        cost = (scores.max(axis=1) - scores[indices, y.flatten()]).mean()
    # TLE
    elif cost_name == 'minmin':
        cost = abs(score_diff[indices, y.flatten()]).mean()
        cost += abs(score_diff[indices, scores.argmax(axis=1)]).mean()
    # TLEcut
    elif cost_name == 'minmin_cut':
        # Score of the groundtruth should be greater or equal than its target score
        cost = tensor.maximum(0, -score_diff[indices, y.flatten()]).mean()
        # Score of the prediction should be less or equal than its actual score
        cost += tensor.maximum(0, score_diff[indices,
                                             scores.argmax(axis=1)]).mean()
    # TLE2
    elif cost_name == 'minmin2':
        cost = ((score_diff[tensor.arange(y.shape[0]), y.flatten()])**2).mean()
        cost += ((score_diff[tensor.arange(y.shape[0]),
                             scores.argmax(axis=1)])**2).mean()
    # Direct loss minimization
    elif cost_name == 'direct':
        epsilon = 0.1
        cost = (-scores[indices,
                        (scores + epsilon * target_scores).argmax(axis=1)] +
                scores[indices, scores.argmax(axis=1)]).mean()
        cost /= epsilon
    elif cost_name == 'svm':
        cost = (scores[indices, (scores - 1 * target_scores).argmax(axis=1)] -
                scores[indices, y.flatten()]).mean()
    else:
        raise ValueError("Unknown cost " + cost)

    error_rate = MisclassificationRate().apply(y.flatten(), scores)
    error_rate.name = 'error_rate'

    cg = ComputationGraph([cost])
    cost.name = 'cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    if learning_rate == None:
        learning_rate = 0.0001
    if momentum == None:
        momentum = 0.0
    rule = Momentum(learning_rate=learning_rate, momentum=momentum)
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=rule)
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        # CallbackExtension(
        #    lambda: rule.learning_rate.set_value(rule.learning_rate.get_value() * 0.9),
        #    after_epoch=True),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm), rule.learning_rate
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[['test_cost', 'test_error_rate'],
                           ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()

    df = pandas.DataFrame.from_dict(main_loop.log, orient='index')
    res = {
        'cost': cost_name,
        'learning_rate': learning_rate,
        'momentum': momentum,
        'train_cost': df.train_cost.iloc[-1],
        'test_cost': df.test_cost.iloc[-1],
        'best_test_cost': df.test_cost.min(),
        'train_error': df.train_error_rate.iloc[-1],
        'test_error': df.test_error_rate.iloc[-1],
        'best_test_error': df.test_error_rate.min()
    }
    res = {
        k: float(v) if isinstance(v, numpy.ndarray) else v
        for k, v in res.items()
    }
    json.dump(res, sys.stdout)
    sys.stdout.flush()
Beispiel #19
0
# initialize_variables
# for variable (M,S) in variables:
# 	compute M and S in the whole data.

if normalization == 'bn2':
    for m,s,var in statistics_list:
        var.tag.aggregation_scheme = MeanAndVariance(var, var.shape[0], axis = 0)
        init_mn, init_var = DatasetEvaluator([var]).evaluate(stream_train)[var.name]
        m.set_value(init_mn.astype(floatX))
        s.set_value(sqrt(init_var).astype(floatX))

cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
cost.name = 'cost'
error_rate = MisclassificationRate().apply(y.flatten(), probs)
error_rate.name = 'error_rate'

cg = ComputationGraph([cost])
    
parameters = cg.parameters
# add gradient descent to M,S
if normalization == 'bn2':
    for m,s,var in statistics_list:
        parameters.extend([m,s])

algorithm = GradientDescent(
    cost=cost, parameters=parameters, step_rule=Adam(0.01))

#update the M and S with batch statistics
alpha = 0.1
updates = []
Beispiel #20
0
def setup_model(configs):
    tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
    # shape: T x B x C x X x Y
    input_ = tensor5('features')
    tensor3 = theano.tensor.TensorType(config.floatX, (False,) * 3)
    locs = tensor3('locs')
    # shape: B x Classes
    target = T.ivector('targets')

    model = LSTMAttention(
        configs,
        weights_init=Glorot(),
        biases_init=Constant(0))
    model.initialize()

    (h, c, location, scale, alpha, patch, downn_sampled_input,
        conved_part_1, conved_part_2, pre_lstm) = model.apply(input_, locs)

    model.location = location
    model.scale = scale
    model.alpha = location
    model.patch = patch

    classifier = MLP(
        [Rectifier(), Softmax()],
        configs['classifier_dims'],
        weights_init=Glorot(),
        biases_init=Constant(0))
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    cost.name = 'CE'
    error_rate = MisclassificationRate().apply(target, probabilities)
    error_rate.name = 'ER'
    model.cost = cost
    model.error_rate = error_rate
    model.probabilities = probabilities

    if configs['load_pretrained']:
        blocks_model = Model(model.cost)
        all_params = blocks_model.parameters
        with open('VGG_CNN_params.npz') as f:
            loaded = np.load(f)
            all_conv_params = loaded.keys()
            for param in all_params:
                if param.name in loaded.keys():
                    assert param.get_value().shape == loaded[param.name].shape
                    param.set_value(loaded[param.name])
                    all_conv_params.pop(all_conv_params.index(param.name))
        print "the following parameters did not match: " + str(all_conv_params)

    if configs['test_model']:
        print "TESTING THE MODEL: CHECK THE INPUT SIZE!"
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.cost],
                            on_unused_input='ignore',
                            allow_input_downcast=True)
        data = configs['get_streams'](configs[
            'batch_size'])[0].get_epoch_iterator().next()
        f(data[1], data[0], data[2])

        print "Test passed! ;)"

    model.monitorings = [cost, error_rate]

    return model
def main(num_epochs=100):
    x = tensor.matrix('features')
    m = tensor.matrix('features_mask')

    x_int = x.astype(dtype='int32').T
    train_dataset = TextFile('inspirational.txt')
    train_dataset.indexables[0] = numpy.array(sorted(
        train_dataset.indexables[0], key=len
    ))

    n_voc = len(train_dataset.dict.keys())

    init_probs = numpy.array(
        [sum(filter(lambda idx:idx == w,
                    [s[0] for s in train_dataset.indexables[
                        train_dataset.sources.index('features')]]
                    )) for w in xrange(n_voc)],
        dtype=theano.config.floatX
    )
    init_probs = init_probs / init_probs.sum()

    n_h = 100
    linear_embedding = LookupTable(
        length=n_voc,
        dim=n_h,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    linear_embedding.initialize()
    lstm_biases = numpy.zeros(4 * n_h).astype(dtype=theano.config.floatX)
    lstm_biases[n_h:(2 * n_h)] = 4.
    rnn = SimpleRecurrent(
        dim=n_h,
        activation=Tanh(),
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    rnn.initialize()
    score_layer = Linear(
        input_dim=n_h,
        output_dim=n_voc,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    score_layer.initialize()

    embedding = (linear_embedding.apply(x_int[:-1])
                 * tensor.shape_padright(m.T[1:]))
    rnn_out = rnn.apply(inputs=embedding, mask=m.T[1:])
    probs = softmax(
        sequence_map(score_layer.apply, rnn_out, mask=m.T[1:])[0]
    )
    idx_mask = m.T[1:].nonzero()
    cost = CategoricalCrossEntropy().apply(
        x_int[1:][idx_mask[0], idx_mask[1]],
        probs[idx_mask[0], idx_mask[1]]
    )
    cost.name = 'cost'
    misclassification = MisclassificationRate().apply(
        x_int[1:][idx_mask[0], idx_mask[1]],
        probs[idx_mask[0], idx_mask[1]]
    )
    misclassification.name = 'misclassification'

    cg = ComputationGraph([cost])
    params = cg.parameters

    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=Adam()
    )

    train_data_stream = Padding(
        data_stream=DataStream(
            dataset=train_dataset,
            iteration_scheme=BatchwiseShuffledScheme(
                examples=train_dataset.num_examples,
                batch_size=10,
            )
        ),
        mask_sources=('features',)
    )

    model = Model(cost)

    extensions = []
    extensions.append(Timing())
    extensions.append(FinishAfter(after_n_epochs=num_epochs))
    extensions.append(TrainingDataMonitoring(
        [cost, misclassification],
        prefix='train',
        after_epoch=True))

    batch_size = 10
    length = 30
    trng = MRG_RandomStreams(18032015)
    u = trng.uniform(size=(length, batch_size, n_voc))
    gumbel_noise = -tensor.log(-tensor.log(u))
    init_samples = (tensor.log(init_probs).dimshuffle(('x', 0))
                    + gumbel_noise[0]).argmax(axis=-1)
    init_states = rnn.initial_state('states', batch_size)

    def sampling_step(g_noise, states, samples_step):
        embedding_step = linear_embedding.apply(samples_step)
        next_states = rnn.apply(inputs=embedding_step,
                                            states=states,
                                            iterate=False)
        probs_step = softmax(score_layer.apply(next_states))
        next_samples = (tensor.log(probs_step)
                        + g_noise).argmax(axis=-1)

        return next_states, next_samples

    [_, samples], _ = theano.scan(
        fn=sampling_step,
        sequences=[gumbel_noise[1:]],
        outputs_info=[init_states, init_samples]
    )

    sampling = theano.function([], samples.owner.inputs[0].T)

    plotters = []
    plotters.append(Plotter(
        channels=[['train_cost', 'train_misclassification']],
        titles=['Costs']))

    extensions.append(PlotManager('Language modelling example',
                                  plotters=plotters,
                                  after_epoch=True,
                                  after_training=True))
    extensions.append(Printing())
    extensions.append(PrintSamples(sampler=sampling,
                                   voc=train_dataset.inv_dict))

    main_loop = MainLoop(model=model,
                         data_stream=train_data_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    main_loop.run()
Beispiel #22
0
def main(job_id, params):
    config = ConfigParser.ConfigParser()
    config.readfp(open('./params'))
    max_epoch = int(config.get('hyperparams', 'max_iter', 100))
    base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
    train_batch = int(config.get('hyperparams', 'train_batch', 256))
    valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
    test_batch = int(config.get('hyperparams', 'valid_batch', 512))

    W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
    W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
    b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
    b_mu = float(config.get('hyperparams', 'b_mu', 0.0))

    hidden_units = int(config.get('hyperparams', 'hidden_units', 32))
    input_dropout_ratio = float(
        config.get('hyperparams', 'input_dropout_ratio', 0.2))
    dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
    weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
    max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
    solver = config.get('hyperparams', 'solver_type', 'rmsprop')
    data_file = config.get('hyperparams', 'data_file')
    side = config.get('hyperparams', 'side', 'b')

    # Spearmint optimization parameters:
    if params:
        base_lr = float(params['base_lr'][0])
        dropout_ratio = float(params['dropout_ratio'][0])
        hidden_units = params['hidden_units'][0]
        weight_decay = params['weight_decay'][0]

    if 'adagrad' in solver:
        solver_type = CompositeRule([
            AdaGrad(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])
    else:
        solver_type = CompositeRule([
            RMSProp(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])

    input_dim = {'l': 11427, 'r': 10519, 'b': 10519 + 11427}
    data_file = config.get('hyperparams', 'data_file')

    if 'b' in side:
        train = H5PYDataset(data_file, which_set='train')
        valid = H5PYDataset(data_file, which_set='valid')
        test = H5PYDataset(data_file, which_set='test')
        x_l = tensor.matrix('l_features')
        x_r = tensor.matrix('r_features')
        x = tensor.concatenate([x_l, x_r], axis=1)

    else:
        train = H5PYDataset(data_file,
                            which_set='train',
                            sources=['{}_features'.format(side), 'targets'])
        valid = H5PYDataset(data_file,
                            which_set='valid',
                            sources=['{}_features'.format(side), 'targets'])
        test = H5PYDataset(data_file,
                           which_set='test',
                           sources=['{}_features'.format(side), 'targets'])
        x = tensor.matrix('{}_features'.format(side))

    y = tensor.lmatrix('targets')

    # Define a feed-forward net with an input, two hidden layers, and a softmax output:
    model = MLP(activations=[
        Rectifier(name='h1'),
        Rectifier(name='h2'),
        Softmax(name='output'),
    ],
                dims=[input_dim[side], hidden_units, hidden_units, 2],
                weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
                biases_init=IsotropicGaussian(b_sd, b_mu))

    # Don't forget to initialize params:
    model.initialize()

    # y_hat is the output of the neural net with x as its inputs
    y_hat = model.apply(x)

    # Define a cost function to optimize, and a classification error rate.
    # Also apply the outputs from the net and corresponding targets:
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'error'

    # This is the model: before applying dropout
    model = Model(cost)

    # Need to define the computation graph for the cost func:
    cost_graph = ComputationGraph([cost])

    # This returns a list of weight vectors for each layer
    W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

    # Add some regularization to this model:
    cost += weight_decay * l2_norm(W)
    cost.name = 'entropy'

    # computational graph with l2 reg
    cost_graph = ComputationGraph([cost])

    # Apply dropout to inputs:
    inputs = VariableFilter([INPUT])(cost_graph.variables)
    dropout_inputs = [
        input for input in inputs if input.name.startswith('linear_')
    ]
    dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]],
                                  input_dropout_ratio)
    dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:],
                                  dropout_ratio)
    dropout_cost = dropout_graph.outputs[0]
    dropout_cost.name = 'dropout_entropy'

    # Learning Algorithm (notice: we use the dropout cost for learning):
    algo = GradientDescent(step_rule=solver_type,
                           params=dropout_graph.parameters,
                           cost=dropout_cost)

    # algo.step_rule.learning_rate.name = 'learning_rate'

    # Data stream used for training model:
    training_stream = Flatten(
        DataStream.default_stream(dataset=train,
                                  iteration_scheme=ShuffledScheme(
                                      train.num_examples,
                                      batch_size=train_batch)))

    training_monitor = TrainingDataMonitoring([
        dropout_cost,
        aggregation.mean(error),
        aggregation.mean(algo.total_gradient_norm)
    ],
                                              after_batch=True)

    # Use the 'valid' set for validation during training:
    validation_stream = Flatten(
        DataStream.default_stream(dataset=valid,
                                  iteration_scheme=ShuffledScheme(
                                      valid.num_examples,
                                      batch_size=valid_batch)))

    validation_monitor = DataStreamMonitoring(variables=[cost, error],
                                              data_stream=validation_stream,
                                              prefix='validation',
                                              after_epoch=True)

    test_stream = Flatten(
        DataStream.default_stream(
            dataset=test,
            iteration_scheme=ShuffledScheme(test.num_examples,
                                            batch_size=test_batch)))

    test_monitor = DataStreamMonitoring(variables=[error],
                                        data_stream=test_stream,
                                        prefix='test',
                                        after_training=True)

    plotting = Plot('AdniNet_{}'.format(side),
                    channels=[
                        ['dropout_entropy', 'validation_entropy'],
                        ['error', 'validation_error'],
                    ],
                    after_batch=False)

    # Checkpoint class used to save model and log:
    stamp = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y-%m-%d-%H:%M')
    checkpoint = Checkpoint('./models/{}net/{}'.format(side, stamp),
                            save_separately=['model', 'log'],
                            every_n_epochs=1)

    # Home-brewed class for early stopping when we detect we have started to overfit
    early_stopper = FinishIfOverfitting(error_name='error',
                                        validation_name='validation_error',
                                        threshold=0.1,
                                        epochs=5,
                                        burn_in=100)

    # The main loop will train the network and output reports, etc
    main_loop = MainLoop(data_stream=training_stream,
                         model=model,
                         algorithm=algo,
                         extensions=[
                             validation_monitor,
                             training_monitor,
                             plotting,
                             FinishAfter(after_n_epochs=max_epoch),
                             early_stopper,
                             Printing(),
                             ProgressBar(),
                             checkpoint,
                             test_monitor,
                         ])
    main_loop.run()

    ve = float(main_loop.log.last_epoch_row['validation_error'])
    te = float(main_loop.log.last_epoch_row['error'])
    spearmint_loss = ve + abs(te - ve)
    print 'Spearmint Loss: {}'.format(spearmint_loss)
    return spearmint_loss
Beispiel #23
0
    prototype=input_mlp,
)
parallel_nets.initialize()
l_h, r_h = parallel_nets.apply(l_x=l_x, r_x=r_x)

# Concatenate the inputs from the two hidden subnets into a single variable
# for input into the next layer.
merge = tensor.concatenate([l_h, r_h], axis=1)

y_hat = output_mlp.apply(merge)

# Define a cost function to optimize, and a classification error rate:
# Also apply the outputs from the net, and corresponding targets:
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
error = MisclassificationRate().apply(y.flatten(), y_hat)
error.name = 'error'

# Need to define the computation graph:
graph = ComputationGraph(cost)

# This returns a list of weight vectors for each layer
W = VariableFilter(roles=[WEIGHT])(graph.variables)

# Add some regularization to this model:
lam = 0.001
cost += lam * l2_norm(W)
cost.name = 'entropy'

# This is the model without dropout, but with l2 reg.
model = Model(cost)
from blocks.bricks import Linear, Logistic, Softmax


# In[10]:

hidden_layer_size = 100
input_to_hidden = Linear(name='input_to_hidden', input_dim=117, output_dim=hidden_layer_size)
h = Logistic().apply(input_to_hidden.apply(x))
hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_layer_size, output_dim=2)
y_hat = Softmax().apply(hidden_to_output.apply(h))

y = tensor.lmatrix('targets')
from blocks.bricks.cost import CategoricalCrossEntropy, MisclassificationRate
cost = CategoricalCrossEntropy().apply(y, y_hat)
error_rate = MisclassificationRate().apply(y.argmax(axis=1), y_hat)
error_rate.name = "error_rate"

# >>> from blocks.roles import WEIGHT
from blocks.graph import ComputationGraph
# >>> from blocks.filter import VariableFilter
cg = ComputationGraph(cost)
# >>> W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
# >>> cost = cost + 0.005 * (W1 ** 2).sum() + 0.005 * (W2 ** 2).sum()
# >>> cost.name = 'cost_with_regularization'
cost.name = 'cost_simple_xentropy'

from blocks.initialization import IsotropicGaussian, Constant
input_to_hidden.weights_init = hidden_to_output.weights_init = IsotropicGaussian(0.01)
input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0)
input_to_hidden.initialize()
hidden_to_output.initialize()
Beispiel #25
0
def main(job_id, params):

    config = ConfigParser.ConfigParser()
    config.readfp(open('./params'))

    max_epoch = int(config.get('hyperparams', 'max_iter', 100))
    base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
    train_batch = int(config.get('hyperparams', 'train_batch', 256))
    valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
    test_batch = int(config.get('hyperparams', 'valid_batch', 512))
    hidden_units = int(config.get('hyperparams', 'hidden_units', 16))

    W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
    W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
    b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
    b_mu = float(config.get('hyperparams', 'b_mu', 0.0))

    dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
    weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
    max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
    solver = config.get('hyperparams', 'solver_type', 'rmsprop')
    data_file = config.get('hyperparams', 'data_file')
    fine_tune = config.getboolean('hyperparams', 'fine_tune')

    # Spearmint optimization parameters:
    if params:
        base_lr = float(params['base_lr'][0])
        dropout_ratio = float(params['dropout_ratio'][0])
        hidden_units = params['hidden_units'][0]
        weight_decay = params['weight_decay'][0]

    if 'adagrad' in solver:
        solver_type = CompositeRule([
            AdaGrad(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])
    else:
        solver_type = CompositeRule([
            RMSProp(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])

    rn_file = '/projects/francisco/repositories/NI-ML/models/deepnets/blocks/ff/models/rnet/2015-06-25-18:13'
    ln_file = '/projects/francisco/repositories/NI-ML/models/deepnets/blocks/ff/models/lnet/2015-06-29-11:45'

    right_dim = 10519
    left_dim = 11427

    train = H5PYDataset(data_file, which_set='train')
    valid = H5PYDataset(data_file, which_set='valid')
    test = H5PYDataset(data_file, which_set='test')

    l_x = tensor.matrix('l_features')
    r_x = tensor.matrix('r_features')
    y = tensor.lmatrix('targets')

    lnet = load(ln_file).model.get_top_bricks()[0]
    rnet = load(rn_file).model.get_top_bricks()[0]

    # Pre-trained layers:

    # Inputs -> hidden_1 -> hidden 2
    for side, net in zip(['l', 'r'], [lnet, rnet]):
        for child in net.children:
            child.name = side + '_' + child.name

    ll1 = lnet.children[0]
    lr1 = lnet.children[1]
    ll2 = lnet.children[2]
    lr2 = lnet.children[3]

    rl1 = rnet.children[0]
    rr1 = rnet.children[1]
    rl2 = rnet.children[2]
    rr2 = rnet.children[3]

    l_h = lr2.apply(ll2.apply(lr1.apply(ll1.apply(l_x))))
    r_h = rr2.apply(rl2.apply(rr1.apply(rl1.apply(r_x))))

    input_dim = ll2.output_dim + rl2.output_dim

    # hidden_2 -> hidden_3 -> hidden_4 -> Logistic output
    output_mlp = MLP(activations=[
        Rectifier(name='h3'),
        Rectifier(name='h4'),
        Softmax(name='output'),
    ],
                     dims=[
                         input_dim,
                         hidden_units,
                         hidden_units,
                         2,
                     ],
                     weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
                     biases_init=IsotropicGaussian(std=W_sd, mean=W_mu))

    output_mlp.initialize()

    # # Concatenate the inputs from the two hidden subnets into a single variable
    # # for input into the next layer.
    merge = tensor.concatenate([l_h, r_h], axis=1)
    #
    y_hat = output_mlp.apply(merge)

    # Define a cost function to optimize, and a classification error rate.
    # Also apply the outputs from the net and corresponding targets:
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'error'

    # This is the model: before applying dropout
    model = Model(cost)

    # Need to define the computation graph for the cost func:
    cost_graph = ComputationGraph([cost])

    # This returns a list of weight vectors for each layer
    W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

    # Add some regularization to this model:
    cost += weight_decay * l2_norm(W)
    cost.name = 'entropy'

    # computational graph with l2 reg
    cost_graph = ComputationGraph([cost])

    # Apply dropout to inputs:
    inputs = VariableFilter([INPUT])(cost_graph.variables)
    dropout_inputs = [
        input for input in inputs if input.name.startswith('linear_')
    ]
    dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]], 0.2)
    dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:],
                                  dropout_ratio)
    dropout_cost = dropout_graph.outputs[0]
    dropout_cost.name = 'dropout_entropy'

    # If no fine-tuning of l-r models is wanted, find the params for only
    # the joint layers:
    if fine_tune:
        params_to_update = dropout_graph.parameters
    else:
        params_to_update = VariableFilter(
            [PARAMETER], bricks=output_mlp.children)(cost_graph)

    # Learning Algorithm:
    algo = GradientDescent(step_rule=solver_type,
                           params=params_to_update,
                           cost=dropout_cost)

    # algo.step_rule.learning_rate.name = 'learning_rate'

    # Data stream used for training model:
    training_stream = Flatten(
        DataStream.default_stream(dataset=train,
                                  iteration_scheme=ShuffledScheme(
                                      train.num_examples,
                                      batch_size=train_batch)))

    training_monitor = TrainingDataMonitoring([
        dropout_cost,
        aggregation.mean(error),
        aggregation.mean(algo.total_gradient_norm)
    ],
                                              after_batch=True)

    # Use the 'valid' set for validation during training:
    validation_stream = Flatten(
        DataStream.default_stream(dataset=valid,
                                  iteration_scheme=ShuffledScheme(
                                      valid.num_examples,
                                      batch_size=valid_batch)))

    validation_monitor = DataStreamMonitoring(variables=[cost, error],
                                              data_stream=validation_stream,
                                              prefix='validation',
                                              after_epoch=True)

    test_stream = Flatten(
        DataStream.default_stream(
            dataset=test,
            iteration_scheme=ShuffledScheme(test.num_examples,
                                            batch_size=test_batch)))

    test_monitor = DataStreamMonitoring(variables=[error],
                                        data_stream=test_stream,
                                        prefix='test',
                                        after_training=True)

    plotting = Plot(
        'AdniNet_LeftRight',
        channels=[
            ['dropout_entropy'],
            ['error', 'validation_error'],
        ],
    )

    # Checkpoint class used to save model and log:
    stamp = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y-%m-%d-%H:%M')
    checkpoint = Checkpoint('./models/{}'.format(stamp),
                            save_separately=['model', 'log'],
                            every_n_epochs=1)

    # The main loop will train the network and output reports, etc
    main_loop = MainLoop(data_stream=training_stream,
                         model=model,
                         algorithm=algo,
                         extensions=[
                             validation_monitor,
                             training_monitor,
                             plotting,
                             FinishAfter(after_n_epochs=max_epoch),
                             FinishIfNoImprovementAfter(
                                 notification_name='validation_error',
                                 epochs=1),
                             Printing(),
                             ProgressBar(),
                             checkpoint,
                             test_monitor,
                         ])
    main_loop.run()
    ve = float(main_loop.log.last_epoch_row['validation_error'])
    te = float(main_loop.log.last_epoch_row['error'])
    spearmint_loss = ve + abs(te - ve)
    print 'Spearmint Loss: {}'.format(spearmint_loss)
    return spearmint_loss
Beispiel #26
0
def train_paired_dnn(train_x, train_y, dev_x, dev_y, test_x, test_y):
    train_y = train_y.flatten().astype(int)
    dev_y = dev_y.flatten().astype(int)
    test_y = test_y.flatten().astype(int)

    batch_size = 256

    n_train, in_dim = train_x.shape
    n_dev = dev_x.shape[0]
    n_test = test_x.shape[0]

    hid_dims = 2 * np.array([512, 512, 512, 512]) 
    out_dim = 1

    ds_train = make_ds(train_x, train_y, batch_size, n_train, SequentialScheme)
    ds_dev = make_ds(dev_x, dev_y, batch_size, n_dev, SequentialScheme)
    ds_test = make_ds(test_x, test_y, batch_size, n_test, SequentialScheme)

    mlp = MLP(
      activations=[Rectifier(), Rectifier(), Rectifier(), Rectifier(), Logistic()],
      dims=[in_dim, hid_dims[0], hid_dims[1], hid_dims[2], hid_dims[3], out_dim],
      weights_init=Uniform(mean=0, width=1/32),
      biases_init=Constant(0)
    )
    mlp.initialize()

    x = tensor.matrix('features')
    y = tensor.matrix('targets', dtype='int64')
    y_hat = mlp.apply(x)
    model = Model(y_hat)

    cost = MyBinaryCrossEntropy().apply(y, y_hat)
    cost.name = 'cost'
    misrate = MisclassificationRate().apply(y.flatten(), y_hat)
    misrate.name = 'misclassfication'

    cg = ComputationGraph([cost, misrate])
    drop_vars = VariableFilter(
        roles=[INPUT], 
        bricks=mlp.linear_transformations[1:]
    )(cg.variables)
    cg_dropout = apply_dropout(cg, drop_vars, 0.2)
    cost_dropout, error_rate_dropout = cg_dropout.outputs

    learning_rate = 0.0015
    momentum = 0.9
    step_rule = CompositeRule([
        Momentum(learning_rate=learning_rate, momentum=momentum),
        AdaGrad(learning_rate=learning_rate)
    ])
    algorithm = GradientDescent(cost=cost_dropout, 
                                parameters=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost_dropout, 
                   error_rate_dropout, 
                   aggregation.mean(algorithm.total_gradient_norm)],
        after_epoch=True,
        prefix="train"
    )
    monitor_dev = DataStreamMonitoring(
    #    variables=[cost_dropout, error_rate_dropout],
        variables=[cost, misrate],
        data_stream=ds_dev, 
        prefix="dev"
    )
    monitor_test = DataStreamMonitoring(
    #    variables=[cost_dropout, error_rate_dropout],
        variables=[cost, misrate],
        data_stream=ds_test, 
        prefix="test"
    )
    track_str = 'train_{0}'.format(cost_dropout.name)
    track_best_str = '{0}_best_so_far'.format(track_str)
    print track_str, track_best_str

    n_epochs = 2
    print 'n_epochs:', n_epochs
    main_loop = MainLoop(
       model=model,
       data_stream=ds_train, 
       algorithm=algorithm,
       extensions=[Timing(),
                   monitor_train,
                   monitor_dev,
                   monitor_test,
                   TrackTheBest(track_str),
                   Checkpoint("best_model.pkl",
                       use_cpickle = True
                   ).add_condition(['after_epoch'],
                       predicate=OnLogRecord(track_best_str)),
                   FinishAfter(after_n_epochs=n_epochs), 
                   # FinishIfNoImprovementAfter(track_best_str, epochs=n_epochs),
                   Printing()]
    )
    main_loop.run() 

    acc([x], y_hat, train_x, train_y, 'train')
    acc([x], y_hat, dev_x, dev_y, 'dev')
    acc([x], y_hat, test_x, test_y, 'test')
hidden_layer_size = 100
input_to_hidden = Linear(name='input_to_hidden',
                         input_dim=117,
                         output_dim=hidden_layer_size)
h = Logistic().apply(input_to_hidden.apply(x))
hidden_to_output = Linear(name='hidden_to_output',
                          input_dim=hidden_layer_size,
                          output_dim=2)
y_hat = Softmax().apply(hidden_to_output.apply(h))

y = tensor.lmatrix('targets')
from blocks.bricks.cost import CategoricalCrossEntropy, MisclassificationRate
cost = CategoricalCrossEntropy().apply(y, y_hat)
error_rate = MisclassificationRate().apply(y.argmax(axis=1), y_hat)
error_rate.name = "error_rate"

# >>> from blocks.roles import WEIGHT
from blocks.graph import ComputationGraph
# >>> from blocks.filter import VariableFilter
cg = ComputationGraph(cost)
# >>> W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
# >>> cost = cost + 0.005 * (W1 ** 2).sum() + 0.005 * (W2 ** 2).sum()
# >>> cost.name = 'cost_with_regularization'
cost.name = 'cost_simple_xentropy'

from blocks.initialization import IsotropicGaussian, Constant
input_to_hidden.weights_init = hidden_to_output.weights_init = IsotropicGaussian(
    0.01)
input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0)
input_to_hidden.initialize()
Beispiel #28
0
def main(save_to, num_epochs, flag, ksize):
    batch_size = 128
    dim = 100
    n_steps = 20
    i2h1 = MLP([Identity()], [784, dim], biases_init=Constant(0.), weights_init=IsotropicGaussian(.001))
    h2o1 = MLP([Rectifier(), Softmax()], [dim, dim, 10], biases_init=Constant(0.), weights_init=IsotropicGaussian(.001))
    rec1 = SimpleRecurrent(dim=dim, activation=Tanh(), weights_init=Orthogonal())
    i2h1.initialize()
    h2o1.initialize()
    rec1.initialize()

    x = tensor.tensor3('features')
    y = tensor.lmatrix('targets')

    preproc = i2h1.apply(x)
    h1 = rec1.apply(preproc)
    probs = tensor.flatten(h2o1.apply(h1[-1],), outdim=2)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)
    cost.name = 'final_cost'
    error_rate.name = 'error_rate'

    cg = ComputationGraph([cost, error_rate])

    mnist_train = MNIST("train", subset=slice(0, 50000))
    mnist_valid = MNIST("train", subset=slice(50000, 60000))
    mnist_test = MNIST("test")
    trainstream = Mapping(Flatten(DataStream(mnist_train,
                          iteration_scheme=SequentialScheme(50000, batch_size))),
                          _meanize(n_steps, flag, ksize))
    validstream = Mapping(Flatten(DataStream(mnist_valid,
                                             iteration_scheme=SequentialScheme(10000,
                                                                               batch_size))),
                          _meanize(n_steps, flag, ksize))
    teststream = Mapping(Flatten(DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(10000,
                                                                              batch_size))),
                         _meanize(n_steps, flag, ksize))

    algorithm = GradientDescent(
        cost=cost, params=cg.parameters,
        step_rule=CompositeRule([Adam(), StepClipping(100)]))
    main_loop = MainLoop(
        algorithm,
        trainstream,
        extensions=[Timing(),
                    FinishAfter(after_n_epochs=num_epochs),
                    DataStreamMonitoring(
                        [cost, error_rate],
                        validstream,
                        prefix="test"),
                    DataStreamMonitoringAndSaving(
                    [cost, error_rate],
                    teststream,
                    [i2h1, h2o1, rec1],
                    'best_'+save_to+'.pkl',
                    cost_name=error_rate.name,
                    after_epoch=True,
                    prefix='valid'
                    ),
                    TrainingDataMonitoring(
                        [cost,
                         aggregation.mean(algorithm.total_gradient_norm)],
                        prefix="train",
                        after_epoch=True),
                    # Plot(
                    #     save_to,
                    #     channels=[
                    #         ['test_final_cost',
                    #          'test_misclassificationrate_apply_error_rate'],
                    #         ['train_total_gradient_norm']]),
                    Printing()])
    main_loop.run()
Beispiel #29
0
def train(train_set, test_set):
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')

    l1 = Linear(
            name='input_to_hidden',
            input_dim=2,
            output_dim=3,
            weights_init=IsotropicGaussian(0.1),
            biases_init=Constant(0)
    )
    l1.initialize()
    h = Logistic().apply(l1.apply(x))

    l2 = Linear(
            name='hidden_to_output',
            input_dim=l1.output_dim,
            output_dim=2,
            weights_init=IsotropicGaussian(0.1),
            biases_init=Constant(0)
    )
    l2.initialize()
    y_hat = Softmax().apply(l2.apply(h))

    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)

    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'misclassification_rate'

    cg = ComputationGraph(cost)

    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + 1e-8 * (W1 ** 2).sum() + 1e-8 * (W2 ** 2).sum()
    cost.name = 'cost_with_regularization'

    print('W1', W1.get_value())
    print('W2', W2.get_value())

    algorithm = GradientDescent(
            cost=cost,
            parameters=cg.parameters,
            step_rule=RMSProp()
    )

    data_stream_train = Flatten(
            DataStream.default_stream(
                    train_set,
                    iteration_scheme=ShuffledScheme(train_set.num_examples, batch_size=4)
            )
    )

    data_stream_test = Flatten(
            DataStream.default_stream(
                    test_set,
                    iteration_scheme=SequentialScheme(test_set.num_examples, batch_size=1)
            )
    )

    monitor = DataStreamMonitoring(
            variables=[cost, error],
            data_stream=data_stream_test,
            prefix="test"
    )

    main_loop = MainLoop(
            data_stream=data_stream_train,
            algorithm=algorithm,
            extensions=[
                monitor,
                FinishAfter(after_n_epochs=100),
                Printing(),
                # ProgressBar()
            ]
    )

    main_loop.run()