예제 #1
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
예제 #2
0
def setup_model():
    # shape: T x B x F
    input_ = T.tensor3('features')
    # shape: B
    target = T.lvector('targets')
    model = LSTMAttention(input_dim=10000,
                          dim=500,
                          mlp_hidden_dims=[2000, 500, 4],
                          batch_size=100,
                          image_shape=(100, 100),
                          patch_shape=(28, 28),
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0))
    model.initialize()
    h, c = model.apply(input_)
    classifier = MLP([Rectifier(), Softmax()], [500, 100, 10],
                     weights_init=IsotropicGaussian(0.01),
                     biases_init=Constant(0))
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    error_rate = MisclassificationRate().apply(target, probabilities)

    return cost, error_rate
예제 #3
0
def build_classifier(dimension):
  mlp = MLP([Tanh(),Tanh(), Softmax()], [784, 100,50, 10],
          weights_init=IsotropicGaussian(0.01),
          biases_init=Constant(0))
          
  mlp.initialize()
  return mlp
예제 #4
0
def setup_model(configs):
    tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
    # shape: T x B x C x X x Y
    input_ = tensor5("features")
    tensor3 = theano.tensor.TensorType(config.floatX, (False,) * 3)
    locs = tensor3("locs")
    # shape: B x Classes
    target = T.ivector("targets")

    model = LSTMAttention(configs, weights_init=Glorot(), biases_init=Constant(0))
    model.initialize()

    (h, c, location, scale, alpha, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply(
        input_, locs
    )

    model.location = location
    model.scale = scale
    model.alpha = location
    model.patch = patch

    classifier = MLP(
        [Rectifier(), Softmax()], configs["classifier_dims"], weights_init=Glorot(), biases_init=Constant(0)
    )
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    cost.name = "CE"
    error_rate = MisclassificationRate().apply(target, probabilities)
    error_rate.name = "ER"
    model.cost = cost
    model.error_rate = error_rate
    model.probabilities = probabilities

    if configs["load_pretrained"]:
        blocks_model = Model(model.cost)
        all_params = blocks_model.parameters
        with open("VGG_CNN_params.npz") as f:
            loaded = np.load(f)
            all_conv_params = loaded.keys()
            for param in all_params:
                if param.name in loaded.keys():
                    assert param.get_value().shape == loaded[param.name].shape
                    param.set_value(loaded[param.name])
                    all_conv_params.pop(all_conv_params.index(param.name))
        print "the following parameters did not match: " + str(all_conv_params)

    if configs["test_model"]:
        print "TESTING THE MODEL: CHECK THE INPUT SIZE!"
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.cost], on_unused_input="ignore", allow_input_downcast=True)
        data = configs["get_streams"](configs["batch_size"])[0].get_epoch_iterator().next()
        f(data[1], data[0], data[2])

        print "Test passed! ;)"

    model.monitorings = [cost, error_rate]

    return model
def build_model_mnist():

    # CNN
    filter_size = (5, 5)
    activation = Rectifier().apply
    pooling_size = (2, 2)
    num_filters = 50
    layer0 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters,
                              pooling_size=pooling_size,
                              weights_init=Uniform(width=0.1),
                              biases_init=Uniform(width=0.01), name="layer_0")

    filter_size = (3, 3)
    activation = Rectifier().apply
    num_filters = 20
    layer1 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters,
                              pooling_size=pooling_size,
                              weights_init=Uniform(width=0.1),
                              biases_init=Uniform(width=0.01), name="layer_1")

    conv_layers = [layer0, layer1]
    convnet = ConvolutionalSequence(conv_layers, num_channels= 1,
                                    image_size=(28, 28))

    convnet.initialize()
    output_dim = np.prod(convnet.get_dim('output'))
    mlp = MLP(activations=[Identity()], dims=[output_dim, 10],
                        weights_init=Uniform(width=0.1),
                        biases_init=Uniform(width=0.01), name="layer_2")
    mlp.initialize()

    classifier = Classifier(convnet, mlp)
    classifier.initialize()
    return classifier
예제 #6
0
def build_mlp(features_car_cat, features_car_int, features_nocar_cat,
              features_nocar_int, features_cp, features_hascar, means, labels):

    prediction, _, _, _, = \
            build_mlp_onlyloc(features_car_cat, features_car_int,
                              features_nocar_cat, features_nocar_int, features_cp, features_hascar,
                              means, labels)

    mlp_crm = MLP(activations=[None],
                  dims=[1, 1],
                  weights_init=IsotropicGaussian(.1),
                  biases_init=Constant(0),
                  name='mlp_crm')
    mlp_crm.initialize()
    crm = features_nocar_int[:, 0][:, None]

    prediction = prediction * mlp_crm.apply(crm)

    cost = MAPECost().apply(labels, prediction)

    cg = ComputationGraph(cost)
    input_var = VariableFilter(roles=[INPUT])(cg.variables)
    print input_var

    cg_dropout = apply_dropout(cg, [input_var[7], input_var[5]], .4)
    cost_dropout = cg_dropout.outputs[0]

    return prediction, cost_dropout, cg_dropout.parameters, cost
예제 #7
0
def build_mlp(features_int, features_cat, labels, labels_mean):

    inputs = tensor.concatenate([features_int, features_cat], axis=1)

    mlp = MLP(activations=[Rectifier(),
                           Rectifier(),
                           Rectifier(), None],
              dims=[337, 800, 1200, 1],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(1))
    mlp.initialize()

    prediction = mlp.apply(inputs)
    cost = MAPECost().apply(prediction, labels, labels_mean)

    cg = ComputationGraph(cost)
    #cg_dropout0   = apply_dropout(cg, [VariableFilter(roles=[INPUT])(cg.variables)[1]], .2)
    cg_dropout1 = apply_dropout(cg, [
        VariableFilter(roles=[OUTPUT])(cg.variables)[1],
        VariableFilter(roles=[OUTPUT])(cg.variables)[3],
        VariableFilter(roles=[OUTPUT])(cg.variables)[5]
    ], .2)
    cost_dropout1 = cg_dropout1.outputs[0]

    return cost_dropout1, cg_dropout1.parameters, cost  #cost, cg.parameters, cost #
예제 #8
0
def create_vae(x=None, batch=batch_size):
    x = T.matrix('features') if x is None else x
    x = x / 255.

    encoder = MLP(
        activations=[Rectifier(), Logistic()],
        dims=[img_dim**2, hidden_dim, 2*latent_dim],
        weights_init=IsotropicGaussian(std=0.01, mean=0),
        biases_init=Constant(0.01),
        name='encoder'
    )
    encoder.initialize()
    z_param = encoder.apply(x)
    z_mean, z_log_std = z_param[:,latent_dim:], z_param[:,:latent_dim]
    z = Sampling(theano_seed=seed).apply([z_mean, z_log_std], batch=batch_size)

    decoder = MLP(
        activations=[Rectifier(), Logistic()],
        dims=[latent_dim, hidden_dim, img_dim**2],
        weights_init=IsotropicGaussian(std=0.01, mean=0),
        biases_init=Constant(0.01),
        name='decoder'
    )
    decoder.initialize()
    x_reconstruct = decoder.apply(z)

    cost = VAEloss().apply(x, x_reconstruct, z_mean, z_log_std)
    cost.name = 'vae_cost'
    return cost
예제 #9
0
    def create_base_model(self, x, y, input_dim, interim_dim=30):

        # Create the output of the MLP
        mlp = MLP([Tanh(), Tanh(), Tanh()], [input_dim, 60, 60, interim_dim],
                  weights_init=IsotropicGaussian(0.001),
                  biases_init=Constant(0))
        mlp.initialize()
        inter = mlp.apply(x)

        fine_tuner = MLP([Logistic()], [interim_dim, 1],
                         weights_init=IsotropicGaussian(0.001),
                         biases_init=Constant(0))
        fine_tuner.initialize()
        probs = fine_tuner.apply(inter)
        #sq_err = BinaryCrossEntropy()
        err = T.sqr(y.flatten() - probs.flatten())
        # cost = T.mean(err * y.flatten() * (1 - self.p) + err *
        #              (1 - y.flatten()) * self.p)
        cost = T.mean(err)
        #cost = sq_err.apply(probs.flatten(), y.flatten())
        # cost = T.mean(y.flatten() * T.log(probs.flatten()) +
        #              (1 - y.flatten()) * T.log(1 - probs.flatten()))
        cost.name = 'cost'
        pred_out = probs > 0.5
        mis_cost = T.sum(T.neq(y.flatten(), pred_out.flatten()))
        mis_cost.name = 'MisclassificationRate'
        return mlp, fine_tuner, cost, mis_cost
예제 #10
0
    def create_model(self, x, y, input_dim, tol=10e-5):

        # Create the output of the MLP
        mlp = MLP(
            [Rectifier(), Rectifier(), Logistic()], [input_dim, 100, 100, 1],
            weights_init=IsotropicGaussian(0.01),
            biases_init=Constant(0))
        mlp.initialize()
        probs = mlp.apply(x)
        y = y.dimshuffle(0, 'x')
        # Create the if-else cost function
        true_p = (T.sum(y * probs) + tol) * 1.0 / (T.sum(y) + tol)
        true_n = (T.sum((1 - y) * (1 - probs)) + tol) * \
            1.0 / (T.sum(1 - y) + tol)
        #p = (T.sum(y) + tol) / (y.shape[0] + tol)
        theta = (1 - self.p) / self.p
        numerator = (1 + self.beta**2) * true_p
        denominator = self.beta**2 + theta + true_p - theta * true_n

        Fscore = numerator / denominator

        cost = -1 * Fscore
        cost.name = "cost"

        return mlp, cost, probs
예제 #11
0
def test_pylearn2_trainin():
    # Construct the model
    mlp = MLP(activations=[Sigmoid(), Sigmoid()],
              dims=[784, 100, 784],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(0.01))
    mlp.initialize()
    cost = SquaredError()

    block_cost = BlocksCost(cost)
    block_model = BlocksModel(mlp, (VectorSpace(dim=784), 'features'))

    # Load the data
    rng = numpy.random.RandomState(14)
    train_dataset = random_dense_design_matrix(rng, 1024, 784, 10)
    valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10)

    # Silence Pylearn2's logger
    logger = logging.getLogger(pylearn2.__name__)
    logger.setLevel(logging.ERROR)

    # Training algorithm
    sgd = SGD(learning_rate=0.01,
              cost=block_cost,
              batch_size=128,
              monitoring_dataset=valid_dataset)
    train = Train(train_dataset, block_model, algorithm=sgd)
    train.main_loop(time_budget=3)
예제 #12
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0),
              seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(cost=cost,
                        params=ComputationGraph(cost).parameters,
                        step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) + [
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)), prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()
        ])
    main_loop.run()
    return main_loop
예제 #13
0
파일: sqrt.py 프로젝트: basaundi/blocks
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, params=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) +
        [Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()])
    main_loop.run()
    return main_loop
예제 #14
0
def build_mlp(features_car_cat, features_car_int, features_nocar_cat,
              features_nocar_int, features_cp, features_hascar, means, labels):

    features = tensor.concatenate([
        features_hascar, means['cp'][features_cp[:, 0]],
        means['dep'][features_cp[:, 1]]
    ],
                                  axis=1)

    mlp = MLP(activations=[Rectifier(), Rectifier(), None],
              dims=[5, 50, 50, 1],
              weights_init=IsotropicGaussian(.1),
              biases_init=Constant(0),
              name='mlp')
    mlp.initialize()

    prediction = mlp.apply(features)

    cost = MAPECost().apply(labels, prediction)

    cg = ComputationGraph(cost)
    input_var = VariableFilter(roles=[INPUT])(cg.variables)
    print input_var

    cg_dropout1 = apply_dropout(cg, [input_var[3], input_var[5]], .4)
    cost_dropout1 = cg_dropout1.outputs[0]

    return prediction, cost_dropout1, cg_dropout1.parameters, cost
예제 #15
0
def test_pylearn2_training():
    # Construct the model
    mlp = MLP(activations=[Sigmoid(), Sigmoid()], dims=[784, 100, 784],
              weights_init=IsotropicGaussian(), biases_init=Constant(0.01))
    mlp.initialize()
    cost = SquaredError()

    # Load the data
    rng = numpy.random.RandomState(14)
    train_dataset = random_dense_design_matrix(rng, 1024, 784, 10)
    valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10)

    x = tensor.matrix('features')
    block_cost = Pylearn2Cost(cost.apply(x, mlp.apply(x)))
    block_model = Pylearn2Model(mlp)

    # Silence Pylearn2's logger
    logger = logging.getLogger(pylearn2.__name__)
    logger.setLevel(logging.ERROR)

    # Training algorithm
    sgd = SGD(learning_rate=0.01, cost=block_cost, batch_size=128,
              monitoring_dataset=valid_dataset)
    train = Pylearn2Train(train_dataset, block_model, algorithm=sgd)
    train.main_loop(time_budget=3)
예제 #16
0
def setupNN(NNParam):
    NNWidth = NNParam['NNWidth']
    WeightStdDev = NNParam['WeightStdDev']
    L2Weight = NNParam['L2Weight']
    DropOutProb = NNParam['DropOutProb']
    InitialLearningRate = NNParam['InitialLearningRate']
    x = theano.tensor.concatenate([x0, x1, x2, x3], axis=1)
    mlp = MLP(activations=[Rectifier(), Rectifier(), Rectifier(), Rectifier(), Rectifier()], dims=[69*4, NNWidth, NNWidth, NNWidth, NNWidth, 100],
           weights_init=IsotropicGaussian(WeightStdDev),
           biases_init=Constant(0))

    x_forward = mlp.apply(x)
    mlp_sm = MLP(activations=[None], dims=[100, 39],
           weights_init=IsotropicGaussian(WeightStdDev),
           biases_init=Constant(0))
    y_hat_b = Softmax().apply(mlp_sm.apply(x_forward))
    mlp.initialize()
    mlp_sm.initialize()
    cg = blocks.graph.ComputationGraph(y_hat_b)
    parameters = list()
    for p in cg.parameters:
        parameters.append(p)
    weights = VariableFilter(roles=[blocks.roles.WEIGHT])(cg.variables)
    cg_dropout = blocks.graph.apply_dropout(cg,[weights[3]] , DropOutProb)
    y_hat_b_do = cg_dropout.outputs[0]
    pred_b = theano.tensor.argmax(cg.outputs[0],axis=1)
    err_b = theano.tensor.mean(theano.tensor.eq(pred_b,y_b))
    cW = 0
    for W in weights:
        cW += (W**2).sum()
    cost = theano.tensor.mean(theano.tensor.nnet.categorical_crossentropy(y_hat_b_do, y_b))  + cW*L2Weight


    Learning_Rate_Decay = numpy.float32(0.98)
    learning_rate_theano = theano.shared(numpy.float32(InitialLearningRate), name='learning_rate')

    learning_rate_update = theano.function(inputs=[],outputs=learning_rate_theano,updates=[(learning_rate_theano,learning_rate_theano*Learning_Rate_Decay)])
    update_proc = momentum_sgd(cost,parameters,0.8, learning_rate_theano)

    #train
    training_proc = theano.function(
        	inputs=[shuffIdx], outputs=cost, updates=update_proc,
        	givens={x0:tX[theano.tensor.flatten(shuffIdx[:,0])],
                x1:tX[theano.tensor.flatten(shuffIdx[:,1])],
                x2:tX[theano.tensor.flatten(shuffIdx[:,2])],
                x3:tX[theano.tensor.flatten(shuffIdx[:,3])],
                y_b:tYb[theano.tensor.flatten(shuffIdx[:,1])]}) 
    #test
    test_on_testing_proc = theano.function(
        	inputs=[shuffIdx], outputs=[err_b], 
        	givens={x0:vX[shuffIdx[:,0]],x1:vX[shuffIdx[:,1]],x2:vX[shuffIdx[:,2]],x3:vX[shuffIdx[:,3]],y_b:vYb[shuffIdx[:,1]]}) 
       
    test_on_training_proc = theano.function(
        	inputs=[shuffIdx], outputs=[err_b], 
        	givens={x0:tX[shuffIdx[:,0]],x1:tX[shuffIdx[:,1]],x2:tX[shuffIdx[:,2]],x3:tX[shuffIdx[:,3]],y_b:tYb[shuffIdx[:,1]]}) 

    forward_proc = theano.function(inputs=[x0,x1,x2,x3],outputs=[x_forward])
    return (learning_rate_update, training_proc, test_on_testing_proc,test_on_training_proc,forward_proc)
예제 #17
0
파일: main.py 프로젝트: mohammadpz/rna
def setup_model(configs):

    tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
    # shape: T x B x C x X x Y
    input_ = tensor5('features')
    # shape: B x Classes
    target = T.lmatrix('targets')

    model = LSTMAttention(
        configs,
        weights_init=Glorot(),
        biases_init=Constant(0))
    model.initialize()

    (h, c, location, scale, patch, downn_sampled_input,
        conved_part_1, conved_part_2, pre_lstm) = model.apply(input_)

    classifier = MLP(
        [Rectifier(), Logistic()],
        configs['classifier_dims'],
        weights_init=Glorot(),
        biases_init=Constant(0))
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = BinaryCrossEntropy().apply(target, probabilities)
    cost.name = 'CE'
    error_rate = MisclassificationRate().apply(target, probabilities)
    error_rate.name = 'ER'
    model.cost = cost

    if configs['load_pretrained']:
        blocks_model = Model(model.cost)
        all_params = blocks_model.parameters
        with open('VGG_CNN_params.npz') as f:
            loaded = np.load(f)
            all_conv_params = loaded.keys()
            for param in all_params:
                if param.name in loaded.keys():
                    assert param.get_value().shape == loaded[param.name].shape
                    param.set_value(loaded[param.name])
                    all_conv_params.pop(all_conv_params.index(param.name))
        print "the following parameters did not match: " + str(all_conv_params)

    if configs['test_model']:
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.cost],
                            on_unused_input='ignore',
                            allow_input_downcast=True)
        data = np.random.randn(10, 40, 3, 224, 224)
        targs = np.random.randn(40, 101)
        f(data, targs)
        print "Test passed! ;)"

    model.monitorings = [cost, error_rate]

    return model
예제 #18
0
def task_ID_layers(x, recurrent_in_size):
    mlp = MLP([Rectifier()] * (len(task_ID_FF_dims)-1), task_ID_FF_dims, name='task_ID_mlp', weights_init=Uniform(width=.2), biases_init=Constant(0))
    mlp.push_initialization_config()
    mlp.initialize()
    out_size = task_ID_FF_dims[-1] + recurrent_in_size - len(game_tasks)
    zero_padded_task_IDs = T.concatenate([x[:,:,-len(game_tasks):], T.zeros((x.shape[0], x.shape[1], task_ID_FF_dims[0] - len(game_tasks)))], axis=2)
    mlp_out = mlp.apply(zero_padded_task_IDs)
    task_ID_out = T.concatenate([x[:,:,:-len(game_tasks)]] + [mlp_out], axis=2)
    return task_ID_out, out_size
예제 #19
0
def test_snapshot():
    x = tensor.matrix('x')
    linear = MLP([Identity(), Identity()], [10, 10, 10],
                 weights_init=Constant(1), biases_init=Constant(2))
    linear.initialize()
    y = linear.apply(x)
    cg = ComputationGraph(y)
    snapshot = cg.get_snapshot(dict(x=numpy.zeros((1, 10), dtype=floatX)))
    assert len(snapshot) == 14
예제 #20
0
def test_serialization():
    # Create a simple brick with two parameters
    mlp = MLP(activations=[None, None],
              dims=[10, 10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Check the data using numpy.load
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])
    assert_allclose(numpy_data['mlp-linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['mlp-linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled
    mlp = load(f.name)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that only parameters are saved as NPY files
    mlp.random_data = numpy.random.rand(10)
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])

    # Ensure that parameters can be loaded with correct names
    parameter_values = load_parameter_values(f.name)
    assert set(parameter_values.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])

    # Ensure that duplicate names are dealt with
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear.W', 'mlp-linear.W_2', 'pkl'])

    # Ensure warnings are raised when __main__ namespace objects are dumped
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)
예제 #21
0
 def create_model(self):
     x = self.x
     input_dim = self.input_dim
     mlp = MLP([Logistic(), Logistic(), Tanh()], [input_dim, 100, 100, 1],
               weights_init=IsotropicGaussian(0.001),
               biases_init=Constant(0))
     mlp.initialize()
     self.mlp = mlp
     probs = mlp.apply(x)
     return probs
def test_fully_layer():
	batch_size=2
	x = T.tensor4();
	y = T.ivector()
	V = 200
	layer_conv = Convolutional(filter_size=(5,5),num_filters=V,
				name="toto",
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	# try with no bias
	activation = Rectifier()
	pool = MaxPooling(pooling_size=(2,2))

	convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15,
					image_size=(10,10),
					name="conv_section")
	convnet.push_allocation_config()
	convnet.initialize()
	output=convnet.apply(x)
	batch_size=output.shape[0]
	output_dim=np.prod(convnet.get_dim('output'))
	result_conv = output.reshape((batch_size, output_dim))
	mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10],
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	mlp.initialize()
	output=mlp.apply(result_conv)
	cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output))
	cg = ComputationGraph(cost)
	W = VariableFilter(roles=[WEIGHT])(cg.variables)
	B = VariableFilter(roles=[BIAS])(cg.variables)
	W = W[0]; b = B[0]

	inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg)
	outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg)
	var_input=inputs_fully[0]
	var_output=outputs_fully[0]
	
	[d_W,d_S,d_b] = T.grad(cost, [W, var_output, b])

	d_b = d_b.dimshuffle(('x',0))
	d_p = T.concatenate([d_W, d_b], axis=0)
	x_value = 1e3*np.random.ranf((2,15, 10, 10))
	f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore')
	A, B, C= f(x_value, [5, 0])
	A = np.concatenate([A, np.ones((2,1))], axis=1)
	print 'A', A.shape
	print 'B', B.shape
	print 'C', C.shape

	print lin.norm(C - np.dot(np.transpose(A), B), 'fro')

	return
	
	"""
예제 #23
0
def main(save_to, num_epochs, bokeh=False):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(
        cost=cost, params=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      DataStream(mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if bokeh:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_final_cost',
                 'test_misclassificationrate_apply_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(
                       mnist_train.num_examples, 50)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()
예제 #24
0
    def create_model(self):
        input_dim = self.input_dim
        x = self.x
        y = self.y
        p = self.p
        mask = self.mask
        hidden_dim = self.hidden_dim
        embedding_dim = self.embedding_dim
        lookup = LookupTable(self.dict_size,
                             embedding_dim,
                             weights_init=IsotropicGaussian(0.001),
                             name='LookupTable')
        x_to_h = Linear(embedding_dim,
                        hidden_dim * 4,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(0.001),
                        biases_init=Constant(0.0))
        lstm = LSTM(hidden_dim,
                    name='lstm',
                    weights_init=IsotropicGaussian(0.001),
                    biases_init=Constant(0.0))
        h_to_o = MLP([Logistic()], [hidden_dim, 1],
                     weights_init=IsotropicGaussian(0.001),
                     biases_init=Constant(0),
                     name='h_to_o')

        lookup.initialize()
        x_to_h.initialize()
        lstm.initialize()
        h_to_o.initialize()

        embed = lookup.apply(x).reshape(
            (x.shape[0], x.shape[1], self.embedding_dim))
        embed.name = "embed_vec"
        x_transform = x_to_h.apply(embed.transpose(1, 0, 2))
        x_transform.name = "Transformed X"
        self.lookup = lookup
        self.x_to_h = x_to_h
        self.lstm = lstm
        self.h_to_o = h_to_o

        #if mask is None:
        h, c = lstm.apply(x_transform)
        #else:
        #h, c = lstm.apply(x_transform, mask=mask)
        h.name = "hidden_state"
        c.name = "cell state"
        # only values of hidden units of the last timeframe are used for
        # the classification
        indices = T.sum(mask, axis=0) - 1
        rel_hid = h[indices, T.arange(h.shape[1])]
        out = self.h_to_o.apply(rel_hid)

        probs = out
        return probs
예제 #25
0
def test_add_to_dump():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None],
              dims=[10, 10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)
    mlp2 = MLP(activations=[None, None],
               dims=[10, 10, 10],
               weights_init=Constant(1.),
               use_bias=False,
               name='mlp2')
    mlp2.initialize()

    # Ensure that adding to dump is working.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb+') as ff:
        add_to_dump(mlp.children[0],
                    ff,
                    'child_0',
                    parameters=[mlp.children[0].W])
        add_to_dump(mlp.children[1], ff, 'child_1')
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(
            ['_pkl', '_parameters', 'child_0', 'child_1'])

    # Ensure that we can load any object from the tarball.
    with open(f.name, 'rb') as ff:
        saved_children_0 = load(ff, 'child_0')
        saved_children_1 = load(ff, 'child_1')
        assert_allclose(saved_children_0.W.get_value(), numpy.ones((10, 10)))
        assert_allclose(saved_children_1.W.get_value(),
                        numpy.ones((10, 10)) * 2)

    # Check the error if using a reserved name.
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl'])

    # Check the error if saving an object with other parameters
    with open(f.name, 'rb+') as ff:
        assert_raises(
            ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
            **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W]))

    # Check the warning if adding to a dump with no parameters
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    with open(f.name, 'rb+') as ff:
        assert_raises(
            ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
            **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W]))
예제 #26
0
 def build_model(self, hidden_dim):
     board_input = T.vector('input')
     mlp = MLP(activations=[LeakyRectifier(0.1), LeakyRectifier(0.1)],
               dims=[9, hidden_dim,  9],
               weights_init=IsotropicGaussian(0.00001),
               biases_init=Constant(0.01))
     output = mlp.apply(board_input)
     masked_output = Softmax().apply(output * T.eq(board_input, 0) * 1000)
     mlp.initialize()
     cost, chosen = self.get_cost(masked_output)
     return board_input, mlp, cost, chosen, output
예제 #27
0
def test_serialization():
    # Create a simple brick with two parameters
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Check the data using numpy.load
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])
    assert_allclose(numpy_data['mlp-linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['mlp-linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled
    mlp = load(f.name)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that only parameters are saved as NPY files
    mlp.random_data = numpy.random.rand(10)
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])

    # Ensure that parameters can be loaded with correct names
    parameter_values = load_parameter_values(f.name)
    assert set(parameter_values.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])

    # Ensure that duplicate names are dealt with
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear.W', 'mlp-linear.W_2', 'pkl'])

    # Ensure warnings are raised when __main__ namespace objects are dumped
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)
예제 #28
0
 def build_model(self, hidden_dim):
     board_input = T.vector('input')
     mlp = MLP(activations=[LeakyRectifier(0.1),
                            LeakyRectifier(0.1)],
               dims=[9, hidden_dim, 9],
               weights_init=IsotropicGaussian(0.00001),
               biases_init=Constant(0.01))
     output = mlp.apply(board_input)
     masked_output = Softmax().apply(output * T.eq(board_input, 0) * 1000)
     mlp.initialize()
     cost, chosen = self.get_cost(masked_output)
     return board_input, mlp, cost, chosen, output
예제 #29
0
def test_serialization():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None],
              dims=[10, 10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Ensure warnings are raised when __main__ namespace objects are dumped.
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp.foo, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)

    # Check the parameters.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])
    assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled.
    with open(f.name, 'rb') as ff:
        mlp = load(ff)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that duplicate names are dealt with.
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear.W', '/mlp/linear.W_2'])

    # Check when we don't dump the main object.
    with NamedTemporaryFile(delete=False) as f:
        dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_parameters'])
예제 #30
0
 def apply(self, input_, target):
     mlp = MLP(self.non_lins, self.dims,
               weights_init=IsotropicGaussian(0.01),
               biases_init=Constant(0),
               name=self.name)
     mlp.initialize()
     probs = mlp.apply(T.flatten(input_, outdim=2))
     probs.name = 'probs'
     cost = CategoricalCrossEntropy().apply(target.flatten(), probs)
     cost.name = "CE"
     self.outputs = {}
     self.outputs['probs'] = probs
     self.outputs['cost'] = cost
예제 #31
0
def test_serialization():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Ensure warnings are raised when __main__ namespace objects are dumped.
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp.foo, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)

    # Check the parameters.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])
    assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled.
    with open(f.name, 'rb') as ff:
        mlp = load(ff)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that duplicate names are dealt with.
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear.W', '/mlp/linear.W_2'])

    # Check when we don't dump the main object.
    with NamedTemporaryFile(delete=False) as f:
        dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_parameters'])
예제 #32
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHTS])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(cost=cost,
                                step_rule=SteepestDescent(learning_rate=0.1))
    main_loop = MainLoop(
        mlp,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(mnist_train.num_examples,
                                                     50)),
        algorithm,
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=num_epochs),
            DataStreamMonitoring([cost, error_rate],
                                 DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(
                                                mnist_test.num_examples, 500)),
                                 prefix="test"),
            TrainingDataMonitoring([
                cost, error_rate,
                aggregation.mean(algorithm.total_gradient_norm)
            ],
                                   prefix="train",
                                   after_every_epoch=True),
            SerializeMainLoop(save_to),
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]),
            Printing()
        ])
    main_loop.run()
예제 #33
0
 def apply(self, input_, target):
     mlp = MLP(self.non_lins,
               self.dims,
               weights_init=IsotropicGaussian(0.01),
               biases_init=Constant(0),
               name=self.name)
     mlp.initialize()
     probs = mlp.apply(T.flatten(input_, outdim=2))
     probs.name = 'probs'
     cost = CategoricalCrossEntropy().apply(target.flatten(), probs)
     cost.name = "CE"
     self.outputs = {}
     self.outputs['probs'] = probs
     self.outputs['cost'] = cost
예제 #34
0
def test_add_to_dump():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)
    mlp2 = MLP(activations=[None, None], dims=[10, 10, 10],
               weights_init=Constant(1.), use_bias=False,
               name='mlp2')
    mlp2.initialize()

    # Ensure that adding to dump is working.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb+') as ff:
        add_to_dump(mlp.children[0], ff, 'child_0',
                    parameters=[mlp.children[0].W])
        add_to_dump(mlp.children[1], ff, 'child_1')
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_pkl', '_parameters',
                                               'child_0', 'child_1'])

    # Ensure that we can load any object from the tarball.
    with open(f.name, 'rb') as ff:
        saved_children_0 = load(ff, 'child_0')
        saved_children_1 = load(ff, 'child_1')
        assert_allclose(saved_children_0.W.get_value(),
                        numpy.ones((10, 10)))
        assert_allclose(saved_children_1.W.get_value(),
                        numpy.ones((10, 10)) * 2)
    
    # Check the error if using a reserved name.
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl'])

    # Check the error if saving an object with other parameters
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
                      **dict(parameters=[mlp2.children[0].W,
                                         mlp2.children[1].W]))

    # Check the warning if adding to a dump with no parameters
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
                      **dict(parameters=[mlp2.children[0].W,
                                         mlp2.children[1].W]))
예제 #35
0
def setup_model():
    # shape: T x B x F
    input_ = T.tensor3('features')
    # shape: B
    target = T.lvector('targets')
    model = LSTMAttention(dim=256,
                          mlp_hidden_dims=[256, 4],
                          batch_size=100,
                          image_shape=(64, 64),
                          patch_shape=(16, 16),
                          weights_init=Glorot(),
                          biases_init=Constant(0))
    model.initialize()
    h, c, location, scale = model.apply(input_)
    classifier = MLP([Rectifier(), Softmax()], [256 * 2, 200, 10],
                     weights_init=Glorot(),
                     biases_init=Constant(0))
    model.h = h
    model.c = c
    model.location = location
    model.scale = scale
    classifier.initialize()

    probabilities = classifier.apply(T.concatenate([h[-1], c[-1]], axis=1))
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    error_rate = MisclassificationRate().apply(target, probabilities)
    model.cost = cost

    location_x_0_avg = T.mean(location[0, :, 0])
    location_x_0_avg.name = 'location_x_0_avg'
    location_x_10_avg = T.mean(location[10, :, 0])
    location_x_10_avg.name = 'location_x_10_avg'
    location_x_20_avg = T.mean(location[-1, :, 0])
    location_x_20_avg.name = 'location_x_20_avg'

    scale_x_0_avg = T.mean(scale[0, :, 0])
    scale_x_0_avg.name = 'scale_x_0_avg'
    scale_x_10_avg = T.mean(scale[10, :, 0])
    scale_x_10_avg.name = 'scale_x_10_avg'
    scale_x_20_avg = T.mean(scale[-1, :, 0])
    scale_x_20_avg.name = 'scale_x_20_avg'

    monitorings = [
        error_rate, location_x_0_avg, location_x_10_avg, location_x_20_avg,
        scale_x_0_avg, scale_x_10_avg, scale_x_20_avg
    ]
    model.monitorings = monitorings

    return model
예제 #36
0
파일: cifar_mlp.py 프로젝트: oplatek/ALI
def main(save_to, num_epochs, batch_size):
    mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tt.tensor4('features', dtype='float32')
    y = tt.vector('label', dtype='int32')

    probs = mlp.apply(x.reshape((-1, 3072)))
    cost = CategoricalCrossEntropy().apply(y, probs)
    error_rate = MisclassificationRate().apply(y, probs)

    cg = ComputationGraph([cost])
    ws = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * sum(([(w**2).sum() for w in ws]))
    cost.name = 'final_cost'

    train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10',
                                   is_train=True)
    valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10',
                                   is_train=False)

    train_stream = train_dataset.get_stream(batch_size)
    valid_stream = valid_dataset.get_stream(batch_size)

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Adam(learning_rate=0.001))
    extensions = [
        Timing(),
        LogExtension('/home/belohlavek/ALI/mlp.log'),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    main_loop = MainLoop(algorithm,
                         train_stream,
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
def create_model():
    """Create the deep autoencoder model with Blocks, and load MNIST."""
    mlp = MLP(activations=[Logistic(), Logistic(), Logistic(), None,
                           Logistic(), Logistic(), Logistic(), Logistic()],
              dims=[784, 1000, 500, 250, 30, 250, 500, 1000, 784],
              weights_init=Sparse(15, IsotropicGaussian()),
              biases_init=Constant(0))
    mlp.initialize()

    x = tensor.matrix('features')
    x_hat = mlp.apply(tensor.flatten(x, outdim=2))
    squared_err = SquaredError().apply(tensor.flatten(x, outdim=2), x_hat)
    cost = BinaryCrossEntropy().apply(tensor.flatten(x, outdim=2), x_hat)

    return x, cost, squared_err
예제 #38
0
    def create_model(self, x, y, input_dim, p):

        # Create the output of the MLP
        mlp = MLP([Tanh(), Tanh(), Logistic()], [input_dim, 200, 100, 1],
                  weights_init=IsotropicGaussian(0.01),
                  biases_init=Constant(0))
        mlp.initialize()
        probs = mlp.apply(x).sum()

        # Create the if-else cost function
        reward = (probs * y * 1.0) / p + (1 - probs) * (1 - y) * 1.0 / (1 - p)
        cost = -reward  # Negative of reward
        cost.name = "cost"

        return mlp, cost
예제 #39
0
    def create_base_model(self, x, y, input_dim):

        # Create the output of the MLP
        mlp = MLP([Tanh(), Tanh(), Logistic()], [input_dim, 100, 100, 1],
                  weights_init=IsotropicGaussian(0.001),
                  biases_init=Constant(0))
        mlp.initialize()
        probs = mlp.apply(x)
        #sq_err = SquaredError()
        cost = T.mean(T.sqr(y.flatten() - probs.flatten()))
        cost.name = 'cost'
        pred_out = probs > 0.5
        mis_cost = T.mean(T.neq(y.flatten(), pred_out.flatten()))
        mis_cost.name = 'MisclassificationRate'
        return mlp, cost, mis_cost
예제 #40
0
def setup_model():
    # shape: T x B x F
    input_ = T.tensor3('features')
    # shape: B
    target = T.lvector('targets')
    model = LSTMAttention(dim=256,
                          mlp_hidden_dims=[256, 4],
                          batch_size=100,
                          image_shape=(64, 64),
                          patch_shape=(16, 16),
                          weights_init=Glorot(),
                          biases_init=Constant(0))
    model.initialize()
    h, c, location, scale = model.apply(input_)
    classifier = MLP([Rectifier(), Softmax()], [256 * 2, 200, 10],
                     weights_init=Glorot(),
                     biases_init=Constant(0))
    model.h = h
    model.c = c
    model.location = location
    model.scale = scale
    classifier.initialize()

    probabilities = classifier.apply(T.concatenate([h[-1], c[-1]], axis=1))
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    error_rate = MisclassificationRate().apply(target, probabilities)
    model.cost = cost

    location_x_0_avg = T.mean(location[0, :, 0])
    location_x_0_avg.name = 'location_x_0_avg'
    location_x_10_avg = T.mean(location[10, :, 0])
    location_x_10_avg.name = 'location_x_10_avg'
    location_x_20_avg = T.mean(location[-1, :, 0])
    location_x_20_avg.name = 'location_x_20_avg'

    scale_x_0_avg = T.mean(scale[0, :, 0])
    scale_x_0_avg.name = 'scale_x_0_avg'
    scale_x_10_avg = T.mean(scale[10, :, 0])
    scale_x_10_avg.name = 'scale_x_10_avg'
    scale_x_20_avg = T.mean(scale[-1, :, 0])
    scale_x_20_avg.name = 'scale_x_20_avg'

    monitorings = [error_rate,
                   location_x_0_avg, location_x_10_avg, location_x_20_avg,
                   scale_x_0_avg, scale_x_10_avg, scale_x_20_avg]
    model.monitorings = monitorings

    return model
예제 #41
0
def setup_model():
    # shape: T x B x F
    input_ = T.tensor3('features')
    # shape: B
    target = T.lvector('targets')
    model = LSTMAttention(dim=500,
                          mlp_hidden_dims=[400, 4],
                          batch_size=100,
                          image_shape=(100, 100),
                          patch_shape=(28, 28),
                          weights_init=Glorot(),
                          biases_init=Constant(0))
    model.initialize()
    h, c, location, scale = model.apply(input_)
    classifier = MLP([Rectifier(), Softmax()], [500, 100, 10],
                     weights_init=Glorot(),
                     biases_init=Constant(0))
    model.h = h
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    error_rate = MisclassificationRate().apply(target, probabilities)

    location_x_avg = T.mean(location[:, 0])
    location_x_avg.name = 'location_x_avg'
    location_y_avg = T.mean(location[:, 1])
    location_y_avg.name = 'location_y_avg'
    scale_x_avg = T.mean(scale[:, 0])
    scale_x_avg.name = 'scale_x_avg'
    scale_y_avg = T.mean(scale[:, 1])
    scale_y_avg.name = 'scale_y_avg'

    location_x_std = T.std(location[:, 0])
    location_x_std.name = 'location_x_std'
    location_y_std = T.std(location[:, 1])
    location_y_std.name = 'location_y_std'
    scale_x_std = T.std(scale[:, 0])
    scale_x_std.name = 'scale_x_std'
    scale_y_std = T.std(scale[:, 1])
    scale_y_std.name = 'scale_y_std'

    monitorings = [error_rate,
                   location_x_avg, location_y_avg, scale_x_avg, scale_y_avg,
                   location_x_std, location_y_std, scale_x_std, scale_y_std]

    return cost, monitorings
예제 #42
0
def test_load():
    # Create a main loop and checkpoint it
    mlp = MLP(activations=[None],
              dims=[10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[0].W
    x = tensor.vector('data')
    cost = mlp.apply(x).mean()
    data = numpy.random.rand(10, 10).astype(theano.config.floatX)
    data_stream = IterableDataset(data).get_example_stream()

    main_loop = MainLoop(data_stream=data_stream,
                         algorithm=GradientDescent(cost=cost, parameters=[W]),
                         extensions=[
                             FinishAfter(after_n_batches=5),
                             Checkpoint('myweirdmodel.picklebarrel')
                         ])
    main_loop.run()

    # Load the parameters, log and iteration state
    old_value = W.get_value()
    W.set_value(old_value * 2)
    main_loop = MainLoop(model=Model(cost),
                         data_stream=data_stream,
                         algorithm=GradientDescent(cost=cost, parameters=[W]),
                         extensions=[
                             Load('myweirdmodel.picklebarrel',
                                  load_iteration_state=True,
                                  load_log=True)
                         ])
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
    assert_allclose(W.get_value(), old_value)

    # Make sure things work too if the model was never saved before
    main_loop = MainLoop(model=Model(cost),
                         data_stream=data_stream,
                         algorithm=GradientDescent(cost=cost, parameters=[W]),
                         extensions=[
                             Load('mynonexisting.picklebarrel',
                                  load_iteration_state=True,
                                  load_log=True)
                         ])
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
예제 #43
0
def test_checkpointing():
    # Create a main loop and checkpoint it
    mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[0].W
    x = tensor.vector('data')
    cost = mlp.apply(x).mean()
    data = numpy.random.rand(10, 10).astype(theano.config.floatX)
    data_stream = IterableDataset(data).get_example_stream()

    main_loop = MainLoop(
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[FinishAfter(after_n_batches=5),
                    Checkpoint('myweirdmodel.tar', parameters=[W])]
    )
    main_loop.run()

    # Load it again
    old_value = W.get_value()
    W.set_value(old_value * 2)
    main_loop = MainLoop(
        model=Model(cost),
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[Load('myweirdmodel.tar')]
    )
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
    assert_allclose(W.get_value(), old_value)

    # Make sure things work too if the model was never saved before
    main_loop = MainLoop(
        model=Model(cost),
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[Load('mynonexisting.tar')]
    )
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')

    # Cleaning
    if os.path.exists('myweirdmodel.tar'):
        os.remove('myweirdmodel.tar')
예제 #44
0
def test_mlp():
    x = tensor.matrix()
    x_val = numpy.random.rand(2, 16).astype(theano.config.floatX)
    mlp = MLP(activations=[Tanh(), None], dims=[16, 8, 4],
              weights_init=Constant(1), biases_init=Constant(1))
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(
        numpy.tanh(x_val.dot(numpy.ones((16, 8))) + numpy.ones((2, 8))).dot(
            numpy.ones((8, 4))) + numpy.ones((2, 4)),
        y.eval({x: x_val}), rtol=1e-06)

    mlp = MLP(activations=[None], weights_init=Constant(1), use_bias=False)
    mlp.dims = [16, 8]
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(x_val.dot(numpy.ones((16, 8))),
                    y.eval({x: x_val}), rtol=1e-06)
예제 #45
0
    def create_model(self, x, y, input_dim, p):

        # Create the output of the MLP
        mlp = MLP(
            [Rectifier(), Rectifier(), Logistic()], [input_dim, 150, 100, 1],
            weights_init=IsotropicGaussian(0.01),
            biases_init=Constant(0))
        mlp.initialize()
        probs = 1 - mlp.apply(x)
        y = y.dimshuffle(0, 'x')
        # Create the if-else cost function
        pos_ex = (y * probs) / p
        neg_ex = (1 - y) * (1 - probs) / np.float32(1 - p)
        reward = pos_ex + neg_ex
        cost = reward  # Negative of reward
        cost.name = "cost"

        return mlp, cost
예제 #46
0
 def setUp(self):
     """Create main loop and run it."""
     mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.0), use_bias=False)
     mlp.initialize()
     self.W = mlp.linear_transformations[0].W
     x = tensor.vector("data")
     cost = mlp.apply(x).mean()
     data = numpy.random.rand(10, 10).astype(theano.config.floatX)
     self.data_stream = IterableDataset(data).get_example_stream()
     self.model = Model(cost)
     self.algorithm = GradientDescent(cost=cost, parameters=[self.W])
     self.main_loop = MainLoop(
         model=self.model,
         data_stream=self.data_stream,
         algorithm=self.algorithm,
         extensions=[FinishAfter(after_n_batches=5), Checkpoint("myweirdmodel.tar", save_separately=["log"])],
     )
     self.main_loop.run()
예제 #47
0
파일: test_bricks.py 프로젝트: CVML/blocks
def test_mlp_apply():
    x = tensor.matrix()
    x_val = numpy.random.rand(2, 16).astype(theano.config.floatX)
    mlp = MLP(activations=[Tanh().apply, None], dims=[16, 8, 4],
              weights_init=Constant(1), biases_init=Constant(1))
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(
        numpy.tanh(x_val.dot(numpy.ones((16, 8))) + numpy.ones((2, 8))).dot(
            numpy.ones((8, 4))) + numpy.ones((2, 4)),
        y.eval({x: x_val}), rtol=1e-06)

    mlp = MLP(activations=[None], weights_init=Constant(1), use_bias=False)
    mlp.dims = [16, 8]
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(x_val.dot(numpy.ones((16, 8))),
                    y.eval({x: x_val}), rtol=1e-06)
    assert mlp.rng == mlp.linear_transformations[0].rng
예제 #48
0
파일: cifar_mlp.py 프로젝트: oplatek/ALI
def main(save_to, num_epochs, batch_size):
    mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tt.tensor4('features', dtype='float32')
    y = tt.vector('label', dtype='int32')

    probs = mlp.apply(x.reshape((-1,3072)))
    cost = CategoricalCrossEntropy().apply(y, probs)
    error_rate = MisclassificationRate().apply(y, probs)

    cg = ComputationGraph([cost])
    ws = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * sum(([(w**2).sum() for w in ws]))
    cost.name = 'final_cost'

    train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=True)
    valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=False)

    train_stream = train_dataset.get_stream(batch_size)
    valid_stream = valid_dataset.get_stream(batch_size)

    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=Adam(learning_rate=0.001))
    extensions = [Timing(),
                  LogExtension('/home/belohlavek/ALI/mlp.log'),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    main_loop = MainLoop(algorithm,
                         train_stream,
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
예제 #49
0
def test_apply_batch_normalization_nested():
    x = tensor.matrix()
    eps = 1e-8
    batch_dims = (3, 9)
    bn = BatchNormalization(input_dim=5, epsilon=eps)
    mlp = MLP([Sequence([bn.apply, Tanh().apply])], [9, 5],
              weights_init=Constant(0.4), biases_init=Constant(1))
    mlp.initialize()
    y = mlp.apply(x)
    cg = apply_batch_normalization(ComputationGraph([y]))
    y_bn = cg.outputs[0]
    rng = numpy.random.RandomState((2016, 1, 18))
    x_ = rng.uniform(size=batch_dims).astype(theano.config.floatX)
    y_ = y_bn.eval({x: x_})
    W_, b_ = map(lambda s: (getattr(mlp.linear_transformations[0], s)
                            .get_value(borrow=True)), ['W', 'b'])
    z_ = numpy.dot(x_, W_) + b_
    y_expected = numpy.tanh((z_ - z_.mean(axis=0)) /
                            numpy.sqrt(z_.var(axis=0) + eps))
    assert_allclose(y_, y_expected, rtol=1e-3)
예제 #50
0
 def create_model(self):
     x = self.x
     y = self.y
     input_dim = self.input_dim
     p = self.p
     mlp = MLP(
         [Rectifier(), Rectifier(), Logistic()], [input_dim, 100, 80, 1],
         weights_init=IsotropicGaussian(0.001),
         biases_init=Constant(0))
     mlp.initialize()
     self.mlp = mlp
     probs = mlp.apply(x)
     probs.name = "score"
     y = y.dimshuffle(0, 'x')
     # Create the if-else cost function
     pos_ex = (y * probs) / p
     neg_ex = (1 - y) * (1 - probs) / np.float32(1 - p)
     reward = pos_ex + neg_ex
     cost = reward  # Negative of reward
     cost.name = "cost"
     return cost, probs
예제 #51
0
파일: gated.py 프로젝트: csmfindling/kaggle
def build_mlp(features_cat, features_int, labels):

    mlp_int = MLP(activations=[Rectifier(), Rectifier()],
                  dims=[19, 50, 50],
                  weights_init=IsotropicGaussian(),
                  biases_init=Constant(0),
                  name='mlp_interval')
    mlp_int.initialize()
    mlp_cat = MLP(activations=[Logistic()],
                  dims=[320, 50],
                  weights_init=IsotropicGaussian(),
                  biases_init=Constant(0),
                  name='mlp_categorical')
    mlp_cat.initialize()

    mlp = MLP(activations=[Rectifier(), None],
              dims=[50, 50, 1],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(0))
    mlp.initialize()

    gated = mlp_cat.apply(features_cat) * mlp_int.apply(features_int)
    prediction = mlp.apply(gated)
    cost = MAPECost().apply(prediction, labels)

    cg = ComputationGraph(cost)
    print cg.variables

    cg_dropout1   = apply_dropout(cg, [VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3]], .2)
    cost_dropout1 = cg_dropout1.outputs[0]

    return cost_dropout1, cg_dropout1.parameters, cost
예제 #52
0
def setup_model():
    # shape: T x B x F
    input_ = T.tensor3('features')
    # shape: B
    target = T.lvector('targets')
    model = LSTMAttention(input_dim=10000, dim=500,
                          mlp_hidden_dims=[2000, 500, 4],
                          batch_size=100,
                          image_shape=(100, 100),
                          patch_shape=(28, 28),
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0))
    model.initialize()
    h, c = model.apply(input_)
    classifier = MLP([Rectifier(), Softmax()], [500, 100, 10],
                     weights_init=IsotropicGaussian(0.01),
                     biases_init=Constant(0))
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    error_rate = MisclassificationRate().apply(target, probabilities)

    return cost, error_rate
예제 #53
0
def construct_model(vocab_size, embedding_dim, ngram_order, hidden_dims,
                    activations):
    # Construct the model
    x = tensor.lmatrix('features')
    y = tensor.lvector('targets')

    lookup = LookupTable(length=vocab_size, dim=embedding_dim, name='lookup')
    hidden = MLP(activations=activations + [None],
                 dims=[ngram_order * embedding_dim] + hidden_dims +
                 [vocab_size])

    embeddings = lookup.apply(x)
    embeddings = embeddings.flatten(ndim=2)  # Concatenate embeddings
    activations = hidden.apply(embeddings)
    cost = Softmax().categorical_cross_entropy(y, activations)

    # Initialize parameters
    lookup.weights_init = IsotropicGaussian(0.001)
    hidden.weights_init = IsotropicGaussian(0.01)
    hidden.biases_init = Constant(0.001)
    lookup.initialize()
    hidden.initialize()

    return cost
예제 #54
0
def main(save_to, num_epochs):
    batch_size = 128
    dim = 100
    n_steps = 20
    i2h1 = MLP([Identity()], [784, dim], biases_init=Constant(0.), weights_init=IsotropicGaussian(.001))
    h2o1 = MLP([Rectifier(), Logistic()], [dim, dim, 784],
               biases_init=Constant(0.), weights_init=IsotropicGaussian(.001))
    rec1 = SimpleRecurrent(dim=dim, activation=Tanh(), weights_init=Orthogonal())
    i2h1.initialize()
    h2o1.initialize()
    rec1.initialize()

    x = tensor.tensor3('features')
    x1 = x[1:, :, :]
    x2 = x[:-1, :, :]

    preproc = i2h1.apply(x1)
    h1 = rec1.apply(preproc)
    x_hat = h2o1.apply(h1)
    cost = tensor.nnet.binary_crossentropy(x_hat, x2).mean()
    # cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    cost.name = 'final_cost'

    cg = ComputationGraph([cost, ])

    mnist_train = MNIST("train", subset=slice(0, 50000), sources=('features', ))
    mnist_valid = MNIST("train", subset=slice(50000, 60000), sources=('features',))
    mnist_test = MNIST("test")
    trainstream = Mapping(Flatten(DataStream(mnist_train,
                          iteration_scheme=SequentialScheme(50000, batch_size))),
                          _meanize(n_steps))
    validstream = Mapping(Flatten(DataStream(mnist_valid,
                                             iteration_scheme=SequentialScheme(10000,
                                                                               batch_size))),
                          _meanize(n_steps))
    teststream = Mapping(Flatten(DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(10000,
                                                                              batch_size))),
                         _meanize(n_steps))

    algorithm = GradientDescent(
        cost=cost, params=cg.parameters,
        step_rule=CompositeRule([Adam(), StepClipping(100)]))
    main_loop = MainLoop(
        algorithm,
        trainstream,
        extensions=[Timing(),
                    FinishAfter(after_n_epochs=num_epochs),
                    # DataStreamMonitoring(
                    #     [cost, ],
                    #     teststream,
                    #     prefix="test"),
                    DataStreamMonitoringAndSaving(
                    [cost, ],
                    validstream,
                    [i2h1, h2o1, rec1],
                    'best_'+save_to+'.pkl',
                    cost_name=cost.name,
                    after_epoch=True,
                    prefix='valid'),
                    TrainingDataMonitoring(
                        [cost,
                         aggregation.mean(algorithm.total_gradient_norm)],
                        prefix="train",
                        after_epoch=True),
                    # Plot(
                    #     save_to,
                    #     channels=[
                    #         ['test_final_cost',
                    #          'test_misclassificationrate_apply_error_rate'],
                    #         ['train_total_gradient_norm']]),
                    Printing()])
    main_loop.run()
예제 #55
0
파일: GAE_Adam.py 프로젝트: capybaralet/GAE
    #step_rule = RMSProp(learning_rate=lr, decay_rate=0.95)
    #step_rule = Momentum(learning_rate=lr, momentum=0.9)
    batch_size = 1000
    nvis, nhid, nlat = 784, 200, 144
    theano_rng = MRG_RandomStreams(134663)

    # Initialize prior
    prior_mu = shared_floatx(numpy.zeros(nlat), name='prior_mu')
    prior_log_sigma = shared_floatx(numpy.zeros(nlat), name='prior_log_sigma')

    # Initialize encoding network
    encoder = MLP(activations=[Rectifier(), Identity()],
                           dims=[nvis, nhid, nlat],
                           weights_init=IsotropicGaussian(std=0.001),
                           biases_init=Constant(0))
    encoder.initialize()

    # Initialize decoding network
    decoder = MLP(activations=[Rectifier(), Identity()],
                           dims=[nlat, nhid, nvis],
                           weights_init=IsotropicGaussian(std=0.001),
                           biases_init=Constant(0))
    decoder.initialize()

    # Encode / decode
    x = tensor.matrix('features')
    x.tag.test_value = np.random.randn(batch_size, 784).astype("float32") # doesn't work (need to change theano flags?)
    #ztest = T.dot(theano.shared(np.random.randn(batch_size, batch_size)), x)
    #print ztest.tag.test_value.shape
    z = encoder.apply(x)
    z.name = 'z'
예제 #56
0
KL_term = -0.5 * ((1 + z1_lognu -T.exp(z1_lognu) - z1_mu ** 2).sum(axis=1) 
          + (1 + z2_lognu -T.exp(z2_lognu) - z2_mu ** 2).sum(axis=1))
reconstruction_term = (x * T.log(decoder_p) + 
                       (1 - x) * T.log(1 - decoder_p)).sum(axis=1) 
cost = (KL_term -reconstruction_term).mean()
cost.name = 'negative_log_likelihood'


# Initialize the parameters

encoder_network1._push_initialization_config()
for layer in encoder_network1.linear_transformations:
    layer.weights_init = Uniform(
        width=12. / (layer.input_dim + layer.output_dim))
encoder_network1.initialize()

encoder_network2._push_initialization_config()
for layer in encoder_network2.linear_transformations:
    layer.weights_init = Uniform(
        width=12. / (layer.input_dim + layer.output_dim))
encoder_network2.initialize()

encoder_network3._push_initialization_config()
for layer in encoder_network3.linear_transformations:
    layer.weights_init = Uniform(
        width=12. / (layer.input_dim + layer.output_dim))
encoder_network3.initialize()

encoder_network4._push_initialization_config()
for layer in encoder_network4.linear_transformations:
        x1 = data_preprocessing1(x).copy(name='x_clean'))
        x2 = data_preprocessing2(x).copy(name='x_dirty'))
        out1 = conv_sequence2.apply(x1)
        out2 = conv_sequence2.apply(x2)


### Flattening data
conv_out1 = Flattener(name='flattener1').apply(out1)
conv_out2 = Flattener(name='flattener2').apply(out2)
conv_out = tensor.concatenate([conv_out1,conv_out2],axis=1)

### MLP
mlp_hiddens = 1000
top_mlp_dims = [numpy.prod(conv_sequence1.get_dim('output'))+numpy.prod(conv_sequence1.get_dim('output'))] + [mlp_hiddens] + [output_size]
top_mlp = MLP(mlp_activation, top_mlp_dims,weights_init=Uniform(width=0.2),biases_init=Constant(0.))
top_mlp.initialize()



### Getting the data

from fuel.datasets.dogs_vs_cats import DogsVsCats
from fuel.streams import DataStream, ServerDataStream
from fuel.schemes import ShuffledScheme
from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, Random2DRotation
from fuel.transformers import Flatten, Cast, ScaleAndShift


def create_data(data):

    stream = DataStream(data, iteration_scheme=ShuffledScheme(data.num_examples, batch_size))
예제 #58
0
                                biases_init=Constant(0.))


convnet.initialize()
conv_features = Flattener().apply(convnet.apply(X))


# MLP

mlp = MLP(activations=[Logistic(name='sigmoid_0'),
          Softmax(name='softmax_1')], dims=[ 256, 256, 256, 2],
          weights_init=IsotropicGaussian(0.01), biases_init=Constant(0))
[child.name for child in mlp.children]
['linear_0', 'sigmoid_0', 'linear_1', 'softmax_1']
Y = mlp.apply(conv_features)
mlp.initialize()


# Setting up the cost function
from blocks.bricks.cost import CategoricalCrossEntropy
cost = CategoricalCrossEntropy().apply(T.flatten(), Y)

from blocks.roles import WEIGHT
from blocks.graph import ComputationGraph
from blocks.filter import VariableFilter
cg = ComputationGraph(cost)
print(VariableFilter(roles=[WEIGHT])(cg.variables))
W1, W2, W3  = VariableFilter(roles=[WEIGHT])(cg.variables)
# cost with L2 regularization
cost = cost + 0.005 * (W2 ** 2).sum() + 0.005 * (W3 ** 2).sum()
cost.name = 'cost_with_regularization'