def __init__(self, config, **kwargs):
        super(Model, self).__init__(config, **kwargs)

        self.dest_mlp = MLP(
            activations=[Rectifier()
                         for _ in config.dim_hidden_dest] + [Softmax()],
            dims=[config.dim_hidden[-1]] + config.dim_hidden_dest +
            [config.dim_output_dest],
            name='dest_mlp')
        self.time_mlp = MLP(
            activations=[Rectifier()
                         for _ in config.dim_hidden_time] + [Softmax()],
            dims=[config.dim_hidden[-1]] + config.dim_hidden_time +
            [config.dim_output_time],
            name='time_mlp')

        self.dest_classes = theano.shared(numpy.array(
            config.dest_tgtcls, dtype=theano.config.floatX),
                                          name='dest_classes')
        self.time_classes = theano.shared(numpy.array(
            config.time_tgtcls, dtype=theano.config.floatX),
                                          name='time_classes')

        self.inputs.append('input_time')
        self.children.extend([self.dest_mlp, self.time_mlp])
Exemple #2
0
def build_training(lr=0.002, model=None):
	x = T.tensor4('x')
	y = T.imatrix()
	if model is None:
		model = build_model()
	y_prev = model.apply(x)
	y_softmax =Softmax().apply(y_prev)
	##### prediction #####
	#cost = CategoricalCrossEntropy().apply(y.flatten(), y_prev).mean()
	cost = Softmax().categorical_cross_entropy(y.flatten(), y_prev).mean()
    	error = MisclassificationRate().apply(y.flatten(), y_softmax).mean()
	W, B = get_Params(y_prev)
	params = W + B
	regulizer_full = sum([w.norm(2) for w in W[0:2]])
	regulizer_conv = sum([w.norm(2) for w in W[2:]])
	cost = cost #+ 0.01*regulizer_conv #+ 0.001*regulizer_conv
	updates, updates_init = RMSProp(cost, params, lr)
	#updates, updates_init = Adam(cost, params, lr)
	#updates = Sgd(cost, params, lr)
	train_function = theano.function([x,y], cost, updates=updates,
			allow_input_downcast=True)
	valid_function = theano.function([x,y], cost,
			allow_input_downcast=True)
	test_function = theano.function([x,y], error,
			allow_input_downcast=True)
	reinit = theano.function([], T.zeros((1,)), updates=updates_init)
	observation = theano.function([], [w.norm(2) for w in W])
	"""
	reg_function = theano.function([], T.zeros((1,)), updates=clip(W),
			allow_input_downcast=True)

	observation = theano.function([], [w.norm(2) for w in W])
	"""
	return train_function, valid_function, test_function, model, reinit
Exemple #3
0
def get_config(config):
    config1 = {}

    if config == '5layers':
        config1['num_epochs'] = 150
        config1['num_channels'] = 3
        config1['image_shape'] = (192, 192)
        config1['filter_size'] = [(5, 5), (5, 5), (5, 5), (5, 5), (5, 5)]
        config1['num_filter'] = [32, 48, 64, 128, 256]
        config1['pooling_sizes'] = [(2, 2), (2, 2), (2, 2), (2, 2), (2, 2)]
        config1['mlp_hiddens'] = [1000, 100]
        config1['output_size'] = 2
        config1['batch_size'] = 16
        config1['activation'] = [Rectifier() for _ in config1['num_filter']]
        config1['mlp_activation'] = [
            Rectifier().apply for _ in config1['mlp_hiddens']
        ] + [Softmax().apply]
        config1['num_batches'] = None
    elif config == '4layers':
        config1['num_epochs'] = 100
        config1['num_channels'] = 3
        config1['image_shape'] = (160, 160)
        config1['filter_size'] = [(5, 5), (5, 5), (5, 5), (5, 5)]
        config1['num_filter'] = [32, 64, 128, 128]
        config1['pooling_sizes'] = [(2, 2), (2, 2), (2, 2), (2, 2)]
        config1['mlp_hiddens'] = [1000, 100]
        config1['output_size'] = 2
        config1['batch_size'] = 32
        config1['activation'] = [Rectifier() for _ in config1['num_filter']]
        config1['mlp_activation'] = [
            Rectifier().apply for _ in config1['mlp_hiddens']
        ] + [Softmax().apply]
        config1['num_batches'] = None
    else:
        config1['num_epochs'] = 100
        config1['num_channels'] = 3
        config1['image_shape'] = (128, 128)
        config1['filter_size'] = [(5, 5), (5, 5), (5, 5)]
        config1['num_filter'] = [20, 50, 80]
        config1['pooling_sizes'] = [(2, 2), (2, 2), (2, 2)]
        config1['mlp_hiddens'] = [1000]
        config1['output_size'] = 2
        config1['batch_size'] = 64
        config1['activation'] = [Rectifier() for _ in config1['num_filter']]
        config1['mlp_activation'] = [
            Rectifier().apply for _ in config1['mlp_hiddens']
        ] + [Softmax().apply]
        config1['num_batches'] = 11000

        if config == 'test':
            print("Test run...")
            config1['test'] = True
        else:
            print("Using default config..")

    return config1
Exemple #4
0
    def __init__(self, hidden_dim, n_classes, **kwargs):
        super(SingleSoftmax, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        self.mlp = MLP(activations=[Rectifier(), Softmax()],
                       dims=[hidden_dim, hidden_dim / 2, self.n_classes],
                       weights_init=Orthogonal(),
                       biases_init=Constant(0))
        self.softmax = Softmax()

        self.children = [self.mlp, self.softmax]
Exemple #5
0
def build_model(images, labels):

    # Construct a bottom convolutional sequence
    bottom_conv_sequence = convolutional_sequence((3, 3), 64, (150, 150))
    bottom_conv_sequence._push_allocation_config()

    # Flatten layer
    flattener = Flattener()

    # Construct a top MLP
    conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output'))
    top_mlp = MLP([
        LeakyRectifier(name='non_linear_9'),
        LeakyRectifier(name='non_linear_10'),
        Softmax(name='non_linear_11')
    ], [conv_out_dim, 2048, 612, 10],
                  weights_init=IsotropicGaussian(),
                  biases_init=Constant(1))

    # Construct feedforward sequence
    ss_seq = FeedforwardSequence(
        [bottom_conv_sequence.apply, flattener.apply, top_mlp.apply])
    ss_seq.push_initialization_config()
    ss_seq.initialize()

    prediction = ss_seq.apply(images)
    cost = CategoricalCrossEntropy().apply(labels.flatten(), prediction)

    return cost
Exemple #6
0
    def __init__(self, config):
        self.X = T.tensor4("features")
        c = config

        seq = BrickSequence(
            input_dim=(3, 32, 32),
            bricks=[
                conv3(c['n_l1']),
                conv3(c['n_l2']),
                max_pool(),
                conv3(c['n_l3']),
                conv3(c['n_l4']),
                max_pool(),
                #conv3(10),
                #conv3(10),
                Flattener(),
                linear(c['n_l5']),
                Softmax()
            ])

        seq.initialize()

        self.pred = seq.apply(self.X)
        self.Y = T.imatrix("targets")

        self.cost = CategoricalCrossEntropy().apply(self.Y.flatten(),
                                                    self.pred)
        self.cost.name = "cost"

        self.accur = 1.0 - MisclassificationRate().apply(
            self.Y.flatten(), self.pred)
        self.accur.name = "accur"
Exemple #7
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Exemple #8
0
def setup_model():
    # shape: T x B x F
    input_ = T.tensor3('features')
    # shape: B
    target = T.lvector('targets')
    model = LSTMAttention(input_dim=10000,
                          dim=500,
                          mlp_hidden_dims=[2000, 500, 4],
                          batch_size=100,
                          image_shape=(100, 100),
                          patch_shape=(28, 28),
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0))
    model.initialize()
    h, c = model.apply(input_)
    classifier = MLP([Rectifier(), Softmax()], [500, 100, 10],
                     weights_init=IsotropicGaussian(0.01),
                     biases_init=Constant(0))
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    error_rate = MisclassificationRate().apply(target, probabilities)

    return cost, error_rate
Exemple #9
0
def create_lenet_5():
    feature_maps = [6, 16]
    mlp_hiddens = [120, 84]
    conv_sizes = [5, 5]
    pool_sizes = [2, 2]
    image_size = (28, 28)
    output_size = 10

    # The above are from LeCun's paper. The blocks example had:
    #    feature_maps = [20, 50]
    #    mlp_hiddens = [500]

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(conv_activations, 1, image_size,
                    filter_sizes=zip(conv_sizes, conv_sizes),
                    feature_maps=feature_maps,
                    pooling_sizes=zip(pool_sizes, pool_sizes),
                    top_mlp_activations=mlp_activations,
                    top_mlp_dims=mlp_hiddens + [output_size],
                    border_mode='valid',
                    weights_init=Uniform(width=.2),
                    biases_init=Constant(0))
    # We push initialization config to set different initialization schemes
    # for convolutional layers.
    convnet.push_initialization_config()
    convnet.layers[0].weights_init = Uniform(width=.2)
    convnet.layers[1].weights_init = Uniform(width=.09)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
    convnet.initialize()

    return convnet
def test_activations():
    x = tensor.vector()
    x_val = numpy.random.rand(8).astype(theano.config.floatX)
    exp_x_val = numpy.exp(x_val)

    assert_allclose(x_val, Identity().apply(x).eval({x: x_val}))
    assert_allclose(numpy.tanh(x_val),
                    Tanh().apply(x).eval({x: x_val}),
                    rtol=1e-06)
    assert_allclose(numpy.log(1 + exp_x_val),
                    Softplus(x).apply(x).eval({x: x_val}),
                    rtol=1e-6)
    assert_allclose(exp_x_val / numpy.sum(exp_x_val),
                    Softmax(x).apply(x).eval({
                        x: x_val
                    }).flatten(),
                    rtol=1e-6)
    assert_allclose(1.0 / (1.0 + numpy.exp(-x_val)),
                    Logistic(x).apply(x).eval({x: x_val}),
                    rtol=1e-6)
    leaky_out_1 = x_val - 0.5
    leaky_out_1[leaky_out_1 < 0] *= 0.01
    assert_allclose(leaky_out_1,
                    LeakyRectifier().apply(x).eval({x: x_val - 0.5}),
                    rtol=1e-5)
    leaky_out_2 = x_val - 0.5
    leaky_out_2[leaky_out_2 < 0] *= 0.05
    assert_allclose(leaky_out_2,
                    LeakyRectifier(leak=0.05).apply(x).eval({x: x_val - 0.5}),
                    rtol=1e-5)
Exemple #11
0
def training(repo, learning_rate, batch_size, filenames):

    print 'LOAD DATA'
    (x_train,
     y_train), (x_valid,
                y_valid), (x_test,
                           y_test) = load_datasets_mnist(repo, filenames)

    print 'BUILD MODEL'
    train_f, valid_f, test_f, model, fisher, params = build_training()
    x_train = x_train[:1000]
    y_train = y_train[:1000]

    x = T.tensor4()
    y = T.imatrix()
    output = model.apply(x)
    output = output.reshape(
        (x.shape[0],
         model.get_dim('output')))  #TO DO : get_dim('name') for Architecture
    cost = Softmax().categorical_cross_entropy(y.flatten(), output).mean()
    cg = ComputationGraph(cost)

    inputs_conv = VariableFilter(roles=[INPUT], bricks=[Convolutional])(cg)
    outputs_conv = VariableFilter(roles=[OUTPUT], bricks=[Convolutional])(cg)
    inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg)
    outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg)
    dico = OrderedDict([('conv_output', outputs_conv[0])])
    [grad_s] = T.grad(cost, outputs_conv)
    dico['conv_output'] = grad_s

    f = theano.function([x, y],
                        grad_s,
                        allow_input_downcast=True,
                        on_unused_input='ignore')
    print np.mean(f(x_train[:10], y_train[:10]))
Exemple #12
0
def build_model(images, labels):
    
    # Construct a bottom convolutional sequence
    bottom_conv_sequence = convolutional_sequence((3,3), 16, (160, 160))
    bottom_conv_sequence._push_allocation_config()
    
    # Flatten layer
    flattener = Flattener()

    # Construct a top MLP
    conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output'))
    #top_mlp = MLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0))
    top_mlp = BatchNormalizedMLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0))
    
    # Construct feedforward sequence
    ss_seq = FeedforwardSequence([bottom_conv_sequence.apply, flattener.apply, top_mlp.apply])
    ss_seq.push_initialization_config()
    ss_seq.initialize()
    
    prediction = ss_seq.apply(images)
    cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction)

    # add regularization
    selector = Selector([top_mlp])
    Ws = selector.get_parameters('W')
    mlp_brick_name = 'batchnormalizedmlp'
    W0 = Ws['/%s/linear_0.W' % mlp_brick_name]
    W1 = Ws['/%s/linear_1.W' % mlp_brick_name]

    cost = cost_noreg + .01 * (W0 ** 2).mean() + .01 * (W1 ** 2).mean()


    return cost
Exemple #13
0
    def __init__(self,
                 image_shape=None,
                 output_size=None,
                 noise_batch_size=None,
                 noise_without_rectifier=False,
                 noise_after_rectifier=False,
                 **kwargs):
        self.num_channels = 3
        self.image_shape = image_shape or (32, 32)
        self.output_size = output_size or 10
        self.noise_batch_size = noise_batch_size
        conv_parameters = [(96, 3, 1, 'half', Convolutional),
                           (96, 3, 1, 'half', Convolutional),
                           (96, 3, 2, 'half', NoisyConvolutional),
                           (192, 3, 1, 'half', Convolutional),
                           (192, 3, 1, 'half', Convolutional),
                           (192, 3, 2, 'half', NoisyConvolutional),
                           (192, 3, 1, 'half', Convolutional),
                           (192, 1, 1, 'valid', Convolutional),
                           (10, 1, 1, 'valid', Convolutional)]
        fc_layer = 10

        self.convolutions = []
        layers = []
        for i, (num_filters, filter_size, conv_step, border_mode,
                cls) in enumerate(conv_parameters):
            if cls == NoisyConvolutional and noise_after_rectifier:
                cls = NoisyConvolutional2
            layer = cls(filter_size=(filter_size, filter_size),
                        num_filters=num_filters,
                        step=(conv_step, conv_step),
                        border_mode=border_mode,
                        tied_biases=True,
                        name='conv_{}'.format(i))
            if cls == NoisyConvolutional or cls == NoisyConvolutional2:
                layer.noise_batch_size = self.noise_batch_size
            self.convolutions.append(layer)
            layers.append(layer)
            if cls != NoisyConvolutional2 and not noise_without_rectifier:
                layers.append(Rectifier())

        self.conv_sequence = ConvolutionalSequence(layers,
                                                   self.num_channels,
                                                   image_size=self.image_shape)

        # The AllConvNet applies average pooling to combine top-level
        # features across the image.
        self.flattener = GlobalAverageFlattener()

        # Then it inserts one final 10-way FC layer before softmax
        # self.top_mlp = MLP([Rectifier(), Softmax()],
        #     [conv_parameters[-1][0], fc_layer, self.output_size])
        self.top_softmax = Softmax()

        application_methods = [
            self.conv_sequence.apply, self.flattener.apply,
            self.top_softmax.apply
        ]

        super(NoisyAllConvNet, self).__init__(application_methods, **kwargs)
Exemple #14
0
    def __init__(self, batch_size, output_length,
                 visual_dim, word_dim,
                 visual_feature_dim,
                 question_feature_dim,
                 joint_dim,
                 memory_dim,
                 output_dim,
                 fc1_dim,
                 fc2_dim,
                 voc_size):
        # the video encoder
        self.video_encoder = visualEncoder(
            visual_dim,
            visual_feature_dim)
        self.sentence_encoder = questionEncoder(
            word_dim,
            question_feature_dim)
        self.toJoint = embeddingLayer(
            2 * question_feature_dim,
            2 * visual_feature_dim,
            joint_dim)
        self.rewatcher = videoAttentionLayer(
            joint_dim,
            memory_dim,
            output_dim)

        self.seq_gen = seqDecoder(
            joint_dim,
            output_dim,
            fc1_dim,
            fc2_dim)
        self.softmax_layer = Softmax()
        self.bs = batch_size
        self.output_length = output_length
        self.voc_size = voc_size
Exemple #15
0
def construct_model(input_dim, output_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    # input_dim must be nr
    mlp = MLP(activations=activation_functions,
              dims=[input_dim] + hidden_dims + [2])

    weights = mlp.apply(r)

    final = tensor.dot(x, weights)

    cost = Softmax().categorical_cross_entropy(y, final).mean()

    pred = final.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters
    for brick in [mlp]:
        brick.weights_init = IsotropicGaussian(0.01)
        brick.biases_init = Constant(0.001)
        brick.initialize()

    # apply noise
    cg = ComputationGraph([cost, error_rate])
    noise_vars = VariableFilter(roles=[WEIGHT])(cg)
    apply_noise(cg, noise_vars, noise_std)
    [cost, error_rate] = cg.outputs

    return cost, error_rate
Exemple #16
0
    def __init__(self, **kwargs):
        conv_layers = [
            Convolutional(filter_size=(3, 3), num_filters=64,
                          border_mode=(1, 1), name='conv_1'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=64,
                          border_mode=(1, 1), name='conv_2'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_2'),

            Convolutional(filter_size=(3, 3), num_filters=128,
                          border_mode=(1, 1), name='conv_3'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=128,
                          border_mode=(1, 1), name='conv_4'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_4'),

            Convolutional(filter_size=(3, 3), num_filters=256,
                          border_mode=(1, 1), name='conv_5'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=256,
                          border_mode=(1, 1), name='conv_6'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=256,
                          border_mode=(1, 1), name='conv_7'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_7'),

            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_8'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_9'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_10'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_10'),

            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_11'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_12'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_13'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_13'),
        ]

        mlp = MLP([Rectifier(name='fc_14'), Rectifier('fc_15'), Softmax()],
                  [25088, 4096, 4096, 1000],
                  )
        conv_sequence = ConvolutionalSequence(
            conv_layers, 3, image_size=(224, 224))

        super(VGGNet, self).__init__(
            [conv_sequence.apply, Flattener().apply, mlp.apply], **kwargs)
Exemple #17
0
    def __init__(self, feature_dim, hidden_dim, output_dim):
        self.image_embed = Linear(input_dim=feature_dim,
                                  output_dim=hidden_dim,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  use_bias=False,
                                  name='image_embed')
        self.word_embed = Linear(input_dim=feature_dim,
                                 output_dim=hidden_dim,
                                 weights_init=IsotropicGaussian(0.01),
                                 biases_init=Constant(0),
                                 use_bias=False,
                                 name='word_embed')
        self.r_embed = Linear(input_dim=feature_dim,
                              output_dim=hidden_dim,
                              weights_init=IsotropicGaussian(0.01),
                              biases_init=Constant(0),
                              use_bias=False,
                              name='r_embed')
        self.m_to_s = Linear(input_dim=hidden_dim,
                             output_dim=1,
                             weights_init=IsotropicGaussian(0.01),
                             biases_init=Constant(0),
                             use_bias=False,
                             name='m_to_s')
        self.attention_dist = Softmax(name='attention_dist_softmax')
        self.r_to_r = Linear(input_dim=feature_dim,
                             output_dim=feature_dim,
                             weights_init=IsotropicGaussian(0.01),
                             biases_init=Constant(0),
                             use_bias=False,
                             name='r_to_r')
        # self.r_to_g = Linear(input_dim=feature_dim,
        #                      output_dim=output_dim,
        #                      weights_init=IsotropicGaussian(0.01),
        #                      biases_init=Constant(0),
        #                      use_bias=False,
        #                      name='r_to_g')
        self.image_embed.initialize()
        self.word_embed.initialize()
        self.r_embed.initialize()
        self.m_to_s.initialize()
        self.r_to_r.initialize()
        # self.r_to_g.initialize()

        # the sequence to sequence LSTM
        self.seq = LSTM(output_dim,
                        name='rewatcher_seq',
                        weights_init=IsotropicGaussian(0.01),
                        biases_init=Constant(0))
        self.seq_embed = Linear(feature_dim,
                                output_dim * 4,
                                name='rewatcher_seq_embed',
                                weights_init=IsotropicGaussian(0.01),
                                biases_init=Constant(0),
                                use_bias=False)

        self.seq.initialize()
        self.seq_embed.initialize()
Exemple #18
0
def construct_model(activation_function, r_dim, hidden_dim, out_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    nx = x.shape[0]
    nj = x.shape[1]  # also is r.shape[0]
    nr = r.shape[1]

    # r is nj x nr
    # x is nx x nj
    # y is nx

    # Get a representation of r of size r_dim
    r = DAE(r)

    # r is now nj x r_dim

    # r_rep is nx x nj x r_dim
    r_rep = r[None, :, :].repeat(axis=0, repeats=nx)
    # x3 is nx x nj x 1
    x3 = x[:, :, None]

    # concat is nx x nj x (r_dim + 1)
    concat = tensor.concatenate([r_rep, x3], axis=2)

    # Change concat from Batch x Time x Features to T X B x F
    rnn_input = concat.dimshuffle(1, 0, 2)

    linear = Linear(input_dim=r_dim + 1,
                    output_dim=4 * hidden_dim,
                    name="input_linear")
    lstm = LSTM(dim=hidden_dim,
                activation=activation_function,
                name="hidden_recurrent")
    top_linear = Linear(input_dim=hidden_dim,
                        output_dim=out_dim,
                        name="out_linear")

    pre_rnn = linear.apply(rnn_input)
    states = lstm.apply(pre_rnn)[0]
    activations = top_linear.apply(states)
    activations = tensor.mean(activations, axis=0)

    cost = Softmax().categorical_cross_entropy(y, activations)

    pred = activations.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters

    for brick in (linear, lstm, top_linear):
        brick.weights_init = IsotropicGaussian(0.1)
        brick.biases_init = Constant(0.)
        brick.initialize()

    return cost, error_rate
Exemple #19
0
        def onestepEncAttn(hEncAttn):

            preEncattn = attentionmlpEnc.apply(hEncAttn)
            attEncsoft = Softmax()
            attEncpyx = attEncsoft.apply(preEncattn.flatten())
            attEncpred = attEncpyx.flatten()
            attenc = T.mul(hEncAttn.dimshuffle(1,0), attEncpred).dimshuffle(1,0)

            return attenc
Exemple #20
0
        def onestepContextAttn(hContextAttn):

            preContextatt = attentionmlpContext.apply(hContextAttn)
            attContextsoft = Softmax()
            attContextpyx = attContextsoft.apply(preContextatt.flatten())
            attContextpred = attContextpyx.flatten()
            attcontext = T.mul(hContextAttn.dimshuffle(1,0), attContextpred).dimshuffle(1,0)

            return attcontext
Exemple #21
0
 def __init__(self, config, **kwargs):
     super(Model, self).__init__(config,
                                 output_dim=config.tgtcls.shape[0],
                                 **kwargs)
     self.classes = theano.shared(numpy.array(config.tgtcls,
                                              dtype=theano.config.floatX),
                                  name='classes')
     self.softmax = Softmax()
     self.children.append(self.softmax)
def get_costs(presoft, args):

    if has_indices(args.dataset):
        # Targets: (Time X Batch)
        y = tensor.lmatrix('targets')
        y_mask = tensor.ones_like(y, dtype=floatX)
        y_mask = tensor.set_subtensor(
            y_mask[:args.context, :],
            tensor.zeros_like(y_mask[:args.context, :], dtype=floatX))

        time, batch, feat = presoft.shape
        cross_entropy = Softmax().categorical_cross_entropy(
            (y.flatten() * y_mask.reshape((batch * time, ))), (presoft.reshape(
                (batch * time, feat)) * y_mask.reshape((batch * time, 1))))

        # renormalization
        renormalized_cross_entropy = cross_entropy * (
            tensor.sum(tensor.ones_like(y_mask)) / tensor.sum(y_mask))

        # BPC: Bits Per Character
        unregularized_cost = renormalized_cross_entropy / tensor.log(2)
        unregularized_cost.name = "cross_entropy"

    else:
        # Targets: (Time X Batch X Features)
        y = tensor.tensor3('targets', dtype=floatX)
        y_mask = tensor.ones_like(y[:, :, 0], dtype=floatX)
        y_mask = tensor.set_subtensor(
            y_mask[:args.context, :],
            tensor.zeros_like(y_mask[:args.context, :], dtype=floatX))

        if args.used_inputs is not None:
            y_mask = tensor.set_subtensor(
                y_mask[:args.used_inputs, :],
                tensor.zeros_like(y_mask[:args.used_inputs, :], dtype=floatX))
        # SquaredError does not work on 3D tensor
        target = (y * y_mask.dimshuffle(0, 1, 'x'))
        values = (presoft[:-1, :, :] * y_mask.dimshuffle(0, 1, 'x'))

        target = target.reshape(
            (target.shape[0] * target.shape[1], target.shape[2]))

        values = values.reshape(
            (values.shape[0] * values.shape[1], values.shape[2]))

        unregularized_cost = SquaredError().apply(target, values)
        # renormalization
        unregularized_cost = unregularized_cost * (
            tensor.sum(tensor.ones_like(y_mask)) / tensor.sum(y_mask))
        unregularized_cost.name = "mean_squared_error"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = unregularized_cost + tensor.log(1)
    cost.name = "regularized_cost"
    return cost, unregularized_cost
def test_fully_layer():
	batch_size=2
	x = T.tensor4();
	y = T.ivector()
	V = 200
	layer_conv = Convolutional(filter_size=(5,5),num_filters=V,
				name="toto",
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	# try with no bias
	activation = Rectifier()
	pool = MaxPooling(pooling_size=(2,2))

	convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15,
					image_size=(10,10),
					name="conv_section")
	convnet.push_allocation_config()
	convnet.initialize()
	output=convnet.apply(x)
	batch_size=output.shape[0]
	output_dim=np.prod(convnet.get_dim('output'))
	result_conv = output.reshape((batch_size, output_dim))
	mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10],
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	mlp.initialize()
	output=mlp.apply(result_conv)
	cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output))
	cg = ComputationGraph(cost)
	W = VariableFilter(roles=[WEIGHT])(cg.variables)
	B = VariableFilter(roles=[BIAS])(cg.variables)
	W = W[0]; b = B[0]

	inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg)
	outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg)
	var_input=inputs_fully[0]
	var_output=outputs_fully[0]
	
	[d_W,d_S,d_b] = T.grad(cost, [W, var_output, b])

	d_b = d_b.dimshuffle(('x',0))
	d_p = T.concatenate([d_W, d_b], axis=0)
	x_value = 1e3*np.random.ranf((2,15, 10, 10))
	f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore')
	A, B, C= f(x_value, [5, 0])
	A = np.concatenate([A, np.ones((2,1))], axis=1)
	print 'A', A.shape
	print 'B', B.shape
	print 'C', C.shape

	print lin.norm(C - np.dot(np.transpose(A), B), 'fro')

	return
	
	"""
Exemple #24
0
def build_intermediate_var_unlabelled(model):
    # build cost function and computational
    x = T.tensor4()
    output = model.apply(x)
    output = output.reshape((x.shape[0], model.get_dim('output')))
    labels = T.argmax(output, axis=1).reshape((x.shape[0], 1))
    cost = Softmax().categorical_cross_entropy(labels.flatten(), output).mean()
    return theano.function([x],
                           build_dictionnary(cost),
                           allow_input_downcast=True,
                           on_unused_input='ignore')
 def __init__(self, config, **kwargs):
     super(Model, self).__init__(config,
                                 rec_input_len=4,
                                 output_dim=config.tgtcls.shape[0],
                                 **kwargs)
     self.classes = theano.shared(numpy.array(config.tgtcls,
                                              dtype=theano.config.floatX),
                                  name='classes')
     self.softmax = Softmax()
     self.sequences.extend(['latitude_lag', 'longitude_lag'])
     self.children.append(self.softmax)
Exemple #26
0
 def build_model(self, hidden_dim):
     board_input = T.vector('input')
     mlp = MLP(activations=[LeakyRectifier(0.1),
                            LeakyRectifier(0.1)],
               dims=[9, hidden_dim, 9],
               weights_init=IsotropicGaussian(0.00001),
               biases_init=Constant(0.01))
     output = mlp.apply(board_input)
     masked_output = Softmax().apply(output * T.eq(board_input, 0) * 1000)
     mlp.initialize()
     cost, chosen = self.get_cost(masked_output)
     return board_input, mlp, cost, chosen, output
def put_labels(model,
               data_labelled,
               data_unlabelled,
               f_predict=None,
               f_loss=None):
    x = T.tensor4('x')
    y = T.imatrix()
    y_prev = model.apply(x)

    if f_predict is None:
        # define prediction function
        y_softmax = Softmax().apply(y_prev)
        prediction = T.argmax(y_softmax, axis=1)
        f_predict = theano.function([x], prediction, allow_input_downcast=True)
    if f_loss is None:
        cost = Softmax().categorical_cross_entropy(y.flatten(), y_prev).mean()
        f_loss = theano.function([x, y], cost, allow_input_downcast=True)

    # now proceed to the mean loss
    batch_size = 64
    x_train_L, y_train_L = data_labelled
    y_train_U = []
    x_train_U = data_unlabelled
    n_train_U = len(x_train_U) / batch_size
    # pay attention to the shape of y_train !!!!!!!!!!
    for index in range(n_train_U):
        y_train_batch = f_predict(x_train_U[batch_size * index:(index + 1) *
                                            batch_size])
        y_train_U.append(y_train_batch)  # what is the type of predict
    if n_train_U * batch_size < len(x_train_U):
        x_batch = x_train_U[n_train_U * batch_size:]
        y_train_U.append(f_predict(x_batch))

    y_train_U = np.concatenate(y_train_U, axis=0)[:, None]
    assert len(y_train_U) == len(
        x_train_U), "problem : length does not match for unlabelled data"

    y_train = np.concatenate([y_train_L, y_train_U], axis=0)
    x_train = np.concatenate([x_train_L, x_train_U], axis=0)
    return (x_train, y_train), f_loss
Exemple #28
0
def test_softmax_vector():
    x = tensor.matrix('x')
    y = tensor.lvector('y')

    softmax_out = Softmax().apply(x)
    cost = CategoricalCrossEntropy().apply(y, softmax_out)

    cost_stable = Softmax().categorical_cross_entropy(y, x)

    softmax_cost_func = function([x, y], cost)
    softmax_cost_stable_func = function([x, y], cost_stable)

    batch_size = 100
    x_size = 10

    rng = numpy.random.RandomState(1)
    x_val = rng.randn(batch_size, x_size).astype(theano.config.floatX)
    y_val = rng.randint(low=0, high=x_size, size=(batch_size))
    softmax_cost = softmax_cost_func(x_val, y_val)
    softmax_cost_stable = softmax_cost_stable_func(x_val, y_val)

    assert_allclose(softmax_cost, softmax_cost_stable)
Exemple #29
0
def build_intermediate_var_labelled(model):
    # build cost function and computational
    x = T.tensor4()
    y = T.imatrix()
    output = model.apply(x)
    output = output.reshape(
        (x.shape[0],
         model.get_dim('output')))  #TO DO : get_dim('name') for Architecture
    cost = Softmax().categorical_cross_entropy(y.flatten(), output).mean()
    return theano.function([x, y],
                           build_dictionnary(cost),
                           allow_input_downcast=True,
                           on_unused_input='ignore')
Exemple #30
0
    def __init__(self, config, prefix_encoder, candidate_encoder, **kwargs):
        super(MemoryNetworkBase, self).__init__(**kwargs)

        self.prefix_encoder = prefix_encoder
        self.candidate_encoder = candidate_encoder
        self.config = config

        self.softmax = Softmax()
        self.children = [self.softmax, prefix_encoder, candidate_encoder]

        self.inputs = self.prefix_encoder.apply.inputs \
                      + ['candidate_%s'%x for x in self.candidate_encoder.apply.inputs] \
                      + ['candidate_destination_latitude', 'candidate_destination_longitude']