def __init__(self, stack_dim=500, **kwargs):
        """Sole constructor.
        
        Args:
            stack_dim (int): Size of vectors on the stack.
        """
        super(PushDownSequenceContentAttention, self).__init__(**kwargs)
        self.stack_dim = stack_dim
        self.max_stack_depth = 25

        self.stack_op_names = self.state_names + ['weighted_averages']

        self.stack_pop_transformer = MLP(activations=[Logistic()], dims=None)
        self.stack_pop_transformers = Parallel(
            input_names=self.stack_op_names,
            prototype=self.stack_pop_transformer,
            name="stack_pop")

        self.stack_push_transformer = MLP(activations=[Logistic()], dims=None)
        self.stack_push_transformers = Parallel(
            input_names=self.stack_op_names,
            prototype=self.stack_push_transformer,
            name="stack_push")

        self.stack_input_transformer = Linear()
        self.stack_input_transformers = Parallel(
            input_names=self.stack_op_names,
            prototype=self.stack_input_transformer,
            name="stack_input")
        self.children.append(self.stack_pop_transformers)
        self.children.append(self.stack_push_transformers)
        self.children.append(self.stack_input_transformers)
Esempio n. 2
0
def create_vae(x=None, batch=batch_size):
    x = T.matrix('features') if x is None else x
    x = x / 255.

    encoder = MLP(
        activations=[Rectifier(), Logistic()],
        dims=[img_dim**2, hidden_dim, 2*latent_dim],
        weights_init=IsotropicGaussian(std=0.01, mean=0),
        biases_init=Constant(0.01),
        name='encoder'
    )
    encoder.initialize()
    z_param = encoder.apply(x)
    z_mean, z_log_std = z_param[:,latent_dim:], z_param[:,:latent_dim]
    z = Sampling(theano_seed=seed).apply([z_mean, z_log_std], batch=batch_size)

    decoder = MLP(
        activations=[Rectifier(), Logistic()],
        dims=[latent_dim, hidden_dim, img_dim**2],
        weights_init=IsotropicGaussian(std=0.01, mean=0),
        biases_init=Constant(0.01),
        name='decoder'
    )
    decoder.initialize()
    x_reconstruct = decoder.apply(z)

    cost = VAEloss().apply(x, x_reconstruct, z_mean, z_log_std)
    cost.name = 'vae_cost'
    return cost
Esempio n. 3
0
    def __init__(self, dim, activation=None, **kwargs):
        super(LSTM, self).__init__(**kwargs)
        self.dim = dim

        if not activation:
            activation = Tanh()

        self.in_activation = masonry.NormalizedActivation(
            shape=(self.dim, ),
            broadcastable=(False, ),
            activation=Logistic(),
            batch_normalize=True,
            name="in_activation")
        self.forget_activation = masonry.NormalizedActivation(
            shape=(self.dim, ),
            broadcastable=(False, ),
            activation=Logistic(),
            batch_normalize=True,
            name="forget_activation")
        self.out_activation = masonry.NormalizedActivation(
            shape=(self.dim, ),
            broadcastable=(False, ),
            activation=Logistic(),
            batch_normalize=True,
            name="out_activation")
        self.recurrent_activation = activation

        self.children = [
            self.in_activation, self.forget_activation, self.out_activation,
            self.recurrent_activation
        ]
Esempio n. 4
0
def decoder_network(latent_sample, latent_dim=J):
  # bernoulli case
  hidden2 = get_typical_layer(latent_sample, latent_dim, 500, Logistic())
  hidden2_to_output = Linear(name="last", input_dim=500, output_dim=784)
  hidden2_to_output.weights_init = IsotropicGaussian(0.01)
  hidden2_to_output.biases_init = Constant(0)
  hidden2_to_output.initialize()
  return Logistic().apply(hidden2_to_output.apply(hidden2))
Esempio n. 5
0
 def create_model(self):
     x = self.x
     input_dim = self.input_dim
     mlp = MLP([Logistic(), Logistic(), Tanh()], [input_dim, 100, 100, 1],
               weights_init=IsotropicGaussian(0.001),
               biases_init=Constant(0))
     mlp.initialize()
     self.mlp = mlp
     probs = mlp.apply(x)
     return probs
Esempio n. 6
0
    def __init__(self, emb_dim, dim, dropout=0.0,
            def_word_gating="none",
            dropout_type="per_unit", compose_type="sum",
            word_dropout_weighting="no_weighting",
            shortcut_unk_and_excluded=False,
            num_input_words=-1, exclude_top_k=-1, vocab=None,
            **kwargs):

        self._dropout = dropout
        self._num_input_words = num_input_words
        self._exclude_top_K = exclude_top_k
        self._dropout_type = dropout_type
        self._compose_type = compose_type
        self._vocab = vocab
        self._shortcut_unk_and_excluded = shortcut_unk_and_excluded
        self._word_dropout_weighting = word_dropout_weighting
        self._def_word_gating = def_word_gating

        if def_word_gating not in {"none", "self_attention"}:
            raise NotImplementedError()

        if word_dropout_weighting not in {"no_weighting"}:
            raise NotImplementedError("Not implemented " + word_dropout_weighting)

        if dropout_type not in {"per_unit", "per_example", "per_word"}:
            raise NotImplementedError()

        children = []

        if self._def_word_gating=="self_attention":
            self._gate_mlp = Linear(dim, dim)
            self._gate_act = Logistic()
            children.extend([self._gate_mlp, self._gate_act])

        if compose_type == 'fully_connected_linear':
            self._def_state_compose = MLP(activations=[None],
                dims=[emb_dim + dim, emb_dim])
            children.append(self._def_state_compose)
        if compose_type == "gated_sum" or compose_type == "gated_transform_and_sum":
            if dropout_type == "per_word" or dropout_type == "per_example":
                raise RuntimeError("I dont think this combination makes much sense")

            self._compose_gate_mlp = Linear(dim + emb_dim, emb_dim,
                                            name='gate_linear')
            self._compose_gate_act = Logistic()
            children.extend([self._compose_gate_mlp, self._compose_gate_act])
        if compose_type == 'sum':
            if not emb_dim == dim:
                raise ValueError("Embedding has different dim! Cannot use compose_type='sum'")
        if compose_type == 'transform_and_sum' or compose_type == "gated_transform_and_sum":
            self._def_state_transform = Linear(dim, emb_dim, name='state_transform')
            children.append(self._def_state_transform)

        super(MeanPoolCombiner, self).__init__(children=children, **kwargs)
Esempio n. 7
0
    def build_network(self,
                      num_labels,
                      features,
                      max_len=None,
                      hidden_units=None,
                      l2=None,
                      use_cnn=None,
                      cnn_filter_size=None,
                      cnn_pool_size=None,
                      cnn_num_filters=None,
                      cnn_filter_sizes=None,
                      embedding_size=None,
                      DEBUG=False):
        """ Build the neural network used for training.

        :param num_labels:      Number of labels to classify
        :param features:        the input features we use
        :param max_len:     Configured window-size
        :param hidden_units:    Number of units in the MLP's hiddden layer
        :returns:               The cost function, the misclassification rate
                                function, the computation graph of the cost
                                function and the prediction function
        """
        logger.info(
            'building the network, with one CNN for left and one for right')
        hidden_units = hidden_units or self._config['hidden_units']
        logger.info('#hidden units: %d', hidden_units)
        # building the feature vector from input.
        mlp_in_e1, mlp_in_e2, mlp_in_dim = self.build_feature_vector_noMention(
            features)
        logger.info('feature vector size: %d', mlp_in_dim)

        mlp = MLP(activations=[Rectifier()],
                  dims=[mlp_in_dim, hidden_units],
                  seed=self.curSeed)
        initialize([mlp])
        before_out_e1 = mlp.apply(mlp_in_e1)
        before_out_e2 = mlp.apply(mlp_in_e2)
        hidden_to_output = Linear(name='hidden_to_output',
                                  input_dim=hidden_units,
                                  output_dim=num_labels)
        initialize([hidden_to_output])
        linear_output_e1 = hidden_to_output.apply(before_out_e1)
        linear_output_e2 = hidden_to_output.apply(before_out_e2)
        linear_output_e1.name = 'linear_output_e1'
        linear_output_e2.name = 'linear_output_e2'

        y_hat_e1 = Logistic(name='logistic1').apply(linear_output_e1)
        y_hat_e2 = Logistic(name='logistic2').apply(linear_output_e2)
        y_hat_e1.name = 'y_hat_e1'
        y_hat_e2.name = 'y_hat_e2'
        y_hat_e1 = debug_print(y_hat_e1, 'y_1', DEBUG)
        return y_hat_e1, y_hat_e2, before_out_e1, before_out_e2
Esempio n. 8
0
    def __init__(self, x_dim, hidden_layers, hidden_act, z_dim, batch_norm=False, l2reg=1e-3, **kwargs):
        super(VAE, self).__init__([], [], **kwargs)

        self.l2reg = l2reg

        inits = {
            'weights_init': IsotropicGaussian(std=0.1),
            #'weights_init': RWSInitialization(factor=1.),
            'biases_init': Constant(0.0),
        }

        if batch_norm:
            mlp_class = BatchNormalizedMLP
        else:
            mlp_class = MLP

        hidden_act = [hidden_act] * len(hidden_layers)

        q_mlp = mlp_class(hidden_act, [x_dim] + hidden_layers, **inits)
        p_mlp = mlp_class(hidden_act + [Logistic()], [z_dim] + hidden_layers + [x_dim], **inits)

        self.q = GaussianLayer(z_dim, q_mlp, **inits)
        self.p = BernoulliLayer(p_mlp, **inits)

        self.prior_log_sigma = numpy.zeros(z_dim)    #
        self.prior_mu = numpy.zeros(z_dim)           #

        self.children = [self.p, self.q]
Esempio n. 9
0
def main():
  x = tensor.matrix("features")
  input_to_hidden1 = get_typical_layer(x, 784, 500)
  #hidden1_to_hidden2 = get_typical_layer(input_to_hidden1, 500, 300)
  hidden1_to_latent = get_typical_layer(input_to_hidden1, 500, 20)

  latent_to_hidden2 = get_typical_layer(hidden1_to_latent, 20, 500)
  #hidden3_to_hidden4 = get_typical_layer(latent_to_hidden3, 300, 500)
  hidden2_to_output = get_typical_layer(latent_to_hidden2, 500, 784, Logistic())
  hidden2_to_output.name = "last_before_output"

  from blocks.bricks.cost import SquaredError, AbsoluteError, BinaryCrossEntropy
  from blocks.graph import ComputationGraph
  from blocks.algorithms import Adam, GradientDescent, Scale
  from blocks.roles import WEIGHT

  cost = BinaryCrossEntropy(name="error").apply(x, hidden2_to_output)
  cg = ComputationGraph(cost)
  weights = VariableFilter(roles=[WEIGHT]) (cg.variables)
#  cost += 0.0001 * tensor.sum(map(lambda x: (x**2).sum(), weights))
#  cost.name = "regularized error"
  gd = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam())

  from blocks.main_loop import MainLoop
  from blocks.extensions import FinishAfter, Printing, ProgressBar
  from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
  monitor = TrainingDataMonitoring([cost], after_epoch=True)
  main_loop = MainLoop(data_stream=get_data_stream(), algorithm=gd, extensions=[monitor, FinishAfter(after_n_epochs=5),  ProgressBar(), Printing()])

  main_loop.run()
  showcase(cg, "last_before_output")
Esempio n. 10
0
    def create_base_model(self, x, y, input_dim, interim_dim=30):

        # Create the output of the MLP
        mlp = MLP([Tanh(), Tanh(), Tanh()], [input_dim, 60, 60, interim_dim],
                  weights_init=IsotropicGaussian(0.001),
                  biases_init=Constant(0))
        mlp.initialize()
        inter = mlp.apply(x)

        fine_tuner = MLP([Logistic()], [interim_dim, 1],
                         weights_init=IsotropicGaussian(0.001),
                         biases_init=Constant(0))
        fine_tuner.initialize()
        probs = fine_tuner.apply(inter)
        #sq_err = BinaryCrossEntropy()
        err = T.sqr(y.flatten() - probs.flatten())
        # cost = T.mean(err * y.flatten() * (1 - self.p) + err *
        #              (1 - y.flatten()) * self.p)
        cost = T.mean(err)
        #cost = sq_err.apply(probs.flatten(), y.flatten())
        # cost = T.mean(y.flatten() * T.log(probs.flatten()) +
        #              (1 - y.flatten()) * T.log(1 - probs.flatten()))
        cost.name = 'cost'
        pred_out = probs > 0.5
        mis_cost = T.sum(T.neq(y.flatten(), pred_out.flatten()))
        mis_cost.name = 'MisclassificationRate'
        return mlp, fine_tuner, cost, mis_cost
Esempio n. 11
0
def build_mlp(features_cat, features_int, labels):

    mlp_int = MLP(activations=[Rectifier(), Rectifier()],
                  dims=[19, 50, 50],
                  weights_init=IsotropicGaussian(),
                  biases_init=Constant(0),
                  name='mlp_interval')
    mlp_int.initialize()
    mlp_cat = MLP(activations=[Logistic()],
                  dims=[320, 50],
                  weights_init=IsotropicGaussian(),
                  biases_init=Constant(0),
                  name='mlp_categorical')
    mlp_cat.initialize()

    mlp = MLP(activations=[Rectifier(), None],
              dims=[50, 50, 1],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(0))
    mlp.initialize()

    gated = mlp_cat.apply(features_cat) * mlp_int.apply(features_int)
    prediction = mlp.apply(gated)
    cost = MAPECost().apply(prediction, labels)

    cg = ComputationGraph(cost)
    print cg.variables

    cg_dropout1   = apply_dropout(cg, [VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3]], .2)
    cost_dropout1 = cg_dropout1.outputs[0]

    return cost_dropout1, cg_dropout1.parameters, cost
Esempio n. 12
0
    def create_model(self, x, y, input_dim, tol=10e-5):

        # Create the output of the MLP
        mlp = MLP(
            [Rectifier(), Rectifier(), Logistic()], [input_dim, 100, 100, 1],
            weights_init=IsotropicGaussian(0.01),
            biases_init=Constant(0))
        mlp.initialize()
        probs = mlp.apply(x)
        y = y.dimshuffle(0, 'x')
        # Create the if-else cost function
        true_p = (T.sum(y * probs) + tol) * 1.0 / (T.sum(y) + tol)
        true_n = (T.sum((1 - y) * (1 - probs)) + tol) * \
            1.0 / (T.sum(1 - y) + tol)
        #p = (T.sum(y) + tol) / (y.shape[0] + tol)
        theta = (1 - self.p) / self.p
        numerator = (1 + self.beta**2) * true_p
        denominator = self.beta**2 + theta + true_p - theta * true_n

        Fscore = numerator / denominator

        cost = -1 * Fscore
        cost.name = "cost"

        return mlp, cost, probs
Esempio n. 13
0
    def __init__(self, mlp, frame_size=259, k=20, const=1e-5, **kwargs):
        super(SPF0Emitter, self).__init__(**kwargs)
        self.mlp = mlp
        input_dim = self.mlp.output_dim
        self.const = const
        self.frame_size = frame_size

        mlp_gmm = GMMMLP(mlp=mlp, dim=(frame_size - 2) * k, k=k, const=const)

        self.gmm_emitter = GMMEmitter(gmmmlp=mlp_gmm,
                                      output_size=frame_size - 2,
                                      k=k,
                                      name="gmm_emitter")

        self.mu = MLP(activations=[Identity()],
                      dims=[input_dim, 1],
                      name=self.name + "_mu")
        self.sigma = MLP(activations=[SoftPlus()],
                         dims=[input_dim, 1],
                         name=self.name + "_sigma")
        self.binary = MLP(activations=[Logistic()],
                          dims=[input_dim, 1],
                          name=self.name + "_binary")

        self.children = [
            self.mlp, self.mu, self.sigma, self.binary, self.gmm_emitter
        ]
Esempio n. 14
0
    def __init__(self,
                 dim,
                 num_copies,
                 use_W_xu,
                 activation=None,
                 gate_activation=None,
                 **kwargs):
        self.dim = dim
        self.num_copies = num_copies
        self.use_W_xu = use_W_xu

        # shape: C x F/2
        permutations = []
        indices = numpy.arange(self.dim / 2)
        for i in range(self.num_copies):
            numpy.random.shuffle(indices)
            permutations.append(
                numpy.concatenate(
                    [indices, [ind + self.dim / 2 for ind in indices]]))
        # C x F (numpy)
        self.permutations = numpy.vstack(permutations)

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Logistic()
        self.activation = activation
        self.gate_activation = gate_activation

        children = ([self.activation, self.gate_activation] +
                    kwargs.get('children', []))
        super(AssociativeLSTM, self).__init__(children=children, **kwargs)
Esempio n. 15
0
def softmax_layer(h, y, hidden_size, num_targets, cost_fn='cross'):
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=hidden_size,
                              output_dim=num_targets)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    y_pred = T.argmax(linear_output, axis=1)
    label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred],
                                     'label_of_predicted', False)
    pat1 = T.mean(label_of_predicted)
    updates = None
    if 'ranking' in cost_fn:
        cost, updates = ranking_loss(linear_output, y)
        print 'using ranking loss function!'
    else:
        y_hat = Logistic().apply(linear_output)
        y_hat.name = 'y_hat'
        cost = cross_entropy_loss(y_hat, y)
    cost.name = 'cost'
    pat1.name = 'precision@1'
    misclassify_rate = MultiMisclassificationRate().apply(
        y, T.ge(linear_output, 0.5))
    misclassify_rate.name = 'error_rate'
    return cost, pat1, updates, misclassify_rate
Esempio n. 16
0
def test_activations():
    x = tensor.vector()
    x_val = numpy.random.rand(8).astype(theano.config.floatX)
    exp_x_val = numpy.exp(x_val)

    assert_allclose(x_val, Identity().apply(x).eval({x: x_val}))
    assert_allclose(numpy.tanh(x_val),
                    Tanh().apply(x).eval({x: x_val}),
                    rtol=1e-06)
    assert_allclose(numpy.log(1 + exp_x_val),
                    Softplus(x).apply(x).eval({x: x_val}),
                    rtol=1e-6)
    assert_allclose(exp_x_val / numpy.sum(exp_x_val),
                    Softmax(x).apply(x).eval({
                        x: x_val
                    }).flatten(),
                    rtol=1e-6)
    assert_allclose(1.0 / (1.0 + numpy.exp(-x_val)),
                    Logistic(x).apply(x).eval({x: x_val}),
                    rtol=1e-6)
    leaky_out_1 = x_val - 0.5
    leaky_out_1[leaky_out_1 < 0] *= 0.01
    assert_allclose(leaky_out_1,
                    LeakyRectifier().apply(x).eval({x: x_val - 0.5}),
                    rtol=1e-5)
    leaky_out_2 = x_val - 0.5
    leaky_out_2[leaky_out_2 < 0] *= 0.05
    assert_allclose(leaky_out_2,
                    LeakyRectifier(leak=0.05).apply(x).eval({x: x_val - 0.5}),
                    rtol=1e-5)
Esempio n. 17
0
def create_model_bricks():
    convnet = ConvolutionalSequence(
        layers=[
            Convolutional(
                filter_size=(4, 4),
                num_filters=32,
                name='conv1'),
            SpatialBatchNormalization(name='batch_norm1'),
            Rectifier(),
            Convolutional(
                filter_size=(3, 3),
                step=(2, 2),
                num_filters=32,
                name='conv2'),
            SpatialBatchNormalization(name='batch_norm2'),
            Rectifier(),
            Convolutional(
                filter_size=(4, 4),
                num_filters=64,
                name='conv3'),
            SpatialBatchNormalization(name='batch_norm3'),
            Rectifier(),
            Convolutional(
                filter_size=(3, 3),
                step=(2, 2),
                num_filters=64,
                name='conv4'),
            SpatialBatchNormalization(name='batch_norm4'),
            Rectifier(),
            Convolutional(
                filter_size=(3, 3),
                num_filters=128,
                name='conv5'),
            SpatialBatchNormalization(name='batch_norm5'),
            Rectifier(),
            Convolutional(
                filter_size=(3, 3),
                step=(2, 2),
                num_filters=128,
                name='conv6'),
            SpatialBatchNormalization(name='batch_norm6'),
            Rectifier(),
        ],
        num_channels=3,
        image_size=(64, 64),
        use_bias=False,
        weights_init=IsotropicGaussian(0.033),
        biases_init=Constant(0),
        name='convnet')
    convnet.initialize()

    mlp = BatchNormalizedMLP(
        activations=[Rectifier(), Logistic()],
        dims=[numpy.prod(convnet.get_dim('output')), 1000, 40],
        weights_init=IsotropicGaussian(0.033),
        biases_init=Constant(0),
        name='mlp')
    mlp.initialize()

    return convnet, mlp
Esempio n. 18
0
    def __init__(self, input_dim, output_dim, hidden_size, init_ranges,
                 **kwargs):
        linear1 = LinearMaxout(input_dim=input_dim,
                               output_dim=hidden_size,
                               num_pieces=2,
                               name='linear1')
        linear2 = LinearMaxout(input_dim=hidden_size,
                               output_dim=hidden_size,
                               num_pieces=2,
                               name='linear2')
        linear3 = Linear(input_dim=hidden_size, output_dim=output_dim)
        logistic = Logistic()
        bricks = [
            linear1,
            BatchNormalization(input_dim=hidden_size, name='bn2'), linear2,
            BatchNormalization(input_dim=hidden_size, name='bnl'), linear3,
            logistic
        ]
        for init_range, b in zip(init_ranges, (linear1, linear2, linear3)):
            b.biases_init = initialization.Constant(0)
            b.weights_init = initialization.Uniform(width=init_range)

        kwargs.setdefault('use_bias', False)
        super(ConcatenateClassifier, self).__init__([b.apply for b in bricks],
                                                    **kwargs)
Esempio n. 19
0
    def __init__(self, **kwargs):

        children = []

        self.layers_numerical = []
        self.layers_numerical.append(
            Linear(name='input_to_numerical_linear',
                   input_dim=5000,
                   output_dim=17,
                   weights_init=IsotropicGaussian(),
                   biases_init=Constant(1)))

        self.layers_categorical = []
        self.layers_categorical.append(
            Linear(name='input_to_categorical_linear',
                   input_dim=5000,
                   output_dim=24016,
                   weights_init=IsotropicGaussian(),
                   biases_init=Constant(1)))
        self.layers_categorical.append(
            Logistic(name='input_to_categorical_sigmoid'))

        children += self.layers_numerical
        children += self.layers_categorical
        kwargs.setdefault('children', []).extend(children)

        super(build_top_mlp, self).__init__(**kwargs)
Esempio n. 20
0
    def __init__(self,
                 input_dim,
                 output_activation=None,
                 transform_activation=None,
                 **kwargs):
        super(Highway, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.output_dim = input_dim

        if output_activation == None:
            output_activation = Rectifier()

        if transform_activation == None:
            transform_activation = Logistic()

        self._linear_h = Linear(name="linear_h",
                                input_dim=input_dim,
                                output_dim=input_dim)
        self._linear_t = Linear(name="linear_t",
                                input_dim=input_dim,
                                output_dim=input_dim)
        self._output_activation = output_activation
        self._transform_activation = transform_activation
        self.children = [
            self._linear_h, self._linear_t, self._output_activation,
            self._transform_activation
        ]
Esempio n. 21
0
def generation(z_list, n_latent, hu_decoder, n_out, y):
    logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent,
                hu_decoder)
    if hu_decoder == 0:
        return generation_simple(z_list, n_latent, n_out, y)
    mlp1 = MLP(activations=[Rectifier()],
               dims=[n_latent, hu_decoder],
               name='latent_to_hidDecoder')
    initialize([mlp1])
    hid_to_out = Linear(name='hidDecoder_to_output',
                        input_dim=hu_decoder,
                        output_dim=n_out)
    initialize([hid_to_out])
    mysigmoid = Logistic(name='y_hat_vae')
    agg_logpy_xz = 0.
    agg_y_hat = 0.
    for i, z in enumerate(z_list):
        y_hat = mysigmoid.apply(hid_to_out.apply(
            mlp1.apply(z)))  #reconstructed x
        agg_logpy_xz += cross_entropy_loss(y_hat, y)
        agg_y_hat += y_hat

    agg_logpy_xz /= len(z_list)
    agg_y_hat /= len(z_list)
    return agg_y_hat, agg_logpy_xz
Esempio n. 22
0
    def __init__(self, image_dimension, **kwargs):

        layers = []

        #############################################
        # a first block with 2 convolutions of 32 (3, 3) filters
        layers.append(Convolutional((3, 3), 32, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 32, border_mode='half'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        #############################################
        # a 2nd block with 3 convolutions of 64 (3, 3) filters
        layers.append(Convolutional((3, 3), 64, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 64, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 64, border_mode='half'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        #############################################
        # a 3rd block with 4 convolutions of 128 (3, 3) filters
        layers.append(Convolutional((3, 3), 128, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 128, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 128, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 128, border_mode='half'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        self.conv_sequence = ConvolutionalSequence(layers,
                                                   3,
                                                   image_size=image_dimension)

        flattener = Flattener()

        self.top_mlp = MLP(activations=[Rectifier(), Logistic()],
                           dims=[500, 1])

        application_methods = [
            self.conv_sequence.apply, flattener.apply, self.top_mlp.apply
        ]

        super(VGGNet, self).__init__(application_methods,
                                     biases_init=Constant(0),
                                     weights_init=Uniform(width=.1),
                                     **kwargs)
Esempio n. 23
0
 def setUp(self):
     self.mlp = MLP([
         Sequence([Identity(name='id1').apply,
                   Tanh(name='tanh1').apply],
                  name='sequence1'),
         Sequence([
             Logistic(name='logistic1').apply,
             Identity(name='id2').apply,
             Tanh(name='tanh2').apply
         ],
                  name='sequence2'),
         Logistic(name='logistic2'),
         Sequence([
             Sequence([Logistic(name='logistic3').apply],
                      name='sequence4').apply
         ],
                  name='sequence3')
     ], [10, 5, 9, 5, 9])
Esempio n. 24
0
    def create_model(self):
        input_dim = self.input_dim
        x = self.x
        y = self.y
        p = self.p
        mask = self.mask
        hidden_dim = self.hidden_dim
        embedding_dim = self.embedding_dim
        lookup = LookupTable(self.dict_size,
                             embedding_dim,
                             weights_init=IsotropicGaussian(0.001),
                             name='LookupTable')
        x_to_h = Linear(embedding_dim,
                        hidden_dim * 4,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(0.001),
                        biases_init=Constant(0.0))
        lstm = LSTM(hidden_dim,
                    name='lstm',
                    weights_init=IsotropicGaussian(0.001),
                    biases_init=Constant(0.0))
        h_to_o = MLP([Logistic()], [hidden_dim, 1],
                     weights_init=IsotropicGaussian(0.001),
                     biases_init=Constant(0),
                     name='h_to_o')

        lookup.initialize()
        x_to_h.initialize()
        lstm.initialize()
        h_to_o.initialize()

        embed = lookup.apply(x).reshape(
            (x.shape[0], x.shape[1], self.embedding_dim))
        embed.name = "embed_vec"
        x_transform = x_to_h.apply(embed.transpose(1, 0, 2))
        x_transform.name = "Transformed X"
        self.lookup = lookup
        self.x_to_h = x_to_h
        self.lstm = lstm
        self.h_to_o = h_to_o

        #if mask is None:
        h, c = lstm.apply(x_transform)
        #else:
        #h, c = lstm.apply(x_transform, mask=mask)
        h.name = "hidden_state"
        c.name = "cell state"
        # only values of hidden units of the last timeframe are used for
        # the classification
        indices = T.sum(mask, axis=0) - 1
        rel_hid = h[indices, T.arange(h.shape[1])]
        out = self.h_to_o.apply(rel_hid)

        probs = out
        return probs
Esempio n. 25
0
def test_collect():
    x = tensor.matrix()
    mlp = MLP(activations=[Logistic(), Logistic()], dims=[784, 100, 784],
              use_bias=False)
    cost = SquaredError().apply(x, mlp.apply(x))
    cg = ComputationGraph(cost)
    var_filter = VariableFilter(roles=[PARAMETER])
    W1, W2 = var_filter(cg.variables)
    for i, W in enumerate([W1, W2]):
        W.set_value(numpy.ones_like(W.get_value()) * (i + 1))
    new_cg = collect_parameters(cg, cg.shared_variables)
    collected_parameters, = new_cg.shared_variables
    assert numpy.all(collected_parameters.get_value()[:784 * 100] == 1.)
    assert numpy.all(collected_parameters.get_value()[784 * 100:] == 2.)
    assert collected_parameters.ndim == 1
    W1, W2 = VariableFilter(roles=[COLLECTED])(new_cg.variables)
    assert W1.eval().shape == (784, 100)
    assert numpy.all(W1.eval() == 1.)
    assert W2.eval().shape == (100, 784)
    assert numpy.all(W2.eval() == 2.)
Esempio n. 26
0
    def __init__(self, dim, activation=None, gate_activation=None, **kwargs):
        self.dim = dim

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Logistic()
        self.activation = activation
        self.gate_activation = gate_activation

        children = [activation, gate_activation] + kwargs.get('children', [])
        super(GatedRecurrent, self).__init__(children=children, **kwargs)
Esempio n. 27
0
    def __init__(self, dim, activation=None, gate_activation=None, **kwargs):
        self.dim = dim

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Logistic()
        self.activation = activation
        self.gate_activation = gate_activation
        children = [activation, gate_activation]
        kwargs.setdefault('children', []).extend(children)
        super(ZoneoutGRU, self).__init__(**kwargs)
Esempio n. 28
0
    def __init__(self, dim, activation=None, gate_activation=None, **kwargs):
        super(GRU, self).__init__(**kwargs)
        self.dim = dim

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Logistic()
        self.activation = activation
        self.gate_activation = gate_activation

        self.children = [activation, gate_activation]
Esempio n. 29
0
def test_activations():
    x = tensor.vector()
    x_val = numpy.random.rand(8).astype(theano.config.floatX)
    exp_x_val = numpy.exp(x_val)

    assert_allclose(x_val, Identity().apply(x).eval({x: x_val}))
    assert_allclose(numpy.tanh(x_val), Tanh().apply(x).eval({x: x_val}),
                    rtol=1e-06)
    assert_allclose(numpy.log(1 + exp_x_val),
                    Softplus(x).apply(x).eval({x: x_val}), rtol=1e-6)
    assert_allclose(exp_x_val / numpy.sum(exp_x_val),
                    Softmax(x).apply(x).eval({x: x_val}).flatten(), rtol=1e-6)
    assert_allclose(1.0 / (1.0 + numpy.exp(-x_val)),
                    Logistic(x).apply(x).eval({x: x_val}), rtol=1e-6)
Esempio n. 30
0
    def __init__(self, x_dim, hidden_layers, hidden_act, z_dim, **kwargs):
        super(DVAE, self).__init__([], [], **kwargs)

        inits = {
            #'weights_init': IsotropicGaussian(std=0.1),
            'weights_init': RWSInitialization(factor=1.),
            'biases_init': Constant(0.0),
        }

        hidden_act = [hidden_act] * len(hidden_layers)

        q_mlp = BatchNormalizedMLP(hidden_act + [Logistic()],
                                   [x_dim] + hidden_layers + [z_dim], **inits)
        #q_mlp = MLP(hidden_act+[Logistic()], [x_dim]+hidden_layers+[z_dim], **inits)
        p_mlp = BatchNormalizedMLP(hidden_act + [Logistic()],
                                   [z_dim] + hidden_layers + [x_dim], **inits)
        #p_mlp = MLP(hidden_act+[Logistic()], [z_dim]+hidden_layers+[x_dim], **inits)

        self.q = BernoulliLayer(q_mlp, name="q")
        self.p = BernoulliLayer(p_mlp, name="p")
        self.p_top = BernoulliTopLayer(z_dim, biases_init=Constant(0.0))

        self.children = [self.p_top, self.p, self.q]