Esempio n. 1
0
class AttentionEUTHM(EUTHM):
    '''
    EUTHM with user attention
    '''
    def __init__(self, config, dataset, *args, **kwargs):
        super(EUTHM, self).__init__(config, dataset)

    def _get_doc_embed(self, *args, **kwargs):
        text_vec = self._get_text_vec()
        user_vec = self.user_embed.apply(self.user)
        text_vec = tensor.concatenate([
            text_vec, user_vec[None, :, :][tensor.zeros(
                shape=(text_vec.shape[0], ), dtype='int32')]
        ],
                                      axis=2)
        return self._encode_text_vec(text_vec)

    def _build_bricks(self, *args, **kwargs):
        super(AttentionEUTHM, self)._build_bricks()
        self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim +
                                self.config.user_embed_dim,
                                output_dim=4 * self.config.lstm_dim,
                                name='mlstm_in')
        self.mlstm_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim +
                       self.config.user_embed_dim + self.config.lstm_dim))
        self.mlstm_ins.biases_init = Constant(0)
        self.mlstm_ins.initialize()
def make_bidir_lstm_stack(seq, seq_dim, mask, sizes, skip=True, name=''):
    bricks = []

    curr_dim = [seq_dim]
    curr_hidden = [seq]

    hidden_list = []
    for k, dim in enumerate(sizes):
        fwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_fwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        fwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_fwd_lstm_%d'%(name,k))

        bwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_bwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        bwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_bwd_lstm_%d'%(name,k))

        bricks = bricks + [fwd_lstm, bwd_lstm] + fwd_lstm_ins + bwd_lstm_ins

        fwd_tmp = sum(x.apply(v) for x, v in zip(fwd_lstm_ins, curr_hidden))
        bwd_tmp = sum(x.apply(v) for x, v in zip(bwd_lstm_ins, curr_hidden))
        fwd_hidden, _ = fwd_lstm.apply(fwd_tmp, mask=mask)
        bwd_hidden, _ = bwd_lstm.apply(bwd_tmp[::-1], mask=mask[::-1])
        hidden_list = hidden_list + [fwd_hidden, bwd_hidden]
        if skip:
            curr_hidden = [seq, fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [seq_dim, dim, dim]
        else:
            curr_hidden = [fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [dim, dim]

    return bricks, hidden_list
Esempio n. 3
0
    def __init__(self, dim_in, dim_hidden, dim_out, **kwargs):

        self.dim_in = dim_in
        self.dim_hidden = dim_hidden
        self.dim_out = dim_out

        self.input_layer = Linear(input_dim=self.dim_in, output_dim=self.dim_hidden,
                                weights_init=initialization.IsotropicGaussian(),
                                biases_init=initialization.Constant(0))
        self.input_layer.initialize()

        sparse_init = initialization.Sparse(num_init=15, weights_init=initialization.IsotropicGaussian())
        self.recurrent_layer = SimpleRecurrent(
                                dim=self.dim_hidden, activation=Tanh(), name="first_recurrent_layer",
                                weights_init=sparse_init,
                                biases_init=initialization.Constant(0.01))
        '''
        self.recurrent_layer = LSTM(dim=self.dim_hidden, activation=Tanh(),
                                    weights_init=initialization.IsotropicGaussian(std=0.001),
                                    biases_init=initialization.Constant(0.01))
        '''
        self.recurrent_layer.initialize()

        self.output_layer = Linear(input_dim=self.dim_hidden, output_dim=self.dim_out,
                                weights_init=initialization.Uniform(width=0.01),
                                biases_init=initialization.Constant(0.01))
        self.output_layer.initialize()

        self.children = [self.input_layer, self.recurrent_layer, self.output_layer]
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 state_dim,
                 representation_dim,
                 theano_seed=None,
                 **kwargs):
        super(Decoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.representation_dim = representation_dim
        self.theano_seed = theano_seed

        # Initialize gru with special initial state.
        self.transition = GRUInitialState(attended_dim=state_dim,
                                          dim=state_dim,
                                          activation=Tanh(),
                                          name='decoder')

        # Initialize the attention mechanism.
        self.attention = SequenceContentAttention2(
            state_names=self.transition.apply.states,
            attended_dim=representation_dim,
            match_dim=state_dim,
            name="attention")

        readout = Readout(source_names=[
            'states', 'feedback', self.attention.take_glimpses.outputs[0]
        ],
                          readout_dim=self.vocab_size,
                          emitter=NewSoftmaxEmitter(initial_output=-1,
                                                    theano_seed=theano_seed),
                          feedback_brick=NewLookupFeedback(
                              vocab_size, embedding_dim),
                          post_merge=InitializableFeedforwardSequence([
                              Bias(dim=state_dim, name='maxout_bias').apply,
                              Maxout(num_pieces=2, name='maxout').apply,
                              Linear(input_dim=state_dim / 2,
                                     output_dim=embedding_dim,
                                     use_bias=False,
                                     name='softmax0').apply,
                              Linear(input_dim=embedding_dim,
                                     name='softmax1').apply
                          ]),
                          merged_dim=state_dim)

        # Build sequence generator accordingly.
        self.sequence_generator = SequenceGenerator(
            readout=readout,
            transition=self.transition,
            attention=self.attention,
            fork=Fork([
                name
                for name in self.transition.apply.sequences if name != 'mask'
            ],
                      prototype=Linear()),
            cost_type='categorical_cross_entropy')

        self.children = [self.sequence_generator]
Esempio n. 5
0
    def __init__(self, dim, mini_dim, summary_dim, **kwargs):
        super(RNNwMini, self).__init__(**kwargs)
        self.dim = dim
        self.mini_dim = mini_dim
        self.summary_dim = summary_dim

        self.recurrent_layer = SimpleRecurrent(
            dim=self.summary_dim,
            activation=Rectifier(),
            name='recurrent_layer',
            weights_init=IsotropicGaussian(),
            biases_init=Constant(0.0))
        self.mini_recurrent_layer = SimpleRecurrent(
            dim=self.mini_dim,
            activation=Rectifier(),
            name='mini_recurrent_layer',
            weights_init=IsotropicGaussian(),
            biases_init=Constant(0.0))

        self.mini_to_main = Linear(self.dim + self.mini_dim,
                                   self.summary_dim,
                                   name='mini_to_main',
                                   weights_init=IsotropicGaussian(),
                                   biases_init=Constant(0.0))
        self.children = [
            self.recurrent_layer, self.mini_recurrent_layer, self.mini_to_main
        ]
Esempio n. 6
0
    def __init__(self, activations=None, dims=None, **kwargs):
        if activations is None:
            raise ValueError("activations must be specified.")
        if dims is None:
            raise ValueError("dims must be specified.")
        if not (len(dims) == (len(activations) + 2)):
            raise ValueError("len(dims) != len(activations) + 2.")
        super(CondNet, self).__init__(**kwargs)
        
        self.dims = dims
        self.shared_acts = activations

        # construct the shared linear transforms for feedforward
        self.shared_linears = []
        for i in range(len(dims)-2):
            self.shared_linears.append( \
                Linear(dims[i], dims[i+1], name='shared_linear_{}'.format(i)))

        self.mean_linear = Linear(dims[-2], dims[-1], name='mean_linear')
        self.logvar_linear = Linear(dims[-2], dims[-1], name='logvar_linear',
                                    weights_init=Constant(0.))

        self.children = self.shared_linears + self.shared_acts
        self.children.append(self.mean_linear)
        self.children.append(self.logvar_linear)
        return
Esempio n. 7
0
    def __init__(self,
                 input_dim,
                 hidden_dim,
                 inputs_weights_init=None,
                 inputs_biases_init=None,
                 reset_weights_init=None,
                 reset_biases_init=None,
                 update_weights_init=None,
                 update_biases_init=None,
                 **kwargs):
        super(GatedRecurrentFork, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim

        self.inputs_weights_init = inputs_weights_init
        self.inputs_biases_init = inputs_biases_init

        self.reset_weights_init = reset_weights_init
        self.reset_biases_init = reset_biases_init

        self.update_weights_init = update_weights_init
        self.update_biases_init = update_biases_init

        self.input_to_inputs = Linear(input_dim=input_dim,
                                      output_dim=self.hidden_dim,
                                      name="input_to_inputs")
        self.input_to_gate_inputs = Linear(input_dim=input_dim,
                                           output_dim=self.hidden_dim * 2,
                                           name="input_to_gate_inputs")

        self.children = [self.input_to_inputs, self.input_to_gate_inputs]
Esempio n. 8
0
def example2():
    """GRU"""
    x = tensor.tensor3('x')
    dim = 3

    fork = Fork(input_dim=dim,
                output_dims=[dim, dim * 2],
                name='fork',
                output_names=["linear", "gates"],
                weights_init=initialization.Identity(),
                biases_init=Constant(0))
    gru = GatedRecurrent(dim=dim,
                         weights_init=initialization.Identity(),
                         biases_init=Constant(0))

    fork.initialize()
    gru.initialize()

    linear, gate_inputs = fork.apply(x)
    h = gru.apply(linear, gate_inputs)

    f = theano.function([x], h)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))

    doubler = Linear(input_dim=dim,
                     output_dim=dim,
                     weights_init=initialization.Identity(2),
                     biases_init=initialization.Constant(0))
    doubler.initialize()

    lin, gate = fork.apply(doubler.apply(x))
    h_doubler = gru.apply(lin, gate)

    f = theano.function([x], h_doubler)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
Esempio n. 9
0
def example():
    """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """
    x = tensor.tensor3('x')

    rnn = SimpleRecurrent(dim=3,
                          activation=Identity(),
                          weights_init=initialization.Identity())
    rnn.initialize()
    h = rnn.apply(x)

    f = theano.function([x], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX)))

    doubler = Linear(input_dim=3,
                     output_dim=3,
                     weights_init=initialization.Identity(2),
                     biases_init=initialization.Constant(0))
    doubler.initialize()
    h_doubler = rnn.apply(doubler.apply(x))

    f = theano.function([x], h_doubler)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX)))

    #Initial State
    h0 = tensor.matrix('h0')
    h = rnn.apply(inputs=x, states=h0)

    f = theano.function([x, h0], h)
    print(
        f(np.ones((3, 1, 3), dtype=theano.config.floatX),
          np.ones((1, 3), dtype=theano.config.floatX)))
Esempio n. 10
0
    def __init__(self, match_dim,
                 use_local_attention=False, window_size=10, sigma=None,
                 state_transformer=None, local_state_transformer=None,
                 local_predictor=None, attended_transformer=None,
                 energy_computer=None, **kwargs):
        super(SequenceContentAttention, self).__init__(**kwargs)
        if not state_transformer:
            state_transformer = Linear(use_bias=False, name="state_trans")
        if not local_state_transformer:
            local_state_transformer = Linear(use_bias=False,
                                             name="local_state_trans")
        if not local_predictor:
            local_predictor = Linear(use_bias=False, name="local_pred")
        if sigma is None:
            sigma = window_size * 1.0 / 2
        self.use_local_attention = use_local_attention
        self.sigma = sigma * sigma
        self.match_dim = match_dim
        self.state_name = self.state_names[0]

        self.state_transformer = state_transformer
        self.local_state_transformer = local_state_transformer
        self.local_predictor = local_predictor

        if not attended_transformer:
            attended_transformer = Linear(name="preprocess")
        if not energy_computer:
            energy_computer = SumMatchFunction(name="energy_comp")
        self.attended_transformer = attended_transformer
        self.energy_computer = energy_computer

        self.children = [self.state_transformer, self.local_state_transformer,
                         self.local_predictor, self.attended_transformer,
                         energy_computer]
Esempio n. 11
0
def generation(z_list, n_latent, hu_decoder, n_out, y):
    logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent,
                hu_decoder)
    if hu_decoder == 0:
        return generation_simple(z_list, n_latent, n_out, y)
    mlp1 = MLP(activations=[Rectifier()],
               dims=[n_latent, hu_decoder],
               name='latent_to_hidDecoder')
    initialize([mlp1])
    hid_to_out = Linear(name='hidDecoder_to_output',
                        input_dim=hu_decoder,
                        output_dim=n_out)
    initialize([hid_to_out])
    mysigmoid = Logistic(name='y_hat_vae')
    agg_logpy_xz = 0.
    agg_y_hat = 0.
    for i, z in enumerate(z_list):
        y_hat = mysigmoid.apply(hid_to_out.apply(
            mlp1.apply(z)))  #reconstructed x
        agg_logpy_xz += cross_entropy_loss(y_hat, y)
        agg_y_hat += y_hat

    agg_logpy_xz /= len(z_list)
    agg_y_hat /= len(z_list)
    return agg_y_hat, agg_logpy_xz
Esempio n. 12
0
    def __init__(self,
                 match_dim,
                 state_transformer=None,
                 attended_transformer=None,
                 energy_computer=None,
                 **kwargs):
        super(SequenceContentAttention, self).__init__(**kwargs)
        if not state_transformer:
            state_transformer = Linear(use_bias=False)
        self.match_dim = match_dim
        self.state_transformer = state_transformer

        self.state_transformers = Parallel(input_names=self.state_names,
                                           prototype=state_transformer,
                                           name="state_trans")
        if not attended_transformer:
            attended_transformer = Linear(name="preprocess")
        if not energy_computer:
            energy_computer = ShallowEnergyComputer(name="energy_comp")
        self.attended_transformer = attended_transformer
        self.energy_computer = energy_computer

        self.children = [
            self.state_transformers, attended_transformer, energy_computer
        ]
Esempio n. 13
0
    def __init__(self, inner_recurrent, inner_dim, **kwargs):
        self.inner_recurrent = inner_recurrent
        self.linear_map = Linear(input_dim=inner_dim, output_dim=1)

        super(OuterLinear, self).__init__(**kwargs)

        self.children = [self.inner_recurrent, self.linear_map]
Esempio n. 14
0
    def __init__(self, emb_dim, dim, num_input_words, 
                 num_output_words, vocab, 
                 **kwargs):
        if emb_dim == 0:
            emb_dim = dim
        if num_input_words == 0:
            num_input_words = vocab.size()
        if num_output_words == 0:
            num_output_words = vocab.size()

        self._num_input_words = num_input_words
        self._num_output_words = num_output_words
        self._vocab = vocab

        self._word_to_id = WordToIdOp(self._vocab)

        children = []

        self._main_lookup = LookupTable(self._num_input_words, emb_dim, name='main_lookup')
        self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork')
        self._encoder_rnn = LSTM(dim, name='encoder_rnn')
        self._decoder_fork = Linear(emb_dim, 4 * dim, name='decoder_fork')
        self._decoder_rnn = LSTM(dim, name='decoder_rnn')
        children.extend([self._main_lookup,
                         self._encoder_fork, self._encoder_rnn,
                         self._decoder_fork, self._decoder_rnn])
        self._pre_softmax = Linear(dim, self._num_output_words)
        self._softmax = NDimensionalSoftmax()
        children.extend([self._pre_softmax, self._softmax])

        super(LanguageModel, self).__init__(children=children, **kwargs)
    def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        self.lookup = LookupTable(name='embeddings')
        self.bidir = BidirectionalWMT15(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        self.fwd_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                             prototype=Linear(),
                             name='fwd_fork')
        self.back_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                              prototype=Linear(),
                              name='back_fork')

        self.children = [
            self.lookup, self.bidir, self.fwd_fork, self.back_fork
        ]
Esempio n. 16
0
    def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_depth,
                 **kwargs):
        super(Decimator, self).__init__(**kwargs)

        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.dgru_state_dim = dgru_state_dim
        self.embedding_dim = embedding_dim
        self.lookup = LookupTable(name='embeddings')
        self.dgru_depth = dgru_depth
        # representation
        self.dgru = RecurrentStack([
            DGRU(activation=Tanh(), dim=self.dgru_state_dim)
            for _ in range(dgru_depth)
        ],
                                   skip_connections=True)
        # importance of this representation
        self.bidir_w = Bidirectional(RecurrentWithFork(
            DGRU(activation=Tanh(), dim=self.dgru_state_dim // 2),
            self.embedding_dim,
            name='src_word_with_fork'),
                                     name='bidir_src_word_encoder')

        self.gru_fork = Fork(
            [name for name in self.dgru.apply.sequences if name != 'mask'],
            prototype=Linear(),
            name='gru_fork')
        # map to a energy scalar
        self.wl = Linear(input_dim=dgru_state_dim, output_dim=1)

        self.children = [
            self.lookup, self.dgru, self.gru_fork, self.bidir_w, self.wl
        ]
Esempio n. 17
0
def example2():
    """GRU"""
    x = tensor.tensor3('x')
    dim = 3

    fork = Fork(input_dim=dim, output_dims=[dim, dim*2],name='fork',output_names=["linear","gates"], weights_init=initialization.Identity(),biases_init=Constant(0))
    gru = GatedRecurrent(dim=dim, weights_init=initialization.Identity(),biases_init=Constant(0))

    fork.initialize()
    gru.initialize()

    linear, gate_inputs = fork.apply(x)
    h = gru.apply(linear, gate_inputs)

    f = theano.function([x], h)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) 

    doubler = Linear(
                 input_dim=dim, output_dim=dim, weights_init=initialization.Identity(2),
                 biases_init=initialization.Constant(0))
    doubler.initialize()

    lin, gate = fork.apply(doubler.apply(x))
    h_doubler = gru.apply(lin,gate)

    f = theano.function([x], h_doubler)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) 
Esempio n. 18
0
    def __init__(self,
                 input_dim,
                 output_activation=None,
                 transform_activation=None,
                 **kwargs):
        super(Highway, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.output_dim = input_dim

        if output_activation == None:
            output_activation = Rectifier()

        if transform_activation == None:
            transform_activation = Logistic()

        self._linear_h = Linear(name="linear_h",
                                input_dim=input_dim,
                                output_dim=input_dim)
        self._linear_t = Linear(name="linear_t",
                                input_dim=input_dim,
                                output_dim=input_dim)
        self._output_activation = output_activation
        self._transform_activation = transform_activation
        self.children = [
            self._linear_h, self._linear_t, self._output_activation,
            self._transform_activation
        ]
Esempio n. 19
0
File: draw.py Progetto: zan2434/draw
    def __init__(self, input_dim, output_dim, **kwargs):
        super(Qsampler, self).__init__(**kwargs)

        self.prior_mean = 0.0
        self.prior_log_sigma = 0.0

        self.mean_transform = Linear(
            name=self.name + "_mean",
            input_dim=input_dim,
            output_dim=output_dim,
            weights_init=self.weights_init,
            biases_init=self.biases_init,
            use_bias=True,
        )

        self.log_sigma_transform = Linear(
            name=self.name + "_log_sigma",
            input_dim=input_dim,
            output_dim=output_dim,
            weights_init=self.weights_init,
            biases_init=self.biases_init,
            use_bias=True,
        )

        self.children = [self.mean_transform, self.log_sigma_transform]
Esempio n. 20
0
def lstm_layer(in_dim, h, h_dim, n, pref=""):
    linear = Linear(input_dim=in_dim,
                    output_dim=h_dim * 4,
                    name='linear' + str(n) + pref)
    lstm = LSTM(dim=h_dim, name='lstm' + str(n) + pref)
    initialize([linear, lstm])
    return lstm.apply(linear.apply(h))[0]
Esempio n. 21
0
 def __init__(self,
              recurrent,
              dims,
              activations=[Identity(), Identity()],
              **kwargs):
     super(MyRecurrent, self).__init__(**kwargs)
     self.dims = dims
     self.recurrent = recurrent
     self.activations = activations
     if isinstance(self.recurrent,
                   (SimpleRecurrent, SimpleRecurrentBatchNorm)):
         output_dim = dims[1]
     elif isinstance(self.recurrent, (LSTM, LSTMBatchNorm)):
         output_dim = 4 * dims[1]
     else:
         raise NotImplementedError
     self.input_trans = Linear(name='input_trans',
                               input_dim=dims[0],
                               output_dim=output_dim,
                               weights_init=NormalizedInitialization(),
                               biases_init=Constant(0))
     self.output_trans = Linear(name='output_trans',
                                input_dim=dims[1],
                                output_dim=dims[2],
                                weights_init=NormalizedInitialization(),
                                biases_init=Constant(0))
     self.children = (
         [self.input_trans, self.recurrent, self.output_trans] +
         self.activations)
Esempio n. 22
0
def test_rng():
    Brick.lazy = True
    linear = Linear()
    assert isinstance(linear.rng, numpy.random.RandomState)
    assert linear.rng.rand() == numpy.random.RandomState(DEFAULT_SEED).rand()
    linear = Linear(rng=numpy.random.RandomState(1))
    assert linear.rng.rand() == numpy.random.RandomState(1).rand()
Esempio n. 23
0
def softmax_layer(h, y, x_mask, y_mask, lens, vocab_size, hidden_size,
                  boosting):
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=hidden_size,
                              output_dim=vocab_size)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    softmax = NDimensionalSoftmax()

    #y_hat = softmax.apply(linear_output, extra_ndim=1)
    #y_hat.name = 'y_hat'
    cost_a = softmax.categorical_cross_entropy(y, linear_output, extra_ndim=1)
    #produces correct average
    cost_a = cost_a * y_mask

    if boosting:
        #boosting step, must divide by length here
        lensMat = T.tile(lens, (y.shape[0], 1))
        cost_a = cost_a / lensMat

    #only count cost of correctly masked entries
    cost = cost_a.sum() / y_mask.sum()

    cost.name = 'cost'

    return (linear_output, cost)
Esempio n. 24
0
class Embedder(Initializable):
    """
    Linear Embedding Brick
    Parameters
    ----------
    dim_in: :class:`int`
        Dimensionality of the input
    dim_out: :class:`int`
        Dimensionality of the output
    output_type: :class:`str`
        fc for fully connected. conv for convolutional
    """
    def __init__(self, dim_in, dim_out, output_type='fc', **kwargs):

        self.dim_in = dim_in
        self.dim_out = dim_out
        self.output_type = output_type
        self.linear = Linear(dim_in, dim_out, name='embed_layer')
        children = [self.linear]
        kwargs.setdefault('children', []).extend(children)
        super(Embedder, self).__init__(**kwargs)

    @application(inputs=['y'], outputs=['outputs'])
    def apply(self, y):
        embedding = self.linear.apply(y)
        if self.output_type == 'fc':
            return embedding
        if self.output_type == 'conv':
            return embedding.reshape((-1, embedding.shape[-1], 1, 1))

    def get_dim(self, name):
        if self.output_type == 'fc':
            return self.linear.get_dim(name)
        if self.output_type == 'conv':
            return (self.linear.get_dim(name), 1, 1)
Esempio n. 25
0
    def __init__(self, nvis, nhid, encoding_mlp, encoding_lstm, decoding_mlp,
                 decoding_lstm, T=1, **kwargs):
        super(DRAW, self).__init__(**kwargs)

        self.nvis = nvis
        self.nhid = nhid
        self.T = T

        self.encoding_mlp = encoding_mlp
        self.encoding_mlp.name = 'encoder_mlp'
        for i, child in enumerate(self.encoding_mlp.children):
            child.name = '{}_{}'.format(self.encoding_mlp.name, i)
        self.encoding_lstm = encoding_lstm
        self.encoding_lstm.name = 'encoder_lstm'
        self.encoding_parameter_mapping = Fork(
            output_names=['mu_phi', 'log_sigma_phi'], prototype=Linear())

        self.decoding_mlp = decoding_mlp
        self.decoding_mlp.name = 'decoder_mlp'
        for i, child in enumerate(self.decoding_mlp.children):
            child.name = '{}_{}'.format(self.decoding_mlp.name, i)
        self.decoding_lstm = decoding_lstm
        self.decoding_lstm.name = 'decoder_lstm'
        self.decoding_parameter_mapping = Linear(name='mu_theta')

        self.prior_mu = tensor.zeros((self.nhid,))
        self.prior_mu.name = 'prior_mu'
        self.prior_log_sigma = tensor.zeros((self.nhid,))
        self.prior_log_sigma.name = 'prior_log_sigma'

        self.children = [self.encoding_mlp, self.encoding_lstm,
                         self.encoding_parameter_mapping,
                         self.decoding_mlp, self.decoding_lstm,
                         self.decoding_parameter_mapping]
Esempio n. 26
0
def test_sequence_variable_inputs():
    x, y = tensor.matrix(), tensor.matrix()

    parallel_1 = Parallel(input_names=['input_1', 'input_2'],
                          input_dims=dict(input_1=4, input_2=5),
                          output_dims=dict(input_1=3, input_2=2),
                          prototype=Linear(), weights_init=Constant(2),
                          biases_init=Constant(1))
    parallel_2 = Parallel(input_names=['input_1', 'input_2'],
                          input_dims=dict(input_1=3, input_2=2),
                          output_dims=dict(input_1=5, input_2=4),
                          prototype=Linear(), weights_init=Constant(2),
                          biases_init=Constant(1))
    sequence = Sequence([parallel_1.apply, parallel_2.apply])
    sequence.initialize()
    new_x, new_y = sequence.apply(x, y)
    x_val = numpy.ones((4, 4), dtype=theano.config.floatX)
    y_val = numpy.ones((4, 5), dtype=theano.config.floatX)
    assert_allclose(
        new_x.eval({x: x_val}),
        (x_val.dot(2 * numpy.ones((4, 3))) + numpy.ones((4, 3))).dot(
            2 * numpy.ones((3, 5))) + numpy.ones((4, 5)))
    assert_allclose(
        new_y.eval({y: y_val}),
        (y_val.dot(2 * numpy.ones((5, 2))) + numpy.ones((4, 2))).dot(
            2 * numpy.ones((2, 4))) + numpy.ones((4, 4)))
Esempio n. 27
0
def bilstm_layer(in_dim, inp, h_dim, n, pref=""):
    linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n) + pref)
    lstm = LSTM(dim=h_dim, name='lstm' + str(n) + pref)
    bilstm = Bidirectional(prototype=lstm)
    bilstm.name = 'bilstm' + str(n) + pref
    initialize([linear, bilstm])
    return bilstm.apply(linear.apply(inp))[0]
Esempio n. 28
0
    def __init__(self, input_dim, output_dim, width, height, N, **kwargs):
        super(AttentionWriter, self).__init__(name="writer", **kwargs)

        self.img_width = width
        self.img_height = height
        self.N = N
        self.input_dim = input_dim
        self.output_dim = output_dim

        assert output_dim == width * height

        self.zoomer = ZoomableAttentionWindow(height, width, N)
        self.z_trafo = Linear(name=self.name + '_ztrafo',
                              input_dim=input_dim,
                              output_dim=5,
                              weights_init=self.weights_init,
                              biases_init=self.biases_init,
                              use_bias=True)

        self.w_trafo = Linear(name=self.name + '_wtrafo',
                              input_dim=input_dim,
                              output_dim=N * N,
                              weights_init=self.weights_init,
                              biases_init=self.biases_init,
                              use_bias=True)

        self.children = [self.z_trafo, self.w_trafo]
Esempio n. 29
0
    def __init__(self, image_feature_dim, embedding_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        self.image_embedding = Linear(
              input_dim=image_feature_dim
            , output_dim=embedding_dim
            # , weights_init=IsotropicGaussian(0.02)
            # , biases_init=Constant(0.)
            , name="image_embedding"
            )

        self.to_inputs = Linear(
              input_dim=embedding_dim
            , output_dim=embedding_dim*4 # gate_inputs = vstack(input, forget, cell, hidden)
            # , weights_init=IsotropicGaussian(0.02)
            # , biases_init=Constant(0.)
            , name="to_inputs"
            )

        # Don't think this dim has to also be dimension, more arbitrary
        self.transition = LSTM(
            dim=embedding_dim, name="transition")

        self.children = [ self.image_embedding
                        , self.to_inputs
                        , self.transition
                        ]
Esempio n. 30
0
    def __init__(self, **kwargs):

        children = []

        self.layers_numerical = []
        self.layers_numerical.append(
            Linear(name='input_to_numerical_linear',
                   input_dim=5000,
                   output_dim=17,
                   weights_init=IsotropicGaussian(),
                   biases_init=Constant(1)))

        self.layers_categorical = []
        self.layers_categorical.append(
            Linear(name='input_to_categorical_linear',
                   input_dim=5000,
                   output_dim=24016,
                   weights_init=IsotropicGaussian(),
                   biases_init=Constant(1)))
        self.layers_categorical.append(
            Logistic(name='input_to_categorical_sigmoid'))

        children += self.layers_numerical
        children += self.layers_categorical
        kwargs.setdefault('children', []).extend(children)

        super(build_top_mlp, self).__init__(**kwargs)
Esempio n. 31
0
def example():
    """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """
    x = tensor.tensor3('x')

    rnn = SimpleRecurrent(dim=3, activation=Identity(), weights_init=initialization.Identity())
    rnn.initialize()
    h = rnn.apply(x)

    f = theano.function([x], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) 

    doubler = Linear(
                 input_dim=3, output_dim=3, weights_init=initialization.Identity(2),
                 biases_init=initialization.Constant(0))
    doubler.initialize()
    h_doubler = rnn.apply(doubler.apply(x))

    f = theano.function([x], h_doubler)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) 

    #Initial State
    h0 = tensor.matrix('h0')
    h = rnn.apply(inputs=x, states=h0)

    f = theano.function([x, h0], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX),
            np.ones((1, 3), dtype=theano.config.floatX))) 
Esempio n. 32
0
 def __init__(self, embedding_dim, state_dim, **kwargs):
     """Constructor. Note that this implementation only supports
     single layer architectures.
     
     Args:
         embedding_dim (int): Dimensionality of the word vectors
                              defined by the sparse feature map.
         state_dim (int): Size of the recurrent layer.
     """
     super(NoLookupEncoder, self).__init__(**kwargs)
     self.embedding_dim = embedding_dim
     self.state_dim = state_dim
     self.bidir = BidirectionalWMT15(
         GatedRecurrent(activation=Tanh(), dim=state_dim))
     self.fwd_fork = Fork([
         name
         for name in self.bidir.prototype.apply.sequences if name != 'mask'
     ],
                          prototype=Linear(),
                          name='fwd_fork')
     self.back_fork = Fork([
         name
         for name in self.bidir.prototype.apply.sequences if name != 'mask'
     ],
                           prototype=Linear(),
                           name='back_fork')
     self.children = [self.bidir, self.fwd_fork, self.back_fork]
Esempio n. 33
0
    def __init__(self, input_dim, output_dim, hidden_dim=None, **kwargs):
        super(Qsampler, self).__init__(**kwargs)

        if hidden_dim is None:
            hidden_dim = (input_dim + output_dim) // 2
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim

        self.h_transform = Linear(name=self.name + '_h',
                                  input_dim=input_dim,
                                  output_dim=hidden_dim,
                                  weights_init=self.weights_init,
                                  biases_init=self.biases_init,
                                  use_bias=True)
        self.mean_transform = Linear(name=self.name + '_mean',
                                     input_dim=hidden_dim,
                                     output_dim=output_dim,
                                     weights_init=self.weights_init,
                                     biases_init=self.biases_init,
                                     use_bias=True)
        self.ls_transform = Linear(name=self.name + '_log_sigma',
                                   input_dim=hidden_dim,
                                   output_dim=output_dim,
                                   weights_init=self.weights_init,
                                   biases_init=self.biases_init,
                                   use_bias=True)

        self.children = [
            self.h_transform, self.mean_transform, self.ls_transform
        ]
Esempio n. 34
0
    def _build_bricks(self, *args, **kwargs):
        # Build lookup tables
        self.word_embed = self._embed(len(self.dataset.word2index),
                                      self.config.word_embed_dim,
                                      name='word_embed')

        self.user_embed = self._embed(len(self.dataset.user2index),
                                      self.config.user_embed_dim,
                                      name="user_embed")

        self.hashtag_embed = self._embed(len(self.dataset.hashtag2index),
                                         self.config.lstm_dim +
                                         self.config.user_embed_dim,
                                         name='hashtag_embed')
        # Build text encoder
        self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim,
                                output_dim=4 * self.config.lstm_dim,
                                name='mlstm_in')
        self.mlstm_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm_ins.biases_init = Constant(0)
        self.mlstm_ins.initialize()
        self.mlstm = MLSTM(self.config.lstm_time,
                           self.config.lstm_dim,
                           shared=False)
        self.mlstm.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm.biases_init = Constant(0)
        self.mlstm.initialize()
Esempio n. 35
0
def softmax_layer(h, y, hidden_size, num_targets, cost_fn='cross'):
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=hidden_size,
                              output_dim=num_targets)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    y_pred = T.argmax(linear_output, axis=1)
    label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred],
                                     'label_of_predicted', False)
    pat1 = T.mean(label_of_predicted)
    updates = None
    if 'ranking' in cost_fn:
        cost, updates = ranking_loss(linear_output, y)
        print 'using ranking loss function!'
    else:
        y_hat = Logistic().apply(linear_output)
        y_hat.name = 'y_hat'
        cost = cross_entropy_loss(y_hat, y)
    cost.name = 'cost'
    pat1.name = 'precision@1'
    misclassify_rate = MultiMisclassificationRate().apply(
        y, T.ge(linear_output, 0.5))
    misclassify_rate.name = 'error_rate'
    return cost, pat1, updates, misclassify_rate
Esempio n. 36
0
def bilstm_layer(in_dim, inp, h_dim, n):
    linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n)+inp.name)
    lstm = LSTM(dim=h_dim, name='lstm' + str(n)+inp.name)
    bilstm = Bidirectional(prototype=lstm)
    bilstm.name = 'bilstm' + str(n) + inp.name
    initialize([linear, bilstm])
    return bilstm.apply(linear.apply(inp))[0]
Esempio n. 37
0
    def apply(self, input_, target):
        x_to_h = Linear(name='x_to_h',
                        input_dim=self.dims[0],
                        output_dim=self.dims[1] * 4)
        pre_rnn = x_to_h.apply(input_)
        pre_rnn.name = 'pre_rnn'
        rnn = LSTM(activation=Tanh(),
                   dim=self.dims[1], name=self.name)
        h, _ = rnn.apply(pre_rnn)
        h.name = 'h'
        h_to_y = Linear(name='h_to_y',
                        input_dim=self.dims[1],
                        output_dim=self.dims[2])
        y_hat = h_to_y.apply(h)
        y_hat.name = 'y_hat'

        cost = SquaredError().apply(target, y_hat)
        cost.name = 'MSE'

        self.outputs = {}
        self.outputs['y_hat'] = y_hat
        self.outputs['cost'] = cost
        self.outputs['pre_rnn'] = pre_rnn
        self.outputs['h'] = h

        # Initialization
        for brick in (rnn, x_to_h, h_to_y):
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0)
            brick.initialize()
Esempio n. 38
0
    def __init__(self, input_dim, output_dim, channels, width, height, N, **kwargs):
        super(AttentionWriter, self).__init__(name="writer", **kwargs)

        self.channels = channels
        self.img_width = width
        self.img_height = height
        self.N = N
        self.input_dim = input_dim
        self.output_dim = output_dim

        assert output_dim == channels*width*height

        self.zoomer = ZoomableAttentionWindow(channels, height, width, N)
        self.z_trafo = Linear(
                name=self.name+'_ztrafo',
                input_dim=input_dim, output_dim=5, 
                weights_init=self.weights_init, biases_init=self.biases_init,
                use_bias=True)

        self.w_trafo = Linear(
                name=self.name+'_wtrafo',
                input_dim=input_dim, output_dim=channels*N*N, 
                weights_init=self.weights_init, biases_init=self.biases_init,
                use_bias=True)

        self.children = [self.z_trafo, self.w_trafo]
Esempio n. 39
0
class AttentionWriter(Initializable):
    def __init__(self, input_dim, output_dim, channels, width, height, N, **kwargs):
        super(AttentionWriter, self).__init__(name="writer", **kwargs)

        self.channels = channels
        self.img_width = width
        self.img_height = height
        self.N = N
        self.input_dim = input_dim
        self.output_dim = output_dim

        assert output_dim == channels*width*height

        self.zoomer = ZoomableAttentionWindow(channels, height, width, N)
        self.z_trafo = Linear(
                name=self.name+'_ztrafo',
                input_dim=input_dim, output_dim=5, 
                weights_init=self.weights_init, biases_init=self.biases_init,
                use_bias=True)

        self.w_trafo = Linear(
                name=self.name+'_wtrafo',
                input_dim=input_dim, output_dim=channels*N*N, 
                weights_init=self.weights_init, biases_init=self.biases_init,
                use_bias=True)

        self.children = [self.z_trafo, self.w_trafo]

    @application(inputs=['h'], outputs=['c_update'])
    def apply(self, h):
        w = self.w_trafo.apply(h)
        l = self.z_trafo.apply(h)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        c_update = 1./gamma * self.zoomer.write(w, center_y, center_x, delta, sigma)

        return c_update

    @application(inputs=['h'], outputs=['c_update', 'center_y', 'center_x', 'delta'])
    def apply_detailed(self, h):
        w = self.w_trafo.apply(h)
        l = self.z_trafo.apply(h)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        c_update = 1./gamma * self.zoomer.write(w, center_y, center_x, delta, sigma)

        return c_update, center_y, center_x, delta

    @application(inputs=['x','h'], outputs=['c_update', 'center_y', 'center_x', 'delta'])
    def apply_circular(self,x,h):
        #w = self.w_trafo.apply(h)
        l = self.z_trafo.apply(h)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        c_update = 1./gamma * self.zoomer.write(x, center_y, center_x, delta, sigma)

        return c_update, center_y, center_x, delta
Esempio n. 40
0
    def __init__(self,
                 num_input_words,
                 emb_dim,
                 dim,
                 vocab,
                 lookup=None,
                 fork_and_rnn=None,
                 **kwargs):

        if num_input_words > 0:
            logger.info("Restricting def vocab to " + str(num_input_words))
            self._num_input_words = num_input_words
        else:
            self._num_input_words = vocab.size()

        self._vocab = vocab

        children = []

        if lookup is None:
            self._def_lookup = LookupTable(self._num_input_words,
                                           emb_dim,
                                           name='def_lookup')
        else:
            self._def_lookup = lookup

        if fork_and_rnn is None:
            self._def_fork = Linear(emb_dim, 4 * dim, name='def_fork')
            self._def_rnn = LSTM(dim, name='def_rnn')
        else:
            self._def_fork, self._def_rnn = fork_and_rnn

        children.extend([self._def_lookup, self._def_fork, self._def_rnn])

        super(LSTMReadDefinitions, self).__init__(children=children, **kwargs)
Esempio n. 41
0
    def __init__(self, visible_dim, hidden_dim, rnn_dimensions=(128, 128), **kwargs):
        super(Rnnrbm, self).__init__(**kwargs)
        self.rnn_dimensions = rnn_dimensions
        self.visible_dim = visible_dim
        self.hidden_dim = hidden_dim

        # self.in_layer = Linear(input_dim=input_dim, output_dim=rnn_dimension * 4,
        # weights_init=IsotropicGaussian(0.01),
        # biases_init=Constant(0.0),
        # use_bias=False,
        # name="in_layer")

        self.rbm = Rbm(visible_dim=visible_dim, hidden_dim=hidden_dim,
                       activation=Sigmoid(), weights_init=IsotropicGaussian(0.1),
                       biases_init=Constant(0.1),
                       name='rbm')

        self.uv = Linear(input_dim=rnn_dimensions[-1], output_dim=visible_dim,
                         weights_init=IsotropicGaussian(0.0001),
                         biases_init=Constant(0.001),
                         use_bias=True, name='uv')

        self.uh = Linear(input_dim=rnn_dimensions[-1], output_dim=hidden_dim,
                         weights_init=IsotropicGaussian(0.0001),
                         biases_init=Constant(0.001),
                         use_bias=True, name='uh')

        self.rnn = Rnn([visible_dim] + list(rnn_dimensions), name='rnn')

        self.children = [self.rbm, self.uv, self.uh, self.rnn] + self.rnn.children._items
Esempio n. 42
0
def test_variable_filter_applications_error():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name="linear1")

    x = tensor.vector()
    h1 = brick1.apply(x)
    cg = ComputationGraph(h1)
    VariableFilter(applications=brick1.apply)(cg.variables)
Esempio n. 43
0
def MSEloss_layer(h, y, frame_length, hidden_size):
    hidden_to_output = Linear(name="hidden_to_output", input_dim=hidden_size, output_dim=frame_length)
    initialize([hidden_to_output])
    y_hat = hidden_to_output.apply(h)
    y_hat.name = "y_hat"
    cost = squared_error(y_hat, y).mean()
    cost.name = "cost"
    # import ipdb; ipdb.set_trace()
    return y_hat, cost
Esempio n. 44
0
def create_rnn(hidden_dim, vocab_dim,mode="rnn"):
    # input
    x = tensor.imatrix('inchar')
    y = tensor.imatrix('outchar')

    # 
    W = LookupTable(
        name = "W1",
        #dim = hidden_dim*4,
        dim = hidden_dim,
        length = vocab_dim,
        weights_init = initialization.IsotropicGaussian(0.01),
        biases_init = initialization.Constant(0)
    )
    if mode == "lstm":
        # Long Short Term Memory
        H = LSTM(
            hidden_dim, 
            name = 'H',
            weights_init = initialization.IsotropicGaussian(0.01),
            biases_init = initialization.Constant(0.0)
        )
    else:
        # recurrent history weight
        H = SimpleRecurrent(
            name = "H",
            dim = hidden_dim,
            activation = Tanh(),
            weights_init = initialization.IsotropicGaussian(0.01)
        )
    # 
    S = Linear(
        name = "W2",
        input_dim = hidden_dim,
        output_dim = vocab_dim,
        weights_init = initialization.IsotropicGaussian(0.01),
        biases_init = initialization.Constant(0)
    )

    A = NDimensionalSoftmax(
        name = "softmax"
    )

    initLayers([W,H,S])
    activations = W.apply(x)
    hiddens = H.apply(activations)#[0]
    activations2 = S.apply(hiddens)
    y_hat = A.apply(activations2, extra_ndim=1)
    cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean()

    cg = ComputationGraph(cost)
    #print VariableFilter(roles=[WEIGHT])(cg.variables)
    #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables)

    layers = (x, W, H, S, A, y)

    return  cg, layers, y_hat, cost
Esempio n. 45
0
def test_variable_filter():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name='linear1')
    brick2 = Bias(2, name='bias1')
    activation = Sigmoid(name='sigm')

    x = tensor.vector()
    h1 = brick1.apply(x)
    h2 = activation.apply(h1)
    y = brick2.apply(h2)
    cg = ComputationGraph(y)

    parameters = [brick1.W, brick1.b, brick2.params[0]]
    bias = [brick1.b, brick2.params[0]]
    brick1_bias = [brick1.b]

    # Testing filtering by role
    role_filter = VariableFilter(roles=[PARAMETER])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[FILTER])
    assert [] == role_filter(cg.variables)

    # Testing filtering by role using each_role flag
    role_filter = VariableFilter(roles=[PARAMETER, BIAS])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[PARAMETER, BIAS], each_role=True)
    assert not parameters == role_filter(cg.variables)
    assert bias == role_filter(cg.variables)

    # Testing filtering by bricks classes
    brick_filter = VariableFilter(roles=[BIAS], bricks=[Linear])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by bricks instances
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by brick instance
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by name
    name_filter = VariableFilter(name='W_norm')
    assert [cg.variables[2]] == name_filter(cg.variables)

    # Testing filtering by name regex
    name_filter_regex = VariableFilter(name_regex='W_no.?m')
    assert [cg.variables[2]] == name_filter_regex(cg.variables)

    # Testing filtering by application
    appli_filter = VariableFilter(applications=[brick1.apply])
    variables = [cg.variables[1], cg.variables[8]]
    assert variables == appli_filter(cg.variables)

    # Testing filtering by application
    appli_filter_list = VariableFilter(applications=[brick1.apply])
    assert variables == appli_filter_list(cg.variables)
Esempio n. 46
0
def test_variable_filter_roles_error():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name="linear1")

    x = tensor.vector()
    h1 = brick1.apply(x)
    cg = ComputationGraph(h1)
    # testing role error
    VariableFilter(roles=PARAMETER)(cg.variables)
Esempio n. 47
0
def add_lstm(input_dim, input_var):
    linear = Linear(input_dim=input_dim,output_dim=input_dim*4,name="linear_layer")
    lstm = LSTM(dim=input_dim, name="lstm_layer")

    testing_init(linear)
    #linear.initialize()
    default_init(lstm)

    h = linear.apply(input_var)
    return lstm.apply(h)
Esempio n. 48
0
def test_protocol0_regression():
    """Check for a regression where protocol 0 dumps fail on load."""
    brick = Linear(5, 10)
    brick.allocate()
    buf = BytesIO()
    dump(brick, buf, parameters=list(brick.parameters), protocol=0)
    try:
        load(buf)
    except TypeError:
        assert False  # Regression
def construct_model(activation_function, r_dim, hidden_dim, out_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    nx = x.shape[0]
    nj = x.shape[1]  # also is r.shape[0]
    nr = r.shape[1]

    # r is nj x nr
    # x is nx x nj
    # y is nx

    # Get a representation of r of size r_dim
    r = DAE(r)

    # r is now nj x r_dim

    # r_rep is nx x nj x r_dim
    r_rep = r[None, :, :].repeat(axis=0, repeats=nx)
    # x3 is nx x nj x 1
    x3 = x[:, :, None]

    # concat is nx x nj x (r_dim + 1)
    concat = tensor.concatenate([r_rep, x3], axis=2)

    # Change concat from Batch x Time x Features to T X B x F
    rnn_input = concat.dimshuffle(1, 0, 2)

    linear = Linear(input_dim=r_dim + 1, output_dim=4 * hidden_dim,
                    name="input_linear")
    lstm = LSTM(dim=hidden_dim, activation=activation_function,
                name="hidden_recurrent")
    top_linear = Linear(input_dim=hidden_dim, output_dim=out_dim,
                        name="out_linear")

    pre_rnn = linear.apply(rnn_input)
    states = lstm.apply(pre_rnn)[0]
    activations = top_linear.apply(states)
    activations = tensor.mean(activations, axis=0)

    cost = Softmax().categorical_cross_entropy(y, activations)

    pred = activations.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters

    for brick in (linear, lstm, top_linear):
        brick.weights_init = IsotropicGaussian(0.1)
        brick.biases_init = Constant(0.)
        brick.initialize()

    return cost, error_rate
Esempio n. 50
0
File: draw.py Progetto: zan2434/draw
class AttentionWriter(Initializable):
    def __init__(self, input_dim, output_dim, width, height, N, **kwargs):
        super(AttentionWriter, self).__init__(name="writer", **kwargs)

        self.img_width = width
        self.img_height = height
        self.N = N
        self.input_dim = input_dim
        self.output_dim = output_dim

        assert output_dim == width * height

        self.zoomer = ZoomableAttentionWindow(height, width, N)
        self.z_trafo = Linear(
            name=self.name + "_ztrafo",
            input_dim=input_dim,
            output_dim=5,
            weights_init=self.weights_init,
            biases_init=self.biases_init,
            use_bias=True,
        )

        self.w_trafo = Linear(
            name=self.name + "_wtrafo",
            input_dim=input_dim,
            output_dim=N * N,
            weights_init=self.weights_init,
            biases_init=self.biases_init,
            use_bias=True,
        )

        self.children = [self.z_trafo, self.w_trafo]

    @application(inputs=["h"], outputs=["c_update"])
    def apply(self, h):
        w = self.w_trafo.apply(h)
        l = self.z_trafo.apply(h)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        c_update = 1.0 / gamma * self.zoomer.write(w, center_y, center_x, delta, sigma)

        return c_update

    @application(inputs=["h"], outputs=["c_update", "center_y", "center_x", "delta"])
    def apply_detailed(self, h):
        w = self.w_trafo.apply(h)
        l = self.z_trafo.apply(h)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        c_update = 1.0 / gamma * self.zoomer.write(w, center_y, center_x, delta, sigma)

        return c_update, center_y, center_x, delta
Esempio n. 51
0
def softmax_layer(h, y, frame_length, hidden_size):
    hidden_to_output = Linear(name="hidden_to_output", input_dim=hidden_size, output_dim=frame_length)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = "linear_output"
    softmax = NDimensionalSoftmax()
    y_hat = softmax.apply(linear_output, extra_ndim=1)
    y_hat.name = "y_hat"
    cost = softmax.categorical_cross_entropy(y, linear_output, extra_ndim=1).mean()
    cost.name = "cost"
    return y_hat, cost
Esempio n. 52
0
def test_linear_nan_allocation():
    x = tensor.matrix()

    linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                    biases_init=Constant(1))
    linear.apply(x)
    w1 = numpy.nan * numpy.zeros((16, 8))
    w2 = linear.params[0].get_value()
    b1 = numpy.nan * numpy.zeros(8)
    b2 = linear.params[1].get_value()
    numpy.testing.assert_equal(w1, w2)
    numpy.testing.assert_equal(b1, b2)
 def lstm_layer(self, h, n):
     """
     Performs the LSTM update for a batch of word sequences
     :param h The word embeddings for this update
     :param n The number of layers of the LSTM
     """
     # Maps the word embedding to a dimensionality to be used in the LSTM
     linear = Linear(input_dim=self.hidden_size, output_dim=self.hidden_size * 4, name='linear_lstm' + str(n))
     initialize(linear, sqrt(6.0 / (5 * self.hidden_size)))
     lstm = LSTM(dim=self.hidden_size, name='lstm' + str(n))
     initialize(lstm, 0.08)
     return lstm.apply(linear.apply(h))
Esempio n. 54
0
 def lllistool(i, inp, func):
     l = Linear(input_dim=DIMS[i], output_dim=DIMS[i+1] * NUMS[i+1], 
                weights_init=IsotropicGaussian(std=DIMS[i]**(-0.5)), 
                biases_init=IsotropicGaussian(std=DIMS[i]**(-0.5)),
                name='Lin{}'.format(i))
     l.initialize()
     func.name='Fun{}'.format(i)
     if func == SimpleRecurrent:
         gong = func(dim=DIMS[i+1], activation=Rectifier(), weights_init=IsotropicGaussian(std=(DIMS[i]+DIMS[i+1])**(-0.5)))
     else:
         gong = func()
     ret = gong.apply(l.apply(inp))
     return ret
Esempio n. 55
0
    def apply_layer(self, layer_type, input_, in_dim, out_dim, layer_name):
        # Since we pass this path twice (clean and corr encoder), we
        # want to make sure that parameters of both layers are shared.
        layer = self.shareds.get(layer_name)
        if layer is None:
            if layer_type == "fc":
                linear = Linear(use_bias=False, name=layer_name, input_dim=in_dim, output_dim=out_dim, seed=1)
                linear.weights_init = Glorot(self.rng, in_dim, out_dim)
                linear.initialize()
                layer = linear
                self.shareds[layer_name] = layer

        return layer.apply(input_)
Esempio n. 56
0
def prior_network(x, n_input, hu_encoder, n_latent):
    logger.info('In prior_network: n_input: %d, hu_encoder: %d', n_input, hu_encoder)
    mlp1 = MLP(activations=[Rectifier()], dims=[n_input, hu_encoder], name='prior_in_to_hidEncoder')
    initialize([mlp1])
    h_encoder = mlp1.apply(x)
    h_encoder = debug_print(h_encoder, 'h_encoder', False)
    lin1 = Linear(name='prior_hiddEncoder_to_latent_mu', input_dim=hu_encoder, output_dim=n_latent)
    lin2 = Linear(name='prior_hiddEncoder_to_latent_sigma', input_dim=hu_encoder, output_dim=n_latent)
    initialize([lin1])
    initialize([lin2], rndstd=0.001)
    mu = lin1.apply(h_encoder)
    log_sigma = lin2.apply(h_encoder)
    return mu, log_sigma
Esempio n. 57
0
    def __init__(self, input_dim, output_dim, noise_batch_size,
            prior_mean=0, prior_noise_level=0, **kwargs):
        self.linear = Linear()
        self.mask = Linear(name='mask')
        children = [self.linear, self.mask]
        kwargs.setdefault('children', []).extend(children)
        super(NoisyLinear, self).__init__(**kwargs)

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.noise_batch_size = noise_batch_size
        self.prior_mean = prior_mean
        self.prior_noise_level = prior_noise_level
Esempio n. 58
0
def softmax_layer(h, y, vocab_size, hidden_size):
    hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_size,
                              output_dim=vocab_size)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    softmax = NDimensionalSoftmax()
    y_hat = softmax.apply(linear_output, extra_ndim=1)
    y_hat.name = 'y_hat'
    cost = softmax.categorical_cross_entropy(
        y, linear_output, extra_ndim=1).mean()
    cost.name = 'cost'
    return y_hat, cost
Esempio n. 59
0
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        x = tensor.tensor3('x', dtype=floatX)
        y = tensor.tensor3('y', dtype=floatX)

        x_to_lstm = Linear(name="x_to_lstm", input_dim=input_size, output_dim=4 * hidden_size,
                           weights_init=IsotropicGaussian(), biases_init=Constant(0))
        lstm = LSTM(dim=hidden_size, name="lstm", weights_init=IsotropicGaussian(), biases_init=Constant(0))
        lstm_to_output = Linear(name="lstm_to_output", input_dim=hidden_size, output_dim=output_size,
                                weights_init=IsotropicGaussian(), biases_init=Constant(0))

        x_transform = x_to_lstm.apply(x)
        h, c = lstm.apply(x_transform)

        y_hat = lstm_to_output.apply(h)
        y_hat = Logistic(name="y_hat").apply(y_hat)

        self.cost = BinaryCrossEntropy(name="cost").apply(y, y_hat)

        x_to_lstm.initialize()
        lstm.initialize()
        lstm_to_output.initialize()

        self.computation_graph = ComputationGraph(self.cost)
Esempio n. 60
0
def main(max_seq_length, lstm_dim, batch_size, num_batches, num_epochs):
    dataset_train = IterableDataset(generate_data(max_seq_length, batch_size,
                                                  num_batches))
    dataset_test = IterableDataset(generate_data(max_seq_length, batch_size,
                                                 100))

    stream_train = DataStream(dataset=dataset_train)
    stream_test = DataStream(dataset=dataset_test)

    x = T.tensor3('x')
    y = T.matrix('y')

    # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see
    # LSTM layer documentation for the explanation
    x_to_h = Linear(1, lstm_dim * 4, name='x_to_h',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))
    lstm = LSTM(lstm_dim, name='lstm',
                weights_init=IsotropicGaussian(),
                biases_init=Constant(0.0))
    h_to_o = Linear(lstm_dim, 1, name='h_to_o',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))

    x_transform = x_to_h.apply(x)
    h, c = lstm.apply(x_transform)

    # only values of hidden units of the last timeframe are used for
    # the classification
    y_hat = h_to_o.apply(h[-1])
    y_hat = Logistic().apply(y_hat)

    cost = BinaryCrossEntropy().apply(y, y_hat)
    cost.name = 'cost'

    lstm.initialize()
    x_to_h.initialize()
    h_to_o.initialize()

    cg = ComputationGraph(cost)

    algorithm = GradientDescent(cost=cost, parameters=cg.parameters,
                                step_rule=Adam())
    test_monitor = DataStreamMonitoring(variables=[cost],
                                        data_stream=stream_test, prefix="test")
    train_monitor = TrainingDataMonitoring(variables=[cost], prefix="train",
                                           after_epoch=True)

    main_loop = MainLoop(algorithm, stream_train,
                         extensions=[test_monitor, train_monitor,
                                     FinishAfter(after_n_epochs=num_epochs),
                                     Printing(), ProgressBar()])
    main_loop.run()

    print 'Learned weights:'
    for layer in (x_to_h, lstm, h_to_o):
        print "Layer '%s':" % layer.name
        for param in layer.parameters:
            print param.name, ': ', param.get_value()
        print