Exemple #1
0
    def __init__(self, feature_dim, memory_dim, fc1_dim, fc2_dim):
        self.W = Linear(input_dim=feature_dim,
                        output_dim=memory_dim * 4,
                        weights_init=IsotropicGaussian(0.01),
                        biases_init=Constant(0),
                        use_bias=False,
                        name='seqDecoder_W')
        self.GRU_A = LSTM(feature_dim,
                          name='seqDecoder_A',
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0))
        self.GRU_B = LSTM(memory_dim,
                          name='seqDecoder_B',
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0))
        self.W.initialize()
        self.GRU_A.initialize()
        self.GRU_B.initialize()
        self.fc1 = Linear(input_dim=memory_dim,
                          output_dim=fc1_dim,
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0),
                          name='fc1')
        self.fc2 = Linear(input_dim=fc1_dim,
                          output_dim=fc2_dim,
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0),
                          name='fc2')

        self.fc1.initialize()
        self.fc2.initialize()
Exemple #2
0
    def __init__(self, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        # Dimension of the word embeddings taken as input
        self.embedding_dim = embedding_dim
        # Hidden state dimension
        self.state_dim = state_dim

        # The bidir GRU
        self.bidir = BidirectionalFromDict(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        # Forks to administer the inputs of GRU gates
        self.fwd_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                             prototype=Linear(),
                             name='fwd_fork')
        self.back_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                              prototype=Linear(),
                              name='back_fork')

        self.children = [self.bidir, self.fwd_fork, self.back_fork]
    def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        self.lookup = LookupTable(name='embeddings')
        self.bidir = NewBidirectional(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        self.fwd_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                             prototype=Linear(),
                             name='fwd_fork')
        self.back_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                              prototype=Linear(),
                              name='back_fork')

        self.children = [
            self.lookup, self.bidir, self.fwd_fork, self.back_fork
        ]
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 state_dim,
                 representation_dim,
                 theano_seed=None,
                 **kwargs):
        super(Decoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.representation_dim = representation_dim
        self.theano_seed = theano_seed

        # Initialize gru with special initial state.
        self.transition = GRUInitialState(attended_dim=state_dim,
                                          dim=state_dim,
                                          activation=Tanh(),
                                          name='decoder')

        # Initialize the attention mechanism.
        self.attention = SequenceContentAttention2(
            state_names=self.transition.apply.states,
            attended_dim=representation_dim,
            match_dim=state_dim,
            name="attention")

        readout = Readout(source_names=[
            'states', 'feedback', self.attention.take_glimpses.outputs[0]
        ],
                          readout_dim=self.vocab_size,
                          emitter=NewSoftmaxEmitter(initial_output=-1,
                                                    theano_seed=theano_seed),
                          feedback_brick=NewLookupFeedback(
                              vocab_size, embedding_dim),
                          post_merge=InitializableFeedforwardSequence([
                              Bias(dim=state_dim, name='maxout_bias').apply,
                              Maxout(num_pieces=2, name='maxout').apply,
                              Linear(input_dim=state_dim / 2,
                                     output_dim=embedding_dim,
                                     use_bias=False,
                                     name='softmax0').apply,
                              Linear(input_dim=embedding_dim,
                                     name='softmax1').apply
                          ]),
                          merged_dim=state_dim)

        # Build sequence generator accordingly.
        self.sequence_generator = SequenceGenerator(
            readout=readout,
            transition=self.transition,
            attention=self.attention,
            fork=Fork([
                name
                for name in self.transition.apply.sequences if name != 'mask'
            ],
                      prototype=Linear()),
            cost_type='categorical_cross_entropy')

        self.children = [self.sequence_generator]
Exemple #5
0
    def __init__(self, input_dim, output_dim, channels, width, height, N,
                 **kwargs):
        super(AttentionWriter, self).__init__(name="writer", **kwargs)

        self.channels = channels
        self.img_width = width
        self.img_height = height
        self.N = N
        self.input_dim = input_dim
        self.output_dim = output_dim

        assert output_dim == channels * width * height

        self.zoomer = ZoomableAttentionWindow(channels, height, width, N)
        self.z_trafo = Linear(name=self.name + '_ztrafo',
                              input_dim=input_dim,
                              output_dim=5,
                              weights_init=self.weights_init,
                              biases_init=self.biases_init,
                              use_bias=True)

        self.w_trafo = Linear(name=self.name + '_wtrafo',
                              input_dim=input_dim,
                              output_dim=channels * N * N,
                              weights_init=self.weights_init,
                              biases_init=self.biases_init,
                              use_bias=True)

        self.children = [self.z_trafo, self.w_trafo]
Exemple #6
0
    def __init__(self,
                 input_dim,
                 output_activation=None,
                 transform_activation=None,
                 **kwargs):
        super(Highway, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.output_dim = input_dim

        if output_activation == None:
            output_activation = Rectifier()

        if transform_activation == None:
            transform_activation = Logistic()

        self._linear_h = Linear(name="linear_h",
                                input_dim=input_dim,
                                output_dim=input_dim)
        self._linear_t = Linear(name="linear_t",
                                input_dim=input_dim,
                                output_dim=input_dim)
        self._output_activation = output_activation
        self._transform_activation = transform_activation
        self.children = [
            self._linear_h, self._linear_t, self._output_activation,
            self._transform_activation
        ]
    def __init__(self,
                 n_att_weights,
                 match_dim,
                 state_transformer=None,
                 attended_transformer=None,
                 energy_computer=None,
                 **kwargs):
        super(SequenceContentAttention, self).__init__(**kwargs)
        self.n_att_weights = n_att_weights
        if not state_transformer:
            state_transformer = Linear(use_bias=False)
        self.match_dim = match_dim
        self.state_transformer = state_transformer

        self.state_transformers = Parallel(input_names=self.state_names,
                                           prototype=state_transformer,
                                           name="state_trans")
        if not attended_transformer:
            attended_transformer = Linear(name="preprocess")
        if not energy_computer:
            energy_computer = MultiShallowEnergyComputer(n_att_weights,
                                                         name="energy_comp")
        self.attended_transformer = attended_transformer
        self.energy_computer = energy_computer

        self.children = [
            self.state_transformers, attended_transformer, energy_computer
        ]
Exemple #8
0
def test_defaults_sequence2():
    seq = DefaultsSequence(input_dim=(3, 4, 4),
                           lists=[
                               Convolutional(num_filters=10,
                                             stride=(2, 2),
                                             filter_size=(3, 3)),
                               BatchNormalization(),
                               Rectifier(),
                               Flattener(),
                               Linear(output_dim=10),
                               BatchNormalization(),
                               Rectifier(),
                               Linear(output_dim=12),
                               BatchNormalization(),
                               Rectifier()
                           ])
    seq.weights_init = Constant(1.0)
    seq.biases_init = Constant(0.0)
    seq.push_allocation_config()
    seq.push_initialization_config()
    seq.initialize()

    x = T.tensor4('input')
    y = seq.apply(x)
    func_ = theano.function([x], [y])

    x_val = np.ones((1, 3, 4, 4), dtype=theano.config.floatX)
    res = func_(x_val)[0]
    assert_allclose(res.shape, (1, 12))
Exemple #9
0
    def create_model(self):
        input_dim = self.input_dim
        x = self.x
        x_to_h = Linear(input_dim,
                        input_dim * 4,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))
        lstm = LSTM(input_dim,
                    name='lstm',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))
        h_to_o = Linear(input_dim,
                        1,
                        name='h_to_o',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))

        x_transform = x_to_h.apply(x)
        self.x_to_h = x_to_h
        self.lstm = lstm
        self.h_to_o = h_to_o

        h, c = lstm.apply(x_transform)

        # only values of hidden units of the last timeframe are used for
        # the classification
        probs = h_to_o.apply(h[-1])
        return probs
Exemple #10
0
    def __init__(self, dim, mini_dim, summary_dim, **kwargs):
        super(LSTMwMini, self).__init__(**kwargs)
        self.dim = dim
        self.mini_dim = mini_dim
        self.summary_dim = summary_dim

        self.recurrent_layer = LSTM(dim=self.summary_dim,
                                    activation=Rectifier(),
                                    name='recurrent_layer',
                                    weights_init=IsotropicGaussian(),
                                    biases_init=Constant(0.0))
        self.mini_recurrent_layer = LSTM(dim=self.mini_dim,
                                         activation=Rectifier(),
                                         name='mini_recurrent_layer',
                                         weights_init=IsotropicGaussian(),
                                         biases_init=Constant(0.0))

        self.mini_to_main = Linear(self.dim + self.mini_dim,
                                   self.summary_dim,
                                   name='mini_to_main',
                                   weights_init=IsotropicGaussian(),
                                   biases_init=Constant(0.0))
        self.mini_to_main2 = Linear(self.summary_dim,
                                    self.summary_dim * 4,
                                    name='mini_to_main2',
                                    weights_init=IsotropicGaussian(),
                                    biases_init=Constant(0.0))

        self.children = [
            self.recurrent_layer, self.mini_recurrent_layer, self.mini_to_main,
            self.mini_to_main2
        ]
Exemple #11
0
    def __init__(self, networks, dims, **kwargs):
        super(DropMultiLayerEncoder, self).__init__(**kwargs)
        self.dims = dims
        self.networks = networks
        self.use_bias = True

        self.hid_linear_trans_forw = [
            Fork([
                name for name in networks[i].prototype.apply.sequences if name
                not in ['mask', 'drops_states', 'drops_cells', 'drops_igates']
            ],
                 name='fork_forw_{}'.format(i),
                 prototype=Linear(),
                 **kwargs) for i in range(len(networks))
        ]

        self.hid_linear_trans_back = [
            Fork([
                name for name in networks[i].prototype.apply.sequences if name
                not in ['mask', 'drops_states', 'drops_cells', 'drops_igates']
            ],
                 name='fork_back_{}'.format(i),
                 prototype=Linear(),
                 **kwargs) for i in range(len(networks))
        ]

        self.out_linear_trans = Linear(name='out_linear', **kwargs)
        self.children = (networks + self.hid_linear_trans_forw +
                         self.hid_linear_trans_back + [self.out_linear_trans])
        self.num_layers = len(networks)
def make_bidir_lstm_stack(seq, seq_dim, mask, sizes, skip=True, name=''):
    bricks = []

    curr_dim = [seq_dim]
    curr_hidden = [seq]

    hidden_list = []
    for k, dim in enumerate(sizes):
        fwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_fwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        fwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_fwd_lstm_%d'%(name,k))

        bwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_bwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        bwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_bwd_lstm_%d'%(name,k))

        bricks = bricks + [fwd_lstm, bwd_lstm] + fwd_lstm_ins + bwd_lstm_ins

        fwd_tmp = sum(x.apply(v) for x, v in zip(fwd_lstm_ins, curr_hidden))
        bwd_tmp = sum(x.apply(v) for x, v in zip(bwd_lstm_ins, curr_hidden))
        fwd_hidden, _ = fwd_lstm.apply(fwd_tmp, mask=mask)
        bwd_hidden, _ = bwd_lstm.apply(bwd_tmp[::-1], mask=mask[::-1])
        hidden_list = hidden_list + [fwd_hidden, bwd_hidden]
        if skip:
            curr_hidden = [seq, fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [seq_dim, dim, dim]
        else:
            curr_hidden = [fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [dim, dim]

    return bricks, hidden_list
Exemple #13
0
def test_sequence_variable_inputs():
    x, y = tensor.matrix(), tensor.matrix()

    parallel_1 = Parallel(input_names=['input_1', 'input_2'],
                          input_dims=[4, 5],
                          output_dims=[3, 2],
                          prototype=Linear(),
                          weights_init=Constant(2),
                          biases_init=Constant(1))
    parallel_2 = Parallel(input_names=['input_1', 'input_2'],
                          input_dims=[3, 2],
                          output_dims=[5, 4],
                          prototype=Linear(),
                          weights_init=Constant(2),
                          biases_init=Constant(1))
    sequence = Sequence([parallel_1.apply, parallel_2.apply])
    sequence.initialize()
    new_x, new_y = sequence.apply(x, y)
    x_val = numpy.ones((4, 4), dtype=theano.config.floatX)
    y_val = numpy.ones((4, 5), dtype=theano.config.floatX)
    assert_allclose(new_x.eval({x: x_val}), (x_val.dot(2 * numpy.ones(
        (4, 3))) + numpy.ones((4, 3))).dot(2 * numpy.ones(
            (3, 5))) + numpy.ones((4, 5)))
    assert_allclose(new_y.eval({y: y_val}), (y_val.dot(2 * numpy.ones(
        (5, 2))) + numpy.ones((4, 2))).dot(2 * numpy.ones(
            (2, 4))) + numpy.ones((4, 4)))
Exemple #14
0
 def __init__(self,
              vocab_size,
              embedding_dim,
              igru_state_dim,
              emitter=None,
              feedback_brick=None,
              merge=None,
              merge_prototype=None,
              post_merge=None,
              merged_dim=None,
              igru=None,
              **kwargs):
     self.igru = igru
     self.lookup = LookupTable(name='embeddings')
     self.vocab_size = vocab_size
     self.igru_state_dim = igru_state_dim
     self.gru_to_softmax = Linear(input_dim=igru_state_dim,
                                  output_dim=vocab_size)
     self.embedding_dim = embedding_dim
     self.gru_fork = Fork([
         name for name in self.igru.apply.sequences
         if name != 'mask' and name != 'input_states'
     ],
                          prototype=Linear(),
                          name='gru_fork')
     kwargs['children'] = [
         self.igru, self.lookup, self.gru_to_softmax, self.gru_fork
     ]
     super(Interpolator, self).__init__(emitter=emitter,
                                        feedback_brick=feedback_brick,
                                        merge=merge,
                                        merge_prototype=merge_prototype,
                                        post_merge=post_merge,
                                        merged_dim=merged_dim,
                                        **kwargs)
Exemple #15
0
    def __init__(self,
                 match_dim,
                 state_transformer=None,
                 attended_transformer=None,
                 energy_computer=None,
                 **kwargs):
        if not state_transformer:
            state_transformer = Linear(use_bias=False)
        self.match_dim = match_dim
        self.state_transformer = state_transformer

        self.state_transformers = Parallel(input_names=kwargs['state_names'],
                                           prototype=state_transformer,
                                           name="state_trans")
        if not attended_transformer:
            attended_transformer = Linear(name="preprocess")
        if not energy_computer:
            energy_computer = ShallowEnergyComputer(name="energy_comp")
        self.attended_transformer = attended_transformer
        self.energy_computer = energy_computer

        children = [
            self.state_transformers, attended_transformer, energy_computer
        ]
        kwargs.setdefault('children', []).extend(children)
        super(SequenceContentAttention, self).__init__(**kwargs)
Exemple #16
0
def test_rng():
    linear = Linear()
    assert isinstance(linear.rng, numpy.random.RandomState)
    linear = Linear(seed=1)
    assert linear.rng.rand() == numpy.random.RandomState(1).rand()
    linear = Linear()
    linear2 = Linear()
    assert linear.seed != linear2.seed
Exemple #17
0
def construct_model(activation_function, r_dim, hidden_dim, out_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    nx = x.shape[0]
    nj = x.shape[1]  # also is r.shape[0]
    nr = r.shape[1]

    # r is nj x nr
    # x is nx x nj
    # y is nx

    # Get a representation of r of size r_dim
    r = DAE(r)

    # r is now nj x r_dim

    # r_rep is nx x nj x r_dim
    r_rep = r[None, :, :].repeat(axis=0, repeats=nx)
    # x3 is nx x nj x 1
    x3 = x[:, :, None]

    # concat is nx x nj x (r_dim + 1)
    concat = tensor.concatenate([r_rep, x3], axis=2)

    # Change concat from Batch x Time x Features to T X B x F
    rnn_input = concat.dimshuffle(1, 0, 2)

    linear = Linear(input_dim=r_dim + 1,
                    output_dim=4 * hidden_dim,
                    name="input_linear")
    lstm = LSTM(dim=hidden_dim,
                activation=activation_function,
                name="hidden_recurrent")
    top_linear = Linear(input_dim=hidden_dim,
                        output_dim=out_dim,
                        name="out_linear")

    pre_rnn = linear.apply(rnn_input)
    states = lstm.apply(pre_rnn)[0]
    activations = top_linear.apply(states)
    activations = tensor.mean(activations, axis=0)

    cost = Softmax().categorical_cross_entropy(y, activations)

    pred = activations.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters

    for brick in (linear, lstm, top_linear):
        brick.weights_init = IsotropicGaussian(0.1)
        brick.biases_init = Constant(0.)
        brick.initialize()

    return cost, error_rate
Exemple #18
0
    def __init__(self, vocab_size, embedding_dim, state_dim,
                 representation_dim,topical_dim,theano_seed=None, **kwargs):
        super(Decoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.representation_dim = representation_dim
        self.theano_seed = theano_seed

        #self.topical_dim=topical_dim;

        # Initialize gru with special initial state
        self.transition = GRUInitialState(
            attended_dim=state_dim, dim=state_dim,
            activation=Tanh(), name='decoder')


        # Initialize the attention mechanism
        self.attention = SequenceContentAttention(
            state_names=self.transition.apply.states,
            attended_dim=representation_dim,
            match_dim=state_dim, name="attention")

        self.topical_attention=SequenceContentAttention(
            state_names=self.transition.apply.states,
            attended_dim=topical_dim,
            match_dim=state_dim, name="topical_attention")#not sure whether the match dim would be correct.


        # Initialize the readout, note that SoftmaxEmitter emits -1 for
        # initial outputs which is used by LookupFeedBackWMT15
        readout = Readout(
            source_names=['states', 'feedback',
                          self.attention.take_glimpses.outputs[0]],#check!
            readout_dim=self.vocab_size,
            emitter=SoftmaxEmitter(initial_output=-1, theano_seed=theano_seed),
            feedback_brick=LookupFeedbackWMT15(vocab_size, embedding_dim),
            post_merge=InitializableFeedforwardSequence(
                [Bias(dim=state_dim, name='maxout_bias').apply,
                 Maxout(num_pieces=2, name='maxout').apply,
                 Linear(input_dim=state_dim / 2, output_dim=embedding_dim,
                        use_bias=False, name='softmax0').apply,
                 Linear(input_dim=embedding_dim, name='softmax1').apply]),
            merged_dim=state_dim)

        # Build sequence generator accordingly
        self.sequence_generator = SequenceGenerator(
            readout=readout,
            transition=self.transition,
            attention=self.attention,
            topical_attention=self.topical_attention,
            topical_name='topical_embeddingq',
            content_name='content_embedding',
            fork=Fork([name for name in self.transition.apply.sequences
                       if name != 'mask'], prototype=Linear())
        )

        self.children = [self.sequence_generator]
Exemple #19
0
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 igru_state_dim,
                 igru_depth,
                 trg_dgru_depth,
                 emitter,
                 feedback_brick,
                 merge=None,
                 merge_prototype=None,
                 post_merge=None,
                 **kwargs):
        merged_dim = igru_state_dim
        if not merge:
            merge = Merge(input_names=kwargs['source_names'],
                          prototype=merge_prototype)
        if not post_merge:
            post_merge = Bias(dim=merged_dim)

        # for compatible
        if igru_depth == 1:
            self.igru = IGRU(dim=igru_state_dim)
        else:
            self.igru = RecurrentStack(
                [IGRU(dim=igru_state_dim, name='igru')] + [
                    UpperIGRU(dim=igru_state_dim,
                              activation=Tanh(),
                              name='upper_igru' + str(i))
                    for i in range(1, igru_depth)
                ],
                skip_connections=True)
        self.embedding_dim = embedding_dim
        self.emitter = emitter
        self.feedback_brick = feedback_brick
        self.merge = merge
        self.post_merge = post_merge
        self.merged_dim = merged_dim
        self.igru_depth = igru_depth
        self.trg_dgru_depth = trg_dgru_depth
        self.lookup = LookupTable(name='embeddings')
        self.vocab_size = vocab_size
        self.igru_state_dim = igru_state_dim
        self.gru_to_softmax = Linear(input_dim=igru_state_dim,
                                     output_dim=vocab_size)
        self.gru_fork = Fork([
            name for name in self.igru.apply.sequences
            if name != 'mask' and name != 'input_states'
        ],
                             prototype=Linear(),
                             name='gru_fork')

        children = [
            self.emitter, self.feedback_brick, self.merge, self.post_merge,
            self.igru, self.lookup, self.gru_to_softmax, self.gru_fork
        ]
        kwargs.setdefault('children', []).extend(children)
        super(Interpolator, self).__init__(**kwargs)
Exemple #20
0
    def __init__(self,
                 k=20,
                 rec_h_dim=400,
                 att_size=10,
                 num_letters=68,
                 sampling_bias=0.,
                 attention_type="graves",
                 epsilon=1e-6,
                 attention_alignment=1.,
                 **kwargs):
        super(Scribe, self).__init__(**kwargs)

        # For now only softmax and graves are supported.
        assert attention_type in ["graves", "softmax"]

        readouts_dim = 1 + 6 * k

        self.k = k
        self.rec_h_dim = rec_h_dim
        self.att_size = att_size
        self.num_letters = num_letters
        self.sampling_bias = sampling_bias
        self.attention_type = attention_type
        self.epsilon = epsilon
        self.attention_alignment = attention_alignment

        self.cell1 = GatedRecurrent(dim=rec_h_dim, name='cell1')

        self.inp_to_h1 = Fork(output_names=['cell1_inputs', 'cell1_gates'],
                              input_dim=3,
                              output_dims=[rec_h_dim, 2 * rec_h_dim],
                              name='inp_to_h1')

        self.h1_to_readout = Linear(input_dim=rec_h_dim,
                                    output_dim=readouts_dim,
                                    name='h1_to_readout')

        self.h1_to_att = Fork(output_names=['alpha', 'beta', 'kappa'],
                              input_dim=rec_h_dim,
                              output_dims=[att_size] * 3,
                              name='h1_to_att')

        self.att_to_h1 = Fork(output_names=['cell1_inputs', 'cell1_gates'],
                              input_dim=num_letters,
                              output_dims=[rec_h_dim, 2 * rec_h_dim],
                              name='att_to_h1')

        self.att_to_readout = Linear(input_dim=num_letters,
                                     output_dim=readouts_dim,
                                     name='att_to_readout')

        self.emitter = BivariateGMMEmitter(k=k, sampling_bias=sampling_bias)

        self.children = [
            self.cell1, self.inp_to_h1, self.h1_to_readout, self.h1_to_att,
            self.att_to_h1, self.att_to_readout, self.emitter
        ]
Exemple #21
0
def lstm_layer(in_size, dim, x, h, n, first_layer=False):
    if connect_h_to_h == 'all-previous':
        if first_layer:
            lstm_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
        elif connect_x_to_h:
            lstm_input = T.concatenate([x] + [hidden for hidden in h], axis=2)
            linear = Linear(input_dim=in_size + dim * (n),
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
        else:
            lstm_input = T.concatenate([hidden for hidden in h], axis=2)
            linear = Linear(input_dim=dim * (n + 1),
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
    elif connect_h_to_h == 'two-previous':
        if first_layer:
            lstm_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
        elif connect_x_to_h:
            lstm_input = T.concatenate([x] + h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=in_size + dim * 2 if n > 1 else in_size +
                            dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
        else:
            lstm_input = T.concatenate(h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=dim * 2 if n > 1 else dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
    elif connect_h_to_h == 'one-previous':
        if first_layer:
            lstm_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
        elif connect_x_to_h:
            lstm_input = T.concatenate([x] + [h[n - 1]], axis=2)
            linear = Linear(input_dim=in_size + dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
        else:
            lstm_input = h[n - 1]
            # linear = LN_LSTM(input_dim=dim, output_dim=dim * 4, name='linear' + str(n) + '-' )
            linear = Linear(input_dim=dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-')
    lstm = LN_LSTM(dim=dim, name=layer_models[network_mode][n] + str(n) + '-')
    initialize([linear, lstm])
    if layer_models[network_mode][n] == 'lstm':
        return lstm.apply(linear.apply(lstm_input))
        # return lstm.apply(linear.apply(lstm_input), mask=x_mask)
    elif layer_models[network_mode][n] == 'mt_lstm':
        return lstm.apply(linear.apply(lstm_input),
                          time_scale=layer_resolutions[n],
                          time_offset=layer_execution_time_offset[n])
Exemple #22
0
def rnn_layer(in_size, dim, x, h, n, first_layer=False):
    if connect_h_to_h == 'all-previous':
        if first_layer:
            rnn_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
        elif connect_x_to_h:
            rnn_input = T.concatenate([x] + [hidden for hidden in h], axis=2)
            linear = Linear(input_dim=in_size + dim * n,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
        else:
            rnn_input = T.concatenate([hidden for hidden in h], axis=2)
            linear = Linear(input_dim=dim * n,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
    elif connect_h_to_h == 'two-previous':
        if first_layer:
            rnn_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
        elif connect_x_to_h:
            rnn_input = T.concatenate([x] + h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=in_size + dim * 2 if n > 1 else in_size +
                            dim,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
        else:
            rnn_input = T.concatenate(h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=dim * 2 if n > 1 else dim,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
    elif connect_h_to_h == 'one-previous':
        if first_layer:
            rnn_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
        elif connect_x_to_h:
            rnn_input = T.concatenate([x] + [h[n - 1]], axis=2)
            linear = Linear(input_dim=in_size + dim,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
        else:
            rnn_input = h[n]
            linear = Linear(input_dim=dim,
                            output_dim=dim,
                            name='linear' + str(n) + '-')
    rnn = SimpleRecurrent(dim=dim,
                          activation=Tanh(),
                          name=layer_models[n] + str(n) + '-')
    initialize([linear, rnn])
    if layer_models[n] == 'rnn':
        return rnn.apply(linear.apply(rnn_input))
    elif layer_models[n] == 'mt_rnn':
        return rnn.apply(linear.apply(rnn_input),
                         time_scale=layer_resolutions[n],
                         time_offset=layer_execution_time_offset[n])
Exemple #23
0
    def _build_bricks(self, *args, **kwargs):
        # Build lookup tables
        self.word_embed = self._embed(len(self.dataset.word2index),
                                      self.config.word_embed_dim,
                                      name='word_embed')

        self.hashtag_embed = self._embed(len(self.dataset.hashtag2index),
                                         self.config.lstm_dim,
                                         name='hashtag_embed')
        # Build text encoder
        self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim,
                                output_dim=4 * self.config.lstm_dim,
                                name='mlstm_in')
        self.mlstm_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm_ins.biases_init = Constant(0)
        self.mlstm_ins.initialize()
        self.mlstm = MLSTM(self.config.lstm_time,
                           self.config.lstm_dim,
                           shared=False)
        self.mlstm.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm.biases_init = Constant(0)
        self.mlstm.initialize()
        self.hashtag2word = MLP(
            activations=[Tanh('hashtag2word_tanh')],
            dims=[self.config.lstm_dim, self.config.word_embed_dim],
            name='hashtag2word_mlp')
        self.hashtag2word.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.hashtag2word.biases_init = Constant(0)
        self.hashtag2word.initialize()
        self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias')
        self.hashtag2word_bias.biases_init = Constant(0)
        self.hashtag2word_bias.initialize()
        #Build character embedding
        self.char_embed = self._embed(len(self.dataset.char2index),
                                      self.config.char_embed_dim,
                                      name='char_embed')
        # Build sparse word encoder
        self.rnn_ins = Linear(input_dim=self.config.char_embed_dim,
                              output_dim=self.config.word_embed_dim,
                              name='rnn_in')
        self.rnn_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim +
                                           self.config.word_embed_dim))
        self.rnn_ins.biases_init = Constant(0)
        self.rnn_ins.initialize()
        self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim,
                                   activation=Tanh())
        self.rnn.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.rnn.initialize()
Exemple #24
0
def MDN_output_layer(x, h, y, in_size, out_size, hidden_size, pred):
    if connect_h_to_o:
        hiddens = T.concatenate([hidden for hidden in h], axis=2)
        hidden_out_size = hidden_size * len(h)
    else:
        hiddens = h[-1]
        hidden_out_size = hidden_size

    mu_linear = Linear(name='mu_linear' + str(pred),
                       input_dim=hidden_out_size,
                       output_dim=out_size * components_size[network_mode])
    sigma_linear = Linear(name='sigma_linear' + str(pred),
                          input_dim=hidden_out_size,
                          output_dim=components_size[network_mode])
    mixing_linear = Linear(name='mixing_linear' + str(pred),
                           input_dim=hidden_out_size,
                           output_dim=components_size[network_mode])
    initialize([mu_linear, sigma_linear, mixing_linear])

    mu = mu_linear.apply(hiddens)
    mu = mu.reshape(
        (mu.shape[0], mu.shape[1], out_size, components_size[network_mode]))

    sigma_orig = sigma_linear.apply(hiddens)
    sigma = T.nnet.softplus(sigma_orig)

    mixing_orig = mixing_linear.apply(hiddens)
    e_x = T.exp(mixing_orig - mixing_orig.max(axis=2, keepdims=True))
    mixing = e_x / e_x.sum(axis=2, keepdims=True)

    exponent = -0.5 * T.inv(sigma) * T.sum(
        (y.dimshuffle(0, 1, 2, 'x') - mu)**2, axis=2)
    normalizer = (2 * np.pi * sigma)
    exponent = exponent + T.log(mixing) - (out_size * .5) * T.log(normalizer)

    # LogSumExp(x)
    max_exponent = T.max(exponent, axis=2, keepdims=True)
    mod_exponent = exponent - max_exponent
    gauss_mix = T.sum(T.exp(mod_exponent), axis=2, keepdims=True)
    log_gauss = T.log(gauss_mix) + max_exponent
    cost = -T.mean(log_gauss)

    srng = RandomStreams(seed=seed)
    mixing = mixing_orig * (1 + sampling_bias)
    sigma = T.nnet.softplus(sigma_orig - sampling_bias)
    e_x = T.exp(mixing - mixing.max(axis=2, keepdims=True))
    mixing = e_x / e_x.sum(axis=2, keepdims=True)
    component = srng.multinomial(pvals=mixing)
    component_mean = T.sum(mu * component.dimshuffle(0, 1, 'x', 2), axis=3)
    component_std = T.sum(sigma * component, axis=2, keepdims=True)
    linear_output = srng.normal(avg=component_mean, std=component_std)
    linear_output.name = 'linear_output'

    return linear_output, cost
Exemple #25
0
    def __init__(self, emb_dim, dim, dropout=0.0,
            def_word_gating="none",
            dropout_type="per_unit", compose_type="sum",
            word_dropout_weighting="no_weighting",
            shortcut_unk_and_excluded=False,
            num_input_words=-1, exclude_top_k=-1, vocab=None,
            **kwargs):

        self._dropout = dropout
        self._num_input_words = num_input_words
        self._exclude_top_K = exclude_top_k
        self._dropout_type = dropout_type
        self._compose_type = compose_type
        self._vocab = vocab
        self._shortcut_unk_and_excluded = shortcut_unk_and_excluded
        self._word_dropout_weighting = word_dropout_weighting
        self._def_word_gating = def_word_gating

        if def_word_gating not in {"none", "self_attention"}:
            raise NotImplementedError()

        if word_dropout_weighting not in {"no_weighting"}:
            raise NotImplementedError("Not implemented " + word_dropout_weighting)

        if dropout_type not in {"per_unit", "per_example", "per_word"}:
            raise NotImplementedError()

        children = []

        if self._def_word_gating=="self_attention":
            self._gate_mlp = Linear(dim, dim)
            self._gate_act = Logistic()
            children.extend([self._gate_mlp, self._gate_act])

        if compose_type == 'fully_connected_linear':
            self._def_state_compose = MLP(activations=[None],
                dims=[emb_dim + dim, emb_dim])
            children.append(self._def_state_compose)
        if compose_type == "gated_sum" or compose_type == "gated_transform_and_sum":
            if dropout_type == "per_word" or dropout_type == "per_example":
                raise RuntimeError("I dont think this combination makes much sense")

            self._compose_gate_mlp = Linear(dim + emb_dim, emb_dim,
                                            name='gate_linear')
            self._compose_gate_act = Logistic()
            children.extend([self._compose_gate_mlp, self._compose_gate_act])
        if compose_type == 'sum':
            if not emb_dim == dim:
                raise ValueError("Embedding has different dim! Cannot use compose_type='sum'")
        if compose_type == 'transform_and_sum' or compose_type == "gated_transform_and_sum":
            self._def_state_transform = Linear(dim, emb_dim, name='state_transform')
            children.append(self._def_state_transform)

        super(MeanPoolCombiner, self).__init__(children=children, **kwargs)
Exemple #26
0
    def __init__(self, **kwargs):
        super(TreeAttention, self).__init__(**kwargs)
        state_transformer = Linear()
        self.state_transformers = Parallel(input_names=self.state_names,
                                           prototype=state_transformer,
                                           name="state_trans")
        self.parent1_transformer = Linear(name="parent1_trans")
        self.parent2_transformer = Linear(name="parent2_trans")

        self.children = [self.state_transformers,
                         self.parent1_transformer,
                         self.parent2_transformer]
Exemple #27
0
    def __init__(self,
                 match_dim,
                 conv_n,
                 conv_num_filters=1,
                 state_transformer=None,
                 attended_transformer=None,
                 energy_computer=None,
                 prior=None,
                 energy_normalizer=None,
                 **kwargs):
        super(SequenceContentAndConvAttention, self).__init__(**kwargs)
        if not state_transformer:
            state_transformer = Linear(use_bias=False)

        self.match_dim = match_dim
        self.state_transformer = state_transformer

        self.state_transformers = Parallel(input_names=self.state_names,
                                           prototype=state_transformer,
                                           name="state_trans")
        if not attended_transformer:
            # Only this contributor to the match vector
            # is allowed to have biases
            attended_transformer = Linear(name="preprocess")

        if not energy_normalizer:
            energy_normalizer = 'softmax'
        self.energy_normalizer = energy_normalizer

        if not energy_computer:
            energy_computer = ShallowEnergyComputer(
                name="energy_comp",
                use_bias=self.energy_normalizer != 'softmax')
        self.filter_handler = Linear(name="handler", use_bias=False)
        self.attended_transformer = attended_transformer
        self.energy_computer = energy_computer

        if not prior:
            prior = dict(type='expanding',
                         initial_begin=0,
                         initial_end=10000,
                         min_speed=0,
                         max_speed=0)
        self.prior = prior

        self.conv_n = conv_n
        self.conv_num_filters = conv_num_filters
        self.conv = Conv1D(conv_num_filters, 2 * conv_n + 1)

        self.children = [
            self.state_transformers, self.attended_transformer,
            self.energy_computer, self.filter_handler, self.conv
        ]
Exemple #28
0
def linear_layer(in_size, dim, x, h, n, first_layer=False):
    if first_layer:
        input = x
        linear = Linear(input_dim=in_size, output_dim=dim, name='feedforward' + str(n))
    elif connect_x_to_h:
        input = T.concatenate([x] + [h[n - 1]], axis=1)
        linear = Linear(input_dim=in_size + dim, output_dim=dim, name='feedforward' + str(n))
    else:
        input = h[n - 1]
        linear = Linear(input_dim=dim, output_dim=dim, name='feedforward' + str(n))
    initialize([linear])
    return linear.apply(input)
Exemple #29
0
 def __init__(self, word_dim, visual_dim, joint_dim):
     self.word_embed = Linear(word_dim,
                              joint_dim,
                              name='word_to_joint',
                              weights_init=IsotropicGaussian(0.01),
                              biases_init=Constant(0))
     self.visual_embed = Linear(visual_dim,
                                joint_dim,
                                name='visual_to_joint',
                                weights_init=IsotropicGaussian(0.01),
                                biases_init=Constant(0))
     self.word_embed.initialize()
     self.visual_embed.initialize()
Exemple #30
0
    def __init__(self, dimension, input_size, embed_input=False, **kwargs):
        super(LSTMEncoder, self).__init__(**kwargs)
        if embed_input:
            self.embedder = LookupTable(input_size, dimension)
        else:
            self.embedder = Linear(input_size, dimension)
        self.fork = Fork(['inputs'],
                         dimension,
                         output_dims=[dimension],
                         prototype=Linear(dimension, 4 * dimension))
        encoder = Bidirectional(LSTM(dim=dimension, activation=Tanh()))

        self.encoder = encoder
        self.children = [encoder, self.embedder, self.fork]