Esempio n. 1
0
def test_lstm():
    from layers import LSTM
    lstm = LSTM(input_size, layer_size)
    lstm.set_state(batch_size)

    x = T.tensor3()
    f = theano.function([x], lstm(x), updates=lstm.updates)

    X = np.ones((batch_size, time_steps, input_size), dtype=np.float32)
    assert f(X).shape == (batch_size, time_steps, layer_size)
Esempio n. 2
0
def test_lstm():
    from layers import LSTM
    lstm = LSTM(input_size, layer_size)
    lstm.set_state(batch_size)

    x = T.tensor3()
    f = theano.function([x], lstm(x), updates=lstm.updates)

    X = np.ones((batch_size, time_steps, input_size), dtype=np.float32)
    assert f(X).shape == (batch_size, time_steps, layer_size)
Esempio n. 3
0
    def __init__(self, vocab_size, embedding_size, hidden_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        self.embedding = Embedding(vocab_size, embedding_size)
        self.lstm = LSTM(embedding_size, hidden_size)
        self.layers = [self.embedding, self.lstm]
        self.params = list(
            itertools.chain(*[
                layer.params for layer in self.layers
                if hasattr(layer, 'params')
            ]))
Esempio n. 4
0
class Decoder(Sequential):
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.lstm = LSTM(embedding_size, hidden_size)
        self.lstm_output = TimeDistributed(hidden_size,
                                           output_size,
                                           activation='tanh')
        self.softmax = TimeDistributed(output_size,
                                       vocab_size,
                                       activation='softmax')
        self.embedding = Embedding(vocab_size, embedding_size)

        self.layers = [
            self.lstm, self.lstm_output, self.softmax, self.embedding
        ]
        self.params = list(
            itertools.chain(*[
                layer.params for layer in self.layers
                if hasattr(layer, 'params')
            ]))

    def forward(self, ec_H, ec_C, mask):
        (sens_size, batch_size) = T.shape(mask)

        def step(m, prev_Y, prev_H, prev_C):
            """Forward a time step of the decoder."""
            # LSTM forward time step
            (H, C) = self.lstm.step(prev_Y, m, prev_H, prev_C)
            # LSTM output
            O = self.lstm_output.forward(H)
            # Apply softmax to LSTM output
            P = self.softmax.forward(O)
            # Make prediction
            one_hot_Y = T.argmax(P, axis=1)
            # Feed the output to the next time step
            Y = self.embedding.forward(one_hot_Y)
            # FIXME: Deal with differ length ?
            return (P, Y, H, C)

        results, updates = theano.scan(fn=step,
                                       sequences=[mask],
                                       outputs_info=[
                                           None,
                                           dict(initial=T.zeros(
                                               (batch_size,
                                                self.embedding_size)),
                                                taps=[-1]),
                                           dict(initial=ec_H, taps=[-1]),
                                           dict(initial=ec_C, taps=[-1])
                                       ])

        # return np.swapaxes(results[0], 0, 1)       # returns the softmax probabilities
        return results[0]
Esempio n. 5
0
    def __init__(self, vocab_size, embedding_size, hidden_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        self.embedding = Embedding(vocab_size, embedding_size)
        self.lstm = LSTM(embedding_size, hidden_size)
        self.layers = [self.embedding, self.lstm]
        self.params = list(itertools.chain(*[layer.params for layer in self.layers if hasattr(layer, 'params')]))
Esempio n. 6
0
 def build_test_model(self, data):
     rng = np.random.RandomState(3435)
     lstm_params, hidden_params, hidden_relu_params, full_connect_params = self.load_trained_params()
     data_x, data_y, maxlen = data
     test_len = len(data_x)
     n_test_batches = test_len // self.batch_size
     x = T.matrix('x')
     y = T.ivector('y')
     index = T.lscalar()
     Words = theano.shared(value=self.word_vectors, name="Words", borrow=True)
     input_width = self.hidden_sizes[0]
     layer0_input = T.cast(Words[T.cast(x.flatten(), dtype="int32")], dtype=floatX).reshape((self.batch_size, maxlen, input_width))
     lstm = LSTM(dim=input_width, batch_size=self.batch_size, number_step=maxlen, params=lstm_params)
     layer0_input = lstm.feed_foward(layer0_input)
     lstm.mean_pooling_input(layer0_input)
     hidden_sizes = [self.hidden_sizes[0], self.hidden_sizes[0]]
     hidden_layer = HiddenLayer(rng, hidden_sizes=hidden_sizes, input_vectors=lstm.output, activation=utils.Tanh, name="Hidden_Tanh", W=hidden_params[0], b=hidden_params[1]) 
     hidden_layer.predict()
     hidden_layer_relu = HiddenLayer(rng, hidden_sizes=hidden_sizes, input_vectors=hidden_layer.output, W=hidden_relu_params[0], b=hidden_relu_params[1])
     hidden_layer_relu.predict()
     # hidden_layer_dropout = HiddenLayerDropout(rng, hidden_sizes=self.hidden_sizes[:2], input_vectors=lstm.output, W=hidden_layer.W, b=hidden_layer.b)
     full_connect = FullConnectLayer(rng, layers_size=[self.hidden_sizes[0], self.hidden_sizes[-1]], 
                                     input_vector=hidden_layer_relu.output, W=full_connect_params[0], b=full_connect_params[1])
     full_connect.predict()
     test_data_x = theano.shared(np.asarray(data_x, dtype=floatX), borrow=True)
     test_data_y = theano.shared(np.asarray(data_y, dtype='int32'), borrow=True)
   
     errors = 0.
     if test_len == 1:
         test_model = theano.function([index],outputs=full_connect.get_predict(), on_unused_input='ignore', givens={
             x: test_data_x[index * self.batch_size: (index + 1) * self.batch_size],
             y: test_data_y[index * self.batch_size: (index + 1) * self.batch_size]
         })
         index = 0
         avg_errors = test_model(index)
     else:
         test_model = theano.function([index], outputs=full_connect.errors(y), givens={
             x: test_data_x[index * self.batch_size: (index + 1) * self.batch_size],
             y: test_data_y[index * self.batch_size: (index + 1) * self.batch_size]
         })
         for i in xrange(n_test_batches):
             errors += test_model(i)
         avg_errors = errors / n_test_batches
     return avg_errors
Esempio n. 7
0
    def __init__(self, dmemory, daddress, nstates, dinput, doutput):
        self.layers = {}

        self.layers['INPUT'] = Dense(dinput, dmemory)
        self.layers['PREVIOUS_READ'] = Dense(dmemory, dmemory)
        self.layers['CONTROL_KEY'] = LSTM(dmemory + dmemory, nstates)
        self.layers['OUTPUT'] = Dense(doutput, doutput)

        self.daddress = daddress
        self.dmemory = dmemory
        self.doutput = doutput
Esempio n. 8
0
    def __init__(self, vocabulary_size: int, hidden_size: int, n_layers: int,
                 embedding_size: int, dropout: float):
        super().__init__()

        self.vocabulary_size = vocabulary_size
        self.dropout = dropout
        self.embedding = torch.nn.Embedding(vocabulary_size + 1,
                                            embedding_size)
        self.lstm = LSTM(embedding_size,
                         hidden_size,
                         n_layers,
                         dropout=dropout)
Esempio n. 9
0
    def run_lstm_alt(self, context_out, question_pool, context_len, is_train):
        # tile pooled question rep and concat with context
        q_rep = tf.expand_dims(question_pool, 1)  # (batch_size, 1, D)
        context_shape = tf.shape(context_out)[1]
        q_rep = tf.tile(q_rep, [1, context_shape, 1])

        q_c_rep = tf.concat([context_out, q_rep], axis=-1)

        with tf.variable_scope('lstm_') as scope:
            lstm_out_fw = LSTM(q_c_rep,
                               context_len,
                               self.hidden_units,
                               tf.cond(is_train, lambda: self.output_keep_prob,
                                       lambda: 1.0),
                               tf.cond(is_train, lambda: self.input_keep_prob,
                                       lambda: 1.0),
                               tf.cond(is_train, lambda: self.state_keep_prob,
                                       lambda: 1.0),
                               use_last=False,
                               seed=self.seed,
                               reuse=False)
            q_c_rep_rev = _reverse(q_c_rep, context_len, 1, 0)
            lstm_out_rev = LSTM(q_c_rep_rev,
                                context_len,
                                self.hidden_units,
                                tf.cond(is_train,
                                        lambda: self.output_keep_prob,
                                        lambda: 1.0),
                                tf.cond(is_train, lambda: self.input_keep_prob,
                                        lambda: 1.0),
                                tf.cond(is_train, lambda: self.state_keep_prob,
                                        lambda: 1.0),
                                use_last=False,
                                seed=self.seed,
                                reuse=True)
            lstm_out_bw = _reverse(lstm_out_rev, context_len, 1, 0)
            lstm_out = tf.concat([lstm_out_fw, lstm_out_bw],
                                 2,
                                 name='lstm_out')
        return lstm_out
Esempio n. 10
0
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.lstm = LSTM(embedding_size, hidden_size)
        self.lstm_output = TimeDistributed(hidden_size, output_size, activation='tanh')
        self.softmax = TimeDistributed(output_size, vocab_size, activation='softmax')
        self.embedding = Embedding(vocab_size, embedding_size)

        self.layers = [self.lstm, self.lstm_output, self.softmax, self.embedding]
        self.params = list(itertools.chain(*[layer.params for layer in self.layers if hasattr(layer, 'params')]))
Esempio n. 11
0
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.lstm = LSTM(embedding_size, hidden_size)
        self.lstm_output = TimeDistributed(hidden_size,
                                           output_size,
                                           activation='tanh')
        self.softmax = TimeDistributed(output_size,
                                       vocab_size,
                                       activation='softmax')
        self.embedding = Embedding(vocab_size, embedding_size)

        self.layers = [
            self.lstm, self.lstm_output, self.softmax, self.embedding
        ]
        self.params = list(
            itertools.chain(*[
                layer.params for layer in self.layers
                if hasattr(layer, 'params')
            ]))
Esempio n. 12
0
def main():
    input_size, output_size = 3, 3
    rnn = RNN()
    rnn.add_layer(LSTM(input_size, output_size))

    X_train = [[[1, 0, 0]], [[0, 1, 0]], [[0, 0, 1]]]
    Y_train = [[[0, 1, 0]], [[0, 0, 1]], [[1, 0, 0]]]

    epochs = 1000
    rnn.train(X_train, Y_train, epochs=epochs)
    for p, y in zip(rnn.predict(X_train), Y_train):
        _p = np.zeros_like(p).astype(int)
        _p[:, np.argmax(p)] = 1
        print('%30s %10s %10s' % (p.reshape(1, -1), _p, np.array(y)))
Esempio n. 13
0
class Encoder(Sequential):
    def __init__(self, vocab_size, embedding_size, hidden_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        self.embedding = Embedding(vocab_size, embedding_size)
        self.lstm = LSTM(embedding_size, hidden_size)
        self.layers = [self.embedding, self.lstm]
        self.params = list(itertools.chain(*[layer.params for layer in self.layers if hasattr(layer, 'params')]))

    def forward(self, batch, mask):
        # ``batch`` is a matrix whose row ``x`` is a sentence, e.g. x = [1, 4, 5, 2, 0]
        # ``emb`` is a list of embedding matrix, e[i].shape = (sene_size, embedding_size)
        emb = self.embedding.forward(batch)
        (H, C) = self.lstm.forward(emb, mask)
        return (H[-1], C[-1])
Esempio n. 14
0
class Decoder(Sequential):
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.lstm = LSTM(embedding_size, hidden_size)
        self.lstm_output = TimeDistributed(hidden_size, output_size, activation='tanh')
        self.softmax = TimeDistributed(output_size, vocab_size, activation='softmax')
        self.embedding = Embedding(vocab_size, embedding_size)

        self.layers = [self.lstm, self.lstm_output, self.softmax, self.embedding]
        self.params = list(itertools.chain(*[layer.params for layer in self.layers if hasattr(layer, 'params')]))

    def forward(self, ec_H, ec_C, mask):
        (sens_size, batch_size) = T.shape(mask)

        def step(m, prev_Y, prev_H, prev_C):
            """Forward a time step of the decoder."""
            # LSTM forward time step
            (H, C) = self.lstm.step(prev_Y, m, prev_H, prev_C)
            # LSTM output
            O = self.lstm_output.forward(H)
            # Apply softmax to LSTM output
            P = self.softmax.forward(O)
            # Make prediction
            one_hot_Y = T.argmax(P, axis=1)
            # Feed the output to the next time step
            Y = self.embedding.forward(one_hot_Y)
            # FIXME: Deal with differ length ?
            return (P, Y, H, C)

        results, updates = theano.scan(
            fn=step,
            sequences=[mask],
            outputs_info=[
                None,
                dict(initial=T.zeros((batch_size, self.embedding_size)), taps=[-1]),
                dict(initial=ec_H, taps=[-1]),
                dict(initial=ec_C, taps=[-1])
            ]
        )

        # return np.swapaxes(results[0], 0, 1)       # returns the softmax probabilities
        return results[0]
Esempio n. 15
0
    def __init__(self, vocabulary_size: int, hidden_size: int, n_layers: int,
                 embedding_size: int, dropout: float, max_out_len: int):
        super().__init__()
        self.dropout = dropout
        self.vocabulary_size = vocabulary_size

        self.embedding = torch.nn.Embedding(vocabulary_size + 1,
                                            embedding_size)
        self.lstm = LSTM(embedding_size,
                         hidden_size,
                         n_layers,
                         dropout=dropout)
        self.output_projection = Linear(hidden_size, vocabulary_size + 1)

        self.max_out_len = max_out_len
        self.sos_token = self.vocabulary_size
        self.eos_token = self.vocabulary_size
Esempio n. 16
0
def main():
    x = tensor.tensor3()
    t = tensor.matrix()

    task = TempOrder(args.low,
                     args.high,
                     args.length,
                     batch_size=config.batch_size,
                     long_sequences=False)
    train_db = SyntheticDatabase(task,
                                 number_of_batches=config.number_of_batches)
    valid_db = SyntheticDatabase(task,
                                 number_of_batches=config.test_sequences //
                                 config.batch_size,
                                 phase='valid')

    model = TempOrderModel(x, t, [
        LSTM(task.input_size,
             config.layer_size,
             config.batch_size,
             args.hid_dropout_rate,
             args.drop_candidates,
             args.per_step,
             weight_init=Uniform(config.scale),
             persistent=False),
        LastStepPooling(),
        Linear(config.layer_size, task.output_size)
    ])

    if args.finetune:
        model.load('exp/temp_order/opt.pkl')

    opt = SGDOptimizer(model,
                       x,
                       t,
                       train_db,
                       test_db=valid_db,
                       name="temp_order",
                       clip_gradients=True,
                       clip_threshold=5,
                       print_norms=True)

    opt.train(train_db, test_db=valid_db, learning_rate=args.lr, epochs=1000)
Esempio n. 17
0
class Encoder(Sequential):
    def __init__(self, vocab_size, embedding_size, hidden_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        self.embedding = Embedding(vocab_size, embedding_size)
        self.lstm = LSTM(embedding_size, hidden_size)
        self.layers = [self.embedding, self.lstm]
        self.params = list(
            itertools.chain(*[
                layer.params for layer in self.layers
                if hasattr(layer, 'params')
            ]))

    def forward(self, batch, mask):
        # ``batch`` is a matrix whose row ``x`` is a sentence, e.g. x = [1, 4, 5, 2, 0]
        # ``emb`` is a list of embedding matrix, e[i].shape = (sene_size, embedding_size)
        emb = self.embedding.forward(batch)
        (H, C) = self.lstm.forward(emb, mask)
        return (H[-1], C[-1])
Esempio n. 18
0
    def train(self, train_data, dev_data, test_data, maxlen):
        # tr = tracker.SummaryTracker()
        rng = np.random.RandomState(3435)
        train_x, train_y = self.shared_dataset(train_data)
        dev_x, dev_y = self.shared_dataset(dev_data)
        test_x, test_y = self.shared_dataset(test_data)
        test_len = len(test_data[0])
        n_train_batches = len(train_data[0]) // self.batch_size
        n_val_batches = len(dev_data[0]) // self.batch_size
        n_test_batches = test_len // self.batch_size
        input_width = self.hidden_sizes[0]
        x = T.matrix('x')
        y = T.ivector('y')
        index = T.lscalar()
        Words = theano.shared(value=self.word_vectors, name="Words", borrow=True)
        layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape((self.batch_size, maxlen, input_width))
        lstm = LSTM(dim=input_width, batch_size=self.batch_size, number_step=maxlen)
        leyer0_output = lstm.feed_foward(layer0_input)
        lstm.mean_pooling_input(leyer0_output)
        hidden_sizes = [self.hidden_sizes[0], self.hidden_sizes[0]]
        hidden_layer = HiddenLayer(rng, hidden_sizes=hidden_sizes, input_vectors=lstm.output, activation=utils.Tanh, name="Hidden_Tanh") 
        hidden_layer.predict()
        hidden_layer_relu = HiddenLayer(rng, hidden_sizes=hidden_sizes, input_vectors=hidden_layer.output)
        hidden_layer_relu.predict()
        # hidden_layer_dropout = HiddenLayerDropout(rng, hidden_sizes=self.hidden_sizes[:2], input_vectors=lstm.output, W=hidden_layer.W, b=hidden_layer.b)
        full_connect = FullConnectLayer(rng, layers_size=[self.hidden_sizes[0], self.hidden_sizes[-1]], input_vector=hidden_layer_relu.output)
        full_connect.predict()

        cost = full_connect.negative_log_likelihood(y)
        
        params = lstm.params + hidden_layer.params + hidden_layer_relu.params + full_connect.params
        # params = hidden_layer.params + hidden_layer_relu.params + full_connect.params
        params_length = len(params)
        #init value for e_grad time 0, e_delta time 0 and delta at time 0
        e_grad, e_delta_prev, delta = self.init_hyper_values(params_length)
        # e_grad_d, e_delta_prev_d, delta_d = self.init_hyper_values(params_length, name="D")
        #apply gradient
        grads = T.grad(cost, params)
        #dropout hidden layer
        # hidden_layer_dropout.dropout()
        # hidden_layer_dropout.predict()
        # full_connect.setInput(hidden_layer_dropout.output)
        # full_connect.predict()
        # cost_d = full_connect.negative_log_likelihood(y)
        #apply gradient to cost_d
        # grads_d = T.grad(cost_d, params)
        e_grad, e_delta_prev, delta = self.adadelta(grads, e_grad, e_delta_prev)
        # e_grad_d, e_delta_prev_d, delta_d = self.adadelta(grads_d, e_grad_d, e_delta_prev_d, delta_d)
        grads = delta
        # grad_d = delta_d
        updates = [(p, p - d) for p, d in zip(params, grads)]
        # updates = [(p, p - self.learning_rate * d) for p, d in zip(params, grads)]
        train_model = theano.function([index], cost, updates=updates, givens={
            x: train_x[(index * self.batch_size):((index + 1) * self.batch_size)],
            y: train_y[(index * self.batch_size):((index + 1) * self.batch_size)]
        })
        val_model = theano.function([index], full_connect.errors(y), givens={
            x: dev_x[index * self.batch_size: (index + 1) * self.batch_size],
            y: dev_y[index * self.batch_size: (index + 1) * self.batch_size],
        })
        test_model = theano.function(inputs=[index], outputs=full_connect.errors(y), givens={
            x: test_x[index * self.batch_size: (index + 1) * self.batch_size],
            y: test_y[index * self.batch_size: (index + 1) * self.batch_size]
        })
        validation_frequency = min(n_train_batches, self.patience // 2)
        val_batch_lost = 1.
        best_batch_lost = 1.
        best_test_lost = 1.
        stop_count = 0
        epoch = 0
        done_loop = False
        current_time_step = 0
        improve_threshold = 0.995
        iter_list = range(n_train_batches)
        while(epoch < self.epochs and done_loop is not True):
            epoch_cost_train = 0.
            epoch += 1
            batch_train = 0
            print("Start epoch: %i" % epoch)
            start = time.time()
            random.shuffle(iter_list)
            for mini_batch, m_b_i in zip(iter_list, xrange(n_train_batches)):
                current_time_step = (epoch - 1) * n_train_batches + m_b_i
                epoch_cost_train += train_model(mini_batch)
                batch_train += 1
                if (current_time_step + 1) % validation_frequency == 0:
                    val_losses = [val_model(i) for i in xrange(n_val_batches)]
                    val_losses = np.array(val_losses)
                    val_batch_lost = np.mean(val_losses)
                    if val_batch_lost < best_batch_lost:
                        if best_batch_lost * improve_threshold > val_batch_lost:
                            self.patience = max(self.patience, current_time_step * self.patience_frq)
                            best_batch_lost = val_batch_lost
                            # test it on the test set
                            test_losses = [
                                test_model(i)
                                for i in range(n_test_batches)
                            ]
                            current_test_lost = np.mean(test_losses)
                            print(('epoch %i minibatch %i test accuracy of %i example is: %.5f') % (epoch, m_b_i, test_len, (1 - current_test_lost) * 100.))
                            if best_test_lost > current_test_lost:
                                best_test_lost = current_test_lost
                if self.patience <= current_time_step:
                    print(self.patience)
                    done_loop = True
                    break
            print('epoch: %i, training time: %.2f secs; with avg cost: %.5f' % (epoch, time.time() - start, epoch_cost_train / batch_train))
        print('Best test accuracy is: %.5f' % (1 - best_test_lost))
        utils.save_layer_params(lstm, 'lstm')
        utils.save_layer_params(hidden_layer, 'hidden_lstm')
        utils.save_layer_params(hidden_layer_relu, 'hidden_relu_lstm')
        utils.save_layer_params(full_connect, 'full_connect_lstm')
        return lstm.params
Esempio n. 19
0
 def _build_layers(self):
     self.emb = HRRWordEmbedding(self.vocab_size, self.cell_dim, self.num_roles, self.num_fillers)
     self.rnn = LSTM(self.num_layers, self.cell_dim, keep_prob=self.keep_prob)
Esempio n. 20
0
    char_to_int[chars[i]] = i
    int_to_char[i] = chars[i]
    vec = np.zeros((len(chars), 1))
    vec[i] = 1.
    char_to_vec[chars[i]] = vec

source = np.array(list(source))[:(len(source) // BATCH_SIZE) * BATCH_SIZE]
source = np.array(np.split(source, BATCH_SIZE))

EMBEDDING_LENGTH = len(chars)

# Creating the model.
model = Network(
    LSTM(size=512,
         input_size=EMBEDDING_LENGTH,
         batch_size=BATCH_SIZE,
         backprop_depth=SEQUENCE_LENGTH,
         stateful=True),
    LSTM(size=512,
         input_size=512,
         batch_size=BATCH_SIZE,
         backprop_depth=SEQUENCE_LENGTH,
         stateful=True),
    TimeDistributed(
        Dense(size=EMBEDDING_LENGTH,
              input_size=512,
              activation=SparseSoftmax())))

if RESTORE_MODEL_PATH:
    model.loadParams(RESTORE_MODEL_PATH)
	def _build(self):
		"""
		Build model
		Input's form is BatchSize x Num_Time_Steps x Num_Channels
		
		Params:
			None
		Returns:
			None
		"""
		self.layers.append(
			Conv1D(num_in_channels=self.num_input_channels,
				num_out_channels=16,
				filter_size=3,
				strides=1,
				padding="SAME",
				dropout=0.0,
				bias=True,
				act=leak_relu
			)
		)

		self.layers.append(
			Conv1D(num_in_channels=16,
				num_out_channels=32,
				filter_size=3,
				strides=1,
				padding="SAME",
				dropout=0.0,
				bias=True,
				act=leak_relu
			)
		)

		self.layers.append(
			LSTM(input_dim=32,
				num_units=128,
				length=self.num_time_steps,
				batch_size=32,
				return_sequece=False,
				bias=True
			)
		)
		
		self.layers.append(
			Dense(input_dim=128,
				output_dim=64,
				dropout=0.0,
				sparse_inputs=False,
				act=leak_relu,
				bias=True
			)
		)

		self.layers.append(CenterLoss(num_classes=self.num_classes,
									num_feas=64, learning_rate=0.5))

		
		self.layers.append(
			Dense(input_dim=64,
				output_dim=self.num_classes,
				dropout=0.0,
				sparse_inputs=False,
				act=leak_relu,
				bias=True
			)
		)
Esempio n. 22
0
    char_to_int[chars[i]] = i
    int_to_char[i] = chars[i]
    vec = np.zeros((len(chars), 1))
    vec[i] = 1.
    char_to_vec[chars[i]] = vec

# The length of the vector that represents a character
# is equivalent to the number of different characters
# in the text.
EMBEDDING_LENGTH = len(chars)

# Creating the model.
model = Network(
    LSTM(size=512,
         input_size=EMBEDDING_LENGTH,
         batch_size=1,
         backprop_depth=1,
         stateful=True),
    LSTM(size=512,
         input_size=512,
         batch_size=1,
         backprop_depth=1,
         stateful=True),
    TimeDistributed(
        Dense(size=EMBEDDING_LENGTH,
              input_size=512,
              activation=SparseSoftmax(TEMPERATURE))))
model.loadParams(MODEL)

# optimizer = Adam(learning_rate=lambda n: 0.001, beta_1=0.9, beta_2=0.999)
Esempio n. 23
0
train_data_iter = data_iterator_simple(load_train_func,
                                       len(x_train),
                                       batch_size,
                                       shuffle=True,
                                       with_file_cache=False)
valid_data_iter = data_iterator_simple(load_valid_func,
                                       len(x_valid),
                                       batch_size,
                                       shuffle=True,
                                       with_file_cache=False)

x = nn.Variable((batch_size, sentence_length))
t = nn.Variable((batch_size, sentence_length, 1))
h = PF.embed(x, vocab_size, embedding_size)
h = LSTM(h, hidden, return_sequences=True)
h = TimeDistributed(PF.affine)(h, hidden, name='hidden')
y = TimeDistributed(PF.affine)(h, vocab_size, name='output')

mask = F.sum(F.sign(t), axis=2)  # do not predict 'pad'.
entropy = TimeDistributedSoftmaxCrossEntropy(y, t) * mask
count = F.sum(mask, axis=1)
loss = F.mean(F.div2(F.sum(entropy, axis=1), count))

# Create solver.
solver = S.Momentum(1e-2, momentum=0.9)
solver.set_parameters(nn.get_parameters())

# Create monitor.
from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
monitor = Monitor('./tmp-lstmlm')
Esempio n. 24
0
 def _build_layers(self):
     self.emb = HRRWordEmbedding(self.vocab_size, self.cell_dim, self.num_roles, self.num_fillers)
     self.rnn = LSTM(self.num_layers, self.cell_dim, keep_prob=self.keep_prob, proj_dim=self.cell_dim * 2)
     self.chunk_layer = HRRChunkLayer(self.cell_dim, self.num_roles)
Esempio n. 25
0
def main():
    with open(LOOKUP_FILE, 'r') as file:
        chars = json.load(file)

    # Here we make dictionaries that can be used to convert
    # between characters, integer id-s of characters, and one-hot
    # vectors that will be used to represent the characters.
    char_to_int = dict()
    int_to_char = dict()
    char_to_vec = dict()

    for i in range(len(chars)):
        char_to_int[chars[i]] = i
        int_to_char[i] = chars[i]
        vec = np.zeros((len(chars), 1))
        vec[i] = 1.
        char_to_vec[chars[i]] = vec

    # The length of the vector that represents a character
    # is equivalent to the number of different characters
    # in the text.
    EMBEDDING_LENGTH = len(chars)
    # Create the LSTM layers only. We don't use the Network class,
    # since we are only interested in the activations of the recurrent
    # layers.
    first_layer = LSTM(size=512,
                       input_size=EMBEDDING_LENGTH,
                       batch_size=1,
                       backprop_depth=1,
                       stateful=True)
    second_layer = LSTM(size=512,
                        input_size=512,
                        batch_size=1,
                        backprop_depth=1,
                        stateful=True)

    # Load the weights.
    with open(MODEL, 'r') as file:
        weights = json.load(file)
    first_layer.loadParams(weights[0])
    second_layer.loadParams(weights[1])

    # Loading in the file.
    with open(TEXT_FILE, 'r', encoding='utf8') as file:
        text = file.read()
        source = list(text)

    for i in range(len(source)):
        source[i] = char_to_vec[source[i]]

    # Feed the text to the network.
    # Here we look at the activation of the neurons of the
    # hidden state at the 2nd LSTM layer.
    # We take the first element of the output as there is only one
    # batch.
    out = second_layer.forward(first_layer.forward(np.array([source])))[0]

    # ###############---TKINTER---#############################################
    class Wrap:
        NEURON_INDEX = 0

    def showNeuron():
        for j in range(out.shape[0]):
            # We will leave the background of the newline characters white,
            # regardless of its activation. The reason for that is that the color
            # would fill the entire remainder of the line, which is very disturbing to look at.
            intensity = 255 if text[j] == '\n' else 255 - int(
                (out[j, Wrap.NEURON_INDEX, 0] + 1) * 127.5)
            text_box.tag_config(str(j),
                                background="#%02x%02x%02x" %
                                (255, intensity, intensity))

    def inputFromEntry(evt):
        Wrap.NEURON_INDEX = int(entry.get())
        entry.delete(0, "end")
        showNeuron()

    def nextButtonClicked():
        Wrap.NEURON_INDEX += 1
        entry.delete(0, "end")
        entry.insert(tk.INSERT, str(Wrap.NEURON_INDEX))
        showNeuron()

    # Making the tkinter window.
    root = tk.Tk()
    text_box = tk.Text(root, height=35)
    text_box.insert(tk.INSERT, text)
    text_box.pack()
    current_line = 1
    current_char = 0
    for i in range(out.shape[0]):
        text_box.tag_add(str(i), f"{current_line}.{current_char}")
        current_char += 1
        if text[i] == '\n':
            current_line += 1
            current_char = 0

    # Making the entry box.
    entry = tk.Entry(root, width=5)
    entry.pack()
    entry.bind("<Return>", inputFromEntry)

    # Buttons
    up = tk.Button(text="Next", command=nextButtonClicked)
    up.pack()

    # Show the first neuron by default.
    showNeuron()

    root.mainloop()
Esempio n. 26
0
from loss import CrossEntropyLoss
from optimizer import SGDOptimizer, RMSpropOptimizer
from network import Network
from data_preparation import load_data
from solve_rnn import solve_rnn

import theano.tensor as T

X_train, y_train, X_test, y_test = load_data()

HIDDEN_DIM = 32
INPUT_DIM = 20
OUTPUT_DIM = 10

model = Network()
model.add(LSTM('rnn1', HIDDEN_DIM, INPUT_DIM,
               0.1))  # output shape: 4 x HIDDEN_DIM
model.add(Linear('fc', HIDDEN_DIM, OUTPUT_DIM,
                 0.1))  # output shape: 4 x OUTPUT_DIM
model.add(Softmax('softmax'))

loss = CrossEntropyLoss('xent')

optim = RMSpropOptimizer(learning_rate=0.01, rho=0.9)
input_placeholder = T.fmatrix('input')
label_placeholder = T.fmatrix('label')

model.compile(input_placeholder, label_placeholder, loss, optim)

MAX_EPOCH = 6
DISP_FREQ = 1000
TEST_FREQ = 10000
output = []
for f, f_size in zip(filters, filster_sizes):
    _h = PF.convolution(h,
                        f,
                        kernel=(1, f_size),
                        pad=(0, f_size // 2),
                        name='conv_{}'.format(f_size))
    _h = F.max_pooling(_h, kernel=(1, word_length))
    output.append(_h)
h = F.concatenate(*output, axis=1)
h = F.transpose(h, (0, 2, 1, 3))
h = F.reshape(h, (batch_size, sentence_length, sum(filters)))
# h = PF.batch_normalization(h, axes=[2])
h = TimeDistributed(Highway)(h, name='highway1')
h = TimeDistributed(Highway)(h, name='highway2')
h = LSTM(h, lstm_size, return_sequences=True, name='lstm1')
h = LSTM(h, lstm_size, return_sequences=True, name='lstm2')
h = TimeDistributed(PF.affine)(h, lstm_size, name='hidden')
y = TimeDistributed(PF.affine)(h, word_vocab_size, name='output')
t = nn.Variable((batch_size, sentence_length, 1))

mask = F.sum(F.sign(t), axis=2)  # do not predict 'pad'.
entropy = TimeDistributedSoftmaxCrossEntropy(y, t) * mask
count = F.sum(mask, axis=1)
loss = F.mean(F.div2(F.sum(entropy, axis=1), count))

# Create solver.
solver = S.Momentum(1e-2, momentum=0.9)
solver.set_parameters(nn.get_parameters())

# Create monitor.
Esempio n. 28
0
 def _build_layers(self):
     emb_cls = TiedIOEmbedding if self.tied_io else EmbeddingLayer
     self.emb = emb_cls(self.vocab_size, self.cell_dim)
     self.rnn = LSTM(self.num_layers, self.cell_dim, keep_prob=self.keep_prob)
Esempio n. 29
0
    def __init__(self,
                 word_V,
                 dep_V,
                 word_d=100,
                 pos_d=25,
                 mlp_d=100,
                 mlp_label_d=100,
                 num_lstm_layers=2,
                 lstm_d=125,
                 embeddings_init=None,
                 pos_V=None,
                 seed=0,
                 verbose=False):
        '''
        word_V - size of word vocab
        dep_V - size of relation label vocab
        word_d - dimension of word embeddings
        pos_d - dimension of POS embeddings
        mlp_d - dimension of hidden layer for arc prediction MLP
        mlp_label_d - dimension of hidden layer for label prediction MLP
        num_lstm_layers - number of bi-directional LSTM layers to stack
        lstm_d - dimension of hidden state in the LSTM
        embeddings_init - use pre-trained embeddings
        pos_V - size of POS vocab
        seed - random seed for initialization
        verbose - whether to print information about these parameters
        '''

        if verbose:
            print('Word vocabulary size: {}'.format(word_V))
            print('Dependency relation vocabulary size: {}'.format(dep_V))
            print('POS vocabulary size: {}'.format(pos_V))

        self.word_V = word_V
        self.dep_V = dep_V
        self.pos_V = pos_V

        self.word_d = word_d
        self.pos_d = pos_d
        self.mlp_d = mlp_d
        self.mlp_label_d = mlp_label_d
        self.lstm_layers = num_lstm_layers
        self.lstm_d = lstm_d

        np.random.seed(seed)

        self.model = dynet.Model()

        #embedding layers for words and POS
        self.embeddings = self.model.add_lookup_parameters(
            (self.word_V, self.word_d))
        if pos_V is not None:
            self.pos_embeddings = self.model.add_lookup_parameters(
                (self.pos_V, self.pos_d))

        #bi-directional LSTM layers
        #embeddings -> layer1 -> layer2
        lstm_layers = []
        for i in range(num_lstm_layers):
            input_d = word_d
            if i:
                input_d = 2 * lstm_d
            elif pos_V is not None:
                input_d += pos_d

            fwd_lstm_layer = LSTM(self.model, input_d, lstm_d)
            rev_lstm_layer = LSTM(self.model, input_d, lstm_d, reverse=True)
            lstm_layers.append((fwd_lstm_layer, rev_lstm_layer))

        #arc prediction MLP
        #layer2(i), layer2(j) -> concatenate -> score
        mlp_layer = MLP(self.model, lstm_d * 4, mlp_d, 1)
        #label prediction MLP
        if mlp_label_d:
            mlp_label_layer = MLP(self.model, lstm_d * 4, mlp_label_d, dep_V)
        else:
            mlp_label_layer = None

        #train the model using Adam optimizer
        self.trainer = dynet.AdamTrainer(self.model)

        #take in word and pos_indices, return the output of the 2nd layer
        def get_lstm_output(indices, pos_indices=None):
            embeddings_out = [self.embeddings[w] for w in indices]
            x = embeddings_out

            if pos_V is not None and pos_indices is not None:
                x = []
                for i, input in enumerate(embeddings_out):
                    x.append(
                        dynet.concatenate(
                            [input, self.pos_embeddings[pos_indices[i]]]))

            for i in range(num_lstm_layers):
                x_1 = lstm_layers[i][0].get_output(x)[0]
                x_2 = lstm_layers[i][1].get_output(x)[0]
                x = [
                    dynet.concatenate([x_1[i], x_2[i]])
                    for i in range(len(indices))
                ]

            return x

        self.states = get_lstm_output

        #score all arcs from i to j using the arc prediction MLP
        def score_arcs(states, value=True):
            length = len(states)
            scores = [[None for i in range(length)] for j in range(length)]

            for i in range(length):
                for j in range(length):
                    score = mlp_layer.get_output(
                        dynet.concatenate([states[i], states[j]]))
                    if value:
                        scores[i][j] = score.scalar_value()
                    else:
                        scores[i][j] = score

            return scores

        self.score_arcs = score_arcs

        #score all labels at i using the label prediction MLP
        def score_labels(states, arcs, value=True):
            scores = []

            for i in range(len(states)):
                score = mlp_label_layer.get_output(
                    dynet.concatenate([states[i], states[arcs[i]]]))
                if value:
                    scores.append(score.value())
                else:
                    scores.append(score)

            return scores

        self.score_labels = score_labels
Esempio n. 30
0
    def build_test_model(self, data):
        rng = np.random.RandomState(3435)
        lstm_params, hidden_params, hidden_relu_params, full_connect_params, convs = self.load_trained_params(
        )
        data_x, data_y, maxlen = data
        test_len = len(data_x)
        n_test_batches = test_len // self.batch_size
        x = T.matrix('x')
        y = T.ivector('y')
        index = T.lscalar()
        Words = theano.shared(value=self.word_vectors,
                              name="Words",
                              borrow=True)
        input_width = self.hidden_sizes[0]
        layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
            (self.batch_size, maxlen, input_width))
        lstm = LSTM(dim=input_width,
                    batch_size=self.batch_size,
                    number_step=maxlen,
                    params=lstm_params)
        leyer0_output = lstm.feed_foward(layer0_input)
        conv_outputs = list()
        conv_nnets = list()
        params = list()
        output = T.cast(layer0_input.flatten(), dtype=floatX)
        conv_input = output.reshape((self.batch_size, 1, maxlen, input_width))
        for it, p_conv in enumerate(convs):
            pheight = maxlen - self.filter_sizes[it] + 1
            conv = ConvolutionLayer(rng=rng,
                                    filter_shape=(self.kernel, 1,
                                                  self.filter_sizes[it],
                                                  input_width),
                                    input_shape=(self.batch_size, 1, maxlen,
                                                 input_width),
                                    poolsize=(pheight, 1),
                                    name="conv" + str(self.filter_sizes[it]),
                                    W=p_conv[0],
                                    b=p_conv[1])
            #=>batch size * 1 * 100 * width
            output = conv.predict(conv_input)
            layer1_input = output.flatten(2)
            params += conv.params
            conv_outputs.append(layer1_input)
            conv_nnets.append(conv)
        conv_nnets_output = T.concatenate(conv_outputs, axis=1)
        hidden_layer = HiddenLayer(
            rng,
            hidden_sizes=[self.kernel * 3, self.hidden_sizes[0]],
            input_vectors=conv_nnets_output,
            activation=utils.Tanh,
            name="Hidden_Tanh",
            W=hidden_params[0],
            b=hidden_params[1])
        hidden_layer.predict()
        hidden_layer_relu = HiddenLayer(
            rng,
            hidden_sizes=[self.hidden_sizes[0], self.hidden_sizes[0]],
            input_vectors=hidden_layer.output,
            W=hidden_relu_params[0],
            b=hidden_relu_params[1])
        hidden_layer_relu.predict()
        # hidden_layer_dropout = HiddenLayerDropout(rng, hidden_sizes=self.hidden_sizes[:2], input_vectors=lstm.output, W=hidden_layer.W, b=hidden_layer.b)
        full_connect = FullConnectLayer(
            rng,
            layers_size=[self.hidden_sizes[0], self.hidden_sizes[-1]],
            input_vector=hidden_layer_relu.output,
            W=full_connect_params[0],
            b=full_connect_params[1])
        full_connect.predict()
        test_data_x = theano.shared(np.asarray(data_x, dtype=floatX),
                                    borrow=True)
        test_data_y = theano.shared(np.asarray(data_y, dtype='int32'),
                                    borrow=True)

        errors = 0.
        if test_len == 1:
            test_model = theano.function(
                [index],
                outputs=full_connect.get_predict(),
                on_unused_input='ignore',
                givens={
                    x:
                    test_data_x[index * self.batch_size:(index + 1) *
                                self.batch_size],
                    y:
                    test_data_y[index * self.batch_size:(index + 1) *
                                self.batch_size]
                })
            index = 0
            avg_errors = test_model(index)
        else:
            test_model = theano.function(
                [index],
                outputs=full_connect.errors(y),
                givens={
                    x:
                    test_data_x[index * self.batch_size:(index + 1) *
                                self.batch_size],
                    y:
                    test_data_y[index * self.batch_size:(index + 1) *
                                self.batch_size]
                })
            for i in xrange(n_test_batches):
                errors += test_model(i)
            avg_errors = errors / n_test_batches
        return avg_errors