class TestBidirectional(unittest.TestCase): def setUp(self): self.bidir = Bidirectional(weights_init=Orthogonal(), prototype=SimpleRecurrent( dim=3, activation=Tanh())) self.simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(), activation=Tanh(), seed=1) self.bidir.allocate() self.simple.initialize() self.bidir.children[0].params[0].set_value( self.simple.params[0].get_value()) self.bidir.children[1].params[0].set_value( self.simple.params[0].get_value()) self.x_val = 0.1 * numpy.asarray( list(itertools.permutations(range(4))), dtype=floatX) self.x_val = (numpy.ones((24, 4, 3), dtype=floatX) * self.x_val[..., None]) self.mask_val = numpy.ones((24, 4), dtype=floatX) self.mask_val[12:24, 3] = 0 def test(self): x = tensor.tensor3('x') mask = tensor.matrix('mask') calc_bidir = theano.function([x, mask], [self.bidir.apply(x, mask=mask)]) calc_simple = theano.function([x, mask], [self.simple.apply(x, mask=mask)]) h_bidir = calc_bidir(self.x_val, self.mask_val)[0] h_simple = calc_simple(self.x_val, self.mask_val)[0] h_simple_rev = calc_simple(self.x_val[::-1], self.mask_val[::-1])[0] assert_allclose(h_simple, h_bidir[..., :3], rtol=1e-04) assert_allclose(h_simple_rev, h_bidir[::-1, ..., 3:], rtol=1e-04)
def example(): """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """ x = tensor.tensor3('x') rnn = SimpleRecurrent(dim=3, activation=Identity(), weights_init=initialization.Identity()) rnn.initialize() h = rnn.apply(x) f = theano.function([x], h) print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) doubler = Linear( input_dim=3, output_dim=3, weights_init=initialization.Identity(2), biases_init=initialization.Constant(0)) doubler.initialize() h_doubler = rnn.apply(doubler.apply(x)) f = theano.function([x], h_doubler) print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) #Initial State h0 = tensor.matrix('h0') h = rnn.apply(inputs=x, states=h0) f = theano.function([x, h0], h) print(f(np.ones((3, 1, 3), dtype=theano.config.floatX), np.ones((1, 3), dtype=theano.config.floatX)))
def example(): """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """ x = tensor.tensor3('x') rnn = SimpleRecurrent(dim=3, activation=Identity(), weights_init=initialization.Identity()) rnn.initialize() h = rnn.apply(x) f = theano.function([x], h) print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) doubler = Linear(input_dim=3, output_dim=3, weights_init=initialization.Identity(2), biases_init=initialization.Constant(0)) doubler.initialize() h_doubler = rnn.apply(doubler.apply(x)) f = theano.function([x], h_doubler) print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) #Initial State h0 = tensor.matrix('h0') h = rnn.apply(inputs=x, states=h0) f = theano.function([x, h0], h) print( f(np.ones((3, 1, 3), dtype=theano.config.floatX), np.ones((1, 3), dtype=theano.config.floatX)))
def example5(): """Bidir + simplereccurent. Adaptation from a unittest in blocks """ bidir = Bidirectional(weights_init=Orthogonal(), prototype=SimpleRecurrent( dim=3, activation=Tanh())) simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(), activation=Tanh(), seed=1) bidir.allocate() simple.initialize() bidir.children[0].parameters[0].set_value( simple.parameters[0].get_value()) bidir.children[1].parameters[0].set_value( simple.parameters[0].get_value()) #Initialize theano variables and functions x = tensor.tensor3('x') mask = tensor.matrix('mask') calc_bidir = theano.function([x, mask], [bidir.apply(x, mask=mask)]) calc_simple = theano.function([x, mask], [simple.apply(x, mask=mask)]) #Testing time x_val = 0.1 * np.asarray( list(itertools.permutations(range(4))), dtype=theano.config.floatX) x_val = (np.ones((24, 4, 3), dtype=theano.config.floatX) * x_val[..., None]) mask_val = np.ones((24, 4), dtype=theano.config.floatX) mask_val[12:24, 3] = 0 h_bidir = calc_bidir(x_val, mask_val)[0] h_simple = calc_simple(x_val, mask_val)[0] h_simple_rev = calc_simple(x_val[::-1], mask_val[::-1])[0] print(h_bidir) print(h_simple) print(h_simple_rev)
class TestSimpleRecurrent(unittest.TestCase): def setUp(self): self.simple = SimpleRecurrent(dim=3, weights_init=Constant(2), activation=Tanh()) self.simple.initialize() def test_one_step(self): h0 = tensor.matrix('h0') x = tensor.matrix('x') mask = tensor.vector('mask') h1 = self.simple.apply(x, h0, mask=mask, iterate=False) next_h = theano.function(inputs=[h0, x, mask], outputs=[h1]) h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]], dtype=theano.config.floatX) x_val = 0.1 * numpy.array([[1, 2, 3], [4, 5, 6]], dtype=theano.config.floatX) mask_val = numpy.array([1, 0]).astype(theano.config.floatX) h1_val = numpy.tanh(h0_val.dot(2 * numpy.ones((3, 3))) + x_val) h1_val = mask_val[:, None] * h1_val + (1 - mask_val[:, None]) * h0_val assert_allclose(h1_val, next_h(h0_val, x_val, mask_val)[0]) def test_many_steps(self): x = tensor.tensor3('x') mask = tensor.matrix('mask') h = self.simple.apply(x, mask=mask, iterate=True) calc_h = theano.function(inputs=[x, mask], outputs=[h]) x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))), dtype=theano.config.floatX) x_val = numpy.ones( (24, 4, 3), dtype=theano.config.floatX) * x_val[..., None] mask_val = numpy.ones((24, 4), dtype=theano.config.floatX) mask_val[12:24, 3] = 0 h_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX) for i in range(1, 25): h_val[i] = numpy.tanh(h_val[i - 1].dot(2 * numpy.ones((3, 3))) + x_val[i - 1]) h_val[i] = (mask_val[i - 1, :, None] * h_val[i] + (1 - mask_val[i - 1, :, None]) * h_val[i - 1]) h_val = h_val[1:] assert_allclose(h_val, calc_h(x_val, mask_val)[0], rtol=1e-04) # Also test that initial state is a parameter initial_state, = VariableFilter(roles=[INITIAL_STATE])( ComputationGraph(h)) assert is_shared_variable(initial_state) assert initial_state.name == 'initial_state'
class TestSimpleRecurrent(unittest.TestCase): def setUp(self): self.simple = SimpleRecurrent(dim=3, weights_init=Constant(2), activation=Tanh()) self.simple.initialize() def test_one_step(self): h0 = tensor.matrix('h0') x = tensor.matrix('x') mask = tensor.vector('mask') h1 = self.simple.apply(x, h0, mask=mask, iterate=False) next_h = theano.function(inputs=[h0, x, mask], outputs=[h1]) h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]], dtype=theano.config.floatX) x_val = 0.1 * numpy.array([[1, 2, 3], [4, 5, 6]], dtype=theano.config.floatX) mask_val = numpy.array([1, 0]).astype(theano.config.floatX) h1_val = numpy.tanh(h0_val.dot(2 * numpy.ones((3, 3))) + x_val) h1_val = mask_val[:, None] * h1_val + (1 - mask_val[:, None]) * h0_val assert_allclose(h1_val, next_h(h0_val, x_val, mask_val)[0]) def test_many_steps(self): x = tensor.tensor3('x') mask = tensor.matrix('mask') h = self.simple.apply(x, mask=mask, iterate=True) calc_h = theano.function(inputs=[x, mask], outputs=[h]) x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))), dtype=theano.config.floatX) x_val = numpy.ones((24, 4, 3), dtype=theano.config.floatX) * x_val[..., None] mask_val = numpy.ones((24, 4), dtype=theano.config.floatX) mask_val[12:24, 3] = 0 h_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX) for i in range(1, 25): h_val[i] = numpy.tanh(h_val[i - 1].dot( 2 * numpy.ones((3, 3))) + x_val[i - 1]) h_val[i] = (mask_val[i - 1, :, None] * h_val[i] + (1 - mask_val[i - 1, :, None]) * h_val[i - 1]) h_val = h_val[1:] assert_allclose(h_val, calc_h(x_val, mask_val)[0], rtol=1e-04) # Also test that initial state is a parameter initial_state, = VariableFilter(roles=[INITIAL_STATE])( ComputationGraph(h)) assert is_shared_variable(initial_state) assert initial_state.name == 'initial_state'
def example5(): """Bidir + simplereccurent. Adaptation from a unittest in blocks """ bidir = Bidirectional(weights_init=Orthogonal(), prototype=SimpleRecurrent(dim=3, activation=Tanh())) simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(), activation=Tanh(), seed=1) bidir.allocate() simple.initialize() bidir.children[0].parameters[0].set_value(simple.parameters[0].get_value()) bidir.children[1].parameters[0].set_value(simple.parameters[0].get_value()) #Initialize theano variables and functions x = tensor.tensor3('x') mask = tensor.matrix('mask') calc_bidir = theano.function([x, mask], [bidir.apply(x, mask=mask)]) calc_simple = theano.function([x, mask], [simple.apply(x, mask=mask)]) #Testing time x_val = 0.1 * np.asarray(list(itertools.permutations(range(4))), dtype=theano.config.floatX) x_val = (np.ones( (24, 4, 3), dtype=theano.config.floatX) * x_val[..., None]) mask_val = np.ones((24, 4), dtype=theano.config.floatX) mask_val[12:24, 3] = 0 h_bidir = calc_bidir(x_val, mask_val)[0] h_simple = calc_simple(x_val, mask_val)[0] h_simple_rev = calc_simple(x_val[::-1], mask_val[::-1])[0] print(h_bidir) print(h_simple) print(h_simple_rev)
class TextRNN(object): def __init__(self, dim_in, dim_hidden, dim_out, **kwargs): self.dim_in = dim_in self.dim_hidden = dim_hidden self.dim_out = dim_out self.input_layer = Linear(input_dim=self.dim_in, output_dim=self.dim_hidden, weights_init=initialization.IsotropicGaussian(), biases_init=initialization.Constant(0)) self.input_layer.initialize() sparse_init = initialization.Sparse(num_init=15, weights_init=initialization.IsotropicGaussian()) self.recurrent_layer = SimpleRecurrent( dim=self.dim_hidden, activation=Tanh(), name="first_recurrent_layer", weights_init=sparse_init, biases_init=initialization.Constant(0.01)) ''' self.recurrent_layer = LSTM(dim=self.dim_hidden, activation=Tanh(), weights_init=initialization.IsotropicGaussian(std=0.001), biases_init=initialization.Constant(0.01)) ''' self.recurrent_layer.initialize() self.output_layer = Linear(input_dim=self.dim_hidden, output_dim=self.dim_out, weights_init=initialization.Uniform(width=0.01), biases_init=initialization.Constant(0.01)) self.output_layer.initialize() self.children = [self.input_layer, self.recurrent_layer, self.output_layer] ''' @recurrent(sequences=['inputs'], states=['states'], contexts=[], outputs=['states', 'output']) ''' def run(self, inputs): output = self.output_layer.apply( self.recurrent_layer.apply(self.input_layer.apply(inputs)) ) return output
def main(num_epochs=100): x = tensor.matrix('features') m = tensor.matrix('features_mask') x_int = x.astype(dtype='int32').T train_dataset = TextFile('inspirational.txt') train_dataset.indexables[0] = numpy.array(sorted( train_dataset.indexables[0], key=len )) n_voc = len(train_dataset.dict.keys()) init_probs = numpy.array( [sum(filter(lambda idx:idx == w, [s[0] for s in train_dataset.indexables[ train_dataset.sources.index('features')]] )) for w in xrange(n_voc)], dtype=theano.config.floatX ) init_probs = init_probs / init_probs.sum() n_h = 100 linear_embedding = LookupTable( length=n_voc, dim=n_h, weights_init=Uniform(std=0.01), biases_init=Constant(0.) ) linear_embedding.initialize() lstm_biases = numpy.zeros(4 * n_h).astype(dtype=theano.config.floatX) lstm_biases[n_h:(2 * n_h)] = 4. rnn = SimpleRecurrent( dim=n_h, activation=Tanh(), weights_init=Uniform(std=0.01), biases_init=Constant(0.) ) rnn.initialize() score_layer = Linear( input_dim=n_h, output_dim=n_voc, weights_init=Uniform(std=0.01), biases_init=Constant(0.) ) score_layer.initialize() embedding = (linear_embedding.apply(x_int[:-1]) * tensor.shape_padright(m.T[1:])) rnn_out = rnn.apply(inputs=embedding, mask=m.T[1:]) probs = softmax( sequence_map(score_layer.apply, rnn_out, mask=m.T[1:])[0] ) idx_mask = m.T[1:].nonzero() cost = CategoricalCrossEntropy().apply( x_int[1:][idx_mask[0], idx_mask[1]], probs[idx_mask[0], idx_mask[1]] ) cost.name = 'cost' misclassification = MisclassificationRate().apply( x_int[1:][idx_mask[0], idx_mask[1]], probs[idx_mask[0], idx_mask[1]] ) misclassification.name = 'misclassification' cg = ComputationGraph([cost]) params = cg.parameters algorithm = GradientDescent( cost=cost, params=params, step_rule=Adam() ) train_data_stream = Padding( data_stream=DataStream( dataset=train_dataset, iteration_scheme=BatchwiseShuffledScheme( examples=train_dataset.num_examples, batch_size=10, ) ), mask_sources=('features',) ) model = Model(cost) extensions = [] extensions.append(Timing()) extensions.append(FinishAfter(after_n_epochs=num_epochs)) extensions.append(TrainingDataMonitoring( [cost, misclassification], prefix='train', after_epoch=True)) batch_size = 10 length = 30 trng = MRG_RandomStreams(18032015) u = trng.uniform(size=(length, batch_size, n_voc)) gumbel_noise = -tensor.log(-tensor.log(u)) init_samples = (tensor.log(init_probs).dimshuffle(('x', 0)) + gumbel_noise[0]).argmax(axis=-1) init_states = rnn.initial_state('states', batch_size) def sampling_step(g_noise, states, samples_step): embedding_step = linear_embedding.apply(samples_step) next_states = rnn.apply(inputs=embedding_step, states=states, iterate=False) probs_step = softmax(score_layer.apply(next_states)) next_samples = (tensor.log(probs_step) + g_noise).argmax(axis=-1) return next_states, next_samples [_, samples], _ = theano.scan( fn=sampling_step, sequences=[gumbel_noise[1:]], outputs_info=[init_states, init_samples] ) sampling = theano.function([], samples.owner.inputs[0].T) plotters = [] plotters.append(Plotter( channels=[['train_cost', 'train_misclassification']], titles=['Costs'])) extensions.append(PlotManager('Language modelling example', plotters=plotters, after_epoch=True, after_training=True)) extensions.append(Printing()) extensions.append(PrintSamples(sampler=sampling, voc=train_dataset.inv_dict)) main_loop = MainLoop(model=model, data_stream=train_data_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) lookup_input.initialize() linear_input = Linear(name='linear_input', input_dim=hidden_layer_dim, output_dim=hidden_layer_dim, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) linear_input.initialize() rnn = SimpleRecurrent(name='hidden', dim=hidden_layer_dim, activation=Tanh(), weights_init=initialization.Uniform(width=0.01)) rnn.initialize() linear_output = Linear(name='linear_output', input_dim=hidden_layer_dim, output_dim=train_dataset.durations_vocab_size(), weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) linear_output.initialize() softmax = NDimensionalSoftmax(name='ndim_softmax') activation_input = lookup_input.apply(x) hidden = rnn.apply(linear_input.apply(activation_input)) activation_output = linear_output.apply(hidden) y_est = softmax.apply(activation_output, extra_ndim=1)
lookup_input.initialize() linear_input = Linear( name='linear_input', input_dim=hidden_layer_dim, output_dim=hidden_layer_dim, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) linear_input.initialize() rnn = SimpleRecurrent( name='hidden', dim=hidden_layer_dim, activation=Tanh(), weights_init=initialization.Uniform(width=0.01)) rnn.initialize() linear_output = Linear( name='linear_output', input_dim=hidden_layer_dim, output_dim=charset_size, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) linear_output.initialize() softmax = NDimensionalSoftmax(name='ndim_softmax') activation_input = lookup_input.apply(x) hidden = rnn.apply(linear_input.apply(activation_input)) activation_output = linear_output.apply(hidden) y_est = softmax.apply(activation_output, extra_ndim=1)
class ETHM(EUTHM): '''Model with only textual-hashtag information''' def __init__(self, config, dataset, *args, **kwargs): super(ETHM, self).__init__(config, dataset) def _build_model(self, *args, **kwargs): # Define inputs self._define_inputs() self._build_bricks() self._set_OV_value() # Transpose text self.text = self.text.dimshuffle(1, 0) self.text_mask = self.text_mask.dimshuffle(1, 0) self.sparse_word = self.sparse_word.dimshuffle(1, 0) self.sparse_word_mask = self.sparse_word_mask.dimshuffle(1, 0) # Turn word, and hashtag into vector representation text_vec = self.word_embed.apply(self.text) # Apply word and hashtag word and url text_vec = self._apply_hashtag_word(text_vec) text_vec = self._apply_sparse_word(text_vec) # Encode text mlstm_hidden, mlstm_cell = self.mlstm.apply( inputs=self.mlstm_ins.apply(text_vec), mask=self.text_mask.astype(theano.config.floatX)) text_encodes = mlstm_hidden[-1] input_vec = text_encodes self._get_cost(input_vec, None, None) def _define_inputs(self, *args, **kwargs): self.hashtag = tensor.ivector('hashtag') self.text = tensor.imatrix('text') self.text_mask = tensor.matrix('text_mask', dtype=theano.config.floatX) self.hashtag_word = tensor.ivector('hashtag_word') self.hashtag_sparse_mask = tensor.vector('hashtag_word_sparse_mask', dtype=theano.config.floatX) self.hashtag_word_left_idx = tensor.ivector( 'hashtag_word_idx_left_idx') self.hashtag_word_right_idx = tensor.ivector( 'hashtag_word_idx_right_idx') self.sparse_word = tensor.imatrix('sparse_word') self.sparse_word_sparse_mask = tensor.vector( 'sparse_word_sparse_mask', dtype=theano.config.floatX) self.sparse_word_mask = tensor.matrix('sparse_word_mask', dtype=theano.config.floatX) self.sparse_word_left_idx = tensor.ivector('sparse_word_idx_left_idx') self.sparse_word_right_idx = tensor.ivector( 'sparse_word_idx_right_idx') def _build_bricks(self, *args, **kwargs): # Build lookup tables self.word_embed = self._embed(len(self.dataset.word2index), self.config.word_embed_dim, name='word_embed') self.hashtag_embed = self._embed(len(self.dataset.hashtag2index), self.config.lstm_dim, name='hashtag_embed') # Build text encoder self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim, output_dim=4 * self.config.lstm_dim, name='mlstm_in') self.mlstm_ins.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim)) self.mlstm_ins.biases_init = Constant(0) self.mlstm_ins.initialize() self.mlstm = MLSTM(self.config.lstm_time, self.config.lstm_dim, shared=False) self.mlstm.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim)) self.mlstm.biases_init = Constant(0) self.mlstm.initialize() self.hashtag2word = MLP( activations=[Tanh('hashtag2word_tanh')], dims=[self.config.lstm_dim, self.config.word_embed_dim], name='hashtag2word_mlp') self.hashtag2word.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim)) self.hashtag2word.biases_init = Constant(0) self.hashtag2word.initialize() self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias') self.hashtag2word_bias.biases_init = Constant(0) self.hashtag2word_bias.initialize() #Build character embedding self.char_embed = self._embed(len(self.dataset.char2index), self.config.char_embed_dim, name='char_embed') # Build sparse word encoder self.rnn_ins = Linear(input_dim=self.config.char_embed_dim, output_dim=self.config.word_embed_dim, name='rnn_in') self.rnn_ins.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim + self.config.word_embed_dim)) self.rnn_ins.biases_init = Constant(0) self.rnn_ins.initialize() self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim, activation=Tanh()) self.rnn.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim)) self.rnn.initialize() def _apply_dropout(self, outputs, *args, **kwargs): variables = [self.word_embed.W, self.hashtag_embed.W] cgs = ComputationGraph(outputs) cg_dropouts = apply_dropout(cgs, variables, drop_prob=self.config.dropout_prob, seed=123).outputs return cg_dropouts def _apply_reg(self, cost, params=None, *args, **kwargs): try: if self.config.l2_norm > 0: cost = cost + self.config.l2_norm * theano_expressions.l2_norm( tensors=[self.hashtag_embed.W, self.word_embed.W])**2 else: pass except Exception: pass return cost
class EUTHM(UTHM): ''' UTH model with extend information ''' def __init__(self, config, dataset, *args, **kwargs): super(EUTHM, self).__init__(config, dataset) def _define_inputs(self, *args, **kwargs): super(EUTHM, self)._define_inputs() self.user_word = tensor.ivector('user_word') self.user_word_sparse_mask = tensor.vector('user_word_sparse_mask', dtype=theano.config.floatX) self.user_word_left_idx = tensor.ivector('user_word_idx_left_idx') self.user_word_right_idx = tensor.ivector('user_word_idx_right_idx') self.hashtag_word = tensor.ivector('hashtag_word') self.hashtag_sparse_mask = tensor.vector('hashtag_word_sparse_mask', dtype=theano.config.floatX) self.hashtag_word_left_idx = tensor.ivector( 'hashtag_word_idx_left_idx') self.hashtag_word_right_idx = tensor.ivector( 'hashtag_word_idx_right_idx') self.sparse_word = tensor.imatrix('sparse_word') self.sparse_word_sparse_mask = tensor.vector( 'sparse_word_sparse_mask', dtype=theano.config.floatX) self.sparse_word_mask = tensor.matrix('sparse_word_mask', dtype=theano.config.floatX) self.sparse_word_left_idx = tensor.ivector('sparse_word_idx_left_idx') self.sparse_word_right_idx = tensor.ivector( 'sparse_word_idx_right_idx') def _build_bricks(self, *args, **kwargs): # Build lookup tables super(EUTHM, self)._build_bricks() self.user2word = MLP( activations=[Tanh('user2word_tanh')], dims=[self.config.user_embed_dim, self.config.word_embed_dim], name='user2word_mlp') self.user2word.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim)) self.user2word.biases_init = Constant(0) self.user2word.initialize() self.hashtag2word = MLP( activations=[Tanh('hashtag2word_tanh')], dims=[ self.config.user_embed_dim + self.config.word_embed_dim, self.config.word_embed_dim ], name='hashtag2word_mlp') self.hashtag2word.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim)) self.hashtag2word.biases_init = Constant(0) self.hashtag2word.initialize() self.user2word_bias = Bias(dim=1, name='user2word_bias') self.user2word_bias.biases_init = Constant(0) self.user2word_bias.initialize() self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias') self.hashtag2word_bias.biases_init = Constant(0) self.hashtag2word_bias.initialize() #Build character embedding self.char_embed = self._embed(len(self.dataset.char2index), self.config.char_embed_dim, name='char_embed') # Build sparse word encoder self.rnn_ins = Linear(input_dim=self.config.char_embed_dim, output_dim=self.config.word_embed_dim, name='rnn_in') self.rnn_ins.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim + self.config.word_embed_dim)) self.rnn_ins.biases_init = Constant(0) self.rnn_ins.initialize() self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim, activation=Tanh()) self.rnn.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim)) self.rnn.initialize() def _set_OV_value(self, *args, **kwargs): '''Train a <unk> representation''' tensor.set_subtensor( self.char_embed.W[self.dataset.char2index['<unk>']], numpy.zeros(self.config.char_embed_dim, dtype=theano.config.floatX)) def _get_text_vec(self, *args, **kwargs): # Transpose text self.text = self.text.dimshuffle(1, 0) self.text_mask = self.text_mask.dimshuffle(1, 0) self.sparse_word = self.sparse_word.dimshuffle(1, 0) self.sparse_word_mask = self.sparse_word_mask.dimshuffle(1, 0) # Turn word, user and hashtag into vector representation text_vec = self.word_embed.apply(self.text) # Apply user word, hashtag word and url text_vec = self._apply_user_word(text_vec) text_vec = self._apply_hashtag_word(text_vec) text_vec = self._apply_sparse_word(text_vec) return text_vec @abstractmethod def _apply_user_word(self, text_vec, *args, **kwargs): ''' Replace @a with transformed author vector :param text_vec: :param args: :param kwargs: :return: ''' user_word_vec = self.user2word.apply(self.user_embed.apply(self.user_word)) + \ self.user2word_bias.parameters[0][0] text_vec = tensor.set_subtensor( text_vec[self.user_word_right_idx, self.user_word_left_idx], text_vec[self.user_word_right_idx, self.user_word_left_idx] * (1 - self.user_word_sparse_mask[:, None]) + user_word_vec * self.user_word_sparse_mask[:, None]) return text_vec @abstractmethod def _apply_hashtag_word(self, text_vec, *args, **kwargs): ''' Replace #h with transformed hashtag vector :param text_vec: :param args: :param kwargs: :return: ''' hashtag_word_vec = self.hashtag2word.apply(self.hashtag_embed.apply(self.hashtag_word)) +\ self.hashtag2word_bias.parameters[0][0] text_vec = tensor.set_subtensor( text_vec[self.hashtag_word_right_idx, self.hashtag_word_left_idx], text_vec[self.hashtag_word_right_idx, self.hashtag_word_left_idx] * (1 - self.hashtag_sparse_mask[:, None]) + hashtag_word_vec * self.hashtag_sparse_mask[:, None]) return text_vec @abstractmethod def _apply_sparse_word(self, text_vec, *args, **kwargs): ''' Replace sparse word encoding with character embedding. (maybe lstm) :param text_vec: :param args: :param kwargs: :return: ''' sparse_word_vec = self.char_embed.apply(self.sparse_word) sparse_word_hiddens = self.rnn.apply( inputs=self.rnn_ins.apply(sparse_word_vec), mask=self.sparse_word_mask) tmp = sparse_word_hiddens[-1] text_vec = tensor.set_subtensor( text_vec[self.sparse_word_right_idx, self.sparse_word_left_idx], text_vec[self.sparse_word_right_idx, self.sparse_word_left_idx] * (1 - self.sparse_word_sparse_mask[:, None]) + tmp * self.sparse_word_sparse_mask[:, None]) return text_vec
#lstm = GatedRecurrent(dim=h_dim, # activation=Tanh()) decode = Linear(name='decode', input_dim=h_dim, output_dim=1) for brick in (encode, gates, decode): brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.) brick.initialize() lstm.weights_init = IsotropicGaussian(0.01) #lstm.weights_init = Orthogonal() lstm.biases_init = Constant(0.) lstm.initialize() #ComputationGraph(encode.apply(x)).get_theano_function()(features_test)[0].shape #ComputationGraph(lstm.apply(encoded)).get_theano_function()(features_test) #ComputationGraph(decode.apply(hiddens[-1])).get_theano_function()(features_test)[0].shape #ComputationGraph(SquaredError().apply(y, y_hat.flatten())).get_theano_function()(features_test, targets_test)[0].shape encoded = encode.apply(x) #hiddens = lstm.apply(encoded, gates.apply(x)) hiddens = lstm.apply(encoded) y_hat = decode.apply(hiddens[-1]) cost = SquaredError().apply(y, y_hat) cost.name = 'cost'
def __init__(self, rnn_dims, num_actions, data_X_np=None, data_y_np=None, width=32, height=32): ############################################################### # # Network and data setup # ############################################################## RNN_DIMS = 100 NUM_ACTIONS = num_actions tensor5 = T.TensorType('float32', [False, True, True, True, True]) self.x = T.tensor4('features') self.reward = T.tensor3('targets', dtype='float32') self.state = T.matrix('states', dtype='float32') self.hidden_states = [] # holds hidden states in np array form #data_X & data_Y supplied in init function now... if data_X_np is None or data_y_np is None: print 'you did not supply data at init' data_X_np = np.float32(np.random.normal(size=(1280, 1,1, width, height))) data_y_np = np.float32(np.random.normal(size=(1280, 1,1,1))) #data_states_np = np.float32(np.ones((1280, 1, 100))) state_shape = (data_X_np.shape[0],rnn_dims) self.data_states_np = np.float32(np.zeros(state_shape)) self.datastream = IterableDataset(dict(features=data_X_np, targets=data_y_np, states=self.data_states_np)).get_example_stream() self.datastream_test = IterableDataset(dict(features=data_X_np, targets=data_y_np, states=self.data_states_np)).get_example_stream() data_X = self.datastream # 2 conv inputs # we want to take our sequence of input images and convert them to convolutional # representations conv_layers = [ConvolutionalLayer(Rectifier().apply, (3, 3), 16, (2, 2), name='l1'), ConvolutionalLayer(Rectifier().apply, (3, 3), 32, (2, 2), name='l2'), ConvolutionalLayer(Rectifier().apply, (3, 3), 64, (2, 2), name='l3'), ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l4'), ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l5'), ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l6')] convnet = ConvolutionalSequence(conv_layers, num_channels=4, image_size=(width, height), weights_init=init.Uniform(0, 0.01), biases_init=init.Constant(0.0), tied_biases=False, border_mode='full') convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) conv_out = convnet.apply(self.x) reshape_dims = (conv_out.shape[0], conv_out.shape[1]*conv_out.shape[2]*conv_out.shape[3]) hidden_repr = conv_out.reshape(reshape_dims) conv2rnn = Linear(input_dim=output_dim, output_dim=RNN_DIMS, weights_init=init.Uniform(width=0.01), biases_init=init.Constant(0.)) conv2rnn.initialize() conv2rnn_output = conv2rnn.apply(hidden_repr) # RNN hidden layer # then we want to feed those conv representations into an RNN rnn = SimpleRecurrent(dim=RNN_DIMS, activation=Rectifier(), weights_init=init.Uniform(width=0.01)) rnn.initialize() self.learned_state = rnn.apply(inputs=conv2rnn_output, states=self.state, iterate=False) # linear output from hidden layer # the RNN has two outputs, but only this one has a target. That is, this is "expected return" # which the network attempts to minimize difference between expected return and actual return lin_output = Linear(input_dim=RNN_DIMS, output_dim=1, weights_init=init.Uniform(width=0.01), biases_init=init.Constant(0.)) lin_output.initialize() self.exp_reward = lin_output.apply(self.learned_state) self.get_exp_reward = theano.function([self.x, self.state], self.exp_reward) # softmax output from hidden layer # this provides a softmax of action recommendations # the hypothesis is that adjusting the other outputs magically influences this set of outputs # to suggest smarter (or more realistic?) moves action_output = Linear(input_dim=RNN_DIMS, output_dim=NUM_ACTIONS, weights_init=init.Constant(.001), biases_init=init.Constant(0.)) action_output.initialize() self.suggested_actions = Softmax().apply(action_output.apply(self.learned_state[-1])) ###################### # use this to get suggested actions... it requires the state of the hidden units from the previous # timestep ##################### self.get_suggested_actions = theano.function([self.x, self.state], [self.suggested_actions, self.learned_state])
def main(save_to, num_epochs): batch_size = 128 dim = 100 n_steps = 20 i2h1 = MLP([Identity()], [784, dim], biases_init=Constant(0.), weights_init=IsotropicGaussian(.001)) h2o1 = MLP([Rectifier(), Logistic()], [dim, dim, 784], biases_init=Constant(0.), weights_init=IsotropicGaussian(.001)) rec1 = SimpleRecurrent(dim=dim, activation=Tanh(), weights_init=Orthogonal()) i2h1.initialize() h2o1.initialize() rec1.initialize() x = tensor.tensor3('features') x1 = x[1:, :, :] x2 = x[:-1, :, :] preproc = i2h1.apply(x1) h1 = rec1.apply(preproc) x_hat = h2o1.apply(h1) cost = tensor.nnet.binary_crossentropy(x_hat, x2).mean() # cost = CategoricalCrossEntropy().apply(y.flatten(), probs) cost.name = 'final_cost' cg = ComputationGraph([cost, ]) mnist_train = MNIST("train", subset=slice(0, 50000), sources=('features', )) mnist_valid = MNIST("train", subset=slice(50000, 60000), sources=('features',)) mnist_test = MNIST("test") trainstream = Mapping(Flatten(DataStream(mnist_train, iteration_scheme=SequentialScheme(50000, batch_size))), _meanize(n_steps)) validstream = Mapping(Flatten(DataStream(mnist_valid, iteration_scheme=SequentialScheme(10000, batch_size))), _meanize(n_steps)) teststream = Mapping(Flatten(DataStream(mnist_test, iteration_scheme=SequentialScheme(10000, batch_size))), _meanize(n_steps)) algorithm = GradientDescent( cost=cost, params=cg.parameters, step_rule=CompositeRule([Adam(), StepClipping(100)])) main_loop = MainLoop( algorithm, trainstream, extensions=[Timing(), FinishAfter(after_n_epochs=num_epochs), # DataStreamMonitoring( # [cost, ], # teststream, # prefix="test"), DataStreamMonitoringAndSaving( [cost, ], validstream, [i2h1, h2o1, rec1], 'best_'+save_to+'.pkl', cost_name=cost.name, after_epoch=True, prefix='valid'), TrainingDataMonitoring( [cost, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), # Plot( # save_to, # channels=[ # ['test_final_cost', # 'test_misclassificationrate_apply_error_rate'], # ['train_total_gradient_norm']]), Printing()]) main_loop.run()
lstm = SimpleRecurrent(dim=h_dim, activation=Tanh()) #lstm = GatedRecurrent(dim=h_dim, # activation=Tanh()) decode = Linear(name='decode', input_dim=h_dim, output_dim=1) for brick in (encode, gates, decode): brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.) brick.initialize() lstm.weights_init = IsotropicGaussian(0.01) #lstm.weights_init = Orthogonal() lstm.biases_init = Constant(0.) lstm.initialize() #ComputationGraph(encode.apply(x)).get_theano_function()(features_test)[0].shape #ComputationGraph(lstm.apply(encoded)).get_theano_function()(features_test) #ComputationGraph(decode.apply(hiddens[-1])).get_theano_function()(features_test)[0].shape #ComputationGraph(SquaredError().apply(y, y_hat.flatten())).get_theano_function()(features_test, targets_test)[0].shape encoded = encode.apply(x) #hiddens = lstm.apply(encoded, gates.apply(x)) hiddens = lstm.apply(encoded) y_hat = decode.apply(hiddens[-1]) cost = SquaredError().apply(y, y_hat) cost.name = 'cost'