def create_model(self): input_dim = self.input_dim x = self.x x_to_h = Linear(input_dim, input_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) lstm = LSTM(input_dim, name='lstm', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) h_to_o = Linear(input_dim, 1, name='h_to_o', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) x_transform = x_to_h.apply(x) self.x_to_h = x_to_h self.lstm = lstm self.h_to_o = h_to_o h, c = lstm.apply(x_transform) # only values of hidden units of the last timeframe are used for # the classification probs = h_to_o.apply(h[-1]) return probs
def __init__(self, feature_dim, memory_dim, fc1_dim, fc2_dim): self.W = Linear(input_dim=feature_dim, output_dim=memory_dim * 4, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='seqDecoder_W') self.GRU_A = LSTM(feature_dim, name='seqDecoder_A', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.GRU_B = LSTM(memory_dim, name='seqDecoder_B', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.W.initialize() self.GRU_A.initialize() self.GRU_B.initialize() self.fc1 = Linear(input_dim=memory_dim, output_dim=fc1_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name='fc1') self.fc2 = Linear(input_dim=fc1_dim, output_dim=fc2_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name='fc2') self.fc1.initialize() self.fc2.initialize()
def __init__(self, emb_dim, dim, num_input_words, num_output_words, vocab, **kwargs): if emb_dim == 0: emb_dim = dim if num_input_words == 0: num_input_words = vocab.size() if num_output_words == 0: num_output_words = vocab.size() self._num_input_words = num_input_words self._num_output_words = num_output_words self._vocab = vocab self._word_to_id = WordToIdOp(self._vocab) children = [] self._main_lookup = LookupTable(self._num_input_words, emb_dim, name='main_lookup') self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork') self._encoder_rnn = LSTM(dim, name='encoder_rnn') self._decoder_fork = Linear(emb_dim, 4 * dim, name='decoder_fork') self._decoder_rnn = LSTM(dim, name='decoder_rnn') children.extend([self._main_lookup, self._encoder_fork, self._encoder_rnn, self._decoder_fork, self._decoder_rnn]) self._pre_softmax = Linear(dim, self._num_output_words) self._softmax = NDimensionalSoftmax() children.extend([self._pre_softmax, self._softmax]) super(LanguageModel, self).__init__(children=children, **kwargs)
def __init__(self, input_size, hidden_size, output_size): self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size x = tensor.tensor3('x', dtype=floatX) y = tensor.tensor3('y', dtype=floatX) x_to_lstm = Linear(name="x_to_lstm", input_dim=input_size, output_dim=4 * hidden_size, weights_init=IsotropicGaussian(), biases_init=Constant(0)) lstm = LSTM(dim=hidden_size, name="lstm", weights_init=IsotropicGaussian(), biases_init=Constant(0)) lstm_to_output = Linear(name="lstm_to_output", input_dim=hidden_size, output_dim=output_size, weights_init=IsotropicGaussian(), biases_init=Constant(0)) x_transform = x_to_lstm.apply(x) h, c = lstm.apply(x_transform) y_hat = lstm_to_output.apply(h) y_hat = Logistic(name="y_hat").apply(y_hat) self.cost = BinaryCrossEntropy(name="cost").apply(y, y_hat) x_to_lstm.initialize() lstm.initialize() lstm_to_output.initialize() self.computation_graph = ComputationGraph(self.cost)
def apply(self, input_, target): x_to_h = Linear(name='x_to_h', input_dim=self.dims[0], output_dim=self.dims[1] * 4) pre_rnn = x_to_h.apply(input_) pre_rnn.name = 'pre_rnn' rnn = LSTM(activation=Tanh(), dim=self.dims[1], name=self.name) h, _ = rnn.apply(pre_rnn) h.name = 'h' h_to_y = Linear(name='h_to_y', input_dim=self.dims[1], output_dim=self.dims[2]) y_hat = h_to_y.apply(h) y_hat.name = 'y_hat' cost = SquaredError().apply(target, y_hat) cost.name = 'MSE' self.outputs = {} self.outputs['y_hat'] = y_hat self.outputs['cost'] = cost self.outputs['pre_rnn'] = pre_rnn self.outputs['h'] = h # Initialization for brick in (rnn, x_to_h, h_to_y): brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0) brick.initialize()
def __init__(self, input_dim, output_dim, lstm_dim, print_intermediate=False, print_attrs=['__str__'], **kwargs): super(LinearLSTM, self).__init__(**kwargs) self.x_to_h = Linear(input_dim, lstm_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.lstm = LSTM(lstm_dim, name='lstm', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.h_to_o = Linear(lstm_dim, output_dim, name='h_to_o', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.children = [self.x_to_h, self.lstm, self.h_to_o] self.print_intermediate = print_intermediate self.print_attrs = print_attrs
def make_bidir_lstm_stack(seq, seq_dim, mask, sizes, skip=True, name=''): bricks = [] curr_dim = [seq_dim] curr_hidden = [seq] hidden_list = [] for k, dim in enumerate(sizes): fwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_fwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)] fwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_fwd_lstm_%d'%(name,k)) bwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_bwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)] bwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_bwd_lstm_%d'%(name,k)) bricks = bricks + [fwd_lstm, bwd_lstm] + fwd_lstm_ins + bwd_lstm_ins fwd_tmp = sum(x.apply(v) for x, v in zip(fwd_lstm_ins, curr_hidden)) bwd_tmp = sum(x.apply(v) for x, v in zip(bwd_lstm_ins, curr_hidden)) fwd_hidden, _ = fwd_lstm.apply(fwd_tmp, mask=mask) bwd_hidden, _ = bwd_lstm.apply(bwd_tmp[::-1], mask=mask[::-1]) hidden_list = hidden_list + [fwd_hidden, bwd_hidden] if skip: curr_hidden = [seq, fwd_hidden, bwd_hidden[::-1]] curr_dim = [seq_dim, dim, dim] else: curr_hidden = [fwd_hidden, bwd_hidden[::-1]] curr_dim = [dim, dim] return bricks, hidden_list
def __init__(self, config, **kwargs): super(Model, self).__init__(**kwargs) self.config = config self.pre_context_embedder = ContextEmbedder( config.pre_embedder, name='pre_context_embedder') self.post_context_embedder = ContextEmbedder( config.post_embedder, name='post_context_embedder') in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings) self.input_to_rec = MLP(activations=[Tanh()], dims=[in1, config.hidden_state_dim], name='input_to_rec') self.rec = LSTM(dim=config.hidden_state_dim, name='recurrent') in2 = config.hidden_state_dim + sum( x[2] for x in config.post_embedder.dim_embeddings) self.rec_to_output = MLP(activations=[Tanh()], dims=[in2, 2], name='rec_to_output') self.sequences = ['latitude', 'latitude_mask', 'longitude'] self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs self.inputs = self.sequences + self.context self.children = [ self.pre_context_embedder, self.post_context_embedder, self.input_to_rec, self.rec, self.rec_to_output ] self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim, ), name="initial_state") self.initial_cells = shared_floatx_zeros((config.hidden_state_dim, ), name="initial_cells")
def __init__(self, dim, mini_dim, summary_dim, **kwargs): super(LSTMwMini, self).__init__(**kwargs) self.dim = dim self.mini_dim = mini_dim self.summary_dim = summary_dim self.recurrent_layer = LSTM(dim=self.summary_dim, activation=Rectifier(), name='recurrent_layer', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.mini_recurrent_layer = LSTM(dim=self.mini_dim, activation=Rectifier(), name='mini_recurrent_layer', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.mini_to_main = Linear(self.dim + self.mini_dim, self.summary_dim, name='mini_to_main', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.mini_to_main2 = Linear(self.summary_dim, self.summary_dim * 4, name='mini_to_main2', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.children = [ self.recurrent_layer, self.mini_recurrent_layer, self.mini_to_main, self.mini_to_main2 ]
def lstm_layer(in_dim, h, h_dim, n, pref=""): linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n) + pref) lstm = LSTM(dim=h_dim, name='lstm' + str(n) + pref) initialize([linear, lstm]) return lstm.apply(linear.apply(h))[0]
def __init__(self, num_input_words, emb_dim, dim, vocab, lookup=None, fork_and_rnn=None, **kwargs): if num_input_words > 0: logger.info("Restricting def vocab to " + str(num_input_words)) self._num_input_words = num_input_words else: self._num_input_words = vocab.size() self._vocab = vocab children = [] if lookup is None: self._def_lookup = LookupTable(self._num_input_words, emb_dim, name='def_lookup') else: self._def_lookup = lookup if fork_and_rnn is None: self._def_fork = Linear(emb_dim, 4 * dim, name='def_fork') self._def_rnn = LSTM(dim, name='def_rnn') else: self._def_fork, self._def_rnn = fork_and_rnn children.extend([self._def_lookup, self._def_fork, self._def_rnn]) super(LSTMReadDefinitions, self).__init__(children=children, **kwargs)
def main(max_seq_length, lstm_dim, batch_size, num_batches, num_epochs): dataset_train = IterableDataset(generate_data(max_seq_length, batch_size, num_batches)) dataset_test = IterableDataset(generate_data(max_seq_length, batch_size, 100)) stream_train = DataStream(dataset=dataset_train) stream_test = DataStream(dataset=dataset_test) x = T.tensor3('x') y = T.matrix('y') # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see # LSTM layer documentation for the explanation x_to_h = Linear(1, lstm_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) lstm = LSTM(lstm_dim, name='lstm', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) h_to_o = Linear(lstm_dim, 1, name='h_to_o', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) x_transform = x_to_h.apply(x) h, c = lstm.apply(x_transform) # only values of hidden units of the last timeframe are used for # the classification y_hat = h_to_o.apply(h[-1]) y_hat = Logistic().apply(y_hat) cost = BinaryCrossEntropy().apply(y, y_hat) cost.name = 'cost' lstm.initialize() x_to_h.initialize() h_to_o.initialize() cg = ComputationGraph(cost) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam()) test_monitor = DataStreamMonitoring(variables=[cost], data_stream=stream_test, prefix="test") train_monitor = TrainingDataMonitoring(variables=[cost], prefix="train", after_epoch=True) main_loop = MainLoop(algorithm, stream_train, extensions=[test_monitor, train_monitor, FinishAfter(after_n_epochs=num_epochs), Printing(), ProgressBar()]) main_loop.run() print 'Learned weights:' for layer in (x_to_h, lstm, h_to_o): print "Layer '%s':" % layer.name for param in layer.parameters: print param.name, ': ', param.get_value() print
def __init__(self, feature_dim, hidden_dim, output_dim): self.image_embed = Linear(input_dim=feature_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='image_embed') self.word_embed = Linear(input_dim=feature_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='word_embed') self.r_embed = Linear(input_dim=feature_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='r_embed') self.m_to_s = Linear(input_dim=hidden_dim, output_dim=1, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='m_to_s') self.attention_dist = Softmax(name='attention_dist_softmax') self.r_to_r = Linear(input_dim=feature_dim, output_dim=feature_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='r_to_r') # self.r_to_g = Linear(input_dim=feature_dim, # output_dim=output_dim, # weights_init=IsotropicGaussian(0.01), # biases_init=Constant(0), # use_bias=False, # name='r_to_g') self.image_embed.initialize() self.word_embed.initialize() self.r_embed.initialize() self.m_to_s.initialize() self.r_to_r.initialize() # self.r_to_g.initialize() # the sequence to sequence LSTM self.seq = LSTM(output_dim, name='rewatcher_seq', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.seq_embed = Linear(feature_dim, output_dim * 4, name='rewatcher_seq_embed', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False) self.seq.initialize() self.seq_embed.initialize()
def lstm_layer(in_size, dim, x, h, n, task, first_layer=False): if connect_h_to_h == 'all-previous': if first_layer: lstm_input = x linear = Linear(input_dim=in_size, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) elif connect_x_to_h: lstm_input = T.concatenate([x] + [hidden for hidden in h], axis=2) linear = Linear(input_dim=in_size + dim * (n), output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) else: lstm_input = T.concatenate([hidden for hidden in h], axis=2) linear = Linear(input_dim=dim * (n + 1), output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) elif connect_h_to_h == 'two-previous': if first_layer: lstm_input = x linear = Linear(input_dim=in_size, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) elif connect_x_to_h: lstm_input = T.concatenate([x] + h[max(0, n - 2):n], axis=2) linear = Linear(input_dim=in_size + dim * 2 if n > 1 else in_size + dim, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) else: lstm_input = T.concatenate(h[max(0, n - 2):n], axis=2) linear = Linear(input_dim=dim * 2 if n > 1 else dim, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) elif connect_h_to_h == 'one-previous': if first_layer: lstm_input = x linear = Linear(input_dim=in_size, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) elif connect_x_to_h: lstm_input = T.concatenate([x] + [h[n - 1]], axis=2) linear = Linear(input_dim=in_size + dim, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) else: lstm_input = h[n - 1] linear = Linear(input_dim=dim, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) lstm = LSTM(dim=dim, name=layer_models[n] + str(n) + '-' + str(task)) initialize([linear, lstm]) if layer_models[n] == 'lstm': return lstm.apply(linear.apply(lstm_input)) elif layer_models[n] == 'mt_lstm': return lstm.apply(linear.apply(lstm_input), time_scale=layer_resolutions[n], time_offset=layer_execution_time_offset[n])
def construct_model(activation_function, r_dim, hidden_dim, out_dim): # Construct the model r = tensor.fmatrix('r') x = tensor.fmatrix('x') y = tensor.ivector('y') nx = x.shape[0] nj = x.shape[1] # also is r.shape[0] nr = r.shape[1] # r is nj x nr # x is nx x nj # y is nx # Get a representation of r of size r_dim r = DAE(r) # r is now nj x r_dim # r_rep is nx x nj x r_dim r_rep = r[None, :, :].repeat(axis=0, repeats=nx) # x3 is nx x nj x 1 x3 = x[:, :, None] # concat is nx x nj x (r_dim + 1) concat = tensor.concatenate([r_rep, x3], axis=2) # Change concat from Batch x Time x Features to T X B x F rnn_input = concat.dimshuffle(1, 0, 2) linear = Linear(input_dim=r_dim + 1, output_dim=4 * hidden_dim, name="input_linear") lstm = LSTM(dim=hidden_dim, activation=activation_function, name="hidden_recurrent") top_linear = Linear(input_dim=hidden_dim, output_dim=out_dim, name="out_linear") pre_rnn = linear.apply(rnn_input) states = lstm.apply(pre_rnn)[0] activations = top_linear.apply(states) activations = tensor.mean(activations, axis=0) cost = Softmax().categorical_cross_entropy(y, activations) pred = activations.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in (linear, lstm, top_linear): brick.weights_init = IsotropicGaussian(0.1) brick.biases_init = Constant(0.) brick.initialize() return cost, error_rate
def create_rnn(hidden_dim, vocab_dim,mode="rnn"): # input x = tensor.imatrix('inchar') y = tensor.imatrix('outchar') # W = LookupTable( name = "W1", #dim = hidden_dim*4, dim = hidden_dim, length = vocab_dim, weights_init = initialization.IsotropicGaussian(0.01), biases_init = initialization.Constant(0) ) if mode == "lstm": # Long Short Term Memory H = LSTM( hidden_dim, name = 'H', weights_init = initialization.IsotropicGaussian(0.01), biases_init = initialization.Constant(0.0) ) else: # recurrent history weight H = SimpleRecurrent( name = "H", dim = hidden_dim, activation = Tanh(), weights_init = initialization.IsotropicGaussian(0.01) ) # S = Linear( name = "W2", input_dim = hidden_dim, output_dim = vocab_dim, weights_init = initialization.IsotropicGaussian(0.01), biases_init = initialization.Constant(0) ) A = NDimensionalSoftmax( name = "softmax" ) initLayers([W,H,S]) activations = W.apply(x) hiddens = H.apply(activations)#[0] activations2 = S.apply(hiddens) y_hat = A.apply(activations2, extra_ndim=1) cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean() cg = ComputationGraph(cost) #print VariableFilter(roles=[WEIGHT])(cg.variables) #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables) layers = (x, W, H, S, A, y) return cg, layers, y_hat, cost
def add_lstm(input_dim, input_var): linear = Linear(input_dim=input_dim,output_dim=input_dim*4,name="linear_layer") lstm = LSTM(dim=input_dim, name="lstm_layer") testing_init(linear) #linear.initialize() default_init(lstm) h = linear.apply(input_var) return lstm.apply(h)
def create_model(self): input_dim = self.input_dim x = self.x y = self.y p = self.p mask = self.mask hidden_dim = self.hidden_dim embedding_dim = self.embedding_dim lookup = LookupTable(self.dict_size, embedding_dim, weights_init=IsotropicGaussian(0.001), name='LookupTable') x_to_h = Linear(embedding_dim, hidden_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.0)) lstm = LSTM(hidden_dim, name='lstm', weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.0)) h_to_o = MLP([Logistic()], [hidden_dim, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0), name='h_to_o') lookup.initialize() x_to_h.initialize() lstm.initialize() h_to_o.initialize() embed = lookup.apply(x).reshape( (x.shape[0], x.shape[1], self.embedding_dim)) embed.name = "embed_vec" x_transform = x_to_h.apply(embed.transpose(1, 0, 2)) x_transform.name = "Transformed X" self.lookup = lookup self.x_to_h = x_to_h self.lstm = lstm self.h_to_o = h_to_o #if mask is None: h, c = lstm.apply(x_transform) #else: #h, c = lstm.apply(x_transform, mask=mask) h.name = "hidden_state" c.name = "cell state" # only values of hidden units of the last timeframe are used for # the classification indices = T.sum(mask, axis=0) - 1 rel_hid = h[indices, T.arange(h.shape[1])] out = self.h_to_o.apply(rel_hid) probs = out return probs
def lstm_layer(dim, h, n, x_mask, first, **kwargs): linear = Linear(input_dim=dim, output_dim=dim * 4, name='linear' + str(n)) lstm = LSTM(dim=dim, activation=Rectifier(), name='lstm' + str(n)) initialize([linear, lstm]) applyLin = linear.apply(h) if first: lstmApply = lstm.apply(applyLin, mask=x_mask, **kwargs)[0] else: lstmApply = lstm.apply(applyLin, **kwargs)[0] return lstmApply
def lstm_layer(self, h, n): """ Performs the LSTM update for a batch of word sequences :param h The word embeddings for this update :param n The number of layers of the LSTM """ # Maps the word embedding to a dimensionality to be used in the LSTM linear = Linear(input_dim=self.hidden_size, output_dim=self.hidden_size * 4, name='linear_lstm' + str(n)) initialize(linear, sqrt(6.0 / (5 * self.hidden_size))) lstm = LSTM(dim=self.hidden_size, name='lstm' + str(n)) initialize(lstm, 0.08) return lstm.apply(linear.apply(h))
def create_rnn(hidden_dim, vocab_dim, mode="rnn"): # input x = tensor.imatrix('inchar') y = tensor.imatrix('outchar') # W = LookupTable( name="W1", #dim = hidden_dim*4, dim=hidden_dim, length=vocab_dim, weights_init=initialization.IsotropicGaussian(0.01), biases_init=initialization.Constant(0)) if mode == "lstm": # Long Short Term Memory H = LSTM(hidden_dim, name='H', weights_init=initialization.IsotropicGaussian(0.01), biases_init=initialization.Constant(0.0)) else: # recurrent history weight H = SimpleRecurrent( name="H", dim=hidden_dim, activation=Tanh(), weights_init=initialization.IsotropicGaussian(0.01)) # S = Linear(name="W2", input_dim=hidden_dim, output_dim=vocab_dim, weights_init=initialization.IsotropicGaussian(0.01), biases_init=initialization.Constant(0)) A = NDimensionalSoftmax(name="softmax") initLayers([W, H, S]) activations = W.apply(x) hiddens = H.apply(activations) #[0] activations2 = S.apply(hiddens) y_hat = A.apply(activations2, extra_ndim=1) cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean() cg = ComputationGraph(cost) #print VariableFilter(roles=[WEIGHT])(cg.variables) #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables) layers = (x, W, H, S, A, y) return cg, layers, y_hat, cost
class CoreNetwork(BaseRecurrent, Initializable): def __init__(self, input_dim, dim, **kwargs): super(CoreNetwork, self).__init__(**kwargs) self.input_dim = input_dim self.dim = dim self.lstm = LSTM(dim=dim, name=self.name + '_lstm', weights_init=self.weights_init, biases_init=self.biases_init) self.proj = Linear(input_dim=input_dim, output_dim=dim*4, name=self.name + '_proj', weights_init=self.weights_init, biases_init=self.biases_init) self.children = [self.lstm, self.proj] def get_dim(self, name): if name == 'inputs': return self.input_dim elif name in ['state', 'cell']: return self.dim else: raise ValueError @recurrent(sequences=['inputs'], states=['state', 'cell'], contexts=[], outputs=['state', 'cell']) def apply(self, inputs, state, cell): state, cell = self.lstm.apply(self.proj.apply(inputs), state, cell, iterate=False) return state, cell
def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size, subword_RNN_hidden_state_size, table_width=0.08, init_type='xavier', **kwargs): super(LSTMCompositionalLayer, self).__init__(**kwargs) self.batch_size = batch_size self.num_subwords = num_subwords # number of subwords which make up a word self.num_words = num_words # number of words in the sentence self.subword_embedding_size = subword_embedding_size self.input_vocab_size = input_vocab_size self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size self.table_width = table_width # create the look up table self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name='input_lookup') self.lookup.weights_init = Uniform(width=table_width) self.lookup.biases_init = Constant(0) if init_type == 'xavier': linear_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size) lstm_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size) else: # default is gaussian linear_init = IsotropicGaussian() lstm_init = IsotropicGaussian() # The `inputs` are then split in this order: Input gates, forget gates, cells and output gates self.linear_forward = Linear(input_dim=self.subword_embedding_size, output_dim=self.subword_RNN_hidden_state_size * 4, name='linear_forward', weights_init=linear_init, biases_init=Constant(0.0)) self.compositional_subword_to_word_RNN_forward = LSTM( dim=self.subword_RNN_hidden_state_size, activation=Tanh(), name='subword_RNN_forward', weights_init=lstm_init, biases_init=Constant(0.0)) self.children = [self.lookup, self.linear_forward, self.compositional_subword_to_word_RNN_forward]
def __init__(self, image_shape, patch_shape, hidden_dim, n_spatial_dims, whatwhere_interaction, prefork_area_transform, postmerge_area_transform, patch_transform, batch_normalize, response_transform, location_std, scale_std, cutoff, batched_window, initargs, emitter, **kwargs): self.rnn = LSTM(activation=Tanh(), dim=hidden_dim, name="recurrent", weights_init=IsotropicGaussian(1e-4), biases_init=Constant(0)) self.locator = masonry.Locator(hidden_dim, n_spatial_dims, area_transform=prefork_area_transform, location_std=location_std, scale_std=scale_std, **initargs) self.cropper = crop.LocallySoftRectangularCropper( n_spatial_dims=n_spatial_dims, image_shape=image_shape, patch_shape=patch_shape, kernel=crop.Gaussian(), cutoff=cutoff, batched_window=batched_window) self.merger = masonry.Merger( patch_transform=patch_transform, area_transform=postmerge_area_transform, response_transform=response_transform, n_spatial_dims=n_spatial_dims, whatwhere_interaction=whatwhere_interaction, batch_normalize=batch_normalize, **initargs) self.attention = masonry.SpatialAttention( self.locator, self.cropper, self.merger, name="sa") self.emitter = emitter self.model = masonry.RecurrentAttentionModel( self.rnn, self.attention, self.emitter, name="ram")
def bilstm_layer(in_dim, inp, h_dim, n): linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n)+inp.name) lstm = LSTM(dim=h_dim, name='lstm' + str(n)+inp.name) bilstm = Bidirectional(prototype=lstm) bilstm.name = 'bilstm' + str(n) + inp.name initialize([linear, bilstm]) return bilstm.apply(linear.apply(inp))[0]
def __init__(self, config, **kwargs): super(Model, self).__init__(**kwargs) self.config = config self.pre_context_embedder = ContextEmbedder(config.pre_embedder, name='pre_context_embedder') self.post_context_embedder = ContextEmbedder(config.post_embedder, name='post_context_embedder') in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings) self.input_to_rec = MLP(activations=[Tanh()], dims=[in1, config.hidden_state_dim], name='input_to_rec') self.rec = LSTM( dim = config.hidden_state_dim, name = 'recurrent' ) in2 = config.hidden_state_dim + sum(x[2] for x in config.post_embedder.dim_embeddings) self.rec_to_output = MLP(activations=[Tanh()], dims=[in2, 2], name='rec_to_output') self.sequences = ['latitude', 'latitude_mask', 'longitude'] self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs self.inputs = self.sequences + self.context self.children = [ self.pre_context_embedder, self.post_context_embedder, self.input_to_rec, self.rec, self.rec_to_output ] self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim,), name="initial_state") self.initial_cells = shared_floatx_zeros((config.hidden_state_dim,), name="initial_cells")
def __init__(self, image_feature_dim, embedding_dim, **kwargs): super(Encoder, self).__init__(**kwargs) self.image_embedding = Linear( input_dim=image_feature_dim , output_dim=embedding_dim # , weights_init=IsotropicGaussian(0.02) # , biases_init=Constant(0.) , name="image_embedding" ) self.to_inputs = Linear( input_dim=embedding_dim , output_dim=embedding_dim*4 # gate_inputs = vstack(input, forget, cell, hidden) # , weights_init=IsotropicGaussian(0.02) # , biases_init=Constant(0.) , name="to_inputs" ) # Don't think this dim has to also be dimension, more arbitrary self.transition = LSTM( dim=embedding_dim, name="transition") self.children = [ self.image_embedding , self.to_inputs , self.transition ]
class LinearLSTM(Initializable): def __init__(self, input_dim, output_dim, lstm_dim, print_intermediate=False, print_attrs=['__str__'], **kwargs): super(LinearLSTM, self).__init__(**kwargs) self.x_to_h = Linear(input_dim, lstm_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.lstm = LSTM(lstm_dim, name='lstm', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.h_to_o = Linear(lstm_dim, output_dim, name='h_to_o', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.children = [self.x_to_h, self.lstm, self.h_to_o] self.print_intermediate = print_intermediate self.print_attrs = print_attrs @application def apply(self, source): x_linear = self.x_to_h.apply( source.reshape( (source.shape[1], source.shape[0], source.shape[2]))) x_linear.name = 'x_linear' if self.print_intermediate: x_linear = Print(message='x_linear info', attrs=self.print_attrs)(x_linear) h, c = self.lstm.apply(x_linear) if self.print_intermediate: h = Print(message="hidden states info", attrs=self.print_attrs)(h) y_hat = self.h_to_o.apply(h) y_hat.name = 'y_hat' if self.print_intermediate: y_hat = Print(message="y_hat info", attrs=self.print_attrs)(y_hat) return y_hat def initialize(self): for child in self.children: child.initialize() def reset_allocation(self): for child in self.children: child.allocated = False
def __init__(self, input1_size, input2_size, lookup1_dim=200, lookup2_dim=200, hidden_size=512): self.hidden_size = hidden_size self.input1_size = input1_size self.input2_size = input2_size self.lookup1_dim = lookup1_dim self.lookup2_dim = lookup2_dim x1 = tensor.lmatrix('durations') x2 = tensor.lmatrix('syllables') y = tensor.lmatrix('pitches') lookup1 = LookupTable(dim=self.lookup1_dim, length=self.input1_size, name='lookup1', weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) lookup1.initialize() lookup2 = LookupTable(dim=self.lookup2_dim, length=self.input2_size, name='lookup2', weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) lookup2.initialize() merge = Merge(['lookup1', 'lookup2'], [self.lookup1_dim, self.lookup2_dim], self.hidden_size, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) merge.initialize() recurrent_block = LSTM(dim=self.hidden_size, activation=Tanh(), weights_init=initialization.Uniform(width=0.01)) #RecurrentStack([LSTM(dim=self.hidden_size, activation=Tanh())] * 3) recurrent_block.initialize() linear = Linear(input_dim=self.hidden_size, output_dim=self.input1_size, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) linear.initialize() softmax = NDimensionalSoftmax() l1 = lookup1.apply(x1) l2 = lookup2.apply(x2) m = merge.apply(l1, l2) h = recurrent_block.apply(m) a = linear.apply(h) y_hat = softmax.apply(a, extra_ndim=1) # ValueError: x must be 1-d or 2-d tensor of floats. Got TensorType(float64, 3D) self.Cost = softmax.categorical_cross_entropy(y, a, extra_ndim=1).mean() self.ComputationGraph = ComputationGraph(self.Cost) self.Model = Model(y_hat)
def __init__(self, image_feature_dim, embedding_dim, **kwargs): super(Encoder, self).__init__(**kwargs) self.image_embedding = Linear(input_dim=image_feature_dim, output_dim=embedding_dim, name="image_embedding") self.to_inputs = Linear( input_dim=embedding_dim, output_dim=embedding_dim * 4 # times 4 cuz vstack(input, forget, cell, hidden) , name="to_inputs") self.transition = LSTM(dim=embedding_dim, name="transition") self.children = [self.image_embedding, self.to_inputs, self.transition]
def __init__(self, dims=(88, 100, 100), **kwargs): super(Rnn, self).__init__(**kwargs) self.dims = dims self.input_transform = Linear( input_dim=dims[0], output_dim=dims[1], weights_init=IsotropicGaussian(0.01), # biases_init=Constant(0.0), use_bias=False, name="input_transfrom") self.gru_layer = SimpleRecurrent(dim=dims[1], activation=Tanh(), weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0), use_bias=True, name="gru_rnn_layer") # TODO: find a way to automatically set the output dim in case of lstm vs normal rnn self.linear_trans = Linear(input_dim=dims[1], output_dim=dims[2] * 4, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0), use_bias=False, name="h2h_transform") self.lstm_layer = LSTM(dim=dims[2], activation=Tanh(), weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0), use_bias=True, name="lstm_rnn_layer") self.out_transform = MLP(activations=[Sigmoid()], dims=[dims[2], dims[0]], weights_init=IsotropicGaussian(0.01), use_bias=True, biases_init=Constant(0.0), name="out_layer") self.children = [ self.input_transform, self.gru_layer, self.linear_trans, self.lstm_layer, self.out_transform ]
def example4(): """LSTM -> Plante lors de l'initialisation du lstm.""" x = tensor.tensor3('x') dim=3 # gate_inputs = theano.function([x],x*4) gate_inputs = Linear(input_dim=dim,output_dim=dim*4, name="linear",weights_init=initialization.Identity(), biases_init=Constant(2)) lstm = LSTM(dim=dim,activation=Tanh(), weights_init=IsotropicGaussian(), biases_init=Constant(0)) gate_inputs.initialize() hg = gate_inputs.apply(x) #print(gate_inputs.parameters) #print(gate_inputs.parameters[1].get_value()) lstm.initialize() h, cells = lstm.apply(hg) print(lstm.parameters) f = theano.function([x], h) print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) print(f(4*np.ones((dim, 1, dim), dtype=theano.config.floatX))) print("Good Job!") # lstm_output = #Initial State h0 = tensor.matrix('h0') c = tensor.matrix('cells') h,c1 = lstm.apply(inputs=x, states=h0, cells=c) # lstm.apply(states=h0,cells=cells,inputs=gate_inputs) f = theano.function([x, h0, c], h) print("a") print(f(np.ones((3, 1, 3), dtype=theano.config.floatX), np.ones((1, 3), dtype=theano.config.floatX), np.ones((1, 3), dtype=theano.config.floatX)))
def getBidir2(input_dim,input_var): """ LSTM-based bidirectionnal """ bidir = Bidirectional(weights_init=Orthogonal(), prototype=LSTM(dim=input_dim, name='lstm')) #bidir.allocate() bidir.initialize() h = bidir.apply(input_var) net = add_softmax_layer(h, input_dim, 2) return net
def __init__(self, dim, activation=None, depth=2, name=None, lstm_name=None, **kwargs): super(LSTMstack, self).__init__(name=name, **kwargs) # use the name allready processed by superclass name = self.name self.dim = dim self.children = [] self.depth = depth for d in range(self.depth): layer_node = LSTM(dim, activation, name=lstm_name) layer_node.name = '%s_%s_%d'%(name, layer_node.name, d) if d > 0: # convert states of previous layer to inputs of new layer layer_name = '%s_%d_%d'%(name, d-1, d) input_dim = layer_node.get_dim('inputs') self.children.append(Linear(dim, input_dim, use_bias=True, name=layer_name)) self.children.append(layer_node)
def __init__(self, layers_no, dim, alphabet_size, batch_size): # characters -> 1-of-N embedder -> N-to-dim -> LSTM#0 -> ... -> LSTM#(layers_no-1) -> dim-to-N -> softmax # TODO zdefiniowac blad # TODO first_resizer # LSTM stack self.stack = [] lstms = map(lambda _: LSTM(dim=dim), range(layers_no)) for lstm in lstms: state, cell = lstm.initial_states(batch_size) self.stack.append(lstm, state, cell)
def __init__(self, input_dim, dim, **kwargs): super(CoreNetwork, self).__init__(**kwargs) self.input_dim = input_dim self.dim = dim self.lstm = LSTM(dim=dim, name=self.name + '_lstm', weights_init=self.weights_init, biases_init=self.biases_init) self.proj = Linear(input_dim=input_dim, output_dim=dim*4, name=self.name + '_proj', weights_init=self.weights_init, biases_init=self.biases_init) self.children = [self.lstm, self.proj]
def __init__(self, dimension, input_size, embed_input=False, **kwargs): super(LSTMEncoder, self).__init__(**kwargs) if embed_input: self.embedder = LookupTable(input_size, dimension) else: self.embedder = Linear(input_size, dimension) self.fork = Fork(['inputs'], dimension, output_dims=[dimension], prototype=Linear(dimension, 4 * dimension)) encoder = Bidirectional(LSTM(dim=dimension, activation=Tanh())) self.encoder = encoder self.children = [encoder, self.embedder, self.fork]
def setUp(self): n_iter = 2 x_dim = 8 z_dim = 10 dec_dim = 12 enc_dim = 16 read_dim = 2 * x_dim rnninits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits) encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits) decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits) encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim], name="MLP_enc", **inits) decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim], name="MLP_dec", **inits) q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits) self.draw = DrawModel(n_iter, reader, encoder_mlp, encoder_rnn, q_sampler, decoder_mlp, decoder_rnn, writer) self.draw.initialize()
def __init__(self, word_dim, hidden_dim): self.forward_lstm= LSTM(hidden_dim, name='question_forward_lstm', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.backward_lstm= LSTM(hidden_dim, name='question_backward_lstm', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.x_to_h_forward = Linear(word_dim, hidden_dim * 4, name='word_x_to_h_forward', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.x_to_h_backward = Linear(word_dim, hidden_dim * 4, name='word_x_to_h_backward', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.forward_lstm.initialize() self.backward_lstm.initialize() self.x_to_h_forward.initialize() self.x_to_h_backward.initialize()
def setUp(self): depth = 4 self.depth = depth dim = 3 # don't change, hardwired in the code transitions = [LSTM(dim=dim) for _ in range(depth)] self.stack0 = RecurrentStack(transitions, weights_init=Constant(2), biases_init=Constant(0)) self.stack0.initialize() self.stack2 = RecurrentStack(transitions, weights_init=Constant(2), biases_init=Constant(0), skip_connections=True) self.stack2.initialize()
def build_theano_functions(self) : #import pdb ; pdb.set_trace() x = T.fmatrix('x') s = T.fvector('s') mu = T.fvector('mu') mu = T.reshape(mu,(self.number_of_mix,1)) pi = T.fvector('pi') lstm = LSTM( dim=self.input_dim/4, weights_init=IsotropicGaussian(0.5), biases_init=Constant(1)) lstm.initialize() h, c = lstm.apply(x) h = h[0][0][-1] LL = T.sum(pi*(1./(T.sqrt(2.*np.pi)*s))*T.exp(\ -0.5*(h-mu)**2/T.reshape(s,(self.number_of_mix,1))**2.).sum(axis=1)) cost = -T.log(LL) #cg = ComputationGraph(cost) #self.cg = cg #parameters = cg.parameters model = Model(cost) self.model = model parameters = model.parameters grads = T.grad(cost, parameters) updates = [] for i in range(len(grads)) : updates.append(tuple([parameters[i], parameters[i] - self.lr*grads[i]])) gradf = theano.function([x,s,mu,pi],[cost],updates=updates) f = theano.function([x],[h]) return gradf, f
class Encoder(Initializable): def __init__(self, image_feature_dim, embedding_dim, **kwargs): super(Encoder, self).__init__(**kwargs) self.image_embedding = Linear( input_dim=image_feature_dim , output_dim=embedding_dim # , weights_init=IsotropicGaussian(0.02) # , biases_init=Constant(0.) , name="image_embedding" ) self.to_inputs = Linear( input_dim=embedding_dim , output_dim=embedding_dim*4 # gate_inputs = vstack(input, forget, cell, hidden) # , weights_init=IsotropicGaussian(0.02) # , biases_init=Constant(0.) , name="to_inputs" ) # Don't think this dim has to also be dimension, more arbitrary self.transition = LSTM( dim=embedding_dim, name="transition") self.children = [ self.image_embedding , self.to_inputs , self.transition ] @application(inputs=['image_vects', 'word_vects'], outputs=['image_embedding', 'sentence_embedding']) def apply(self, image_vects, word_vects): image_embedding = self.image_embedding.apply(image_vects) # inputs = word_vects inputs = self.to_inputs.apply(word_vects) inputs = inputs.dimshuffle(1, 0, 2) hidden, cells = self.transition.apply(inputs=inputs, mask=None) # the last hidden state represents the accumulation of all the words (i.e. the sentence) # grab all batches, grab the last value representing accumulation of the sequence, grab all features sentence_embedding = hidden[-1] # sentence_embedding = inputs.mean(axis=0) return image_embedding, sentence_embedding
class Encoder(Initializable): def __init__(self, image_feature_dim, embedding_dim, **kwargs): super(Encoder, self).__init__(**kwargs) self.image_embedding = Linear( input_dim=image_feature_dim , output_dim=embedding_dim , name="image_embedding" ) self.to_inputs = Linear( input_dim=embedding_dim , output_dim=embedding_dim*4 # times 4 cuz vstack(input, forget, cell, hidden) , name="to_inputs" ) self.transition = LSTM( dim=embedding_dim, name="transition") self.children = [ self.image_embedding , self.to_inputs , self.transition ] @application( inputs=['image_vects', 'word_vects'] , outputs=['image_embedding', 'sentence_embedding'] ) def apply(self, image_vects, word_vects): image_embedding = self.image_embedding.apply(image_vects) inputs = self.to_inputs.apply(word_vects) # shuffle dimensions to correspond to (sequence, batch, features) inputs = inputs.dimshuffle(1, 0, 2) hidden, cells = self.transition.apply(inputs=inputs, mask=None) # last hidden state represents the accumulation of word embeddings # (i.e. the sentence embedding) sentence_embedding = hidden[-1] return image_embedding, sentence_embedding
def __init__(self, image_feature_dim, embedding_dim, **kwargs): super(Encoder, self).__init__(**kwargs) self.image_embedding = Linear( input_dim=image_feature_dim , output_dim=embedding_dim , name="image_embedding" ) self.to_inputs = Linear( input_dim=embedding_dim , output_dim=embedding_dim*4 # times 4 cuz vstack(input, forget, cell, hidden) , name="to_inputs" ) self.transition = LSTM( dim=embedding_dim, name="transition") self.children = [ self.image_embedding , self.to_inputs , self.transition ]
def __init__( self, input_dim, state_dim, activation=Tanh(), state_weights_init=None, input_weights_init=None, biases_init=init.Constant(0), **kwargs ): super(LSTMLayer, self).__init__(biases_init=biases_init, **kwargs) if state_weights_init is None: state_weights_init = init.IsotropicGaussian(0.01) if input_weights_init is None: input_weights_init = init.IsotropicGaussian(0.01) if biases_init is None: biases_init = init.Constant(0) self.input_transformation = Linear( input_dim=input_dim, output_dim=state_dim * 4, weights_init=input_weights_init, biases_init=biases_init ) self.lstm = LSTM(dim=state_dim, activation=activation, weights_init=state_weights_init) self.children = [self.input_transformation, self.lstm]
def __init__(self, dims=(88, 100, 100), **kwargs): super(Rnn, self).__init__(**kwargs) self.dims = dims self.input_transform = Linear(input_dim=dims[0], output_dim=dims[1], weights_init=IsotropicGaussian(0.01), # biases_init=Constant(0.0), use_bias=False, name="input_transfrom") self.gru_layer = SimpleRecurrent(dim=dims[1], activation=Tanh(), weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0), use_bias=True, name="gru_rnn_layer") # TODO: find a way to automatically set the output dim in case of lstm vs normal rnn self.linear_trans = Linear(input_dim=dims[1], output_dim=dims[2] * 4, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0), use_bias=False, name="h2h_transform") self.lstm_layer = LSTM(dim=dims[2], activation=Tanh(), weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0), use_bias=True, name="lstm_rnn_layer") self.out_transform = MLP(activations=[Sigmoid()], dims=[dims[2], dims[0]], weights_init=IsotropicGaussian(0.01), use_bias=True, biases_init=Constant(0.0), name="out_layer") self.children = [self.input_transform, self.gru_layer, self.linear_trans, self.lstm_layer, self.out_transform]
def setUp(self): self.lstm = LSTM(dim=3, weights_init=Constant(2), biases_init=Constant(0)) self.lstm.initialize()
class TestLSTM(unittest.TestCase): def setUp(self): self.lstm = LSTM(dim=3, weights_init=Constant(2), biases_init=Constant(0)) self.lstm.initialize() def test_one_step(self): h0 = tensor.matrix('h0') c0 = tensor.matrix('c0') x = tensor.matrix('x') h1, c1 = self.lstm.apply(x, h0, c0, iterate=False) next_h = theano.function(inputs=[x, h0, c0], outputs=[h1]) h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]], dtype=theano.config.floatX) c0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]], dtype=theano.config.floatX) x_val = 0.1 * numpy.array([range(12), range(12, 24)], dtype=theano.config.floatX) W_state_val = 2 * numpy.ones((3, 12), dtype=theano.config.floatX) W_cell_to_in = 2 * numpy.ones((3,), dtype=theano.config.floatX) W_cell_to_out = 2 * numpy.ones((3,), dtype=theano.config.floatX) W_cell_to_forget = 2 * numpy.ones((3,), dtype=theano.config.floatX) # omitting biases because they are zero activation = numpy.dot(h0_val, W_state_val) + x_val def sigmoid(x): return 1. / (1. + numpy.exp(-x)) i_t = sigmoid(activation[:, :3] + c0_val * W_cell_to_in) f_t = sigmoid(activation[:, 3:6] + c0_val * W_cell_to_forget) next_cells = f_t * c0_val + i_t * numpy.tanh(activation[:, 6:9]) o_t = sigmoid(activation[:, 9:12] + next_cells * W_cell_to_out) h1_val = o_t * numpy.tanh(next_cells) assert_allclose(h1_val, next_h(x_val, h0_val, c0_val)[0], rtol=1e-6) def test_many_steps(self): x = tensor.tensor3('x') mask = tensor.matrix('mask') h, c = self.lstm.apply(x, mask=mask, iterate=True) calc_h = theano.function(inputs=[x, mask], outputs=[h]) x_val = (0.1 * numpy.asarray( list(itertools.islice(itertools.permutations(range(12)), 0, 24)), dtype=theano.config.floatX)) x_val = numpy.ones((24, 4, 12), dtype=theano.config.floatX) * x_val[:, None, :] mask_val = numpy.ones((24, 4), dtype=theano.config.floatX) mask_val[12:24, 3] = 0 h_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX) c_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX) W_state_val = 2 * numpy.ones((3, 12), dtype=theano.config.floatX) W_cell_to_in = 2 * numpy.ones((3,), dtype=theano.config.floatX) W_cell_to_out = 2 * numpy.ones((3,), dtype=theano.config.floatX) W_cell_to_forget = 2 * numpy.ones((3,), dtype=theano.config.floatX) def sigmoid(x): return 1. / (1. + numpy.exp(-x)) for i in range(1, 25): activation = numpy.dot(h_val[i-1], W_state_val) + x_val[i-1] i_t = sigmoid(activation[:, :3] + c_val[i-1] * W_cell_to_in) f_t = sigmoid(activation[:, 3:6] + c_val[i-1] * W_cell_to_forget) c_val[i] = f_t * c_val[i-1] + i_t * numpy.tanh(activation[:, 6:9]) o_t = sigmoid(activation[:, 9:12] + c_val[i] * W_cell_to_out) h_val[i] = o_t * numpy.tanh(c_val[i]) h_val[i] = (mask_val[i - 1, :, None] * h_val[i] + (1 - mask_val[i - 1, :, None]) * h_val[i - 1]) c_val[i] = (mask_val[i - 1, :, None] * c_val[i] + (1 - mask_val[i - 1, :, None]) * c_val[i - 1]) h_val = h_val[1:] assert_allclose(h_val, calc_h(x_val, mask_val)[0], rtol=1e-04) # Also test that initial state is a parameter initial1, initial2 = VariableFilter(roles=[INITIAL_STATE])( ComputationGraph(h)) assert is_shared_variable(initial1) assert is_shared_variable(initial2) assert {initial1.name, initial2.name} == { 'initial_state', 'initial_cells'}
def __init__(self): inp = tensor.tensor3('input') inp = inp.dimshuffle(1,0,2) target = tensor.matrix('target') target = target.reshape((target.shape[0],)) product = tensor.lvector('product') missing = tensor.eq(inp, 0) train_input_mean = 1470614.1 train_input_std = 3256577.0 trans_1 = tensor.concatenate((inp[1:,:,:],tensor.zeros((1,inp.shape[1],inp.shape[2]))), axis=0) trans_2 = tensor.concatenate((tensor.zeros((1,inp.shape[1],inp.shape[2])), inp[:-1,:,:]), axis=0) inp = tensor.switch(missing,(trans_1+trans_2)/2, inp) lookup = LookupTable(length = 352, dim=4*hidden_dim) product_embed= lookup.apply(product) salut = tensor.concatenate((inp, missing),axis =2) linear = Linear(input_dim=input_dim+1, output_dim=4*hidden_dim, name="lstm_in") inter = linear.apply(salut) inter = inter + product_embed[None,:,:] lstm = LSTM(dim=hidden_dim, activation=activation_function, name="lstm") hidden, cells = lstm.apply(inter) linear2= Linear(input_dim = hidden_dim, output_dim = out_dim, name="ouput_linear") pred = linear2.apply(hidden[-1])*train_input_std + train_input_mean pred = pred.reshape((product.shape[0],)) cost = tensor.mean(abs((pred-target)/target)) # Initialize all bricks for brick in [linear, linear2, lstm, lookup]: brick.weights_init = IsotropicGaussian(0.1) brick.biases_init = Constant(0.) brick.initialize() # Apply noise and dropout cg = ComputationGraph([cost]) if w_noise_std > 0: noise_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, noise_vars, w_noise_std) if i_dropout > 0: cg = apply_dropout(cg, [hidden], i_dropout) [cost_reg] = cg.outputs cost_reg += 1e-20 if cost_reg is not cost: self.cost = cost self.cost_reg = cost_reg cost_reg.name = 'cost_reg' cost.name = 'cost' self.sgd_cost = cost_reg self.monitor_vars = [[cost, cost_reg]] else: self.cost = cost cost.name = 'cost' self.sgd_cost = cost self.monitor_vars = [[cost]] self.pred = pred pred.name = 'pred'
def main(nvis, nhid, encoding_lstm_dim, decoding_lstm_dim, T=1): x = tensor.matrix('features') # Construct and initialize model encoding_mlp = MLP([Tanh()], [None, None]) decoding_mlp = MLP([Tanh()], [None, None]) encoding_lstm = LSTM(dim=encoding_lstm_dim) decoding_lstm = LSTM(dim=decoding_lstm_dim) draw = DRAW(nvis=nvis, nhid=nhid, T=T, encoding_mlp=encoding_mlp, decoding_mlp=decoding_mlp, encoding_lstm=encoding_lstm, decoding_lstm=decoding_lstm, biases_init=Constant(0), weights_init=Orthogonal()) draw.push_initialization_config() encoding_lstm.weights_init = IsotropicGaussian(std=0.001) decoding_lstm.weights_init = IsotropicGaussian(std=0.001) draw.initialize() # Compute cost cost = -draw.log_likelihood_lower_bound(x).mean() cost.name = 'nll_upper_bound' model = Model(cost) # Datasets and data streams mnist_train = BinarizedMNIST('train') train_loop_stream = ForceFloatX(DataStream( dataset=mnist_train, iteration_scheme=SequentialScheme(mnist_train.num_examples, 100))) train_monitor_stream = ForceFloatX(DataStream( dataset=mnist_train, iteration_scheme=SequentialScheme(mnist_train.num_examples, 500))) mnist_valid = BinarizedMNIST('valid') valid_monitor_stream = ForceFloatX(DataStream( dataset=mnist_valid, iteration_scheme=SequentialScheme(mnist_valid.num_examples, 500))) mnist_test = BinarizedMNIST('test') test_monitor_stream = ForceFloatX(DataStream( dataset=mnist_test, iteration_scheme=SequentialScheme(mnist_test.num_examples, 500))) # Get parameters and monitoring channels computation_graph = ComputationGraph([cost]) params = VariableFilter(roles=[PARAMETER])(computation_graph.variables) monitoring_channels = dict([ ('avg_' + channel.tag.name, channel.mean()) for channel in VariableFilter(name='.*term$')(computation_graph.auxiliary_variables)]) for name, channel in monitoring_channels.items(): channel.name = name monitored_quantities = monitoring_channels.values() + [cost] # Training loop step_rule = RMSProp(learning_rate=1e-3, decay_rate=0.95) algorithm = GradientDescent(cost=cost, params=params, step_rule=step_rule) algorithm.add_updates(computation_graph.updates) main_loop = MainLoop( model=model, data_stream=train_loop_stream, algorithm=algorithm, extensions=[ Timing(), SerializeMainLoop('vae.pkl', save_separately=['model']), FinishAfter(after_n_epochs=200), DataStreamMonitoring( monitored_quantities, train_monitor_stream, prefix="train", updates=computation_graph.updates), DataStreamMonitoring( monitored_quantities, valid_monitor_stream, prefix="valid", updates=computation_graph.updates), DataStreamMonitoring( monitored_quantities, test_monitor_stream, prefix="test", updates=computation_graph.updates), ProgressBar(), Printing()]) main_loop.run()
def build_theano_functions(self): x = T.fmatrix('time_sequence') x = x.reshape((self.batch_dim, self.sequence_dim, self.time_dim)) y = x[:,1:self.sequence_dim,:] x = x[:,:self.sequence_dim-1,:] # if we try to include the spectrogram features spec_dims = 0 if self.image_size is not None : print "Convolution activated" self.init_conv() spec = T.ftensor4('spectrogram') spec_features, spec_dims = self.conv.build_conv_layers(spec) print "Conv final dims =", spec_dims spec_dims = np.prod(spec_dims) spec_features = spec_features.reshape( (self.batch_dim, self.sequence_dim-1, spec_dims)) x = T.concatenate([x, spec_features], axis=2) layers_input = [x] dims =np.array([self.time_dim + spec_dims]) for dim in self.lstm_layers_dim : dims = np.append(dims, dim) print "Dimensions =", dims # layer is just an index of the layer for layer in range(len(self.lstm_layers_dim)) : # before the cell, input, forget and output gates, x needs to # be transformed linear = Linear(dims[layer], dims[layer+1]*4, weights_init=Orthogonal(self.orth_scale), biases_init=Constant(0), name="linear"+str(layer)) linear.initialize() lstm_input = linear.apply(layers_input[layer]) # the lstm wants batch X sequence X time lstm = LSTM( dim=dims[layer+1], weights_init=IsotropicGaussian(mean=0.,std=0.5), biases_init=Constant(1), name="lstm"+str(layer)) lstm.initialize() # hack to use Orthogonal on lstm w_state lstm.W_state.set_value( self.orth_scale*Orthogonal().generate(np.random, lstm.W_state.get_value().shape)) h, _dummy = lstm.apply(lstm_input) layers_input.append(h) # this is where Alex Graves' paper starts print "Last linear transform dim :", dims[1:].sum() output_transform = Linear(dims[1:].sum(), self.output_dim, weights_init=Orthogonal(self.orth_scale), use_bias=False, name="output_transform") output_transform.initialize() if len(self.lstm_layers_dim) == 1 : print "hallo there, only one layer speaking" y_hat = output_transform.apply(layers_input[-1]) else : y_hat = output_transform.apply(T.concatenate(layers_input[1:], axis=2)) # transforms to find each gmm params (mu, pi, sig) # small hack to softmax a 3D tensor pis = T.reshape( T.nnet.softmax( T.reshape(y_hat[:,:,:self.gmm_dim], ((self.sequence_dim-1)*self.batch_dim, self.gmm_dim))), (self.batch_dim, (self.sequence_dim-1), self.gmm_dim)) sig = T.exp(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])+1e-6 mus = y_hat[:,:,self.gmm_dim*2:] pis = pis[:,:,:,np.newaxis] mus = mus[:,:,:,np.newaxis] sig = sig[:,:,:,np.newaxis] y = y[:,:,np.newaxis,:] y = T.patternbroadcast(y, (False, False, True, False)) mus = T.patternbroadcast(mus, (False, False, False, True)) sig = T.patternbroadcast(sig, (False, False, False, True)) # sum likelihood with targets # see blog for this crazy Pr() = sum log sum prod # axes :: (batch, sequence, mixture, time) expo_term = -0.5*((y-mus)**2)/sig**2 coeff = T.log(T.maximum(1./(T.sqrt(2.*np.pi)*sig), EPS)) #coeff = T.log(1./(T.sqrt(2.*np.pi)*sig)) sequences = coeff + expo_term log_sequences = T.log(pis + EPS) + T.sum(sequences, axis=3, keepdims=True) log_sequences_max = T.max(log_sequences, axis=2, keepdims=True) LL = -(log_sequences_max + T.log(EPS + T.sum(T.exp(log_sequences - log_sequences_max), axis=2, keepdims=True))).mean() LL.name = "summed_likelihood" model = Model(LL) self.model = model parameters = model.parameters algorithm = GradientDescent( cost=LL, parameters=model.parameters, step_rule=Adam()) f = theano.function([x],[pis, sig, mus]) return algorithm, f
class LanguageModel(Initializable): """ This takes the word embeddings from LSTMCompositionalLayer and creates sentence embeddings using a LSTM compositional_layer_type can be: 1) 'BidirectionalLSTMCompositionalLayer' 2) 'UnidirectionalLSTMCompositionalLayer' 3) 'BaselineLSTMCompositionalLayer' Input is a 3d tensor with the dimensions of (num_words, num_subwords, batch_size) and a 3d tensor a mask of size (num_words, num_subwords, batch_size) All hidden state sizes are the same as the subword embedding size This returns a 3d tensor with dimensions of (num_words = num RNN states, batch_size, sentence embedding size = LM_RNN_hidden_state_size = subword_RNN_hidden_state_size * 2) """ def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size, subword_RNN_hidden_state_size, LM_RNN_hidden_state_size, table_width=0.08, compositional_layer_type='BidirectionalLSTMCompositionalLayer', init_type='xavier', **kwargs): super(LanguageModel, self).__init__(**kwargs) self.batch_size = batch_size self.num_subwords = num_subwords # number of subwords which make up a word self.num_words = num_words # number of words in the sentence self.subword_embedding_size = subword_embedding_size self.input_vocab_size = input_vocab_size self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size #i.e. word embedding size self.LM_RNN_hidden_state_size = LM_RNN_hidden_state_size #i.e sentence embedding size self.table_width = table_width self.name = 'Language_Model' if init_type == 'xavier': linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size) lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size) else: # default is gaussian linear_init = IsotropicGaussian() lstm_init = IsotropicGaussian() self.compositional_layer = None self.linear = None if compositional_layer_type == 'BidirectionalLSTMCompositionalLayer': self.compositional_layer = BidirectionalLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words, self.subword_embedding_size, self.input_vocab_size, self.subword_RNN_hidden_state_size, self.table_width, init_type=init_type, name='compositional_layer') if init_type == 'xavier': linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size) lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size) else: # default is gaussian linear_init = IsotropicGaussian() lstm_init = IsotropicGaussian() self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size * 2, # 2 * for the bidirectional output_dim=self.LM_RNN_hidden_state_size * 4, name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) elif compositional_layer_type == 'UnidirectionalLSTMCompositionalLayer': self.compositional_layer = LSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words, self.subword_embedding_size, self.input_vocab_size, self.subword_RNN_hidden_state_size, self.table_width, init_type=init_type, name='compositional_layer') self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size, output_dim=self.LM_RNN_hidden_state_size * 4, name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) elif compositional_layer_type == 'BaselineLSTMCompositionalLayer': self.compositional_layer = BaselineLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words, self.subword_embedding_size, self.input_vocab_size, self.subword_RNN_hidden_state_size, self.table_width, init_type=init_type, name='compositional_layer') self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size, output_dim=self.LM_RNN_hidden_state_size * 4, name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) else: print('ERROR: compositional_layer_type = ' + compositional_layer_type + ' is invalid') sys.exit() # has one RNN which reads the word embeddings into a sentence embedding, or partial sentence embeddings self.language_model_RNN = LSTM( dim=self.LM_RNN_hidden_state_size, activation=Identity(), name='language_model_RNN', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.children = [self.compositional_layer, self.linear, self.language_model_RNN] @application(inputs=['subword_id_input_', 'subword_id_input_mask_'], outputs=['sentence_embeddings', 'word_embeddings_mask']) def apply(self, subword_id_input_, subword_id_input_mask_): """ subword_id_input_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size). It is expected as a dtype=uint16 or equivalent subword_id_input_mask_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size). It is expected as a dtype=uint8 or equivalent and has binary values of 1 when there is data and zero otherwise. Returned is a 3d tensor of size (num_words = num RNN states, batch_size, sentence embedding size) Also returned is a 1d tensor of size (batch_size) describing if the sentence is valid of empty in the batch """ word_embeddings, word_embeddings_mask = self.compositional_layer.apply(subword_id_input_, subword_id_input_mask_) sentence_embeddings = self.language_model_RNN.apply( self.linear.apply(word_embeddings), mask=word_embeddings_mask)[0] #[0] = hidden states, [1] = cells # sentence_embeddings_mask = word_embeddings_mask.max(axis=0).T return sentence_embeddings, word_embeddings_mask
def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size, subword_RNN_hidden_state_size, LM_RNN_hidden_state_size, table_width=0.08, compositional_layer_type='BidirectionalLSTMCompositionalLayer', init_type='xavier', **kwargs): super(LanguageModel, self).__init__(**kwargs) self.batch_size = batch_size self.num_subwords = num_subwords # number of subwords which make up a word self.num_words = num_words # number of words in the sentence self.subword_embedding_size = subword_embedding_size self.input_vocab_size = input_vocab_size self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size #i.e. word embedding size self.LM_RNN_hidden_state_size = LM_RNN_hidden_state_size #i.e sentence embedding size self.table_width = table_width self.name = 'Language_Model' if init_type == 'xavier': linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size) lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size) else: # default is gaussian linear_init = IsotropicGaussian() lstm_init = IsotropicGaussian() self.compositional_layer = None self.linear = None if compositional_layer_type == 'BidirectionalLSTMCompositionalLayer': self.compositional_layer = BidirectionalLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words, self.subword_embedding_size, self.input_vocab_size, self.subword_RNN_hidden_state_size, self.table_width, init_type=init_type, name='compositional_layer') if init_type == 'xavier': linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size) lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size) else: # default is gaussian linear_init = IsotropicGaussian() lstm_init = IsotropicGaussian() self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size * 2, # 2 * for the bidirectional output_dim=self.LM_RNN_hidden_state_size * 4, name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) elif compositional_layer_type == 'UnidirectionalLSTMCompositionalLayer': self.compositional_layer = LSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words, self.subword_embedding_size, self.input_vocab_size, self.subword_RNN_hidden_state_size, self.table_width, init_type=init_type, name='compositional_layer') self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size, output_dim=self.LM_RNN_hidden_state_size * 4, name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) elif compositional_layer_type == 'BaselineLSTMCompositionalLayer': self.compositional_layer = BaselineLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words, self.subword_embedding_size, self.input_vocab_size, self.subword_RNN_hidden_state_size, self.table_width, init_type=init_type, name='compositional_layer') self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size, output_dim=self.LM_RNN_hidden_state_size * 4, name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) else: print('ERROR: compositional_layer_type = ' + compositional_layer_type + ' is invalid') sys.exit() # has one RNN which reads the word embeddings into a sentence embedding, or partial sentence embeddings self.language_model_RNN = LSTM( dim=self.LM_RNN_hidden_state_size, activation=Identity(), name='language_model_RNN', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.children = [self.compositional_layer, self.linear, self.language_model_RNN]
class Model(Initializable): @lazy() def __init__(self, config, **kwargs): super(Model, self).__init__(**kwargs) self.config = config self.pre_context_embedder = ContextEmbedder(config.pre_embedder, name='pre_context_embedder') self.post_context_embedder = ContextEmbedder(config.post_embedder, name='post_context_embedder') in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings) self.input_to_rec = MLP(activations=[Tanh()], dims=[in1, config.hidden_state_dim], name='input_to_rec') self.rec = LSTM( dim = config.hidden_state_dim, name = 'recurrent' ) in2 = config.hidden_state_dim + sum(x[2] for x in config.post_embedder.dim_embeddings) self.rec_to_output = MLP(activations=[Tanh()], dims=[in2, 2], name='rec_to_output') self.sequences = ['latitude', 'latitude_mask', 'longitude'] self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs self.inputs = self.sequences + self.context self.children = [ self.pre_context_embedder, self.post_context_embedder, self.input_to_rec, self.rec, self.rec_to_output ] self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim,), name="initial_state") self.initial_cells = shared_floatx_zeros((config.hidden_state_dim,), name="initial_cells") def _push_initialization_config(self): for mlp in [self.input_to_rec, self.rec_to_output]: mlp.weights_init = self.config.weights_init mlp.biases_init = self.config.biases_init self.rec.weights_init = self.config.weights_init def get_dim(self, name): return self.rec.get_dim(name) @application def initial_state(self, *args, **kwargs): return self.rec.initial_state(*args, **kwargs) @recurrent(states=['states', 'cells'], outputs=['destination', 'states', 'cells'], sequences=['latitude', 'longitude', 'latitude_mask']) def predict_all(self, latitude, longitude, latitude_mask, **kwargs): latitude = (latitude - data.train_gps_mean[0]) / data.train_gps_std[0] longitude = (longitude - data.train_gps_mean[1]) / data.train_gps_std[1] pre_emb = tuple(self.pre_context_embedder.apply(**kwargs)) latitude = tensor.shape_padright(latitude) longitude = tensor.shape_padright(longitude) itr = self.input_to_rec.apply(tensor.concatenate(pre_emb + (latitude, longitude), axis=1)) itr = itr.repeat(4, axis=1) (next_states, next_cells) = self.rec.apply(itr, kwargs['states'], kwargs['cells'], mask=latitude_mask, iterate=False) post_emb = tuple(self.post_context_embedder.apply(**kwargs)) rto = self.rec_to_output.apply(tensor.concatenate(post_emb + (next_states,), axis=1)) rto = (rto * data.train_gps_std) + data.train_gps_mean return (rto, next_states, next_cells) @predict_all.property('contexts') def predict_all_inputs(self): return self.context @application(outputs=['destination']) def predict(self, latitude, longitude, latitude_mask, **kwargs): latitude = latitude.T longitude = longitude.T latitude_mask = latitude_mask.T res = self.predict_all(latitude, longitude, latitude_mask, **kwargs)[0] return res[-1] @predict.property('inputs') def predict_inputs(self): return self.inputs @application(outputs=['cost_matrix']) def cost_matrix(self, latitude, longitude, latitude_mask, **kwargs): latitude = latitude.T longitude = longitude.T latitude_mask = latitude_mask.T res = self.predict_all(latitude, longitude, latitude_mask, **kwargs)[0] target = tensor.concatenate( (kwargs['destination_latitude'].dimshuffle('x', 0, 'x'), kwargs['destination_longitude'].dimshuffle('x', 0, 'x')), axis=2) target = target.repeat(latitude.shape[0], axis=0) ce = error.erdist(target.reshape((-1, 2)), res.reshape((-1, 2))) ce = ce.reshape(latitude.shape) return ce * latitude_mask @cost_matrix.property('inputs') def cost_matrix_inputs(self): return self.inputs + ['destination_latitude', 'destination_longitude'] @application(outputs=['cost']) def cost(self, latitude_mask, **kwargs): return self.cost_matrix(latitude_mask=latitude_mask, **kwargs).sum() / latitude_mask.sum() @cost.property('inputs') def cost_inputs(self): return self.inputs + ['destination_latitude', 'destination_longitude'] @application(outputs=['cost']) def valid_cost(self, **kwargs): # Only works when batch_size is 1. return self.cost_matrix(**kwargs)[-1,0] @valid_cost.property('inputs') def valid_cost_inputs(self): return self.inputs + ['destination_latitude', 'destination_longitude']