def __init__(self, dim, mini_dim, summary_dim, **kwargs): super(LSTMwMini, self).__init__(**kwargs) self.dim = dim self.mini_dim = mini_dim self.summary_dim = summary_dim self.recurrent_layer = LSTM(dim=self.summary_dim, activation=Rectifier(), name='recurrent_layer', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.mini_recurrent_layer = LSTM(dim=self.mini_dim, activation=Rectifier(), name='mini_recurrent_layer', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.mini_to_main = Linear(self.dim + self.mini_dim, self.summary_dim, name='mini_to_main', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.mini_to_main2 = Linear(self.summary_dim, self.summary_dim * 4, name='mini_to_main2', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.children = [ self.recurrent_layer, self.mini_recurrent_layer, self.mini_to_main, self.mini_to_main2 ]
def __init__(self, feature_dim, memory_dim, fc1_dim, fc2_dim): self.W = Linear(input_dim=feature_dim, output_dim=memory_dim * 4, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='seqDecoder_W') self.GRU_A = LSTM(feature_dim, name='seqDecoder_A', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.GRU_B = LSTM(memory_dim, name='seqDecoder_B', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.W.initialize() self.GRU_A.initialize() self.GRU_B.initialize() self.fc1 = Linear(input_dim=memory_dim, output_dim=fc1_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name='fc1') self.fc2 = Linear(input_dim=fc1_dim, output_dim=fc2_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name='fc2') self.fc1.initialize() self.fc2.initialize()
def __init__(self, emb_dim, dim, num_input_words, num_output_words, vocab, **kwargs): if emb_dim == 0: emb_dim = dim if num_input_words == 0: num_input_words = vocab.size() if num_output_words == 0: num_output_words = vocab.size() self._num_input_words = num_input_words self._num_output_words = num_output_words self._vocab = vocab self._word_to_id = WordToIdOp(self._vocab) children = [] self._main_lookup = LookupTable(self._num_input_words, emb_dim, name='main_lookup') self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork') self._encoder_rnn = LSTM(dim, name='encoder_rnn') self._decoder_fork = Linear(emb_dim, 4 * dim, name='decoder_fork') self._decoder_rnn = LSTM(dim, name='decoder_rnn') children.extend([self._main_lookup, self._encoder_fork, self._encoder_rnn, self._decoder_fork, self._decoder_rnn]) self._pre_softmax = Linear(dim, self._num_output_words) self._softmax = NDimensionalSoftmax() children.extend([self._pre_softmax, self._softmax]) super(LanguageModel, self).__init__(children=children, **kwargs)
def make_bidir_lstm_stack(seq, seq_dim, mask, sizes, skip=True, name=''): bricks = [] curr_dim = [seq_dim] curr_hidden = [seq] hidden_list = [] for k, dim in enumerate(sizes): fwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_fwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)] fwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_fwd_lstm_%d'%(name,k)) bwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_bwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)] bwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_bwd_lstm_%d'%(name,k)) bricks = bricks + [fwd_lstm, bwd_lstm] + fwd_lstm_ins + bwd_lstm_ins fwd_tmp = sum(x.apply(v) for x, v in zip(fwd_lstm_ins, curr_hidden)) bwd_tmp = sum(x.apply(v) for x, v in zip(bwd_lstm_ins, curr_hidden)) fwd_hidden, _ = fwd_lstm.apply(fwd_tmp, mask=mask) bwd_hidden, _ = bwd_lstm.apply(bwd_tmp[::-1], mask=mask[::-1]) hidden_list = hidden_list + [fwd_hidden, bwd_hidden] if skip: curr_hidden = [seq, fwd_hidden, bwd_hidden[::-1]] curr_dim = [seq_dim, dim, dim] else: curr_hidden = [fwd_hidden, bwd_hidden[::-1]] curr_dim = [dim, dim] return bricks, hidden_list
def __init__(self, input_dim, output_dim, lstm_dim, print_intermediate=False, print_attrs=['__str__'], **kwargs): super(LinearLSTM, self).__init__(**kwargs) self.x_to_h = Linear(input_dim, lstm_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.lstm = LSTM(lstm_dim, name='lstm', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.h_to_o = Linear(lstm_dim, output_dim, name='h_to_o', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.children = [self.x_to_h, self.lstm, self.h_to_o] self.print_intermediate = print_intermediate self.print_attrs = print_attrs
def create_model(self): input_dim = self.input_dim x = self.x x_to_h = Linear(input_dim, input_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) lstm = LSTM(input_dim, name='lstm', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) h_to_o = Linear(input_dim, 1, name='h_to_o', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) x_transform = x_to_h.apply(x) self.x_to_h = x_to_h self.lstm = lstm self.h_to_o = h_to_o h, c = lstm.apply(x_transform) # only values of hidden units of the last timeframe are used for # the classification probs = h_to_o.apply(h[-1]) return probs
def bilstm_layer(in_dim, inp, h_dim, n): linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n)+inp.name) lstm = LSTM(dim=h_dim, name='lstm' + str(n)+inp.name) bilstm = Bidirectional(prototype=lstm) bilstm.name = 'bilstm' + str(n) + inp.name initialize([linear, bilstm]) return bilstm.apply(linear.apply(inp))[0]
def lstm_layer(in_dim, h, h_dim, n, pref=""): linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n) + pref) lstm = LSTM(dim=h_dim, name='lstm' + str(n) + pref) initialize([linear, lstm]) return lstm.apply(linear.apply(h))[0]
def __init__(self, config, **kwargs): super(Model, self).__init__(**kwargs) self.config = config self.pre_context_embedder = ContextEmbedder( config.pre_embedder, name='pre_context_embedder') self.post_context_embedder = ContextEmbedder( config.post_embedder, name='post_context_embedder') in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings) self.input_to_rec = MLP(activations=[Tanh()], dims=[in1, config.hidden_state_dim], name='input_to_rec') self.rec = LSTM(dim=config.hidden_state_dim, name='recurrent') in2 = config.hidden_state_dim + sum( x[2] for x in config.post_embedder.dim_embeddings) self.rec_to_output = MLP(activations=[Tanh()], dims=[in2, 2], name='rec_to_output') self.sequences = ['latitude', 'latitude_mask', 'longitude'] self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs self.inputs = self.sequences + self.context self.children = [ self.pre_context_embedder, self.post_context_embedder, self.input_to_rec, self.rec, self.rec_to_output ] self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim, ), name="initial_state") self.initial_cells = shared_floatx_zeros((config.hidden_state_dim, ), name="initial_cells")
def apply(self, input_, target): x_to_h = Linear(name='x_to_h', input_dim=self.dims[0], output_dim=self.dims[1] * 4) pre_rnn = x_to_h.apply(input_) pre_rnn.name = 'pre_rnn' rnn = LSTM(activation=Tanh(), dim=self.dims[1], name=self.name) h, _ = rnn.apply(pre_rnn) h.name = 'h' h_to_y = Linear(name='h_to_y', input_dim=self.dims[1], output_dim=self.dims[2]) y_hat = h_to_y.apply(h) y_hat.name = 'y_hat' cost = SquaredError().apply(target, y_hat) cost.name = 'MSE' self.outputs = {} self.outputs['y_hat'] = y_hat self.outputs['cost'] = cost self.outputs['pre_rnn'] = pre_rnn self.outputs['h'] = h # Initialization for brick in (rnn, x_to_h, h_to_y): brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0) brick.initialize()
def __init__(self, image_shape, patch_shape, hidden_dim, n_spatial_dims, whatwhere_interaction, prefork_area_transform, postmerge_area_transform, patch_transform, batch_normalize, response_transform, location_std, scale_std, cutoff, batched_window, initargs, emitter, **kwargs): self.rnn = LSTM(activation=Tanh(), dim=hidden_dim, name="recurrent", weights_init=IsotropicGaussian(1e-4), biases_init=Constant(0)) self.locator = masonry.Locator(hidden_dim, n_spatial_dims, area_transform=prefork_area_transform, location_std=location_std, scale_std=scale_std, **initargs) self.cropper = crop.LocallySoftRectangularCropper( n_spatial_dims=n_spatial_dims, image_shape=image_shape, patch_shape=patch_shape, kernel=crop.Gaussian(), cutoff=cutoff, batched_window=batched_window) self.merger = masonry.Merger( patch_transform=patch_transform, area_transform=postmerge_area_transform, response_transform=response_transform, n_spatial_dims=n_spatial_dims, whatwhere_interaction=whatwhere_interaction, batch_normalize=batch_normalize, **initargs) self.attention = masonry.SpatialAttention( self.locator, self.cropper, self.merger, name="sa") self.emitter = emitter self.model = masonry.RecurrentAttentionModel( self.rnn, self.attention, self.emitter, name="ram")
def __init__(self, num_input_words, emb_dim, dim, vocab, lookup=None, fork_and_rnn=None, **kwargs): if num_input_words > 0: logger.info("Restricting def vocab to " + str(num_input_words)) self._num_input_words = num_input_words else: self._num_input_words = vocab.size() self._vocab = vocab children = [] if lookup is None: self._def_lookup = LookupTable(self._num_input_words, emb_dim, name='def_lookup') else: self._def_lookup = lookup if fork_and_rnn is None: self._def_fork = Linear(emb_dim, 4 * dim, name='def_fork') self._def_rnn = LSTM(dim, name='def_rnn') else: self._def_fork, self._def_rnn = fork_and_rnn children.extend([self._def_lookup, self._def_fork, self._def_rnn]) super(LSTMReadDefinitions, self).__init__(children=children, **kwargs)
def __init__(self, feature_dim, hidden_dim, output_dim): self.image_embed = Linear(input_dim=feature_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='image_embed') self.word_embed = Linear(input_dim=feature_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='word_embed') self.r_embed = Linear(input_dim=feature_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='r_embed') self.m_to_s = Linear(input_dim=hidden_dim, output_dim=1, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='m_to_s') self.attention_dist = Softmax(name='attention_dist_softmax') self.r_to_r = Linear(input_dim=feature_dim, output_dim=feature_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='r_to_r') # self.r_to_g = Linear(input_dim=feature_dim, # output_dim=output_dim, # weights_init=IsotropicGaussian(0.01), # biases_init=Constant(0), # use_bias=False, # name='r_to_g') self.image_embed.initialize() self.word_embed.initialize() self.r_embed.initialize() self.m_to_s.initialize() self.r_to_r.initialize() # self.r_to_g.initialize() # the sequence to sequence LSTM self.seq = LSTM(output_dim, name='rewatcher_seq', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.seq_embed = Linear(feature_dim, output_dim * 4, name='rewatcher_seq_embed', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False) self.seq.initialize() self.seq_embed.initialize()
def lstm_layer(in_size, dim, x, h, n, task, first_layer=False): if connect_h_to_h == 'all-previous': if first_layer: lstm_input = x linear = Linear(input_dim=in_size, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) elif connect_x_to_h: lstm_input = T.concatenate([x] + [hidden for hidden in h], axis=2) linear = Linear(input_dim=in_size + dim * (n), output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) else: lstm_input = T.concatenate([hidden for hidden in h], axis=2) linear = Linear(input_dim=dim * (n + 1), output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) elif connect_h_to_h == 'two-previous': if first_layer: lstm_input = x linear = Linear(input_dim=in_size, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) elif connect_x_to_h: lstm_input = T.concatenate([x] + h[max(0, n - 2):n], axis=2) linear = Linear(input_dim=in_size + dim * 2 if n > 1 else in_size + dim, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) else: lstm_input = T.concatenate(h[max(0, n - 2):n], axis=2) linear = Linear(input_dim=dim * 2 if n > 1 else dim, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) elif connect_h_to_h == 'one-previous': if first_layer: lstm_input = x linear = Linear(input_dim=in_size, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) elif connect_x_to_h: lstm_input = T.concatenate([x] + [h[n - 1]], axis=2) linear = Linear(input_dim=in_size + dim, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) else: lstm_input = h[n - 1] linear = Linear(input_dim=dim, output_dim=dim * 4, name='linear' + str(n) + '-' + str(task)) lstm = LSTM(dim=dim, name=layer_models[n] + str(n) + '-' + str(task)) initialize([linear, lstm]) if layer_models[n] == 'lstm': return lstm.apply(linear.apply(lstm_input)) elif layer_models[n] == 'mt_lstm': return lstm.apply(linear.apply(lstm_input), time_scale=layer_resolutions[n], time_offset=layer_execution_time_offset[n])
def construct_model(activation_function, r_dim, hidden_dim, out_dim): # Construct the model r = tensor.fmatrix('r') x = tensor.fmatrix('x') y = tensor.ivector('y') nx = x.shape[0] nj = x.shape[1] # also is r.shape[0] nr = r.shape[1] # r is nj x nr # x is nx x nj # y is nx # Get a representation of r of size r_dim r = DAE(r) # r is now nj x r_dim # r_rep is nx x nj x r_dim r_rep = r[None, :, :].repeat(axis=0, repeats=nx) # x3 is nx x nj x 1 x3 = x[:, :, None] # concat is nx x nj x (r_dim + 1) concat = tensor.concatenate([r_rep, x3], axis=2) # Change concat from Batch x Time x Features to T X B x F rnn_input = concat.dimshuffle(1, 0, 2) linear = Linear(input_dim=r_dim + 1, output_dim=4 * hidden_dim, name="input_linear") lstm = LSTM(dim=hidden_dim, activation=activation_function, name="hidden_recurrent") top_linear = Linear(input_dim=hidden_dim, output_dim=out_dim, name="out_linear") pre_rnn = linear.apply(rnn_input) states = lstm.apply(pre_rnn)[0] activations = top_linear.apply(states) activations = tensor.mean(activations, axis=0) cost = Softmax().categorical_cross_entropy(y, activations) pred = activations.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in (linear, lstm, top_linear): brick.weights_init = IsotropicGaussian(0.1) brick.biases_init = Constant(0.) brick.initialize() return cost, error_rate
def add_lstm(input_dim, input_var): linear = Linear(input_dim=input_dim,output_dim=input_dim*4,name="linear_layer") lstm = LSTM(dim=input_dim, name="lstm_layer") testing_init(linear) #linear.initialize() default_init(lstm) h = linear.apply(input_var) return lstm.apply(h)
def create_model(self): input_dim = self.input_dim x = self.x y = self.y p = self.p mask = self.mask hidden_dim = self.hidden_dim embedding_dim = self.embedding_dim lookup = LookupTable(self.dict_size, embedding_dim, weights_init=IsotropicGaussian(0.001), name='LookupTable') x_to_h = Linear(embedding_dim, hidden_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.0)) lstm = LSTM(hidden_dim, name='lstm', weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.0)) h_to_o = MLP([Logistic()], [hidden_dim, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0), name='h_to_o') lookup.initialize() x_to_h.initialize() lstm.initialize() h_to_o.initialize() embed = lookup.apply(x).reshape( (x.shape[0], x.shape[1], self.embedding_dim)) embed.name = "embed_vec" x_transform = x_to_h.apply(embed.transpose(1, 0, 2)) x_transform.name = "Transformed X" self.lookup = lookup self.x_to_h = x_to_h self.lstm = lstm self.h_to_o = h_to_o #if mask is None: h, c = lstm.apply(x_transform) #else: #h, c = lstm.apply(x_transform, mask=mask) h.name = "hidden_state" c.name = "cell state" # only values of hidden units of the last timeframe are used for # the classification indices = T.sum(mask, axis=0) - 1 rel_hid = h[indices, T.arange(h.shape[1])] out = self.h_to_o.apply(rel_hid) probs = out return probs
def lstm_layer(dim, h, n, x_mask, first, **kwargs): linear = Linear(input_dim=dim, output_dim=dim * 4, name='linear' + str(n)) lstm = LSTM(dim=dim, activation=Rectifier(), name='lstm' + str(n)) initialize([linear, lstm]) applyLin = linear.apply(h) if first: lstmApply = lstm.apply(applyLin, mask=x_mask, **kwargs)[0] else: lstmApply = lstm.apply(applyLin, **kwargs)[0] return lstmApply
def getBidir2(input_dim,input_var): """ LSTM-based bidirectionnal """ bidir = Bidirectional(weights_init=Orthogonal(), prototype=LSTM(dim=input_dim, name='lstm')) #bidir.allocate() bidir.initialize() h = bidir.apply(input_var) net = add_softmax_layer(h, input_dim, 2) return net
def lstm_layer(self, h, n): """ Performs the LSTM update for a batch of word sequences :param h The word embeddings for this update :param n The number of layers of the LSTM """ # Maps the word embedding to a dimensionality to be used in the LSTM linear = Linear(input_dim=self.hidden_size, output_dim=self.hidden_size * 4, name='linear_lstm' + str(n)) initialize(linear, sqrt(6.0 / (5 * self.hidden_size))) lstm = LSTM(dim=self.hidden_size, name='lstm' + str(n)) initialize(lstm, 0.08) return lstm.apply(linear.apply(h))
def __init__(self, layers_no, dim, alphabet_size, batch_size): # characters -> 1-of-N embedder -> N-to-dim -> LSTM#0 -> ... -> LSTM#(layers_no-1) -> dim-to-N -> softmax # TODO zdefiniowac blad # TODO first_resizer # LSTM stack self.stack = [] lstms = map(lambda _: LSTM(dim=dim), range(layers_no)) for lstm in lstms: state, cell = lstm.initial_states(batch_size) self.stack.append(lstm, state, cell)
def __init__(self, dimension, input_size, embed_input=False, **kwargs): super(LSTMEncoder, self).__init__(**kwargs) if embed_input: self.embedder = LookupTable(input_size, dimension) else: self.embedder = Linear(input_size, dimension) self.fork = Fork(['inputs'], dimension, output_dims=[dimension], prototype=Linear(dimension, 4 * dimension)) encoder = Bidirectional(LSTM(dim=dimension, activation=Tanh())) self.encoder = encoder self.children = [encoder, self.embedder, self.fork]
def example4(): """LSTM -> Plante lors de l'initialisation du lstm.""" x = tensor.tensor3('x') dim = 3 # gate_inputs = theano.function([x],x*4) gate_inputs = Linear(input_dim=dim, output_dim=dim * 4, name="linear", weights_init=initialization.Identity(), biases_init=Constant(2)) lstm = LSTM(dim=dim, activation=Tanh(), weights_init=IsotropicGaussian(), biases_init=Constant(0)) gate_inputs.initialize() hg = gate_inputs.apply(x) #print(gate_inputs.parameters) #print(gate_inputs.parameters[1].get_value()) lstm.initialize() h, cells = lstm.apply(hg) print(lstm.parameters) f = theano.function([x], h) print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) print(f(4 * np.ones((dim, 1, dim), dtype=theano.config.floatX))) print("Good Job!") # lstm_output = #Initial State h0 = tensor.matrix('h0') c = tensor.matrix('cells') h, c1 = lstm.apply( inputs=x, states=h0, cells=c) # lstm.apply(states=h0,cells=cells,inputs=gate_inputs) f = theano.function([x, h0, c], h) print("a") print( f(np.ones((3, 1, 3), dtype=theano.config.floatX), np.ones((1, 3), dtype=theano.config.floatX), np.ones((1, 3), dtype=theano.config.floatX)))
def setUp(self): n_iter = 2 x_dim = 8 z_dim = 10 dec_dim = 12 enc_dim = 16 read_dim = 2 * x_dim rnninits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits) encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits) decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits) encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim], name="MLP_enc", **inits) decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim], name="MLP_dec", **inits) q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits) self.draw = DrawModel(n_iter, reader, encoder_mlp, encoder_rnn, q_sampler, decoder_mlp, decoder_rnn, writer) self.draw.initialize()
def create_rnn(hidden_dim, vocab_dim, mode="rnn"): # input x = tensor.imatrix('inchar') y = tensor.imatrix('outchar') # W = LookupTable( name="W1", #dim = hidden_dim*4, dim=hidden_dim, length=vocab_dim, weights_init=initialization.IsotropicGaussian(0.01), biases_init=initialization.Constant(0)) if mode == "lstm": # Long Short Term Memory H = LSTM(hidden_dim, name='H', weights_init=initialization.IsotropicGaussian(0.01), biases_init=initialization.Constant(0.0)) else: # recurrent history weight H = SimpleRecurrent( name="H", dim=hidden_dim, activation=Tanh(), weights_init=initialization.IsotropicGaussian(0.01)) # S = Linear(name="W2", input_dim=hidden_dim, output_dim=vocab_dim, weights_init=initialization.IsotropicGaussian(0.01), biases_init=initialization.Constant(0)) A = NDimensionalSoftmax(name="softmax") initLayers([W, H, S]) activations = W.apply(x) hiddens = H.apply(activations) #[0] activations2 = S.apply(hiddens) y_hat = A.apply(activations2, extra_ndim=1) cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean() cg = ComputationGraph(cost) #print VariableFilter(roles=[WEIGHT])(cg.variables) #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables) layers = (x, W, H, S, A, y) return cg, layers, y_hat, cost
def setUp(self): depth = 4 self.depth = depth dim = 3 # don't change, hardwired in the code transitions = [LSTM(dim=dim) for _ in range(depth)] self.stack0 = RecurrentStack(transitions, weights_init=Constant(2), biases_init=Constant(0)) self.stack0.initialize() self.stack2 = RecurrentStack(transitions, weights_init=Constant(2), biases_init=Constant(0), skip_connections=True) self.stack2.initialize()
def __init__(self, word_dim, hidden_dim): self.forward_lstm= LSTM(hidden_dim, name='question_forward_lstm', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.backward_lstm= LSTM(hidden_dim, name='question_backward_lstm', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.x_to_h_forward = Linear(word_dim, hidden_dim * 4, name='word_x_to_h_forward', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.x_to_h_backward = Linear(word_dim, hidden_dim * 4, name='word_x_to_h_backward', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.forward_lstm.initialize() self.backward_lstm.initialize() self.x_to_h_forward.initialize() self.x_to_h_backward.initialize()
def __init__(self, image_feature_dim, embedding_dim, **kwargs): super(Encoder, self).__init__(**kwargs) self.image_embedding = Linear(input_dim=image_feature_dim, output_dim=embedding_dim, name="image_embedding") self.to_inputs = Linear( input_dim=embedding_dim, output_dim=embedding_dim * 4 # times 4 cuz vstack(input, forget, cell, hidden) , name="to_inputs") self.transition = LSTM(dim=embedding_dim, name="transition") self.children = [self.image_embedding, self.to_inputs, self.transition]
def __init__(self, dims=(88, 100, 100), **kwargs): super(Rnn, self).__init__(**kwargs) self.dims = dims self.input_transform = Linear( input_dim=dims[0], output_dim=dims[1], weights_init=IsotropicGaussian(0.01), # biases_init=Constant(0.0), use_bias=False, name="input_transfrom") self.gru_layer = SimpleRecurrent(dim=dims[1], activation=Tanh(), weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0), use_bias=True, name="gru_rnn_layer") # TODO: find a way to automatically set the output dim in case of lstm vs normal rnn self.linear_trans = Linear(input_dim=dims[1], output_dim=dims[2] * 4, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0), use_bias=False, name="h2h_transform") self.lstm_layer = LSTM(dim=dims[2], activation=Tanh(), weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0), use_bias=True, name="lstm_rnn_layer") self.out_transform = MLP(activations=[Sigmoid()], dims=[dims[2], dims[0]], weights_init=IsotropicGaussian(0.01), use_bias=True, biases_init=Constant(0.0), name="out_layer") self.children = [ self.input_transform, self.gru_layer, self.linear_trans, self.lstm_layer, self.out_transform ]
def __init__(self, dims, **kwargs): super(ModularRnn, self).__init__(**kwargs) self.layers = [] self.tranforms = [] for i, (dim1, dim2) in enumerate(zip(dims[:-1], dims[1:])): self.layers.append( LSTM(dim=dim1, activation=Tanh(), weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0), use_bias=True, name="rnn_layer%s" % i)) self.tranforms.append( Linear(input_dim=dim1, output_dim=dim2 * multiplier, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0), use_bias=False, name="linea_transform%s" % i))