class AttentionEUTHM(EUTHM): ''' EUTHM with user attention ''' def __init__(self, config, dataset, *args, **kwargs): super(EUTHM, self).__init__(config, dataset) def _get_doc_embed(self, *args, **kwargs): text_vec = self._get_text_vec() user_vec = self.user_embed.apply(self.user) text_vec = tensor.concatenate([ text_vec, user_vec[None, :, :][tensor.zeros( shape=(text_vec.shape[0], ), dtype='int32')] ], axis=2) return self._encode_text_vec(text_vec) def _build_bricks(self, *args, **kwargs): super(AttentionEUTHM, self)._build_bricks() self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim + self.config.user_embed_dim, output_dim=4 * self.config.lstm_dim, name='mlstm_in') self.mlstm_ins.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.word_embed_dim + self.config.user_embed_dim + self.config.lstm_dim)) self.mlstm_ins.biases_init = Constant(0) self.mlstm_ins.initialize()
def make_bidir_lstm_stack(seq, seq_dim, mask, sizes, skip=True, name=''): bricks = [] curr_dim = [seq_dim] curr_hidden = [seq] hidden_list = [] for k, dim in enumerate(sizes): fwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_fwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)] fwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_fwd_lstm_%d'%(name,k)) bwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_bwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)] bwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_bwd_lstm_%d'%(name,k)) bricks = bricks + [fwd_lstm, bwd_lstm] + fwd_lstm_ins + bwd_lstm_ins fwd_tmp = sum(x.apply(v) for x, v in zip(fwd_lstm_ins, curr_hidden)) bwd_tmp = sum(x.apply(v) for x, v in zip(bwd_lstm_ins, curr_hidden)) fwd_hidden, _ = fwd_lstm.apply(fwd_tmp, mask=mask) bwd_hidden, _ = bwd_lstm.apply(bwd_tmp[::-1], mask=mask[::-1]) hidden_list = hidden_list + [fwd_hidden, bwd_hidden] if skip: curr_hidden = [seq, fwd_hidden, bwd_hidden[::-1]] curr_dim = [seq_dim, dim, dim] else: curr_hidden = [fwd_hidden, bwd_hidden[::-1]] curr_dim = [dim, dim] return bricks, hidden_list
def __init__(self, dim_in, dim_hidden, dim_out, **kwargs): self.dim_in = dim_in self.dim_hidden = dim_hidden self.dim_out = dim_out self.input_layer = Linear(input_dim=self.dim_in, output_dim=self.dim_hidden, weights_init=initialization.IsotropicGaussian(), biases_init=initialization.Constant(0)) self.input_layer.initialize() sparse_init = initialization.Sparse(num_init=15, weights_init=initialization.IsotropicGaussian()) self.recurrent_layer = SimpleRecurrent( dim=self.dim_hidden, activation=Tanh(), name="first_recurrent_layer", weights_init=sparse_init, biases_init=initialization.Constant(0.01)) ''' self.recurrent_layer = LSTM(dim=self.dim_hidden, activation=Tanh(), weights_init=initialization.IsotropicGaussian(std=0.001), biases_init=initialization.Constant(0.01)) ''' self.recurrent_layer.initialize() self.output_layer = Linear(input_dim=self.dim_hidden, output_dim=self.dim_out, weights_init=initialization.Uniform(width=0.01), biases_init=initialization.Constant(0.01)) self.output_layer.initialize() self.children = [self.input_layer, self.recurrent_layer, self.output_layer]
def __init__(self, vocab_size, embedding_dim, state_dim, representation_dim, theano_seed=None, **kwargs): super(Decoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.representation_dim = representation_dim self.theano_seed = theano_seed # Initialize gru with special initial state. self.transition = GRUInitialState(attended_dim=state_dim, dim=state_dim, activation=Tanh(), name='decoder') # Initialize the attention mechanism. self.attention = SequenceContentAttention2( state_names=self.transition.apply.states, attended_dim=representation_dim, match_dim=state_dim, name="attention") readout = Readout(source_names=[ 'states', 'feedback', self.attention.take_glimpses.outputs[0] ], readout_dim=self.vocab_size, emitter=NewSoftmaxEmitter(initial_output=-1, theano_seed=theano_seed), feedback_brick=NewLookupFeedback( vocab_size, embedding_dim), post_merge=InitializableFeedforwardSequence([ Bias(dim=state_dim, name='maxout_bias').apply, Maxout(num_pieces=2, name='maxout').apply, Linear(input_dim=state_dim / 2, output_dim=embedding_dim, use_bias=False, name='softmax0').apply, Linear(input_dim=embedding_dim, name='softmax1').apply ]), merged_dim=state_dim) # Build sequence generator accordingly. self.sequence_generator = SequenceGenerator( readout=readout, transition=self.transition, attention=self.attention, fork=Fork([ name for name in self.transition.apply.sequences if name != 'mask' ], prototype=Linear()), cost_type='categorical_cross_entropy') self.children = [self.sequence_generator]
def __init__(self, dim, mini_dim, summary_dim, **kwargs): super(RNNwMini, self).__init__(**kwargs) self.dim = dim self.mini_dim = mini_dim self.summary_dim = summary_dim self.recurrent_layer = SimpleRecurrent( dim=self.summary_dim, activation=Rectifier(), name='recurrent_layer', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.mini_recurrent_layer = SimpleRecurrent( dim=self.mini_dim, activation=Rectifier(), name='mini_recurrent_layer', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.mini_to_main = Linear(self.dim + self.mini_dim, self.summary_dim, name='mini_to_main', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.children = [ self.recurrent_layer, self.mini_recurrent_layer, self.mini_to_main ]
def __init__(self, activations=None, dims=None, **kwargs): if activations is None: raise ValueError("activations must be specified.") if dims is None: raise ValueError("dims must be specified.") if not (len(dims) == (len(activations) + 2)): raise ValueError("len(dims) != len(activations) + 2.") super(CondNet, self).__init__(**kwargs) self.dims = dims self.shared_acts = activations # construct the shared linear transforms for feedforward self.shared_linears = [] for i in range(len(dims)-2): self.shared_linears.append( \ Linear(dims[i], dims[i+1], name='shared_linear_{}'.format(i))) self.mean_linear = Linear(dims[-2], dims[-1], name='mean_linear') self.logvar_linear = Linear(dims[-2], dims[-1], name='logvar_linear', weights_init=Constant(0.)) self.children = self.shared_linears + self.shared_acts self.children.append(self.mean_linear) self.children.append(self.logvar_linear) return
def __init__(self, input_dim, hidden_dim, inputs_weights_init=None, inputs_biases_init=None, reset_weights_init=None, reset_biases_init=None, update_weights_init=None, update_biases_init=None, **kwargs): super(GatedRecurrentFork, self).__init__(**kwargs) self.input_dim = input_dim self.hidden_dim = hidden_dim self.inputs_weights_init = inputs_weights_init self.inputs_biases_init = inputs_biases_init self.reset_weights_init = reset_weights_init self.reset_biases_init = reset_biases_init self.update_weights_init = update_weights_init self.update_biases_init = update_biases_init self.input_to_inputs = Linear(input_dim=input_dim, output_dim=self.hidden_dim, name="input_to_inputs") self.input_to_gate_inputs = Linear(input_dim=input_dim, output_dim=self.hidden_dim * 2, name="input_to_gate_inputs") self.children = [self.input_to_inputs, self.input_to_gate_inputs]
def example2(): """GRU""" x = tensor.tensor3('x') dim = 3 fork = Fork(input_dim=dim, output_dims=[dim, dim * 2], name='fork', output_names=["linear", "gates"], weights_init=initialization.Identity(), biases_init=Constant(0)) gru = GatedRecurrent(dim=dim, weights_init=initialization.Identity(), biases_init=Constant(0)) fork.initialize() gru.initialize() linear, gate_inputs = fork.apply(x) h = gru.apply(linear, gate_inputs) f = theano.function([x], h) print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) doubler = Linear(input_dim=dim, output_dim=dim, weights_init=initialization.Identity(2), biases_init=initialization.Constant(0)) doubler.initialize() lin, gate = fork.apply(doubler.apply(x)) h_doubler = gru.apply(lin, gate) f = theano.function([x], h_doubler) print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
def example(): """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """ x = tensor.tensor3('x') rnn = SimpleRecurrent(dim=3, activation=Identity(), weights_init=initialization.Identity()) rnn.initialize() h = rnn.apply(x) f = theano.function([x], h) print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) doubler = Linear(input_dim=3, output_dim=3, weights_init=initialization.Identity(2), biases_init=initialization.Constant(0)) doubler.initialize() h_doubler = rnn.apply(doubler.apply(x)) f = theano.function([x], h_doubler) print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) #Initial State h0 = tensor.matrix('h0') h = rnn.apply(inputs=x, states=h0) f = theano.function([x, h0], h) print( f(np.ones((3, 1, 3), dtype=theano.config.floatX), np.ones((1, 3), dtype=theano.config.floatX)))
def __init__(self, match_dim, use_local_attention=False, window_size=10, sigma=None, state_transformer=None, local_state_transformer=None, local_predictor=None, attended_transformer=None, energy_computer=None, **kwargs): super(SequenceContentAttention, self).__init__(**kwargs) if not state_transformer: state_transformer = Linear(use_bias=False, name="state_trans") if not local_state_transformer: local_state_transformer = Linear(use_bias=False, name="local_state_trans") if not local_predictor: local_predictor = Linear(use_bias=False, name="local_pred") if sigma is None: sigma = window_size * 1.0 / 2 self.use_local_attention = use_local_attention self.sigma = sigma * sigma self.match_dim = match_dim self.state_name = self.state_names[0] self.state_transformer = state_transformer self.local_state_transformer = local_state_transformer self.local_predictor = local_predictor if not attended_transformer: attended_transformer = Linear(name="preprocess") if not energy_computer: energy_computer = SumMatchFunction(name="energy_comp") self.attended_transformer = attended_transformer self.energy_computer = energy_computer self.children = [self.state_transformer, self.local_state_transformer, self.local_predictor, self.attended_transformer, energy_computer]
def generation(z_list, n_latent, hu_decoder, n_out, y): logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent, hu_decoder) if hu_decoder == 0: return generation_simple(z_list, n_latent, n_out, y) mlp1 = MLP(activations=[Rectifier()], dims=[n_latent, hu_decoder], name='latent_to_hidDecoder') initialize([mlp1]) hid_to_out = Linear(name='hidDecoder_to_output', input_dim=hu_decoder, output_dim=n_out) initialize([hid_to_out]) mysigmoid = Logistic(name='y_hat_vae') agg_logpy_xz = 0. agg_y_hat = 0. for i, z in enumerate(z_list): y_hat = mysigmoid.apply(hid_to_out.apply( mlp1.apply(z))) #reconstructed x agg_logpy_xz += cross_entropy_loss(y_hat, y) agg_y_hat += y_hat agg_logpy_xz /= len(z_list) agg_y_hat /= len(z_list) return agg_y_hat, agg_logpy_xz
def __init__(self, match_dim, state_transformer=None, attended_transformer=None, energy_computer=None, **kwargs): super(SequenceContentAttention, self).__init__(**kwargs) if not state_transformer: state_transformer = Linear(use_bias=False) self.match_dim = match_dim self.state_transformer = state_transformer self.state_transformers = Parallel(input_names=self.state_names, prototype=state_transformer, name="state_trans") if not attended_transformer: attended_transformer = Linear(name="preprocess") if not energy_computer: energy_computer = ShallowEnergyComputer(name="energy_comp") self.attended_transformer = attended_transformer self.energy_computer = energy_computer self.children = [ self.state_transformers, attended_transformer, energy_computer ]
def __init__(self, inner_recurrent, inner_dim, **kwargs): self.inner_recurrent = inner_recurrent self.linear_map = Linear(input_dim=inner_dim, output_dim=1) super(OuterLinear, self).__init__(**kwargs) self.children = [self.inner_recurrent, self.linear_map]
def __init__(self, emb_dim, dim, num_input_words, num_output_words, vocab, **kwargs): if emb_dim == 0: emb_dim = dim if num_input_words == 0: num_input_words = vocab.size() if num_output_words == 0: num_output_words = vocab.size() self._num_input_words = num_input_words self._num_output_words = num_output_words self._vocab = vocab self._word_to_id = WordToIdOp(self._vocab) children = [] self._main_lookup = LookupTable(self._num_input_words, emb_dim, name='main_lookup') self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork') self._encoder_rnn = LSTM(dim, name='encoder_rnn') self._decoder_fork = Linear(emb_dim, 4 * dim, name='decoder_fork') self._decoder_rnn = LSTM(dim, name='decoder_rnn') children.extend([self._main_lookup, self._encoder_fork, self._encoder_rnn, self._decoder_fork, self._decoder_rnn]) self._pre_softmax = Linear(dim, self._num_output_words) self._softmax = NDimensionalSoftmax() children.extend([self._pre_softmax, self._softmax]) super(LanguageModel, self).__init__(children=children, **kwargs)
def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.lookup = LookupTable(name='embeddings') self.bidir = BidirectionalWMT15( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='fwd_fork') self.back_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='back_fork') self.children = [ self.lookup, self.bidir, self.fwd_fork, self.back_fork ]
def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_depth, **kwargs): super(Decimator, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.dgru_state_dim = dgru_state_dim self.embedding_dim = embedding_dim self.lookup = LookupTable(name='embeddings') self.dgru_depth = dgru_depth # representation self.dgru = RecurrentStack([ DGRU(activation=Tanh(), dim=self.dgru_state_dim) for _ in range(dgru_depth) ], skip_connections=True) # importance of this representation self.bidir_w = Bidirectional(RecurrentWithFork( DGRU(activation=Tanh(), dim=self.dgru_state_dim // 2), self.embedding_dim, name='src_word_with_fork'), name='bidir_src_word_encoder') self.gru_fork = Fork( [name for name in self.dgru.apply.sequences if name != 'mask'], prototype=Linear(), name='gru_fork') # map to a energy scalar self.wl = Linear(input_dim=dgru_state_dim, output_dim=1) self.children = [ self.lookup, self.dgru, self.gru_fork, self.bidir_w, self.wl ]
def example2(): """GRU""" x = tensor.tensor3('x') dim = 3 fork = Fork(input_dim=dim, output_dims=[dim, dim*2],name='fork',output_names=["linear","gates"], weights_init=initialization.Identity(),biases_init=Constant(0)) gru = GatedRecurrent(dim=dim, weights_init=initialization.Identity(),biases_init=Constant(0)) fork.initialize() gru.initialize() linear, gate_inputs = fork.apply(x) h = gru.apply(linear, gate_inputs) f = theano.function([x], h) print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) doubler = Linear( input_dim=dim, output_dim=dim, weights_init=initialization.Identity(2), biases_init=initialization.Constant(0)) doubler.initialize() lin, gate = fork.apply(doubler.apply(x)) h_doubler = gru.apply(lin,gate) f = theano.function([x], h_doubler) print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
def __init__(self, input_dim, output_activation=None, transform_activation=None, **kwargs): super(Highway, self).__init__(**kwargs) self.input_dim = input_dim self.output_dim = input_dim if output_activation == None: output_activation = Rectifier() if transform_activation == None: transform_activation = Logistic() self._linear_h = Linear(name="linear_h", input_dim=input_dim, output_dim=input_dim) self._linear_t = Linear(name="linear_t", input_dim=input_dim, output_dim=input_dim) self._output_activation = output_activation self._transform_activation = transform_activation self.children = [ self._linear_h, self._linear_t, self._output_activation, self._transform_activation ]
def __init__(self, input_dim, output_dim, **kwargs): super(Qsampler, self).__init__(**kwargs) self.prior_mean = 0.0 self.prior_log_sigma = 0.0 self.mean_transform = Linear( name=self.name + "_mean", input_dim=input_dim, output_dim=output_dim, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True, ) self.log_sigma_transform = Linear( name=self.name + "_log_sigma", input_dim=input_dim, output_dim=output_dim, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True, ) self.children = [self.mean_transform, self.log_sigma_transform]
def lstm_layer(in_dim, h, h_dim, n, pref=""): linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n) + pref) lstm = LSTM(dim=h_dim, name='lstm' + str(n) + pref) initialize([linear, lstm]) return lstm.apply(linear.apply(h))[0]
def __init__(self, recurrent, dims, activations=[Identity(), Identity()], **kwargs): super(MyRecurrent, self).__init__(**kwargs) self.dims = dims self.recurrent = recurrent self.activations = activations if isinstance(self.recurrent, (SimpleRecurrent, SimpleRecurrentBatchNorm)): output_dim = dims[1] elif isinstance(self.recurrent, (LSTM, LSTMBatchNorm)): output_dim = 4 * dims[1] else: raise NotImplementedError self.input_trans = Linear(name='input_trans', input_dim=dims[0], output_dim=output_dim, weights_init=NormalizedInitialization(), biases_init=Constant(0)) self.output_trans = Linear(name='output_trans', input_dim=dims[1], output_dim=dims[2], weights_init=NormalizedInitialization(), biases_init=Constant(0)) self.children = ( [self.input_trans, self.recurrent, self.output_trans] + self.activations)
def test_rng(): Brick.lazy = True linear = Linear() assert isinstance(linear.rng, numpy.random.RandomState) assert linear.rng.rand() == numpy.random.RandomState(DEFAULT_SEED).rand() linear = Linear(rng=numpy.random.RandomState(1)) assert linear.rng.rand() == numpy.random.RandomState(1).rand()
def softmax_layer(h, y, x_mask, y_mask, lens, vocab_size, hidden_size, boosting): hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_size, output_dim=vocab_size) initialize([hidden_to_output]) linear_output = hidden_to_output.apply(h) linear_output.name = 'linear_output' softmax = NDimensionalSoftmax() #y_hat = softmax.apply(linear_output, extra_ndim=1) #y_hat.name = 'y_hat' cost_a = softmax.categorical_cross_entropy(y, linear_output, extra_ndim=1) #produces correct average cost_a = cost_a * y_mask if boosting: #boosting step, must divide by length here lensMat = T.tile(lens, (y.shape[0], 1)) cost_a = cost_a / lensMat #only count cost of correctly masked entries cost = cost_a.sum() / y_mask.sum() cost.name = 'cost' return (linear_output, cost)
class Embedder(Initializable): """ Linear Embedding Brick Parameters ---------- dim_in: :class:`int` Dimensionality of the input dim_out: :class:`int` Dimensionality of the output output_type: :class:`str` fc for fully connected. conv for convolutional """ def __init__(self, dim_in, dim_out, output_type='fc', **kwargs): self.dim_in = dim_in self.dim_out = dim_out self.output_type = output_type self.linear = Linear(dim_in, dim_out, name='embed_layer') children = [self.linear] kwargs.setdefault('children', []).extend(children) super(Embedder, self).__init__(**kwargs) @application(inputs=['y'], outputs=['outputs']) def apply(self, y): embedding = self.linear.apply(y) if self.output_type == 'fc': return embedding if self.output_type == 'conv': return embedding.reshape((-1, embedding.shape[-1], 1, 1)) def get_dim(self, name): if self.output_type == 'fc': return self.linear.get_dim(name) if self.output_type == 'conv': return (self.linear.get_dim(name), 1, 1)
def __init__(self, nvis, nhid, encoding_mlp, encoding_lstm, decoding_mlp, decoding_lstm, T=1, **kwargs): super(DRAW, self).__init__(**kwargs) self.nvis = nvis self.nhid = nhid self.T = T self.encoding_mlp = encoding_mlp self.encoding_mlp.name = 'encoder_mlp' for i, child in enumerate(self.encoding_mlp.children): child.name = '{}_{}'.format(self.encoding_mlp.name, i) self.encoding_lstm = encoding_lstm self.encoding_lstm.name = 'encoder_lstm' self.encoding_parameter_mapping = Fork( output_names=['mu_phi', 'log_sigma_phi'], prototype=Linear()) self.decoding_mlp = decoding_mlp self.decoding_mlp.name = 'decoder_mlp' for i, child in enumerate(self.decoding_mlp.children): child.name = '{}_{}'.format(self.decoding_mlp.name, i) self.decoding_lstm = decoding_lstm self.decoding_lstm.name = 'decoder_lstm' self.decoding_parameter_mapping = Linear(name='mu_theta') self.prior_mu = tensor.zeros((self.nhid,)) self.prior_mu.name = 'prior_mu' self.prior_log_sigma = tensor.zeros((self.nhid,)) self.prior_log_sigma.name = 'prior_log_sigma' self.children = [self.encoding_mlp, self.encoding_lstm, self.encoding_parameter_mapping, self.decoding_mlp, self.decoding_lstm, self.decoding_parameter_mapping]
def test_sequence_variable_inputs(): x, y = tensor.matrix(), tensor.matrix() parallel_1 = Parallel(input_names=['input_1', 'input_2'], input_dims=dict(input_1=4, input_2=5), output_dims=dict(input_1=3, input_2=2), prototype=Linear(), weights_init=Constant(2), biases_init=Constant(1)) parallel_2 = Parallel(input_names=['input_1', 'input_2'], input_dims=dict(input_1=3, input_2=2), output_dims=dict(input_1=5, input_2=4), prototype=Linear(), weights_init=Constant(2), biases_init=Constant(1)) sequence = Sequence([parallel_1.apply, parallel_2.apply]) sequence.initialize() new_x, new_y = sequence.apply(x, y) x_val = numpy.ones((4, 4), dtype=theano.config.floatX) y_val = numpy.ones((4, 5), dtype=theano.config.floatX) assert_allclose( new_x.eval({x: x_val}), (x_val.dot(2 * numpy.ones((4, 3))) + numpy.ones((4, 3))).dot( 2 * numpy.ones((3, 5))) + numpy.ones((4, 5))) assert_allclose( new_y.eval({y: y_val}), (y_val.dot(2 * numpy.ones((5, 2))) + numpy.ones((4, 2))).dot( 2 * numpy.ones((2, 4))) + numpy.ones((4, 4)))
def bilstm_layer(in_dim, inp, h_dim, n, pref=""): linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n) + pref) lstm = LSTM(dim=h_dim, name='lstm' + str(n) + pref) bilstm = Bidirectional(prototype=lstm) bilstm.name = 'bilstm' + str(n) + pref initialize([linear, bilstm]) return bilstm.apply(linear.apply(inp))[0]
def __init__(self, input_dim, output_dim, width, height, N, **kwargs): super(AttentionWriter, self).__init__(name="writer", **kwargs) self.img_width = width self.img_height = height self.N = N self.input_dim = input_dim self.output_dim = output_dim assert output_dim == width * height self.zoomer = ZoomableAttentionWindow(height, width, N) self.z_trafo = Linear(name=self.name + '_ztrafo', input_dim=input_dim, output_dim=5, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.w_trafo = Linear(name=self.name + '_wtrafo', input_dim=input_dim, output_dim=N * N, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.children = [self.z_trafo, self.w_trafo]
def __init__(self, image_feature_dim, embedding_dim, **kwargs): super(Encoder, self).__init__(**kwargs) self.image_embedding = Linear( input_dim=image_feature_dim , output_dim=embedding_dim # , weights_init=IsotropicGaussian(0.02) # , biases_init=Constant(0.) , name="image_embedding" ) self.to_inputs = Linear( input_dim=embedding_dim , output_dim=embedding_dim*4 # gate_inputs = vstack(input, forget, cell, hidden) # , weights_init=IsotropicGaussian(0.02) # , biases_init=Constant(0.) , name="to_inputs" ) # Don't think this dim has to also be dimension, more arbitrary self.transition = LSTM( dim=embedding_dim, name="transition") self.children = [ self.image_embedding , self.to_inputs , self.transition ]
def __init__(self, **kwargs): children = [] self.layers_numerical = [] self.layers_numerical.append( Linear(name='input_to_numerical_linear', input_dim=5000, output_dim=17, weights_init=IsotropicGaussian(), biases_init=Constant(1))) self.layers_categorical = [] self.layers_categorical.append( Linear(name='input_to_categorical_linear', input_dim=5000, output_dim=24016, weights_init=IsotropicGaussian(), biases_init=Constant(1))) self.layers_categorical.append( Logistic(name='input_to_categorical_sigmoid')) children += self.layers_numerical children += self.layers_categorical kwargs.setdefault('children', []).extend(children) super(build_top_mlp, self).__init__(**kwargs)
def example(): """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """ x = tensor.tensor3('x') rnn = SimpleRecurrent(dim=3, activation=Identity(), weights_init=initialization.Identity()) rnn.initialize() h = rnn.apply(x) f = theano.function([x], h) print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) doubler = Linear( input_dim=3, output_dim=3, weights_init=initialization.Identity(2), biases_init=initialization.Constant(0)) doubler.initialize() h_doubler = rnn.apply(doubler.apply(x)) f = theano.function([x], h_doubler) print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) #Initial State h0 = tensor.matrix('h0') h = rnn.apply(inputs=x, states=h0) f = theano.function([x, h0], h) print(f(np.ones((3, 1, 3), dtype=theano.config.floatX), np.ones((1, 3), dtype=theano.config.floatX)))
def __init__(self, embedding_dim, state_dim, **kwargs): """Constructor. Note that this implementation only supports single layer architectures. Args: embedding_dim (int): Dimensionality of the word vectors defined by the sparse feature map. state_dim (int): Size of the recurrent layer. """ super(NoLookupEncoder, self).__init__(**kwargs) self.embedding_dim = embedding_dim self.state_dim = state_dim self.bidir = BidirectionalWMT15( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='fwd_fork') self.back_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='back_fork') self.children = [self.bidir, self.fwd_fork, self.back_fork]
def __init__(self, input_dim, output_dim, hidden_dim=None, **kwargs): super(Qsampler, self).__init__(**kwargs) if hidden_dim is None: hidden_dim = (input_dim + output_dim) // 2 self.input_dim = input_dim self.output_dim = output_dim self.hidden_dim = hidden_dim self.h_transform = Linear(name=self.name + '_h', input_dim=input_dim, output_dim=hidden_dim, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.mean_transform = Linear(name=self.name + '_mean', input_dim=hidden_dim, output_dim=output_dim, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.ls_transform = Linear(name=self.name + '_log_sigma', input_dim=hidden_dim, output_dim=output_dim, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.children = [ self.h_transform, self.mean_transform, self.ls_transform ]
def _build_bricks(self, *args, **kwargs): # Build lookup tables self.word_embed = self._embed(len(self.dataset.word2index), self.config.word_embed_dim, name='word_embed') self.user_embed = self._embed(len(self.dataset.user2index), self.config.user_embed_dim, name="user_embed") self.hashtag_embed = self._embed(len(self.dataset.hashtag2index), self.config.lstm_dim + self.config.user_embed_dim, name='hashtag_embed') # Build text encoder self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim, output_dim=4 * self.config.lstm_dim, name='mlstm_in') self.mlstm_ins.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim)) self.mlstm_ins.biases_init = Constant(0) self.mlstm_ins.initialize() self.mlstm = MLSTM(self.config.lstm_time, self.config.lstm_dim, shared=False) self.mlstm.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim)) self.mlstm.biases_init = Constant(0) self.mlstm.initialize()
def softmax_layer(h, y, hidden_size, num_targets, cost_fn='cross'): hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_size, output_dim=num_targets) initialize([hidden_to_output]) linear_output = hidden_to_output.apply(h) linear_output.name = 'linear_output' y_pred = T.argmax(linear_output, axis=1) label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred], 'label_of_predicted', False) pat1 = T.mean(label_of_predicted) updates = None if 'ranking' in cost_fn: cost, updates = ranking_loss(linear_output, y) print 'using ranking loss function!' else: y_hat = Logistic().apply(linear_output) y_hat.name = 'y_hat' cost = cross_entropy_loss(y_hat, y) cost.name = 'cost' pat1.name = 'precision@1' misclassify_rate = MultiMisclassificationRate().apply( y, T.ge(linear_output, 0.5)) misclassify_rate.name = 'error_rate' return cost, pat1, updates, misclassify_rate
def bilstm_layer(in_dim, inp, h_dim, n): linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n)+inp.name) lstm = LSTM(dim=h_dim, name='lstm' + str(n)+inp.name) bilstm = Bidirectional(prototype=lstm) bilstm.name = 'bilstm' + str(n) + inp.name initialize([linear, bilstm]) return bilstm.apply(linear.apply(inp))[0]
def apply(self, input_, target): x_to_h = Linear(name='x_to_h', input_dim=self.dims[0], output_dim=self.dims[1] * 4) pre_rnn = x_to_h.apply(input_) pre_rnn.name = 'pre_rnn' rnn = LSTM(activation=Tanh(), dim=self.dims[1], name=self.name) h, _ = rnn.apply(pre_rnn) h.name = 'h' h_to_y = Linear(name='h_to_y', input_dim=self.dims[1], output_dim=self.dims[2]) y_hat = h_to_y.apply(h) y_hat.name = 'y_hat' cost = SquaredError().apply(target, y_hat) cost.name = 'MSE' self.outputs = {} self.outputs['y_hat'] = y_hat self.outputs['cost'] = cost self.outputs['pre_rnn'] = pre_rnn self.outputs['h'] = h # Initialization for brick in (rnn, x_to_h, h_to_y): brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0) brick.initialize()
def __init__(self, input_dim, output_dim, channels, width, height, N, **kwargs): super(AttentionWriter, self).__init__(name="writer", **kwargs) self.channels = channels self.img_width = width self.img_height = height self.N = N self.input_dim = input_dim self.output_dim = output_dim assert output_dim == channels*width*height self.zoomer = ZoomableAttentionWindow(channels, height, width, N) self.z_trafo = Linear( name=self.name+'_ztrafo', input_dim=input_dim, output_dim=5, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.w_trafo = Linear( name=self.name+'_wtrafo', input_dim=input_dim, output_dim=channels*N*N, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.children = [self.z_trafo, self.w_trafo]
class AttentionWriter(Initializable): def __init__(self, input_dim, output_dim, channels, width, height, N, **kwargs): super(AttentionWriter, self).__init__(name="writer", **kwargs) self.channels = channels self.img_width = width self.img_height = height self.N = N self.input_dim = input_dim self.output_dim = output_dim assert output_dim == channels*width*height self.zoomer = ZoomableAttentionWindow(channels, height, width, N) self.z_trafo = Linear( name=self.name+'_ztrafo', input_dim=input_dim, output_dim=5, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.w_trafo = Linear( name=self.name+'_wtrafo', input_dim=input_dim, output_dim=channels*N*N, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.children = [self.z_trafo, self.w_trafo] @application(inputs=['h'], outputs=['c_update']) def apply(self, h): w = self.w_trafo.apply(h) l = self.z_trafo.apply(h) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) c_update = 1./gamma * self.zoomer.write(w, center_y, center_x, delta, sigma) return c_update @application(inputs=['h'], outputs=['c_update', 'center_y', 'center_x', 'delta']) def apply_detailed(self, h): w = self.w_trafo.apply(h) l = self.z_trafo.apply(h) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) c_update = 1./gamma * self.zoomer.write(w, center_y, center_x, delta, sigma) return c_update, center_y, center_x, delta @application(inputs=['x','h'], outputs=['c_update', 'center_y', 'center_x', 'delta']) def apply_circular(self,x,h): #w = self.w_trafo.apply(h) l = self.z_trafo.apply(h) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) c_update = 1./gamma * self.zoomer.write(x, center_y, center_x, delta, sigma) return c_update, center_y, center_x, delta
def __init__(self, num_input_words, emb_dim, dim, vocab, lookup=None, fork_and_rnn=None, **kwargs): if num_input_words > 0: logger.info("Restricting def vocab to " + str(num_input_words)) self._num_input_words = num_input_words else: self._num_input_words = vocab.size() self._vocab = vocab children = [] if lookup is None: self._def_lookup = LookupTable(self._num_input_words, emb_dim, name='def_lookup') else: self._def_lookup = lookup if fork_and_rnn is None: self._def_fork = Linear(emb_dim, 4 * dim, name='def_fork') self._def_rnn = LSTM(dim, name='def_rnn') else: self._def_fork, self._def_rnn = fork_and_rnn children.extend([self._def_lookup, self._def_fork, self._def_rnn]) super(LSTMReadDefinitions, self).__init__(children=children, **kwargs)
def __init__(self, visible_dim, hidden_dim, rnn_dimensions=(128, 128), **kwargs): super(Rnnrbm, self).__init__(**kwargs) self.rnn_dimensions = rnn_dimensions self.visible_dim = visible_dim self.hidden_dim = hidden_dim # self.in_layer = Linear(input_dim=input_dim, output_dim=rnn_dimension * 4, # weights_init=IsotropicGaussian(0.01), # biases_init=Constant(0.0), # use_bias=False, # name="in_layer") self.rbm = Rbm(visible_dim=visible_dim, hidden_dim=hidden_dim, activation=Sigmoid(), weights_init=IsotropicGaussian(0.1), biases_init=Constant(0.1), name='rbm') self.uv = Linear(input_dim=rnn_dimensions[-1], output_dim=visible_dim, weights_init=IsotropicGaussian(0.0001), biases_init=Constant(0.001), use_bias=True, name='uv') self.uh = Linear(input_dim=rnn_dimensions[-1], output_dim=hidden_dim, weights_init=IsotropicGaussian(0.0001), biases_init=Constant(0.001), use_bias=True, name='uh') self.rnn = Rnn([visible_dim] + list(rnn_dimensions), name='rnn') self.children = [self.rbm, self.uv, self.uh, self.rnn] + self.rnn.children._items
def test_variable_filter_applications_error(): # Creating computation graph brick1 = Linear(input_dim=2, output_dim=2, name="linear1") x = tensor.vector() h1 = brick1.apply(x) cg = ComputationGraph(h1) VariableFilter(applications=brick1.apply)(cg.variables)
def MSEloss_layer(h, y, frame_length, hidden_size): hidden_to_output = Linear(name="hidden_to_output", input_dim=hidden_size, output_dim=frame_length) initialize([hidden_to_output]) y_hat = hidden_to_output.apply(h) y_hat.name = "y_hat" cost = squared_error(y_hat, y).mean() cost.name = "cost" # import ipdb; ipdb.set_trace() return y_hat, cost
def create_rnn(hidden_dim, vocab_dim,mode="rnn"): # input x = tensor.imatrix('inchar') y = tensor.imatrix('outchar') # W = LookupTable( name = "W1", #dim = hidden_dim*4, dim = hidden_dim, length = vocab_dim, weights_init = initialization.IsotropicGaussian(0.01), biases_init = initialization.Constant(0) ) if mode == "lstm": # Long Short Term Memory H = LSTM( hidden_dim, name = 'H', weights_init = initialization.IsotropicGaussian(0.01), biases_init = initialization.Constant(0.0) ) else: # recurrent history weight H = SimpleRecurrent( name = "H", dim = hidden_dim, activation = Tanh(), weights_init = initialization.IsotropicGaussian(0.01) ) # S = Linear( name = "W2", input_dim = hidden_dim, output_dim = vocab_dim, weights_init = initialization.IsotropicGaussian(0.01), biases_init = initialization.Constant(0) ) A = NDimensionalSoftmax( name = "softmax" ) initLayers([W,H,S]) activations = W.apply(x) hiddens = H.apply(activations)#[0] activations2 = S.apply(hiddens) y_hat = A.apply(activations2, extra_ndim=1) cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean() cg = ComputationGraph(cost) #print VariableFilter(roles=[WEIGHT])(cg.variables) #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables) layers = (x, W, H, S, A, y) return cg, layers, y_hat, cost
def test_variable_filter(): # Creating computation graph brick1 = Linear(input_dim=2, output_dim=2, name='linear1') brick2 = Bias(2, name='bias1') activation = Sigmoid(name='sigm') x = tensor.vector() h1 = brick1.apply(x) h2 = activation.apply(h1) y = brick2.apply(h2) cg = ComputationGraph(y) parameters = [brick1.W, brick1.b, brick2.params[0]] bias = [brick1.b, brick2.params[0]] brick1_bias = [brick1.b] # Testing filtering by role role_filter = VariableFilter(roles=[PARAMETER]) assert parameters == role_filter(cg.variables) role_filter = VariableFilter(roles=[FILTER]) assert [] == role_filter(cg.variables) # Testing filtering by role using each_role flag role_filter = VariableFilter(roles=[PARAMETER, BIAS]) assert parameters == role_filter(cg.variables) role_filter = VariableFilter(roles=[PARAMETER, BIAS], each_role=True) assert not parameters == role_filter(cg.variables) assert bias == role_filter(cg.variables) # Testing filtering by bricks classes brick_filter = VariableFilter(roles=[BIAS], bricks=[Linear]) assert brick1_bias == brick_filter(cg.variables) # Testing filtering by bricks instances brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1]) assert brick1_bias == brick_filter(cg.variables) # Testing filtering by brick instance brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1]) assert brick1_bias == brick_filter(cg.variables) # Testing filtering by name name_filter = VariableFilter(name='W_norm') assert [cg.variables[2]] == name_filter(cg.variables) # Testing filtering by name regex name_filter_regex = VariableFilter(name_regex='W_no.?m') assert [cg.variables[2]] == name_filter_regex(cg.variables) # Testing filtering by application appli_filter = VariableFilter(applications=[brick1.apply]) variables = [cg.variables[1], cg.variables[8]] assert variables == appli_filter(cg.variables) # Testing filtering by application appli_filter_list = VariableFilter(applications=[brick1.apply]) assert variables == appli_filter_list(cg.variables)
def test_variable_filter_roles_error(): # Creating computation graph brick1 = Linear(input_dim=2, output_dim=2, name="linear1") x = tensor.vector() h1 = brick1.apply(x) cg = ComputationGraph(h1) # testing role error VariableFilter(roles=PARAMETER)(cg.variables)
def add_lstm(input_dim, input_var): linear = Linear(input_dim=input_dim,output_dim=input_dim*4,name="linear_layer") lstm = LSTM(dim=input_dim, name="lstm_layer") testing_init(linear) #linear.initialize() default_init(lstm) h = linear.apply(input_var) return lstm.apply(h)
def test_protocol0_regression(): """Check for a regression where protocol 0 dumps fail on load.""" brick = Linear(5, 10) brick.allocate() buf = BytesIO() dump(brick, buf, parameters=list(brick.parameters), protocol=0) try: load(buf) except TypeError: assert False # Regression
def construct_model(activation_function, r_dim, hidden_dim, out_dim): # Construct the model r = tensor.fmatrix('r') x = tensor.fmatrix('x') y = tensor.ivector('y') nx = x.shape[0] nj = x.shape[1] # also is r.shape[0] nr = r.shape[1] # r is nj x nr # x is nx x nj # y is nx # Get a representation of r of size r_dim r = DAE(r) # r is now nj x r_dim # r_rep is nx x nj x r_dim r_rep = r[None, :, :].repeat(axis=0, repeats=nx) # x3 is nx x nj x 1 x3 = x[:, :, None] # concat is nx x nj x (r_dim + 1) concat = tensor.concatenate([r_rep, x3], axis=2) # Change concat from Batch x Time x Features to T X B x F rnn_input = concat.dimshuffle(1, 0, 2) linear = Linear(input_dim=r_dim + 1, output_dim=4 * hidden_dim, name="input_linear") lstm = LSTM(dim=hidden_dim, activation=activation_function, name="hidden_recurrent") top_linear = Linear(input_dim=hidden_dim, output_dim=out_dim, name="out_linear") pre_rnn = linear.apply(rnn_input) states = lstm.apply(pre_rnn)[0] activations = top_linear.apply(states) activations = tensor.mean(activations, axis=0) cost = Softmax().categorical_cross_entropy(y, activations) pred = activations.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in (linear, lstm, top_linear): brick.weights_init = IsotropicGaussian(0.1) brick.biases_init = Constant(0.) brick.initialize() return cost, error_rate
class AttentionWriter(Initializable): def __init__(self, input_dim, output_dim, width, height, N, **kwargs): super(AttentionWriter, self).__init__(name="writer", **kwargs) self.img_width = width self.img_height = height self.N = N self.input_dim = input_dim self.output_dim = output_dim assert output_dim == width * height self.zoomer = ZoomableAttentionWindow(height, width, N) self.z_trafo = Linear( name=self.name + "_ztrafo", input_dim=input_dim, output_dim=5, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True, ) self.w_trafo = Linear( name=self.name + "_wtrafo", input_dim=input_dim, output_dim=N * N, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True, ) self.children = [self.z_trafo, self.w_trafo] @application(inputs=["h"], outputs=["c_update"]) def apply(self, h): w = self.w_trafo.apply(h) l = self.z_trafo.apply(h) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) c_update = 1.0 / gamma * self.zoomer.write(w, center_y, center_x, delta, sigma) return c_update @application(inputs=["h"], outputs=["c_update", "center_y", "center_x", "delta"]) def apply_detailed(self, h): w = self.w_trafo.apply(h) l = self.z_trafo.apply(h) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) c_update = 1.0 / gamma * self.zoomer.write(w, center_y, center_x, delta, sigma) return c_update, center_y, center_x, delta
def softmax_layer(h, y, frame_length, hidden_size): hidden_to_output = Linear(name="hidden_to_output", input_dim=hidden_size, output_dim=frame_length) initialize([hidden_to_output]) linear_output = hidden_to_output.apply(h) linear_output.name = "linear_output" softmax = NDimensionalSoftmax() y_hat = softmax.apply(linear_output, extra_ndim=1) y_hat.name = "y_hat" cost = softmax.categorical_cross_entropy(y, linear_output, extra_ndim=1).mean() cost.name = "cost" return y_hat, cost
def test_linear_nan_allocation(): x = tensor.matrix() linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2), biases_init=Constant(1)) linear.apply(x) w1 = numpy.nan * numpy.zeros((16, 8)) w2 = linear.params[0].get_value() b1 = numpy.nan * numpy.zeros(8) b2 = linear.params[1].get_value() numpy.testing.assert_equal(w1, w2) numpy.testing.assert_equal(b1, b2)
def lstm_layer(self, h, n): """ Performs the LSTM update for a batch of word sequences :param h The word embeddings for this update :param n The number of layers of the LSTM """ # Maps the word embedding to a dimensionality to be used in the LSTM linear = Linear(input_dim=self.hidden_size, output_dim=self.hidden_size * 4, name='linear_lstm' + str(n)) initialize(linear, sqrt(6.0 / (5 * self.hidden_size))) lstm = LSTM(dim=self.hidden_size, name='lstm' + str(n)) initialize(lstm, 0.08) return lstm.apply(linear.apply(h))
def lllistool(i, inp, func): l = Linear(input_dim=DIMS[i], output_dim=DIMS[i+1] * NUMS[i+1], weights_init=IsotropicGaussian(std=DIMS[i]**(-0.5)), biases_init=IsotropicGaussian(std=DIMS[i]**(-0.5)), name='Lin{}'.format(i)) l.initialize() func.name='Fun{}'.format(i) if func == SimpleRecurrent: gong = func(dim=DIMS[i+1], activation=Rectifier(), weights_init=IsotropicGaussian(std=(DIMS[i]+DIMS[i+1])**(-0.5))) else: gong = func() ret = gong.apply(l.apply(inp)) return ret
def apply_layer(self, layer_type, input_, in_dim, out_dim, layer_name): # Since we pass this path twice (clean and corr encoder), we # want to make sure that parameters of both layers are shared. layer = self.shareds.get(layer_name) if layer is None: if layer_type == "fc": linear = Linear(use_bias=False, name=layer_name, input_dim=in_dim, output_dim=out_dim, seed=1) linear.weights_init = Glorot(self.rng, in_dim, out_dim) linear.initialize() layer = linear self.shareds[layer_name] = layer return layer.apply(input_)
def prior_network(x, n_input, hu_encoder, n_latent): logger.info('In prior_network: n_input: %d, hu_encoder: %d', n_input, hu_encoder) mlp1 = MLP(activations=[Rectifier()], dims=[n_input, hu_encoder], name='prior_in_to_hidEncoder') initialize([mlp1]) h_encoder = mlp1.apply(x) h_encoder = debug_print(h_encoder, 'h_encoder', False) lin1 = Linear(name='prior_hiddEncoder_to_latent_mu', input_dim=hu_encoder, output_dim=n_latent) lin2 = Linear(name='prior_hiddEncoder_to_latent_sigma', input_dim=hu_encoder, output_dim=n_latent) initialize([lin1]) initialize([lin2], rndstd=0.001) mu = lin1.apply(h_encoder) log_sigma = lin2.apply(h_encoder) return mu, log_sigma
def __init__(self, input_dim, output_dim, noise_batch_size, prior_mean=0, prior_noise_level=0, **kwargs): self.linear = Linear() self.mask = Linear(name='mask') children = [self.linear, self.mask] kwargs.setdefault('children', []).extend(children) super(NoisyLinear, self).__init__(**kwargs) self.input_dim = input_dim self.output_dim = output_dim self.noise_batch_size = noise_batch_size self.prior_mean = prior_mean self.prior_noise_level = prior_noise_level
def softmax_layer(h, y, vocab_size, hidden_size): hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_size, output_dim=vocab_size) initialize([hidden_to_output]) linear_output = hidden_to_output.apply(h) linear_output.name = 'linear_output' softmax = NDimensionalSoftmax() y_hat = softmax.apply(linear_output, extra_ndim=1) y_hat.name = 'y_hat' cost = softmax.categorical_cross_entropy( y, linear_output, extra_ndim=1).mean() cost.name = 'cost' return y_hat, cost
def __init__(self, input_size, hidden_size, output_size): self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size x = tensor.tensor3('x', dtype=floatX) y = tensor.tensor3('y', dtype=floatX) x_to_lstm = Linear(name="x_to_lstm", input_dim=input_size, output_dim=4 * hidden_size, weights_init=IsotropicGaussian(), biases_init=Constant(0)) lstm = LSTM(dim=hidden_size, name="lstm", weights_init=IsotropicGaussian(), biases_init=Constant(0)) lstm_to_output = Linear(name="lstm_to_output", input_dim=hidden_size, output_dim=output_size, weights_init=IsotropicGaussian(), biases_init=Constant(0)) x_transform = x_to_lstm.apply(x) h, c = lstm.apply(x_transform) y_hat = lstm_to_output.apply(h) y_hat = Logistic(name="y_hat").apply(y_hat) self.cost = BinaryCrossEntropy(name="cost").apply(y, y_hat) x_to_lstm.initialize() lstm.initialize() lstm_to_output.initialize() self.computation_graph = ComputationGraph(self.cost)
def main(max_seq_length, lstm_dim, batch_size, num_batches, num_epochs): dataset_train = IterableDataset(generate_data(max_seq_length, batch_size, num_batches)) dataset_test = IterableDataset(generate_data(max_seq_length, batch_size, 100)) stream_train = DataStream(dataset=dataset_train) stream_test = DataStream(dataset=dataset_test) x = T.tensor3('x') y = T.matrix('y') # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see # LSTM layer documentation for the explanation x_to_h = Linear(1, lstm_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) lstm = LSTM(lstm_dim, name='lstm', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) h_to_o = Linear(lstm_dim, 1, name='h_to_o', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) x_transform = x_to_h.apply(x) h, c = lstm.apply(x_transform) # only values of hidden units of the last timeframe are used for # the classification y_hat = h_to_o.apply(h[-1]) y_hat = Logistic().apply(y_hat) cost = BinaryCrossEntropy().apply(y, y_hat) cost.name = 'cost' lstm.initialize() x_to_h.initialize() h_to_o.initialize() cg = ComputationGraph(cost) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam()) test_monitor = DataStreamMonitoring(variables=[cost], data_stream=stream_test, prefix="test") train_monitor = TrainingDataMonitoring(variables=[cost], prefix="train", after_epoch=True) main_loop = MainLoop(algorithm, stream_train, extensions=[test_monitor, train_monitor, FinishAfter(after_n_epochs=num_epochs), Printing(), ProgressBar()]) main_loop.run() print 'Learned weights:' for layer in (x_to_h, lstm, h_to_o): print "Layer '%s':" % layer.name for param in layer.parameters: print param.name, ': ', param.get_value() print