def __init__(self, emitter=None, feedback_brick=None, merge=None, merge_prototype=None, post_merge=None, merged_dim=None, **kwargs): super(Readout, self).__init__(**kwargs) if not emitter: emitter = TrivialEmitter(self.readout_dim) if not feedback_brick: feedback_brick = TrivialFeedback(self.readout_dim) if not merge: merge = Merge(input_names=self.source_names, prototype=merge_prototype) if not post_merge: post_merge = Bias(dim=self.readout_dim) if not merged_dim: merged_dim = self.readout_dim self.emitter = emitter self.feedback_brick = feedback_brick self.merge = merge self.post_merge = post_merge self.merged_dim = merged_dim self.children = [ self.emitter, self.feedback_brick, self.merge, self.post_merge ]
def __init__(self, merge_dim, post_merge_dim, merge_names=None, merge=None, merge_prototype=None, post_merge=None, **kwargs): super(MergeReadout, self).__init__(**kwargs) if not merge_dim: merge_dim = post_merge_dim if not merge_names: merge_names = kwargs['input_names'] if not merge: merge = Merge(input_names=merge_names, prototype=merge_prototype) if not post_merge: post_merge = Bias(dim=post_merge_dim) self.merge_names = merge_names self.merge_dim = merge_dim self.merge_brick = merge self.post_merge = post_merge self.post_merge_dim = post_merge_dim self.children = [self.merge_brick, self.post_merge]
def __init__(self, emitter=None, feedback_brick=None, merge=None, merge_prototype=None, post_merge=None, merged_dim=None, **kwargs): if not emitter: emitter = TrivialEmitter(kwargs['readout_dim']) if not feedback_brick: feedback_brick = TrivialFeedback(kwargs['readout_dim']) if not merge: merge = Merge(input_names=kwargs['source_names'], prototype=merge_prototype) if not post_merge: post_merge = Bias(dim=kwargs['readout_dim']) if not merged_dim: merged_dim = kwargs['readout_dim'] self.emitter = emitter self.feedback_brick = feedback_brick self.merge = merge self.post_merge = post_merge self.merged_dim = merged_dim children = [self.emitter, self.feedback_brick, self.merge, self.post_merge] kwargs.setdefault('children', []).extend(children) super(Readout, self).__init__(**kwargs)
def __init__(self, path, nn_char_map, no_transition_cost=1e12, **kwargs): # Since we currently support only type, it is ignored. # if type_ != 'fst': # raise ValueError("Supports only FST's so far.") fst = FST(path) fst_char_map = dict(fst.fst.isyms.items()) del fst_char_map['<eps>'] if not len(fst_char_map) == len(nn_char_map): raise ValueError() remap_table = { nn_char_map[character]: fst_code for character, fst_code in fst_char_map.items() } transition = FSTTransition(fst, remap_table, no_transition_cost) # This SequenceGenerator will be used only in a very limited way. # That's why it is sufficient to equip it with a completely # fake readout. dummy_readout = Readout(source_names=['add'], readout_dim=len(remap_table), merge=Merge(input_names=['costs'], prototype=Identity()), post_merge=Identity(), emitter=SoftmaxEmitter()) super(LanguageModel, self).__init__(transition=transition, fork=Fork(output_names=[ name for name in transition.apply.sequences if name != 'mask' ], prototype=Identity()), readout=dummy_readout, **kwargs)
def __init__(self, input1_size, input2_size, lookup1_dim=200, lookup2_dim=200, hidden_size=512): self.hidden_size = hidden_size self.input1_size = input1_size self.input2_size = input2_size self.lookup1_dim = lookup1_dim self.lookup2_dim = lookup2_dim x1 = tensor.lmatrix('durations') x2 = tensor.lmatrix('syllables') y = tensor.lmatrix('pitches') lookup1 = LookupTable(dim=self.lookup1_dim, length=self.input1_size, name='lookup1', weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) lookup1.initialize() lookup2 = LookupTable(dim=self.lookup2_dim, length=self.input2_size, name='lookup2', weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) lookup2.initialize() merge = Merge(['lookup1', 'lookup2'], [self.lookup1_dim, self.lookup2_dim], self.hidden_size, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) merge.initialize() recurrent_block = LSTM(dim=self.hidden_size, activation=Tanh(), weights_init=initialization.Uniform(width=0.01)) #RecurrentStack([LSTM(dim=self.hidden_size, activation=Tanh())] * 3) recurrent_block.initialize() linear = Linear(input_dim=self.hidden_size, output_dim=self.input1_size, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) linear.initialize() softmax = NDimensionalSoftmax() l1 = lookup1.apply(x1) l2 = lookup2.apply(x2) m = merge.apply(l1, l2) h = recurrent_block.apply(m) a = linear.apply(h) y_hat = softmax.apply(a, extra_ndim=1) # ValueError: x must be 1-d or 2-d tensor of floats. Got TensorType(float64, 3D) self.Cost = softmax.categorical_cross_entropy(y, a, extra_ndim=1).mean() self.ComputationGraph = ComputationGraph(self.Cost) self.Model = Model(y_hat)
def __init__(self, vocab_size, embedding_dim, igru_state_dim, igru_depth, trg_dgru_depth, emitter, feedback_brick, merge=None, merge_prototype=None, post_merge=None, **kwargs): merged_dim = igru_state_dim if not merge: merge = Merge(input_names=kwargs['source_names'], prototype=merge_prototype) if not post_merge: post_merge = Bias(dim=merged_dim) # for compatible if igru_depth == 1: self.igru = IGRU(dim=igru_state_dim) else: self.igru = RecurrentStack( [IGRU(dim=igru_state_dim, name='igru')] + [ UpperIGRU(dim=igru_state_dim, activation=Tanh(), name='upper_igru' + str(i)) for i in range(1, igru_depth) ], skip_connections=True) self.embedding_dim = embedding_dim self.emitter = emitter self.feedback_brick = feedback_brick self.merge = merge self.post_merge = post_merge self.merged_dim = merged_dim self.igru_depth = igru_depth self.trg_dgru_depth = trg_dgru_depth self.lookup = LookupTable(name='embeddings') self.vocab_size = vocab_size self.igru_state_dim = igru_state_dim self.gru_to_softmax = Linear(input_dim=igru_state_dim, output_dim=vocab_size) self.gru_fork = Fork([ name for name in self.igru.apply.sequences if name != 'mask' and name != 'input_states' ], prototype=Linear(), name='gru_fork') children = [ self.emitter, self.feedback_brick, self.merge, self.post_merge, self.igru, self.lookup, self.gru_to_softmax, self.gru_fork ] kwargs.setdefault('children', []).extend(children) super(Interpolator, self).__init__(**kwargs)
def __init__(self, config, blockid, theano_seed=None, **kwargs): super(PoemBlock, self).__init__(**kwargs) self.theano_seed = theano_seed self.config = config self.blockid = blockid self.encoder = Encoder(self.blockid, self.config['src_vocab_size'], self.config['enc_embed'], self.config['enc_nhids']) self.decoder = Decoder(self.blockid, self.config['trg_vocab_size'], self.config['dec_embed'], self.config['dec_nhids'], self.config['enc_nhids'] ) self.mergerep = Merge(input_names=['representation', 'hstates'], input_dims=[self.config['enc_nhids'], self.config['enc_nhids']], output_dim=self.config['enc_nhids'], name = 'blockmerge' + self.blockid) self.children = [self.encoder, self.decoder, self.mergerep]
def test_variable_filter(): # Creating computation graph brick1 = Linear(input_dim=2, output_dim=2, name="linear1") brick2 = Bias(2, name="bias1") activation = Logistic(name="sigm") x = tensor.vector() h1 = brick1.apply(x) h2 = activation.apply(h1) h2.name = "h2act" y = brick2.apply(h2) cg = ComputationGraph(y) parameters = [brick1.W, brick1.b, brick2.parameters[0]] bias = [brick1.b, brick2.parameters[0]] brick1_bias = [brick1.b] # Testing filtering by role role_filter = VariableFilter(roles=[PARAMETER]) assert parameters == role_filter(cg.variables) role_filter = VariableFilter(roles=[FILTER]) assert [] == role_filter(cg.variables) # Testing filtering by role using each_role flag role_filter = VariableFilter(roles=[PARAMETER, BIAS]) assert parameters == role_filter(cg.variables) role_filter = VariableFilter(roles=[PARAMETER, BIAS], each_role=True) assert not parameters == role_filter(cg.variables) assert bias == role_filter(cg.variables) # Testing filtering by bricks classes brick_filter = VariableFilter(roles=[BIAS], bricks=[Linear]) assert brick1_bias == brick_filter(cg.variables) # Testing filtering by bricks instances brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1]) assert brick1_bias == brick_filter(cg.variables) # Testing filtering by brick instance brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1]) assert brick1_bias == brick_filter(cg.variables) # Testing filtering by name name_filter = VariableFilter(name="W_norm") assert [cg.variables[2]] == name_filter(cg.variables) # Testing filtering by name regex name_filter_regex = VariableFilter(name_regex="W_no.?m") assert [cg.variables[2]] == name_filter_regex(cg.variables) # Testing filtering by theano name theano_name_filter = VariableFilter(theano_name="h2act") assert [cg.variables[11]] == theano_name_filter(cg.variables) # Testing filtering by theano name regex theano_name_filter_regex = VariableFilter(theano_name_regex="h2a.?t") assert [cg.variables[11]] == theano_name_filter_regex(cg.variables) # Testing filtering by application appli_filter = VariableFilter(applications=[brick1.apply]) variables = [cg.variables[1], cg.variables[8]] assert variables == appli_filter(cg.variables) # Testing filtering by application appli_filter_list = VariableFilter(applications=[brick1.apply]) assert variables == appli_filter_list(cg.variables) input1 = tensor.matrix("input1") input2 = tensor.matrix("input2") merge = Merge(["input1", "input2"], [5, 6], 2) merged = merge.apply(input1, input2) merge_cg = ComputationGraph(merged) outputs = VariableFilter(roles=[OUTPUT], bricks=[merge])(merge_cg.variables) assert merged in outputs assert len(outputs) == 3 outputs_application = VariableFilter(roles=[OUTPUT], applications=[merge.apply])(merge_cg.variables) assert outputs_application == [merged]
def test_variable_filter(): # Creating computation graph brick1 = Linear(input_dim=2, output_dim=2, name='linear1') brick2 = Bias(2, name='bias1') activation = Logistic(name='sigm') x = tensor.vector() h1 = brick1.apply(x, call_id='brick1_call_id') h2 = activation.apply(h1, call_id='act') h2.name = "h2act" y = brick2.apply(h2) cg = ComputationGraph(y) parameters = [brick1.W, brick1.b, brick2.parameters[0]] bias = [brick1.b, brick2.parameters[0]] brick1_bias = [brick1.b] # Testing filtering by role role_filter = VariableFilter(roles=[PARAMETER]) assert parameters == role_filter(cg.variables) role_filter = VariableFilter(roles=[FILTER]) assert [] == role_filter(cg.variables) # Testing filtering by role using each_role flag role_filter = VariableFilter(roles=[PARAMETER, BIAS]) assert parameters == role_filter(cg.variables) role_filter = VariableFilter(roles=[PARAMETER, BIAS], each_role=True) assert not parameters == role_filter(cg.variables) assert bias == role_filter(cg.variables) # Testing filtering by bricks classes brick_filter = VariableFilter(roles=[BIAS], bricks=[Linear]) assert brick1_bias == brick_filter(cg.variables) # Testing filtering by bricks instances brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1]) assert brick1_bias == brick_filter(cg.variables) # Testing filtering by brick instance brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1]) assert brick1_bias == brick_filter(cg.variables) # Testing filtering by name name_filter = VariableFilter(name='W_norm') assert [cg.variables[2]] == name_filter(cg.variables) # Testing filtering by name regex name_filter_regex = VariableFilter(name_regex='W_no.?m') assert [cg.variables[2]] == name_filter_regex(cg.variables) # Testing filtering by theano name theano_name_filter = VariableFilter(theano_name='h2act') assert [cg.variables[11]] == theano_name_filter(cg.variables) # Testing filtering by theano name regex theano_name_filter_regex = VariableFilter(theano_name_regex='h2a.?t') assert [cg.variables[11]] == theano_name_filter_regex(cg.variables) brick1_apply_variables = [cg.variables[1], cg.variables[8]] # Testing filtering by application appli_filter = VariableFilter(applications=[brick1.apply]) assert brick1_apply_variables == appli_filter(cg.variables) # Testing filtering by unbound application unbound_appli_filter = VariableFilter(applications=[Linear.apply]) assert brick1_apply_variables == unbound_appli_filter(cg.variables) # Testing filtering by call identifier call_id_filter = VariableFilter(call_id='brick1_call_id') assert brick1_apply_variables == call_id_filter(cg.variables) input1 = tensor.matrix('input1') input2 = tensor.matrix('input2') merge = Merge(['input1', 'input2'], [5, 6], 2) merged = merge.apply(input1, input2) merge_cg = ComputationGraph(merged) outputs = VariableFilter( roles=[OUTPUT], bricks=[merge])(merge_cg.variables) assert merged in outputs assert len(outputs) == 3 outputs_application = VariableFilter( roles=[OUTPUT], applications=[merge.apply])(merge_cg.variables) assert outputs_application == [merged]
class PoemBlock(Initializable): '''The block to generate a sentence''' def __init__(self, config, blockid, theano_seed=None, **kwargs): super(PoemBlock, self).__init__(**kwargs) self.theano_seed = theano_seed self.config = config self.blockid = blockid self.encoder = Encoder(self.blockid, self.config['src_vocab_size'], self.config['enc_embed'], self.config['enc_nhids']) self.decoder = Decoder(self.blockid, self.config['trg_vocab_size'], self.config['dec_embed'], self.config['dec_nhids'], self.config['enc_nhids'] ) self.mergerep = Merge(input_names=['representation', 'hstates'], input_dims=[self.config['enc_nhids'], self.config['enc_nhids']], output_dim=self.config['enc_nhids'], name = 'blockmerge' + self.blockid) self.children = [self.encoder, self.decoder, self.mergerep] def set_initw(self, initialMethod): self.encoder.weights_init = self.decoder.weights_init = initialMethod self.mergerep.weights_init = initialMethod def set_initb(self, initialMethod): self.encoder.biases_init = self.decoder.biases_init = initialMethod def set_specialinit(self, im1, im2): self.encoder.gru.weights_init = im1 self.decoder.transition.weights_init = im2 def _push_allocation_config(self): self.encoder.push_allocation_config() self.decoder.push_allocation_config() self.mergerep.push_allocation_config() @application(inputs=['source_sentence', 'source_sentence_mask0','source_sentence_mask1', 'source_sentence_mask', 'target_sentence', 'target_sentence_mask', 'hstates', 'lastrep0', 'lastrep1'], outputs=['costs', 'hstates', 'lastrepresentation']) def cost(self, application_call, source_sentence, source_sentence_mask0, source_sentence_mask1, source_sentence_mask, target_sentence, target_sentence_mask, hstates, lastrep0 = None, lastrep1 = None): representation = self.encoder.apply(source_sentence, source_sentence_mask) if lastrep0 and lastrep1: globalrep = theano.tensor.concatenate([lastrep0, lastrep1,representation], axis=0) source_mask = theano.tensor.concatenate([source_sentence_mask0, source_sentence_mask1, source_sentence_mask], axis=1) elif lastrep0: globalrep = theano.tensor.concatenate([lastrep0, representation], axis=0) source_mask = theano.tensor.concatenate([source_sentence_mask0, source_sentence_mask], axis=1) else: globalrep = representation source_mask = source_sentence_mask repeatTime = globalrep.shape[0] hstatesRepeat = theano.tensor.tile(hstates, (repeatTime,1,1)) dic = {} dic['representation'] = globalrep dic['hstates'] = hstatesRepeat newrep = self.mergerep.apply(**dic) costs, lasthstates = self.decoder.cost(newrep, source_mask, target_sentence, target_sentence_mask) return costs, lasthstates, representation @application(inputs=['source_sentence', 'hstates', 'lastrep0', 'lastrep1']) def mygenerate(self, source_sentence, hstates, lastrep0 = None, lastrep1 = None, **kwargs): representation = self.encoder.apply(source_sentence, theano.tensor.ones(source_sentence.shape)) if lastrep0 and lastrep1: globalrep = theano.tensor.concatenate([lastrep0, lastrep1, representation[:lastrep0.shape[0]]], axis=0) elif lastrep0: globalrep = theano.tensor.concatenate([lastrep0, representation[:lastrep0.shape[0]]], axis=0) else: globalrep = representation repeatTime = globalrep.shape[0] hstatesRepeat = theano.tensor.tile(hstates, (repeatTime,1,1)) dic = {} dic['representation'] = globalrep dic['hstates'] = hstatesRepeat newrep = self.mergerep.apply(**dic) states, outputs, _2, _3, costs = self.decoder.generate( source_sentence = source_sentence, representation = newrep) return states, outputs, _2, _3, costs, representation
def __init__(self, input_sources_list, input_sources_vocab_size_list, output_source, output_source_vocab_size, lookup_dim=200, hidden_size=256, recurrent_stack_size=1): self.InputSources = input_sources_list self.InputSourcesVocab = input_sources_vocab_size_list self.OutputSource = output_source self.OutputSourceVocab = output_source_vocab_size inputs = [tensor.lmatrix(source) for source in input_sources_list] output = tensor.lmatrix(output_source) lookups = self.get_lookups(lookup_dim, input_sources_vocab_size_list) for lookup in lookups: lookup.initialize() merge = Merge([lookup.name for lookup in lookups], [lookup.dim for lookup in lookups], hidden_size, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) merge.initialize() linear0 = Linear(input_dim=hidden_size, output_dim=hidden_size, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0), name='linear0') linear0.initialize() recurrent_blocks = [] for i in range(recurrent_stack_size): recurrent_blocks.append(SimpleRecurrent( dim=hidden_size, activation=Tanh(), weights_init=initialization.Uniform(width=0.01), use_bias=False)) for i, recurrent_block in enumerate(recurrent_blocks): recurrent_block.name = 'recurrent'+str(i+1) recurrent_block.initialize() linear_out = Linear(input_dim=hidden_size, output_dim=output_source_vocab_size, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0), name='linear_out') linear_out.initialize() softmax = NDimensionalSoftmax(name='softmax') lookup_outputs = [lookup.apply(input) for lookup, input in zip(lookups, inputs)] m = merge.apply(*lookup_outputs) r = linear0.apply(m) for block in recurrent_blocks: r = block.apply(r) a = linear_out.apply(r) self.Cost = softmax.categorical_cross_entropy(output, a, extra_ndim=1).mean() self.Cost.name = 'cost' y_hat = softmax.apply(a, extra_ndim=1) y_hat.name = 'y_hat' self.ComputationGraph = ComputationGraph(self.Cost) self.Function = None self.MainLoop = None self.Model = Model(y_hat)
def __init__(self, input1_size, input2_size, lookup1_dim=200, lookup2_dim=200, hidden_size=512): self.hidden_size = hidden_size self.input1_size = input1_size self.input2_size = input2_size self.lookup1_dim = lookup1_dim self.lookup2_dim = lookup2_dim x1 = tensor.lmatrix('durations') x2 = tensor.lmatrix('syllables') y = tensor.lmatrix('pitches') lookup1 = LookupTable(dim=self.lookup1_dim, length=self.input1_size, name='lookup1', weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) lookup1.initialize() lookup2 = LookupTable(dim=self.lookup2_dim, length=self.input2_size, name='lookup2', weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) lookup2.initialize() merge = Merge(['lookup1', 'lookup2'], [self.lookup1_dim, self.lookup2_dim], self.hidden_size, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) merge.initialize() recurrent_block = LSTM( dim=self.hidden_size, activation=Tanh(), weights_init=initialization.Uniform(width=0.01) ) #RecurrentStack([LSTM(dim=self.hidden_size, activation=Tanh())] * 3) recurrent_block.initialize() linear = Linear(input_dim=self.hidden_size, output_dim=self.input1_size, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) linear.initialize() softmax = NDimensionalSoftmax() l1 = lookup1.apply(x1) l2 = lookup2.apply(x2) m = merge.apply(l1, l2) h = recurrent_block.apply(m) a = linear.apply(h) y_hat = softmax.apply(a, extra_ndim=1) # ValueError: x must be 1-d or 2-d tensor of floats. Got TensorType(float64, 3D) self.Cost = softmax.categorical_cross_entropy(y, a, extra_ndim=1).mean() self.ComputationGraph = ComputationGraph(self.Cost) self.Model = Model(y_hat)