def build_recurrent_graph(self, n_steps=None, reverse=False, **kwargs): self.nonseq_args = kwargs.pop('nonseq_args', None) self.output_args = kwargs.pop('output_args', None) self.context_args = kwargs.pop('context_args', None) self.iterators = kwargs.pop('iterators', None) self.nonseq_inputs = kwargs.pop('nonseq_inputs', None) self.nNone = 0 inputs = self.inputs.values() seqs = [] outputs = [] nonseqs = [] if self.nonseq_inputs is not None: for i in self.nonseq_inputs: nonseqs.append(inputs.pop(i)) self.input_args = OrderedDict() self.recur_args = OrderedDict() for name, node in self.nodes.items(): if hasattr(node, 'get_init_state'): self.recur_args[name] = node state = node.get_init_state() outputs.append(state) self.nrecur = len(self.recur_args) # Substitutes initial hidden state into a context if self.context_args is not None: for i, (nname, node) in enumerate(self.output_args.items()): for aname, arg in self.context_args.items(): if nname == aname: outputs[i] = arg if self.iterators is None: seqs += inputs elif self.iterators is not None: seqs += inputs[len(self.iterators):] outputs += inputs[:len(self.iterators)] if self.output_args is not None: self.nNone = len(self.output_args) outputs = flatten(outputs + [None] * self.nNone) if self.nonseq_args is not None: for arg in self.nonseq_args: nonseqs.append(arg) self.nseqs = len(seqs) self.noutputs = len(outputs) self.nnonseqs = len(nonseqs) result, updates = theano.scan( fn=self.scan_fn, sequences=seqs, outputs_info=outputs, non_sequences=nonseqs, n_steps=n_steps, go_backwards=reverse) result = tolist(result) if self.output_args is None and self.iterators is None: return result if len(updates) == 0: return result[-self.nNone:] for k, v in updates.iteritems(): k.default_update = v return result[-self.nNone:], updates
init_b=init_b) output = FullyConnectedLayer(name='output', parent=['h1', 'h2', 'h3'], parent_dim=[200, 200, 200], nout=205, unit='softmax', init_W=init_W, init_b=init_b) nodes = [h1, h2, h3, output] for node in nodes: node.initialize() params = flatten([node.get_params().values() for node in nodes]) step_count = sharedX(0, name='step_count') last_h = np.zeros((batch_size, 400), dtype=np.float32) h1_tm1 = sharedX(last_h, name='h1_tm1') h2_tm1 = sharedX(last_h, name='h2_tm1') h3_tm1 = sharedX(last_h, name='h3_tm1') update_list = [step_count, h1_tm1, h2_tm1, h3_tm1] step_count = T.switch(T.le(step_count, reset_freq), step_count + 1, 0) s1_0 = T.switch(T.or_(T.cast(T.eq(step_count, 0), 'int32'), T.cast(T.eq(T.sum(h1_tm1), 0.), 'int32')), h1.get_init_state(), h1_tm1) s2_0 = T.switch(T.or_(T.cast(T.eq(step_count, 0), 'int32'),
init_b=init_b) output = FullyConnectedLayer(name='output', parent=['h1', 'h2', 'h3'], parent_dim=[200, 200, 200], nout=frame_size, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [h1, h2, h3, output] for node in nodes: node.initialize() params = flatten([node.get_params().values() for node in nodes]) s1_0 = h1.get_init_state(batch_size) s2_0 = h2.get_init_state(batch_size) s3_0 = h3.get_init_state(batch_size) def inner_fn(x_t, s1_tm1, s2_tm1, s3_tm1): s1_t = h1.fprop([[x_t], [s1_tm1]]) s2_t = h2.fprop([[s1_t], [s2_tm1]]) s3_t = h3.fprop([[s2_t], [s3_tm1]]) y_hat_t = output.fprop([s1_t, s2_t, s3_t]) return s1_t, s2_t, s3_t, y_hat_t
def get_params(self): return flatten([node.get_params().values() for node in self.nodes.values()])