def __init__(self, name, data, model, optimizer, cost, outputs, debug_print=0, trainlog=None, extension=None): self.name = name self.data = data self.model = model self.optimizer = optimizer self.inputs = model.inputs self.cost = cost self.outputs = tolist(outputs) self.updates = OrderedDict() self.updates.update(model.updates) self.extension = extension self.debug_print = debug_print t0 = time.time() self.cost_fn = self.build_training_graph() print "Elapsed compilation time: %f" % (time.time() - t0) if self.debug_print: from theano.printing import debugprint debugprint(self.cost_fn) if trainlog is None: self.trainlog = TrainLog() else: self.trainlog = trainlog self.endloop = 0
def build_training_graph(self): self.run_extension('ext_regularize_pre_grad') self.grads = OrderedDict(izip(self.params, T.grad(self.cost, self.params))) self.run_extension('ext_grad') grads = self.optimizer.get_updates(self.grads) for key, val in grads.items(): self.updates[key] = val self.run_extension('ext_regularize_post_grad') return self.build_theano_graph(self.inputs, self.outputs, self.updates)
unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) output = FullyConnectedLayer(name='output', parent=['h1', 'h2', 'h3'], parent_dim=[200, 200, 200], nout=frame_size, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [h1, h2, h3, output] params = OrderedDict() for node in nodes: params.update(node.initialize()) params = init_tparams(params) s1_0 = h1.get_init_state(batch_size) s2_0 = h2.get_init_state(batch_size) s3_0 = h3.get_init_state(batch_size) def inner_fn(x_t, s1_tm1, s2_tm1, s3_tm1): s1_t = h1.fprop([[x_t], [s1_tm1, s2_tm1, s3_tm1]], params) s2_t = h2.fprop([[s1_t], [s2_tm1, s1_tm1, s3_tm1]], params) s3_t = h3.fprop([[s2_t], [s3_tm1], s1_tm1, s2_tm1], params) y_hat_t = output.fprop([s1_t, s2_t, s3_t], params)
class Training(PickleMixin, TheanoMixin): """ WRITEME Parameters ---------- .. todo:: """ def __init__(self, name, data, model, optimizer, cost, outputs, debug_print=0, trainlog=None, extension=None): self.name = name self.data = data self.model = model self.optimizer = optimizer self.inputs = model.inputs self.cost = cost self.outputs = tolist(outputs) self.updates = OrderedDict() self.updates.update(model.updates) self.extension = extension self.debug_print = debug_print t0 = time.time() self.cost_fn = self.build_training_graph() print "Elapsed compilation time: %f" % (time.time() - t0) if self.debug_print: from theano.printing import debugprint debugprint(self.cost_fn) if trainlog is None: self.trainlog = TrainLog() else: self.trainlog = trainlog self.endloop = 0 def build_training_graph(self): self.run_extension('ext_regularize_pre_grad') self.grads = OrderedDict(izip(self.model.params.values(), T.grad(self.cost, self.model.params.values()))) self.run_extension('ext_grad') grads = self.optimizer.get_updates(self.grads) for key, val in grads.items(): self.updates[key] = val self.run_extension('ext_regularize_post_grad') return self.build_theano_graph(self.inputs, self.outputs, self.updates) def run(self): logger.info("Entering main loop") while self.run_epoch(): pass logger.info("Terminating main loop") def run_epoch(self): for batch in self.data: self.run_extension('ext_monitor') batch_t0 = time.time() this_cost = self.cost_fn(*batch) self.trainlog._times.append(time.time() - batch_t0) self.trainlog._batches.append(this_cost) self.trainlog._batch_seen += 1 self.run_extension('ext_save') self.trainlog._epoch_seen += 1 self.run_extension('ext_term') if self.end_training(): self.run_extension('ext_monitor') self.run_extension('ext_save') return False return True def find_extension(self, name): try: exts = [extension for extension in self.extension if extension.name == name] if len(exts) > 0: return_val = 1 else: return_val = 0 return return_val, exts except: return (0, None) def run_extension(self, name): tok, exts = self.find_extension(name) if tok: for ext in exts: ext.exe(self) def end_training(self): return self.endloop