def main(self): print_mem('start') #if test_data is not None: self.test() print 'Took', (time.time() - self.start_time)/60., 'min' avg_step = timings['time_step'][:self.step].mean() avg_cost2expl = timings['log2_p_expl'][:self.step].mean() print "Average step took {}".format(avg_step) print "That amounts to {} sentences in a day".format(1 / avg_step * 86400 * state['bs']) print "Average log2 per example is {}".format(avg_cost2expl)
def main(self): print_mem('start') #if test_data is not None: self.test() print 'Took', (time.time() - self.start_time) / 60., 'min' avg_step = timings['time_step'][:self.step].mean() avg_cost2expl = timings['log2_p_expl'][:self.step].mean() print "Average step took {}".format(avg_step) print "That amounts to {} sentences in a day".format( 1 / avg_step * 86400 * state['bs']) print "Average log2 per example is {}".format(avg_cost2expl)
def validate(self): rvals = self.model.validate(self.valid_data) msg = '** %d validation:' % self.valid_id print_mem('validate') self.valid_id += 1 self.batch_start_time = time.time() pos = self.step // self.state['validFreq'] for k, v in rvals: msg = msg + ' ' + k + ':%f ' % float(v) self.timings['valid'+k][pos] = float(v) self.state['valid'+k] = float(v) msg += 'whole time %s' % print_time(time.time() - self.start_time) msg += ' patience %d' % self.patience print msg if self.train_cost: valid_rvals = rvals rvals = self.model.validate(self.train_data, True) msg = '** %d train:' % (self.valid_id - 1) for k, v in rvals: msg = msg + ' ' + k + ':%6.3f ' % float(v) self.timings['fulltrain' + k] = float(v) self.state['fulltrain' + k] = float(v) print msg rvals = valid_rvals self.state['validtime'] = float(time.time() - self.start_time)/60. # Just pick the first thing that the cost returns cost = rvals[0][1] if self.state['bvalidcost'] > cost: self.state['bvalidcost'] = float(cost) for k, v in rvals: self.state['bvalid'+k] = float(v) self.state['bstep'] = int(self.step) self.state['btime'] = int(time.time() - self.start_time) self.test() elif numpy.random.rand(1) > self.state['rand_test_inclusion']: print 'Shouldn''t test, but you got lucky', cost, '>', self.state['bvalidcost'] for k, v in self.state.items(): if 'test' in k: print k, v self.test() else: print 'No testing', cost, '>', self.state['bvalidcost'] for k, v in self.state.items(): if 'test' in k: print k, v print_mem('validate') if self.validate_postprocess: return self.validate_postprocess(cost) return cost
def validate(self): rvals = self.model.validate(self.valid_data) msg = "** %d validation:" % self.valid_id self.valid_id += 1 self.batch_start_time = time.time() pos = self.step // self.state["validFreq"] for k, v in rvals: msg = msg + " " + k + ":%f " % float(v) self.timings["valid" + k][pos] = float(v) self.state["valid" + k] = float(v) msg += "whole time %s" % print_time(time.time() - self.start_time) msg += " patience %d" % self.patience print msg if self.train_cost: valid_rvals = rvals rvals = self.model.validate(self.train_data, True) msg = "** %d train:" % (self.valid_id - 1) for k, v in rvals: msg = msg + " " + k + ":%6.3f " % float(v) self.timings["fulltrain" + k] = float(v) self.state["fulltrain" + k] = float(v) print msg rvals = valid_rvals self.state["validtime"] = float(time.time() - self.start_time) / 60.0 # Just pick the first thing that the cost returns cost = rvals[0][1] if self.state["bvalidcost"] > cost: self.state["bvalidcost"] = float(cost) for k, v in rvals: self.state["bvalid" + k] = float(v) self.state["bstep"] = int(self.step) self.state["btime"] = int(time.time() - self.start_time) self.test() else: print "No testing", cost, ">", self.state["bvalidcost"] for k, v in self.state.items(): if "test" in k: print k, v print_mem("validate") if self.validate_postprocess: return self.validate_postprocess(cost) return cost
def validate(self): rvals = self.model.validate(self.valid_data) msg = '** %d validation:' % self.valid_id self.valid_id += 1 self.batch_start_time = time.time() pos = self.step // self.state['validFreq'] for k, v in rvals: msg = msg + ' ' + k + ':%f ' % float(v) self.timings['valid' + k][pos] = float(v) self.state['valid' + k] = float(v) msg += 'whole time %s' % print_time(time.time() - self.start_time) msg += ' patience %d' % self.patience print msg if self.train_cost: valid_rvals = rvals rvals = self.model.validate(self.train_data, True) msg = '** %d train:' % (self.valid_id - 1) for k, v in rvals: msg = msg + ' ' + k + ':%6.3f ' % float(v) self.timings['fulltrain' + k] = float(v) self.state['fulltrain' + k] = float(v) print msg rvals = valid_rvals self.state['validtime'] = float(time.time() - self.start_time) / 60. # Just pick the first thing that the cost returns cost = rvals[0][1] if self.state['bvalidcost'] > cost: self.state['bvalidcost'] = float(cost) for k, v in rvals: self.state['bvalid' + k] = float(v) self.state['bstep'] = int(self.step) self.state['btime'] = int(time.time() - self.start_time) self.test() else: print 'No testing', cost, '>', self.state['bvalidcost'] for k, v in self.state.items(): if 'test' in k: print k, v print_mem('validate') if self.validate_postprocess: return self.validate_postprocess(cost) return cost
def main(self): assert self.reset == -1 print_mem('start') self.state['gotNaN'] = 0 start_time = time.time() self.start_time = start_time self.batch_start_time = time.time() self.step = int(self.timings['step']) self.algo.step = self.step self.save_iter = 0 self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() last_cost = 1. self.state['clr'] = self.state['lr'] self.train_data.start(self.timings['next_offset'] if 'next_offset' in self.timings else -1) while (self.step < self.state['loopIters'] and last_cost > .1*self.state['minerr'] and (time.time() - start_time)/60. < self.state['timeStop'] and self.state['lr'] > self.state['minlr']): print 'step:', self.step if self.step > 0 and (time.time() - self.save_time)/60. >= self.state['saveFreq']: self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() if self.state['save_by_iter'] and self.step % self.state['saveiter'] == 0: self.save_DIY() st = time.time() try: rvals = self.algo() self.state['traincost'] = float(rvals['cost']) self.state['step'] = self.step last_cost = rvals['cost'] for name in rvals.keys(): self.timings[name][self.step] = float(numpy.array(rvals[name])) if self.l2_params: for param in self.model.params: self.timings["l2_" + param.name][self.step] =\ numpy.mean(param.get_value() ** 2) ** 0.5 if (numpy.isinf(rvals['cost']) or numpy.isnan(rvals['cost'])) and\ self.state['on_nan'] == 'raise': self.state['gotNaN'] = 1 self.save() if self.channel: self.channel.save() print 'Got NaN while training' last_cost = 0 if self.valid_data is not None and\ self.step % self.state['validFreq'] == 0 and\ self.step > 1: valcost = self.validate() if valcost > self.old_cost * self.state['cost_threshold']: self.patience -= 1 if 'lr_start' in self.state and\ self.state['lr_start'] == 'on_error': self.state['lr_start'] = self.step elif valcost < self.old_cost: self.patience = self.state['patience'] self.old_cost = valcost if self.state['divide_lr'] and \ self.patience < 1: # Divide lr by 2 self.algo.lr = self.algo.lr / self.state['divide_lr'] bparams = dict(self.model.best_params) self.patience = self.state['patience'] for p in self.model.params: p.set_value(bparams[p.name]) if self.state['hookFreq'] > 0 and \ self.step % self.state['hookFreq'] == 0 and \ self.hooks: [fn() for fn in self.hooks] if self.reset > 0 and self.step > 1 and \ self.step % self.reset == 0: print 'Resetting the data iterator' self.train_data.reset() self.step += 1 self.timings['step'] = self.step self.timings['next_offset'] = self.train_data.next_offset except KeyboardInterrupt: break self.state['wholetime'] = float(time.time() - start_time) if self.valid_data is not None: self.validate() self.save() if self.channel: self.channel.save() print 'Took', (time.time() - start_time)/60., 'min' avg_step = self.timings['time_step'][:self.step].mean() avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean() print "Average step took {}".format(avg_step) print "That amounts to {} sentences in a day".format(1 / avg_step * 86400 * self.state['bs']) print "Average log2 per example is {}".format(avg_cost2expl)
def main(self): assert self.reset == -1 print_mem("start") self.state["gotNaN"] = 0 start_time = time.time() self.start_time = start_time self.batch_start_time = time.time() self.step = int(self.timings["step"]) self.algo.step = self.step if self.state["save_iter"] < 0: self.save_iter = 0 self.state["save_iter"] = 0 self.save() if self.channel is not None: self.channel.save() else: # Fake saving self.save_iter += 1 self.state["save_iter"] = self.save_iter self.save_time = time.time() last_cost = 1.0 self.state["clr"] = self.state["lr"] self.train_data.start(self.timings["next_offset"] if "next_offset" in self.timings else -1) if self.state["rolling_vocab"]: for i in xrange(self.timings["step"] - self.timings["super_step"]): self.train_data.next() if self.state["rolling_vocab"]: # Make sure dictionary is current. # If training is interrupted when the vocabularies are exchanged, # things may get broken. step_modulo = self.step % self.model.total_num_batches if step_modulo in self.model.rolling_vocab_dict: # 0 always in. cur_key = step_modulo else: cur_key = 0 for key in self.model.rolling_vocab_dict: if (key < step_modulo) and (key > cur_key): # Find largest key smaller than step_modulo cur_key = key new_large2small_src = self.model.Dx_shelve[str(cur_key)] new_large2small_trgt = self.model.Dy_shelve[str(cur_key)] self.roll_vocab_update_dicts(new_large2small_src, new_large2small_trgt) self.zero_or_reload = True while ( self.step < self.state["loopIters"] and last_cost > 0.1 * self.state["minerr"] and (time.time() - start_time) / 60.0 < self.state["timeStop"] and self.state["lr"] > self.state["minlr"] ): if self.step > 0 and (time.time() - self.save_time) / 60.0 >= self.state["saveFreq"]: self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() st = time.time() try: if self.state["rolling_vocab"]: step_modulo = self.step % self.model.total_num_batches if step_modulo in self.model.rolling_vocab_dict: if not self.zero_or_reload: self.roll_vocab_small2large() # Not necessary for 0 or when reloading a properly saved model new_large2small_src = self.model.Dx_shelve[str(step_modulo)] new_large2small_trgt = self.model.Dy_shelve[str(step_modulo)] self.roll_vocab_update_dicts( new_large2small_src, new_large2small_trgt ) # Done above for 0 or reloaded model self.roll_vocab_large2small() try: tmp_batch = self.train_data.next(peek=True) except StopIteration: if self.state["reprocess_each_iteration"]: logger.info("Reached end of file; re-preprocessing") subprocess.check_call(self.state["reprocess_each_iteration"], shell=True) if self.state["rolling_vocab"]: os.remove(self.state["Dx_file"]) os.remove(self.state["Dy_file"]) tmp_state = copy.deepcopy(self.state) rolling_dicts.main(tmp_state) with open(self.state["rolling_vocab_dict"], "rb") as f: self.model.rolling_vocab_dict = cPickle.load(f) self.model.total_num_batches = max(self.model.rolling_vocab_dict) self.model.Dx_shelve = shelve.open(self.state["Dx_file"]) self.model.Dy_shelve = shelve.open(self.state["Dy_file"]) # round up/down number of steps so modulo is 0 (hack because total_num_batches can change) logger.debug("step before restart: {0}".format(self.step)) if self.step % self.model.total_num_batches < self.model.total_num_batches / 2: self.step -= self.step % self.model.total_num_batches else: self.step += self.model.total_num_batches - ( self.step % self.model.total_num_batches ) logger.debug("step after restart: {0}".format(self.step)) logger.debug("Load data") self.train_data = get_batch_iterator( self.state, numpy.random.RandomState(self.state["seed"]) ) self.train_data.start(-1) self.timings["next_offset"] = -1 step_modulo = self.step % self.model.total_num_batches if step_modulo in self.model.rolling_vocab_dict: if not self.zero_or_reload: self.roll_vocab_small2large() # Not necessary for 0 or when reloading a properly saved model new_large2small_src = self.model.Dx_shelve[str(step_modulo)] new_large2small_trgt = self.model.Dy_shelve[str(step_modulo)] self.roll_vocab_update_dicts( new_large2small_src, new_large2small_trgt ) # Done above for 0 or reloaded model self.roll_vocab_large2small() self.algo.data = self.train_data self.algo.step = self.step tmp_batch = self.train_data.next(peek=True) if self.hooks: self.hooks[0].train_iter = self.train_data else: self.save() raise if ( tmp_batch["x"][:, 0].tolist(), tmp_batch["y"][:, 0].tolist(), ) == self.model.rolling_vocab_dict[step_modulo]: logger.debug("Identical first sentences. OK") else: logger.error("Batches do not correspond.") elif self.state["hookFreq"] > 0 and self.step % self.state["hookFreq"] == 0 and self.hooks: [fn() for fn in self.hooks] # Hook first so that the peeked batch is the same as the one used in algo # Use elif not to peek twice try: rvals = self.algo() except StopIteration: if self.state["reprocess_each_iteration"]: logger.info("Reached end of file; re-preprocessing") subprocess.check_call(self.state["reprocess_each_iteration"], shell=True) logger.debug("Load data") self.train_data = get_batch_iterator(self.state, numpy.random.RandomState(self.state["seed"])) self.train_data.start(-1) self.timings["next_offset"] = -1 self.algo.data = self.train_data self.algo.step = self.step rvals = self.algo() if self.hooks: self.hooks[0].train_iter = self.train_data else: self.save() raise self.state["traincost"] = float(rvals["cost"]) self.state["step"] = self.step last_cost = rvals["cost"] for name in rvals.keys(): self.timings[name][self.step] = float(numpy.array(rvals[name])) if self.l2_params: for param in self.model.params: self.timings["l2_" + param.name][self.step] = numpy.mean(param.get_value() ** 2) ** 0.5 if (numpy.isinf(rvals["cost"]) or numpy.isnan(rvals["cost"])) and self.state["on_nan"] == "raise": self.state["gotNaN"] = 1 self.save() if self.channel: self.channel.save() print "Got NaN while training" last_cost = 0 if self.valid_data is not None and self.step % self.state["validFreq"] == 0 and self.step > 1: valcost = self.validate() if valcost > self.old_cost * self.state["cost_threshold"]: self.patience -= 1 if "lr_start" in self.state and self.state["lr_start"] == "on_error": self.state["lr_start"] = self.step elif valcost < self.old_cost: self.patience = self.state["patience"] self.old_cost = valcost if self.state["divide_lr"] and self.patience < 1: # Divide lr by 2 self.algo.lr = self.algo.lr / self.state["divide_lr"] bparams = dict(self.model.best_params) self.patience = self.state["patience"] for p in self.model.params: p.set_value(bparams[p.name]) if not self.state["rolling_vocab"]: # Standard use of hooks if self.state["hookFreq"] > 0 and self.step % self.state["hookFreq"] == 0 and self.hooks: [fn() for fn in self.hooks] if self.reset > 0 and self.step > 1 and self.step % self.reset == 0: print "Resetting the data iterator" self.train_data.reset() self.step += 1 if self.state["rolling_vocab"]: self.zero_or_reload = False self.timings["step"] = self.step # Step now if (self.step % self.model.total_num_batches) % self.state[ "sort_k_batches" ] == 0: # Start of a super_batch. logger.debug("Set super_step and next_offset") # This log shoud appear just before 'logger.debug("Start of a super batch")' in 'get_homogeneous_batch_iter()' self.timings["super_step"] = self.step # Step at start of superbatch. super_step < step self.timings["next_offset"] = self.train_data.next_offset # Where to start after reload. Will need to call next() a few times else: self.timings["step"] = self.step self.timings["next_offset"] = self.train_data.next_offset except KeyboardInterrupt: break if self.state["rolling_vocab"]: self.roll_vocab_small2large() self.state["wholetime"] = float(time.time() - start_time) if self.valid_data is not None: self.validate() self.save() if self.channel: self.channel.save() print "Took", (time.time() - start_time) / 60.0, "min" avg_step = self.timings["time_step"][: self.step].mean() avg_cost2expl = self.timings["log2_p_expl"][: self.step].mean() print "Average step took {}".format(avg_step) print "That amounts to {} sentences in a day".format(1 / avg_step * 86400 * self.state["bs"]) print "Average log2 per example is {}".format(avg_cost2expl)
def main(self): assert self.reset == -1 print_mem('start') self.state['gotNaN'] = 0 start_time = time.time() self.start_time = start_time self.batch_start_time = time.time() self.step = int(self.timings['step']) self.algo.step = self.step self.save_iter = 0 self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() print 'syscomb' last_cost = 1. self.state['clr'] = self.state['lr'] self.train_data.start(self.timings['next_offset'] if 'next_offset' in self.timings else -1) while (self.step < self.state['loopIters'] and last_cost > .1 * self.state['minerr'] and (time.time() - start_time) / 60. < self.state['timeStop'] and self.state['lr'] > self.state['minlr']): print 'step:', self.step if self.step > 0 and (time.time() - self.save_time ) / 60. >= self.state['saveFreq']: self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() if self.state['save_by_iter'] and self.step % self.state[ 'saveiter'] == 0: self.save_DIY() st = time.time() try: rvals = self.algo() self.state['traincost'] = float(rvals['cost']) self.state['step'] = self.step last_cost = rvals['cost'] for name in rvals.keys(): self.timings[name][self.step] = float( numpy.array(rvals[name])) if self.l2_params: for param in self.model.params: self.timings["l2_" + param.name][self.step] =\ numpy.mean(param.get_value() ** 2) ** 0.5 if (numpy.isinf(rvals['cost']) or numpy.isnan(rvals['cost'])) and\ self.state['on_nan'] == 'raise': self.state['gotNaN'] = 1 self.save() if self.channel: self.channel.save() print 'Got NaN while training' last_cost = 0 if self.valid_data is not None and\ self.step % self.state['validFreq'] == 0 and\ self.step > 1: valcost = self.validate() if valcost > self.old_cost * self.state['cost_threshold']: self.patience -= 1 if 'lr_start' in self.state and\ self.state['lr_start'] == 'on_error': self.state['lr_start'] = self.step elif valcost < self.old_cost: self.patience = self.state['patience'] self.old_cost = valcost if self.state['divide_lr'] and \ self.patience < 1: # Divide lr by 2 self.algo.lr = self.algo.lr / self.state['divide_lr'] bparams = dict(self.model.best_params) self.patience = self.state['patience'] for p in self.model.params: p.set_value(bparams[p.name]) if self.state['hookFreq'] > 0 and \ self.step % self.state['hookFreq'] == 0 and \ self.hooks: try: [fn() for fn in self.hooks] except: print 'sample failed' if self.reset > 0 and self.step > 1 and \ self.step % self.reset == 0: print 'Resetting the data iterator' self.train_data.reset() self.step += 1 self.timings['step'] = self.step self.timings['next_offset'] = self.train_data.next_offset except KeyboardInterrupt: break self.state['wholetime'] = float(time.time() - start_time) if self.valid_data is not None: self.validate() self.save() if self.channel: self.channel.save() print 'Took', (time.time() - start_time) / 60., 'min' avg_step = self.timings['time_step'][:self.step].mean() avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean() print "Average step took {}".format(avg_step) print "That amounts to {} sentences in a day".format( 1 / avg_step * 86400 * self.state['bs']) print "Average log2 per example is {}".format(avg_cost2expl)
def main(self): assert self.reset == -1 print_mem('start') self.state['gotNaN'] = 0 start_time = time.time() self.start_time = start_time self.batch_start_time = time.time() self.step = int(self.timings['step']) self.algo.step = self.step self.save_iter = 0 self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() last_cost = 1. self.state['clr'] = self.state['lr'] self.train_data.start(self.timings['next_offset'] if 'next_offset' in self.timings else -1) # added by Zhaopeng Tu, 2016-01-08 # for halving the learning rate every full epoch after the 10th epoch # in our experiences, the best performance is achieved around 10th epoch, then decreases after that # that's why we introduce the strategy of havling the learning rate epoch_batch_number = int(self.train_data.data_len / self.state['bs']) print 'Iterations per epoch', epoch_batch_number while (self.step < self.state['loopIters'] and last_cost > .1*self.state['minerr'] and (time.time() - start_time)/60. < self.state['timeStop'] and self.state['lr'] > self.state['minlr']): if self.step > 0 and (time.time() - self.save_time)/60. >= self.state['saveFreq']: self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() st = time.time() try: # print 'Doubly cost weight:', self.model.cost_layer.DC.get_value(), 'ori_cost:', self.model.cost_layer.ori_cost.get_value(), 'doubly_cost:', self.model.cost_layer.doubly_cost.get_value(), 'cost:', self.model.cost_layer.cost.get_value() # print 'Doubly cost weight:', self.model.cost_layer.DC.get_value() # print_mem('iter%d'%self.step) rvals = self.algo() self.state['traincost'] = float(rvals['cost']) self.state['step'] = self.step last_cost = rvals['cost'] for name in rvals.keys(): self.timings[name][self.step] = float(numpy.array(rvals[name])) if self.l2_params: for param in self.model.params: self.timings["l2_" + param.name][self.step] =\ numpy.mean(param.get_value() ** 2) ** 0.5 if (numpy.isinf(rvals['cost']) or numpy.isnan(rvals['cost'])) and\ self.state['on_nan'] == 'raise': self.state['gotNaN'] = 1 self.save() if self.channel: self.channel.save() print 'Got NaN while training' last_cost = 0 if self.valid_data is not None and\ self.step % self.state['validFreq'] == 0 and\ self.step > 1: valcost = self.validate() if valcost > self.old_cost * self.state['cost_threshold']: self.patience -= 1 if 'lr_start' in self.state and\ self.state['lr_start'] == 'on_error': self.state['lr_start'] = self.step elif valcost < self.old_cost: self.patience = self.state['patience'] self.old_cost = valcost if self.state['divide_lr'] and \ self.patience < 1: # Divide lr by 2 self.algo.lr = self.algo.lr / self.state['divide_lr'] bparams = dict(self.model.best_params) self.patience = self.state['patience'] for p in self.model.params: p.set_value(bparams[p.name]) if self.state['hookFreq'] > 0 and \ self.step % self.state['hookFreq'] == 0 and \ self.hooks: [fn() for fn in self.hooks] if self.reset > 0 and self.step > 1 and \ self.step % self.reset == 0: print 'Resetting the data iterator' self.train_data.reset() self.step += 1 self.timings['step'] = self.step self.timings['next_offset'] = self.train_data.next_offset # added by Zhaopeng Tu, 2016-01-08 # for halving the learning rate every full epoch after the 10th epoch # if self.step % self.train_data.data_len == 0 and self.step / self.train_data.data_len >= 10: if self.step % epoch_batch_number == 0 and self.step / epoch_batch_number >= 10: # Divide lr by 2 self.algo.lr = self.algo.lr / self.state['divide_lr'] except KeyboardInterrupt: break self.state['wholetime'] = float(time.time() - start_time) if self.valid_data is not None: self.validate() self.save() if self.channel: self.channel.save() print 'Took', (time.time() - start_time)/60., 'min' avg_step = self.timings['time_step'][:self.step].mean() avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean() print "Average step took {}".format(avg_step) print "That amounts to {} sentences in a day".format(1 / avg_step * 86400 * self.state['bs']) print "Average log2 per example is {}".format(avg_cost2expl)
def main(self): assert self.reset == -1 print_mem('start') self.state['gotNaN'] = 0 start_time = time.time() self.start_time = start_time self.batch_start_time = time.time() self.step = int(self.timings['step']) self.algo.step = self.step if self.state['save_iter'] < 0: self.save_iter = 0 self.state['save_iter'] = 0 self.save() if self.channel is not None: self.channel.save() else: # Fake saving self.save_iter += 1 self.state['save_iter'] = self.save_iter self.save_time = time.time() last_cost = 1. self.state['clr'] = self.state['lr'] self.train_data.start(self.timings['next_offset'] if 'next_offset' in self.timings else -1) if self.state['rolling_vocab']: for i in xrange(self.timings['step'] - self.timings['super_step']): self.train_data.next() if self.state['rolling_vocab']: # Make sure dictionary is current. # If training is interrupted when the vocabularies are exchanged, # things may get broken. step_modulo = self.step % self.model.total_num_batches if step_modulo in self.model.rolling_vocab_dict: # 0 always in. cur_key = step_modulo else: cur_key = 0 for key in self.model.rolling_vocab_dict: if (key < step_modulo) and (key > cur_key): # Find largest key smaller than step_modulo cur_key = key new_large2small_src = self.model.Dx_shelve[str(cur_key)] new_large2small_trgt = self.model.Dy_shelve[str(cur_key)] self.roll_vocab_update_dicts(new_large2small_src, new_large2small_trgt) self.zero_or_reload = True while (self.step < self.state['loopIters'] and last_cost > .1*self.state['minerr'] and (time.time() - start_time)/60. < self.state['timeStop'] and self.state['lr'] > self.state['minlr']): if self.step > 0 and (time.time() - self.save_time)/60. >= self.state['saveFreq']: self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() st = time.time() try: if self.state['rolling_vocab']: step_modulo = self.step % self.model.total_num_batches if step_modulo in self.model.rolling_vocab_dict: if not self.zero_or_reload: self.roll_vocab_small2large() # Not necessary for 0 or when reloading a properly saved model new_large2small_src = self.model.Dx_shelve[str(step_modulo)] new_large2small_trgt = self.model.Dy_shelve[str(step_modulo)] self.roll_vocab_update_dicts(new_large2small_src, new_large2small_trgt) # Done above for 0 or reloaded model self.roll_vocab_large2small() tmp_batch = self.train_data.next(peek=True) if (tmp_batch['x'][:,0].tolist(), tmp_batch['y'][:,0].tolist()) == self.model.rolling_vocab_dict[step_modulo]: logger.debug("Identical first sentences. OK") else: logger.error("Batches do not correspond.") elif self.state['hookFreq'] > 0 and \ self.step % self.state['hookFreq'] == 0 and \ self.hooks: [fn() for fn in self.hooks] # Hook first so that the peeked batch is the same as the one used in algo # Use elif not to peek twice rvals = self.algo() self.state['traincost'] = float(rvals['cost']) self.state['step'] = self.step last_cost = rvals['cost'] for name in rvals.keys(): self.timings[name][self.step] = float(numpy.array(rvals[name])) if self.l2_params: for param in self.model.params: self.timings["l2_" + param.name][self.step] =\ numpy.mean(param.get_value() ** 2) ** 0.5 if (numpy.isinf(rvals['cost']) or numpy.isnan(rvals['cost'])) and\ self.state['on_nan'] == 'raise': self.state['gotNaN'] = 1 self.save() if self.channel: self.channel.save() print 'Got NaN while training' last_cost = 0 if self.valid_data is not None and\ self.step % self.state['validFreq'] == 0 and\ self.step > 1: valcost = self.validate() if valcost > self.old_cost * self.state['cost_threshold']: self.patience -= 1 if 'lr_start' in self.state and\ self.state['lr_start'] == 'on_error': self.state['lr_start'] = self.step elif valcost < self.old_cost: self.patience = self.state['patience'] self.old_cost = valcost if self.state['divide_lr'] and \ self.patience < 1: # Divide lr by 2 self.algo.lr = self.algo.lr / self.state['divide_lr'] bparams = dict(self.model.best_params) self.patience = self.state['patience'] for p in self.model.params: p.set_value(bparams[p.name]) if not self.state['rolling_vocab']: # Standard use of hooks if self.state['hookFreq'] > 0 and \ self.step % self.state['hookFreq'] == 0 and \ self.hooks: [fn() for fn in self.hooks] if self.reset > 0 and self.step > 1 and \ self.step % self.reset == 0: print 'Resetting the data iterator' self.train_data.reset() self.step += 1 if self.state['rolling_vocab']: self.zero_or_reload = False self.timings['step'] = self.step # Step now if (self.step % self.model.total_num_batches) % self.state['sort_k_batches'] == 0: # Start of a super_batch. logger.debug("Set super_step and next_offset") # This log shoud appear just before 'logger.debug("Start of a super batch")' in 'get_homogeneous_batch_iter()' self.timings['super_step'] = self.step # Step at start of superbatch. super_step < step self.timings['next_offset'] = self.train_data.next_offset # Where to start after reload. Will need to call next() a few times else: self.timings['step'] = self.step self.timings['next_offset'] = self.train_data.next_offset except KeyboardInterrupt: break if self.state['rolling_vocab']: self.roll_vocab_small2large() self.state['wholetime'] = float(time.time() - start_time) if self.valid_data is not None: self.validate() self.save() if self.channel: self.channel.save() print 'Took', (time.time() - start_time)/60., 'min' avg_step = self.timings['time_step'][:self.step].mean() avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean() print "Average step took {}".format(avg_step) print "That amounts to {} sentences in a day".format(1 / avg_step * 86400 * self.state['bs']) print "Average log2 per example is {}".format(avg_cost2expl)
def main(self): print_mem('start') self.state['gotNaN'] = 0 self.start_time = time.time() self.batch_start_time = time.time() self.step = int(self.timings['step']) self.algo.step = self.step self.save_iter = 0 self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() last_cost = 1. start_time = time.time() self.start_time = start_time self.state['clr'] = self.state['lr'] while (self.step < self.state['loopIters'] and last_cost > .1 * self.state['minerr'] and (time.time() - start_time) / 60. < self.state['timeStop'] and self.state['lr'] > self.state['minlr']): if (time.time() - self.save_time) / 60. >= self.state['saveFreq']: self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() st = time.time() try: rvals = self.algo() self.state['traincost'] = float(rvals['cost']) self.state['step'] = self.step last_cost = rvals['cost'] for name in rvals.keys(): pos = self.step // self.state['trainFreq'] self.timings[name][pos] = float(numpy.array(rvals[name])) if (numpy.isinf(rvals['cost']) or numpy.isnan(rvals['cost'])) and\ self.state['on_nan'] == 'raise': self.state['gotNaN'] = 1 self.save() if self.channel: self.channel.save() print 'Got NaN while training' last_cost = 0 if self.valid_data is not None and\ self.step % self.state['validFreq'] == 0 and\ self.step > 1: valcost = self.validate() if valcost > self.old_cost * self.state['cost_threshold']: self.patience -= 1 if 'lr_start' in self.state and\ self.state['lr_start'] == 'on_error': self.state['lr_start'] = self.step elif valcost < self.old_cost: self.patience = self.state['patience'] self.old_cost = valcost if self.state['divide_lr'] and \ self.patience < 1: # Divide lr by 2 self.algo.lr = self.algo.lr / self.state['divide_lr'] bparams = dict(self.model.best_params) self.patience = self.state['patience'] for p in self.model.params: p.set_value(bparams[p.name]) if self.state['hookFreq'] > 0 and \ self.step % self.state['hookFreq'] == 0 and \ self.hooks: [fn() for fn in self.hooks] if self.reset > 0 and self.step > 1 and \ self.step % self.reset == 0: print 'Resetting the data iterator' self.train_data.reset() self.step += 1 self.timings['step'] = self.step print "took {}".format(time.time() - st) except: self.state['wholetime'] = float(time.time() - start_time) self.save() if self.channel: self.channel.save() last_cost = 0 print 'Error in running algo (lr issue)' print 'Took', (time.time() - start_time) / 60., 'min' raise self.state['wholetime'] = float(time.time() - start_time) if self.valid_data is not None: self.validate() self.save() if self.channel: self.channel.save() print 'Took', (time.time() - start_time) / 60., 'min'
def main(self): print_mem('start') self.state['gotNaN'] = 0 self.start_time = time.time() self.batch_start_time = time.time() self.step = 0 self.save_iter = 0 self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() last_cost = 1. start_time = time.time() self.start_time = start_time self.state['clr'] = self.state['lr'] while (self.step < self.state['loopIters'] and last_cost > .1*self.state['minerr'] and (time.time() - start_time)/60. < self.state['timeStop'] and self.state['lr'] > self.state['minlr']): if (time.time() - self.save_time)/60. > self.state['saveFreq']: self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() st = time.time() try: rvals = self.algo() self.state['traincost'] = float(rvals['cost']) self.state['step'] = self.step last_cost = rvals['cost'] for name in rvals.keys(): pos = self.step // self.state['trainFreq'] self.timings[name][pos] = float(numpy.array(rvals[name])) if (numpy.isinf(rvals['cost']) or numpy.isnan(rvals['cost'])) and\ self.state['on_nan'] == 'raise': self.state['gotNaN'] = 1 self.save() if self.channel: self.channel.save() print 'Got NaN while training' last_cost = 0 if self.valid_data is not None and\ self.step % self.state['validFreq'] == 0 and\ self.step > 1: valcost = self.validate() if valcost > self.old_cost * self.state['cost_threshold']: self.patience -= 1 if 'lr_start' in self.state and\ self.state['lr_start'] == 'on_error': self.state['lr_start'] = self.step elif valcost < self.old_cost: self.patience = self.state['patience'] self.old_cost = valcost if self.state['divide_lr'] and \ self.patience < 1: # Divide lr by 2 self.algo.lr = self.algo.lr / self.state['divide_lr'] bparams = dict(self.model.best_params) self.patience = self.state['patience'] for p in self.model.params: p.set_value(bparams[p.name]) if self.state['hookFreq'] > 0 and \ self.step % self.state['hookFreq'] == 0 and \ self.hooks: [fn() for fn in self.hooks] if self.reset > 0 and self.step > 1 and \ self.step % self.reset == 0: print 'Resetting the data iterator' self.train_data.reset() self.step += 1 except: self.state['wholetime'] = float(time.time() - start_time) self.save() if self.channel: self.channel.save() last_cost = 0 print 'Error in running algo (lr issue)' print 'Took', (time.time() - start_time)/60., 'min' raise self.state['wholetime'] = float(time.time() - start_time) if self.valid_data is not None: self.validate() self.save() if self.channel: self.channel.save() print 'Took', (time.time() - start_time)/60., 'min'
def main(self): assert self.reset == -1 print_mem('start') self.state['gotNaN'] = 0 start_time = time.time() self.start_time = start_time self.batch_start_time = time.time() self.step = int(self.timings['step']) self.algo.step = self.step self.save_iter = 0 self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() last_cost = 1. self.state['clr'] = self.state['lr'] self.train_data.start(self.timings['next_offset'] if 'next_offset' in self.timings else -1) # added by Zhaopeng Tu, 2016-01-08 # for halving the learning rate every full epoch after the 10th epoch # in our experiences, the best performance is achieved around 10th epoch, then decreases after that # that's why we introduce the strategy of havling the learning rate epoch_batch_number = int(self.train_data.data_len / self.state['bs']) print 'Iterations per epoch', epoch_batch_number while (self.step < self.state['loopIters'] and last_cost > .1 * self.state['minerr'] and (time.time() - start_time) / 60. < self.state['timeStop'] and self.state['lr'] > self.state['minlr']): if self.step > 0 and (time.time() - self.save_time ) / 60. >= self.state['saveFreq']: self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() st = time.time() try: # print 'Doubly cost weight:', self.model.cost_layer.DC.get_value(), 'ori_cost:', self.model.cost_layer.ori_cost.get_value(), 'doubly_cost:', self.model.cost_layer.doubly_cost.get_value(), 'cost:', self.model.cost_layer.cost.get_value() # print 'Doubly cost weight:', self.model.cost_layer.DC.get_value() # print_mem('iter%d'%self.step) rvals = self.algo() self.state['traincost'] = float(rvals['cost']) self.state['step'] = self.step last_cost = rvals['cost'] for name in rvals.keys(): self.timings[name][self.step] = float( numpy.array(rvals[name])) if self.l2_params: for param in self.model.params: self.timings["l2_" + param.name][self.step] =\ numpy.mean(param.get_value() ** 2) ** 0.5 if (numpy.isinf(rvals['cost']) or numpy.isnan(rvals['cost'])) and\ self.state['on_nan'] == 'raise': self.state['gotNaN'] = 1 self.save() if self.channel: self.channel.save() print 'Got NaN while training' last_cost = 0 if self.valid_data is not None and\ self.step % self.state['validFreq'] == 0 and\ self.step > 1: valcost = self.validate() if valcost > self.old_cost * self.state['cost_threshold']: self.patience -= 1 if 'lr_start' in self.state and\ self.state['lr_start'] == 'on_error': self.state['lr_start'] = self.step elif valcost < self.old_cost: self.patience = self.state['patience'] self.old_cost = valcost if self.state['divide_lr'] and \ self.patience < 1: # Divide lr by 2 self.algo.lr = self.algo.lr / self.state['divide_lr'] bparams = dict(self.model.best_params) self.patience = self.state['patience'] for p in self.model.params: p.set_value(bparams[p.name]) if self.state['hookFreq'] > 0 and \ self.step % self.state['hookFreq'] == 0 and \ self.hooks: [fn() for fn in self.hooks] if self.reset > 0 and self.step > 1 and \ self.step % self.reset == 0: print 'Resetting the data iterator' self.train_data.reset() self.step += 1 self.timings['step'] = self.step self.timings['next_offset'] = self.train_data.next_offset # added by Zhaopeng Tu, 2016-01-08 # for halving the learning rate every full epoch after the 10th epoch # if self.step % self.train_data.data_len == 0 and self.step / self.train_data.data_len >= 10: if self.step % epoch_batch_number == 0 and self.step / epoch_batch_number >= 10: # Divide lr by 2 self.algo.lr = self.algo.lr / self.state['divide_lr'] except KeyboardInterrupt: break self.state['wholetime'] = float(time.time() - start_time) if self.valid_data is not None: self.validate() self.save() if self.channel: self.channel.save() print 'Took', (time.time() - start_time) / 60., 'min' avg_step = self.timings['time_step'][:self.step].mean() avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean() print "Average step took {}".format(avg_step) print "That amounts to {} sentences in a day".format( 1 / avg_step * 86400 * self.state['bs']) print "Average log2 per example is {}".format(avg_cost2expl)
def main(self): assert self.reset == -1 print_mem('start') self.state['gotNaN'] = 0 start_time = time.time() self.start_time = start_time self.batch_start_time = time.time() self.step = int(self.timings['step']) self.algo.step = self.step self.save_iter = 0 self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() last_cost = 1. self.state['clr'] = self.state['lr'] ############################################################################### # by He Wei if 'next_offset' in self.state and self.state['next_offset'] <= 0: self.train_data.start(-1) elif 'next_offset' in self.timings: self.train_data.start(self.timings['next_offset']) else: self.train_data.start(-1) #self.train_data.start(self.timings['next_offset'] # if 'next_offset' in self.timings # else -1) #if 'next_offset' in self.state and self.state['next_offset'] <= 0: # self.train_data.reset() while (self.step < self.state['loopIters'] and last_cost > .1*self.state['minerr'] and (time.time() - start_time)/60. < self.state['timeStop'] and self.state['lr'] > self.state['minlr']): #################################################################################### #add by hewei if self.step % int(self.state['copy_model_freq']) == 0: self.save() if self.channel is not None: self.channel.save() try: copy_model_path = "%s/iter_%d" % (self.state['copy_model_path'], self.step) cmd = "mkdir -p %s" % copy_model_path if not os.path.exists(copy_model_path): print >> sys.stderr, cmd os.system(cmd) cmd = "cp %s* %s" % (self.state['prefix'], copy_model_path) print >> sys.stderr, cmd os.system(cmd) except Exception: print 'mainLoop: failed to copy model file to %s' % copy_model_path traceback.print_exc() #################################################################################### if self.step > 0 and (time.time() - self.save_time)/60. >= self.state['saveFreq']: self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() st = time.time() try: rvals = self.algo() self.state['traincost'] = float(rvals['cost']) self.state['step'] = self.step last_cost = rvals['cost'] for name in rvals.keys(): self.timings[name][self.step] = float(numpy.array(rvals[name])) if self.l2_params: for param in self.model.params: self.timings["l2_" + param.name][self.step] =\ numpy.mean(param.get_value() ** 2) ** 0.5 if (numpy.isinf(rvals['cost']) or numpy.isnan(rvals['cost'])) and\ self.state['on_nan'] == 'raise': self.state['gotNaN'] = 1 self.save() if self.channel: self.channel.save() print 'Got NaN while training' last_cost = 0 if self.valid_data is not None and\ self.step % self.state['validFreq'] == 0 and\ self.step > 1: valcost = self.validate() if valcost > self.old_cost * self.state['cost_threshold']: self.patience -= 1 if 'lr_start' in self.state and\ self.state['lr_start'] == 'on_error': self.state['lr_start'] = self.step elif valcost < self.old_cost: self.patience = self.state['patience'] self.old_cost = valcost if self.state['divide_lr'] and \ self.patience < 1: # Divide lr by 2 self.algo.lr = self.algo.lr / self.state['divide_lr'] bparams = dict(self.model.best_params) self.patience = self.state['patience'] for p in self.model.params: p.set_value(bparams[p.name]) if self.state['hookFreq'] > 0 and \ self.step % self.state['hookFreq'] == 0 and \ self.hooks: [fn() for fn in self.hooks] if self.reset > 0 and self.step > 1 and \ self.step % self.reset == 0: print 'Resetting the data iterator' self.train_data.reset() self.step += 1 self.timings['step'] = self.step self.timings['next_offset'] = self.train_data.next_offset except KeyboardInterrupt: break self.state['wholetime'] = float(time.time() - start_time) if self.valid_data is not None: self.validate() self.save() if self.channel: self.channel.save() print 'Took', (time.time() - start_time)/60., 'min' avg_step = self.timings['time_step'][:self.step].mean() avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean() print "Average step took {}".format(avg_step) print "That amounts to {} sentences in a day".format(1 / avg_step * 86400 * self.state['bs']) print "Average log2 per example is {}".format(avg_cost2expl)
def main(self): assert self.reset == -1 print_mem('start') self.state['gotNaN'] = 0 start_time = time.time() self.start_time = start_time self.batch_start_time = time.time() self.step = int(self.timings['step']) self.algo.step = self.step self.save_iter = 0 self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() last_cost = 1. self.state['clr'] = self.state['lr'] ############################################################################### # by He Wei if 'next_offset' in self.state and self.state['next_offset'] <= 0: self.train_data.start(-1) elif 'next_offset' in self.timings: self.train_data.start(self.timings['next_offset']) else: self.train_data.start(-1) #self.train_data.start(self.timings['next_offset'] # if 'next_offset' in self.timings # else -1) #if 'next_offset' in self.state and self.state['next_offset'] <= 0: # self.train_data.reset() while (self.step < self.state['loopIters'] and last_cost > .1 * self.state['minerr'] and (time.time() - start_time) / 60. < self.state['timeStop'] and self.state['lr'] > self.state['minlr']): #################################################################################### #add by hewei if self.step % int(self.state['copy_model_freq']) == 0: self.save() if self.channel is not None: self.channel.save() try: copy_model_path = "%s/iter_%d" % ( self.state['copy_model_path'], self.step) cmd = "mkdir -p %s" % copy_model_path if not os.path.exists(copy_model_path): print >> sys.stderr, cmd os.system(cmd) cmd = "cp %s* %s" % (self.state['prefix'], copy_model_path) print >> sys.stderr, cmd os.system(cmd) except Exception: print 'mainLoop: failed to copy model file to %s' % copy_model_path traceback.print_exc() #################################################################################### if self.step > 0 and (time.time() - self.save_time ) / 60. >= self.state['saveFreq']: self.save() if self.channel is not None: self.channel.save() self.save_time = time.time() st = time.time() try: rvals = self.algo() self.state['traincost'] = float(rvals['cost']) self.state['step'] = self.step last_cost = rvals['cost'] for name in rvals.keys(): self.timings[name][self.step] = float( numpy.array(rvals[name])) if self.l2_params: for param in self.model.params: self.timings["l2_" + param.name][self.step] =\ numpy.mean(param.get_value() ** 2) ** 0.5 if (numpy.isinf(rvals['cost']) or numpy.isnan(rvals['cost'])) and\ self.state['on_nan'] == 'raise': self.state['gotNaN'] = 1 self.save() if self.channel: self.channel.save() print 'Got NaN while training' last_cost = 0 if self.valid_data is not None and\ self.step % self.state['validFreq'] == 0 and\ self.step > 1: valcost = self.validate() if valcost > self.old_cost * self.state['cost_threshold']: self.patience -= 1 if 'lr_start' in self.state and\ self.state['lr_start'] == 'on_error': self.state['lr_start'] = self.step elif valcost < self.old_cost: self.patience = self.state['patience'] self.old_cost = valcost if self.state['divide_lr'] and \ self.patience < 1: # Divide lr by 2 self.algo.lr = self.algo.lr / self.state['divide_lr'] bparams = dict(self.model.best_params) self.patience = self.state['patience'] for p in self.model.params: p.set_value(bparams[p.name]) if self.state['hookFreq'] > 0 and \ self.step % self.state['hookFreq'] == 0 and \ self.hooks: [fn() for fn in self.hooks] if self.reset > 0 and self.step > 1 and \ self.step % self.reset == 0: print 'Resetting the data iterator' self.train_data.reset() self.step += 1 self.timings['step'] = self.step self.timings['next_offset'] = self.train_data.next_offset except KeyboardInterrupt: break self.state['wholetime'] = float(time.time() - start_time) if self.valid_data is not None: self.validate() self.save() if self.channel: self.channel.save() print 'Took', (time.time() - start_time) / 60., 'min' avg_step = self.timings['time_step'][:self.step].mean() avg_cost2expl = self.timings['log2_p_expl'][:self.step].mean() print "Average step took {}".format(avg_step) print "That amounts to {} sentences in a day".format( 1 / avg_step * 86400 * self.state['bs']) print "Average log2 per example is {}".format(avg_cost2expl)