def train(self): config = self.config layer_config = LayerConfig() layer_config.learn_rate = config.learn_rate layer_config.momentum = config.momentum layer_config.weight_decay = config.weight_decay nnstore = NNStore() nnstore.init_from_net(self) for epoch in range(0, config.num_epochs): # shuffle the data cases idx = np.random.permutation(self.num_total_cases) train_X = self.train_data.X[idx] train_T = self.train_data.T[idx] loss = 0 for batch in range(0, self.num_minibatches): i_start = batch * config.minibatch_size if not batch == self.num_minibatches - 1: i_end = i_start + config.minibatch_size else: i_end = self.num_total_cases X = train_X[i_start:i_end] T = train_T[i_start:i_end] Xbelow = X # forward pass for i in range(0, self.num_layers): Xbelow = self.layer[i].forward(Xbelow) self.output.forward(Xbelow) # compute loss loss += self.output.loss(T) # backprop dLdXabove = self.output.backprop(layer_config) for i in range(self.num_layers-1, -1, -1): dLdXabove = self.layer[i].backprop(dLdXabove, layer_config) # statistics avg_loss = 1.0 * loss / self.num_total_cases if (epoch + 1) % config.epoch_to_display == 0: print 'epoch ' + str(epoch + 1) + ', loss = ' + str(avg_loss) if (epoch + 1) % config.epoch_to_save == 0: nnstore.update_from_net(self) nnstore.write(config.output_dir + '/m' + str(epoch + 1) + '.pdata')
def train(self): config = self.config # convert t into a matrix in 1-of-K representation if it is a vector t = self.train_data.T if not self.config.is_regression: T_matrix = self.output.act_type.label_vec_to_mat(t, self.train_data.K) else: T_matrix = t layer_config = LayerConfig() layer_config.learn_rate = config.learn_rate layer_config.momentum = config.momentum layer_config.weight_decay = config.weight_decay nnstore = NNStore() nnstore.init_from_net(self) self.display_training_info(-1, 0, 0) t_start = time.time() for epoch in range(0, config.num_epochs): # shuffle the dataset idx = np.random.permutation(self.num_total_cases) train_X = self.train_data.X[idx] train_T = T_matrix[idx] loss = 0 for batch in range(0, self.num_minibatches): i_start = batch * config.minibatch_size if not batch == self.num_minibatches - 1: i_end = i_start + config.minibatch_size else: i_end = self.num_total_cases X = train_X[i_start:i_end] T = train_T[i_start:i_end] Xbelow = X # forward pass for i in range(0, self.num_layers): Xbelow = self.layer[i].forward(Xbelow) self.output.forward(Xbelow) # compute loss loss += self.output.loss(T) # backprop dLdXabove = self.output.backprop(layer_config) for i in range(self.num_layers-1, -1, -1): dLdXabove = self.layer[i].backprop(dLdXabove, layer_config) # statistics avg_loss = 1.0 * loss / self.num_total_cases if (epoch + 1) % config.epoch_to_display == 0: self.display_training_info(epoch, avg_loss, time.time() - t_start) t_start = time.time() if (epoch + 1) % config.epoch_to_save == 0: nnstore.update_from_net(self) nnstore.write(config.output_dir + '/m' + str(epoch + 1) + '.pdata')
def train(self): config = self.config # convert t into a matrix in 1-of-K representation if it is a vector t = self.train_data.T if not self.config.is_regression: T_matrix = self.output.act_type.label_vec_to_mat( t, self.train_data.K) else: T_matrix = t layer_config = LayerConfig() layer_config.learn_rate = config.learn_rate layer_config.momentum = config.momentum layer_config.weight_decay = config.weight_decay nnstore = NNStore() nnstore.init_from_net(self) self.display_training_info(-1, 0, 0) t_start = time.time() for epoch in range(0, config.num_epochs): # shuffle the dataset idx = np.random.permutation(self.num_total_cases) train_X = self.train_data.X[idx] train_T = T_matrix[idx] loss = 0 for batch in range(0, self.num_minibatches): i_start = batch * config.minibatch_size if not batch == self.num_minibatches - 1: i_end = i_start + config.minibatch_size else: i_end = self.num_total_cases X = train_X[i_start:i_end] T = train_T[i_start:i_end] Xbelow = X # forward pass for i in range(0, self.num_layers): Xbelow = self.layer[i].forward(Xbelow) self.output.forward(Xbelow) # compute loss loss += self.output.loss(T) # backprop dLdXabove = self.output.backprop(layer_config) for i in range(self.num_layers - 1, -1, -1): dLdXabove = self.layer[i].backprop(dLdXabove, layer_config) # statistics avg_loss = 1.0 * loss / self.num_total_cases if (epoch + 1) % config.epoch_to_display == 0: self.display_training_info(epoch, avg_loss, time.time() - t_start) t_start = time.time() if (epoch + 1) % config.epoch_to_save == 0: nnstore.update_from_net(self) nnstore.write(config.output_dir + '/m' + str(epoch + 1) + '.pdata')
def train(self): config = self.config # convert t into a matrix in 1-of-K representation if it is a vector t = self.train_data.T T_matrix = self.output.act_type.label_vec_to_mat(t, self.train_data.K) layer_config = LayerConfig() layer_config.learn_rate = config.learn_rate layer_config.momentum = config.init_momentum layer_config.weight_decay = config.weight_decay nnstore = NNStore() nnstore.init_from_net(self) best_net = NNStore() best_net.init_from_net(self) train_acc, val_acc, test_acc = self.display_training_info( -1, self._compute_loss( self.train_data.X, T_matrix, config.minibatch_size), 0) acc_rec = np.zeros((config.num_epochs / config.epoch_to_display + 1, 4)) acc_rec[0, 0] = 0 acc_rec[0, 1] = train_acc if config.is_val: acc_rec[0, 2] = val_acc if config.is_test: acc_rec[0, 3] = test_acc t_start = time.time() best_acc = val_acc if self.config.is_test: best_test_acc = test_acc best_epoch = -1 for epoch in range(0, config.num_epochs): gnp.free_reuse_cache() # decrease learning rate over time layer_config.learn_rate = config.learn_rate / \ (epoch / config.lr_drop_rate + 1) # TODO [dirty] special for Lnsvm if isinstance(self.output.act_type, act.LnsvmVariantOutput): #self.output.act_type.n = 3.0 - (3.0 - 0.5) / 50 * epoch self.output.act_type.n = 0.5 if self.output.act_type.n < 0.5: self.output.act_type.n = 0.5 if (epoch + 1) % config.epoch_to_display == 0: print 'n %.4f' % self.output.act_type.n, if epoch >= config.switch_epoch: layer_config.momentum = config.final_momentum # shuffle the dataset idx = np.random.permutation(self.num_total_cases) #idx = np.arange(self.num_total_cases) train_X = self.train_data.X[idx] train_T = T_matrix[idx] if config.input_noise > 0: train_X = train_X * (gnp.rand(train_X.shape) > config.input_noise) # train_X = train_X + gnp.randn(train_X.shape) * config.input_noise loss = 0 for batch in range(0, self.num_minibatches): i_start = batch * config.minibatch_size if not batch == self.num_minibatches - 1: i_end = i_start + config.minibatch_size else: i_end = self.num_total_cases X = train_X[i_start:i_end] T = train_T[i_start:i_end] # forward pass self._forward(X) # compute loss loss += self.output.loss(T) if self.output.Y.isnan().any(): import ipdb ipdb.set_trace() print 'batch #%d <-- nan' % batch # backprop dLdXabove = self.output.backprop(layer_config) for i in range(self.num_layers-1, -1, -1): dLdXabove = self.layer[i].backprop(dLdXabove, layer_config) # statistics avg_loss = 1.0 * loss / self.num_total_cases if (epoch + 1) % config.epoch_to_display == 0: train_acc, val_acc, test_acc = self.display_training_info( epoch, avg_loss, time.time() - t_start) if val_acc == None: val_acc = train_acc if (config.show_task_loss and val_acc < best_acc) or \ (not config.show_task_loss and val_acc > best_acc): best_acc = val_acc best_net.update_from_net(self) if config.is_test: best_test_acc = test_acc best_epoch = epoch t_start = time.time() acc_rec[(epoch + 1) / config.epoch_to_display, 0] = epoch + 1 acc_rec[(epoch + 1) / config.epoch_to_display, 1] = train_acc if config.is_val: acc_rec[(epoch + 1) / config.epoch_to_display, 2] = val_acc if config.is_test: acc_rec[(epoch + 1) / config.epoch_to_display, 3] = test_acc if (epoch + 1) % config.epoch_to_save == 0: nnstore.update_from_net(self) nnstore.write(config.output_dir + '/m' + str(epoch + 1) + '.pdata') print '----------------------------------------------------------------' if config.show_task_loss: s = 'loss' else: s = 'acc' if config.is_val: print 'Best val_%s %.4f' % (s, best_acc), else: print 'Best train_%s %.4f' % (s, best_acc), if config.is_test: print '--> test_%s %.4f' % (s, best_test_acc), print 'at epoch %d' % (best_epoch + 1) if config.is_output: f = open('%s/acc_rec.pdata' % config.output_dir, 'w') pickle.dump(acc_rec, f, -1) f.close() self.write_config('%s/cfg.txt' % config.output_dir) # save the best net fname = config.output_dir + '/best_net.pdata' print 'Saving the best model to ' + fname best_net.write(fname) if config.is_test: return (best_acc, best_test_acc) else: return (best_acc)