def __init__(self, model, data_loader, optimizer): super().__init__(model, data_loader, optimizer) if self.config.LOGGING_ENABLE: from tensor_board_logger import TensorBoardLogger self.logger = TensorBoardLogger( os.path.join("logs", model.__class__.__name__)) self.current_epoch = 0
def __init__(self, model, optimizer): super().__init__(model, optimizer) if self.config.LOGGING_ENABLE: from tensor_board_logger import TensorBoardLogger self.logger = TensorBoardLogger( os.path.join("logs", model.__class__.__name__)) self.current_epoch = 0 self.model.eval() # Load model & optimizer. checkpoint = self.load_checkpoint() try: self.optimizer.load_state_dict(checkpoint["optimizer"]) # 89527, 100 -> 271, 100 self.model.load_state_dict(checkpoint["model"]) except KeyError: # There is no checkpoint pass
class RNNTrainer(Trainer): config = ConfigRNN.instance() def __init__(self, model, data_loader, optimizer): super().__init__(model, data_loader, optimizer) if self.config.LOGGING_ENABLE: from tensor_board_logger import TensorBoardLogger self.logger = TensorBoardLogger( os.path.join("logs", model.__class__.__name__)) self.current_epoch = 0 def train(self, max_epoch, batch_size): print("Training started") if torch.cuda.is_available(): self.model = self.model.cuda() # Set model to train mode. self.model.train() epoch_resume = 0 if self.config.CHECKPOINT_ENABLE: checkpoint = self.load_checkpoint() try: epoch_resume = checkpoint["epoch"] self.optimizer.load_state_dict(checkpoint["optimizer"]) self.model.load_state_dict(checkpoint["model"]) except KeyError: # There is no checkpoint pass for epoch in range(epoch_resume, max_epoch): accuracy_sum = 0 loss_sum = 0 self.current_epoch = epoch for batch_idx, (_data, target) in enumerate(self.data_loader): # Transpose vector to make it (num of words / batch size) * batch size * index size(1). _data = np.transpose(_data, (1, 0, 2)) _data, target = _data.to(device=self.device), target.to( device=self.device) # Initialize the gradient of model self.optimizer.zero_grad() output, hidden, cell = self.model(_data) loss = self.config.CRITERION(output, target) loss.backward() self.optimizer.step() if self.config.DEBUG_MODE: print("Train Epoch: {}/{} [{}/{} ({:.0f}%)]".format( epoch, max_epoch, batch_idx * len(_data), len(self.data_loader.dataset), 100. * batch_idx / len(self.data_loader))) print("Loss: {:.6f}".format(loss.item())) print("target : ", target) print("output : ", output, end="\n\n") accuracy = self.get_accuracy(target, output) accuracy_sum += accuracy loss_sum += loss if self.config.LOGGING_ENABLE: if len(self.data_loader) == 0: raise Exception("Data size is smaller than batch size.") loss_avg = loss_sum / len(self.data_loader) accuracy_avg = accuracy_sum / len(self.data_loader) # TODO(kyungsoo): Make Tensorboard automatically execute when train.py runs if it is possible self.logger.log(loss_avg, accuracy_avg, self.model.named_parameters(), self.current_epoch) self.save_checkpoint({ "epoch": epoch + 1, "model": self.model.state_dict(), "optimizer": self.optimizer.state_dict(), }) print("End") def evaluate(self, batch_size): print("Evaluation started") # Set model to eval mode. self.model.eval() if self.config.CHECKPOINT_ENABLE: checkpoint = self.load_checkpoint() try: self.optimizer.load_state_dict(checkpoint["optimizer"]) self.model.load_state_dict(checkpoint["model"]) except KeyError: # There is no checkpoint pass test_loss = 0 correct = 0 with torch.no_grad(): for _data, target in self.data_loader: _data, target = _data.to(self.device), target.to(self.device) input_data = _data.view( -1, batch_size, 1 ) # (num of words / batch size) * batch size * index size(1) output, _, _ = self.model(input_data) test_loss += self.config.CRITERION( output, target).item() # sum up batch loss prediction = output.max(1, keepdim=True)[ 1] # get the index of the max log-probability correct += prediction.eq( target.view_as(prediction)).sum().item() test_loss /= len(self.data_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'. format(test_loss, correct, len(self.data_loader.dataset), 100. * correct / len(self.data_loader.dataset))) print("End")
class RNNTrainer(Trainer): config = ConfigRNN.instance() def __init__(self, model, data_loader, optimizer): super().__init__(model, data_loader, optimizer) if self.config.BOARD_LOGGING: from tensor_board_logger import TensorBoardLogger self.logger = TensorBoardLogger( os.path.join("logs", model.__class__.__name__)) self.current_epoch = 0 def train(self, max_epoch): print("Training started") # Set model to train mode. self.model.train() epoch_resume = 0 if self.config.SAVE_CHECKPOINT: checkpoint = self.load_checkpoint() try: epoch_resume = checkpoint["epoch"] self.optimizer.load_state_dict(checkpoint["optimizer"]) self.model.load_state_dict(checkpoint["model"]) except KeyError: # There is no checkpoint pass for epoch in range(epoch_resume, max_epoch): self.current_epoch = epoch for batch_idx, (_data, target) in enumerate(self.data_loader): # Transpose vector to make it (num of words / batch size) * batch size * index size(1). _data = np.transpose(_data, (1, 0, 2)) _data, target = _data.to(device=self.device), target.to( device=self.device) # Initialize the gradient of model self.optimizer.zero_grad() output, hidden, cell, sorted_target = self.model(_data, target) loss = self.config.CRITERION(output, sorted_target) loss.backward() self.optimizer.step() if self.config.CONSOLE_LOGGING: print("Train Epoch: {}/{} [{}/{} ({:.0f}%)]".format( epoch, max_epoch, batch_idx * _data.shape[1], len(self.data_loader.dataset), 100. * batch_idx / len(self.data_loader))) print("Loss: {:.6f}".format(loss.item())) print("target : ", target) print("output : ", output, end="\n\n") accuracy = self.get_accuracy(sorted_target, output) if self.config.BOARD_LOGGING: if len(self.data_loader) == 0: raise Exception("Data size is smaller than batch size.") # TODO(kyungsoo): Make Tensorboard automatically execute when train.py runs if it is possible self.logger.log(loss.item(), accuracy, self.model.named_parameters(), self.current_epoch) self.save_checkpoint({ "epoch": epoch + 1, "model": self.model.state_dict(), "optimizer": self.optimizer.state_dict(), }) print("End") def evaluate(self): print("Evaluation started") # Set model to eval mode. self.model.eval() checkpoint = self.load_checkpoint() try: self.optimizer.load_state_dict(checkpoint["optimizer"]) self.model.load_state_dict(checkpoint["model"]) except KeyError: raise Exception("No checkpoint to evaluate.") pass correct = 0 with torch.no_grad(): for _data, target in self.data_loader: # Transpose vector to make it (num of words / batch size) * batch size * index size(1). _data = np.transpose(_data, (1, 0, 2)) _data, target = _data.to(device=self.device), target.to( device=self.device) # Initialize the gradient of model self.optimizer.zero_grad() output, hidden, cell, sorted_target = self.model(_data, target) _, argmax = torch.max(output, 1) correct += ( sorted_target == argmax.squeeze()).nonzero().size(0) size = len(self.data_loader.dataset) print('\nAccuracy: {}/{} ({:.2f})%\n'.format(correct, size, 100. * correct / size)) print("End")
def train(env, env_eval, model, max_steps, name): target_model = create_atari_model(env) replay = ReplayBuffer(REPLAY_BUFFER_SIZE) done = True episode = 0 logdir = '{}-log'.format(name) board = TensorBoardLogger(logdir) print('Created {}'.format(logdir)) steps_after_logging = 0 loss = 0.0 for step in range(1, max_steps + 1): try: if step % SNAPSHOT_EVERY == 0: save_model(model, step, logdir, name) if done: if episode > 0: if STRATEGY == 'final': extra_goals = [final_goal(trajectory)] for i, experience in enumerate(trajectory): goal, obs, action, reward, next_obs, done = experience replay.add(goal, obs, action, reward, next_obs, done) # Hindsight Experience Replay - add experiences with extra goals that were reached if STRATEGY == 'future': extra_goals = future_goals(i, trajectory) if extra_goals: for extra_goal in extra_goals: replay.add(extra_goal, obs, action, goal_reward(next_obs, extra_goal), next_obs, done) if steps_after_logging >= LOG_EVERY: steps_after_logging = 0 episode_end = time.time() episode_seconds = episode_end - episode_start episode_steps = step - episode_start_step steps_per_second = episode_steps / episode_seconds memory = psutil.virtual_memory() to_gb = lambda in_bytes: in_bytes / 1024 / 1024 / 1024 print("episode {} " "steps {}/{} " "loss {:.7f} " "return {} " "in {:.2f}s " "{:.1f} steps/s " "{:.1f}/{:.1f} GB RAM".format( episode, episode_steps, step, loss, episode_return, episode_seconds, steps_per_second, to_gb(memory.used), to_gb(memory.total), )) board.log_scalar('episode_return', episode_return, step) board.log_scalar('episode_steps', episode_steps, step) board.log_scalar('episode_seconds', episode_seconds, step) board.log_scalar('steps_per_second', steps_per_second, step) board.log_scalar('epsilon', epsilon_for_step(step), step) board.log_scalar('memory_used', to_gb(memory.used), step) board.log_scalar('loss', loss, step) trajectory = [] goal = sample_goal() episode_start = time.time() episode_start_step = step obs = env.reset() episode += 1 episode_return = 0.0 epsilon = epsilon_for_step(step) else: obs = next_obs action = epsilon_greedy_action(env, model, goal, obs, epsilon) next_obs, _, done, _ = env.step(action) reward = goal_reward(next_obs, goal) episode_return += reward trajectory.append((goal, obs, action, reward, next_obs, done)) if step >= TRAIN_START and step % UPDATE_EVERY == 0: if step % TARGET_UPDATE_EVERY == 0: target_model.set_weights(model.get_weights()) batch = replay.sample(BATCH_SIZE) loss = fit_batch(env, model, target_model, batch) if step == TRAIN_START: validation_goals, validation_observations, _, _, _, _ = replay.sample( VALIDATION_SIZE) if step >= TRAIN_START and step % EVAL_EVERY == 0: episode_return_avg, episode_return_min, episode_return_max = evaluate( env_eval, model) q_values = predict(env, model, validation_goals, validation_observations) max_q_values = np.max(q_values, axis=1) avg_max_q_value = np.mean(max_q_values) print("episode {} " "step {} " "episode_return_avg {:.1f} " "episode_return_min {:.1f} " "episode_return_max {:.1f} " "avg_max_q_value {:.1f}".format( episode, step, episode_return_avg, episode_return_min, episode_return_max, avg_max_q_value, )) board.log_scalar('episode_return_avg', episode_return_avg, step) board.log_scalar('episode_return_min', episode_return_min, step) board.log_scalar('episode_return_max', episode_return_max, step) board.log_scalar('avg_max_q_value', avg_max_q_value, step) steps_after_logging += 1 except KeyboardInterrupt: save_model(model, step, logdir, name) break