def write_weight_statitsics(writer: torch.utils.tensorboard.SummaryWriter, module: torch.nn.Module, epoch: int): # log the weights-norm for the parameters of the model for weight_name in module.state_dict().keys(): w = module.state_dict()[weight_name] norm = w.norm().item() writer.add_scalar(f'Norm/{weight_name}', norm, epoch) avg = w.abs().mean().item() writer.add_scalar(f'avg/{weight_name}', avg, epoch) writer.add_histogram(f'hist/{weight_name}', w, epoch)
def log_results(writer: torch.utils.tensorboard.SummaryWriter, accuracy_train: float, loss_train: float, accuracy_validation: float, loss_validation: float, epoch: int): """ Log accuracies and losses for training and validation in given epoch. Arguments ---------- writer : torch.utils.tensorboard.SummaryWriter Entry to log data for consumption and visualization by TensorBoard accuracy_train : float Training accuracy loss_train : float Training loss accuracy_validation : float Validation accuracy loss_validation : float Validation loss epoch : int Number of epochs, where above results were obtained """ writer.add_scalar('Train/Accuracy', accuracy_train, epoch) writer.add_scalar('Train/Loss', loss_train, epoch) writer.add_scalar('Validation/Accuracy', accuracy_validation, epoch) writer.add_scalar('Validation/Loss', loss_validation, epoch)
def train_pcoders(net: torch.nn.Module, optimizer: torch.optim.Optimizer, loss_function: Callable, epoch: int, train_loader: torch.utils.data.DataLoader, device: str, writer: torch.utils.tensorboard.SummaryWriter=None): r""" Trains the feedback modules of PCoders using a distance between the prediction of a PCoder and the representation of the PCoder below. Args: net (torch.nn.Module): Predified network including all the PCoders optimizer (torch.optim.Optimizer): PyTorch-compatible optimizer object loss_function (Callable): A callable function that receives two tensors and returns the distance between them epoch (int): Training epoch number train_loader (torch.utils.data.DataLoader): DataLoader for training samples writer (torch.utils.tensorboard.SummaryWrite, optional): Tensorboard summary writer to track training history. Default: None device (str): Training device (e.g. 'cpu', 'cuda:0') """ net.train() net.backbone.eval() nb_trained_samples = 0 for batch_index, (images, _) in enumerate(train_loader): net.reset() images = images.to(device) optimizer.zero_grad() outputs = net(images) for i in range(net.number_of_pcoders): if i == 0: a = loss_function(net.pcoder1.prd, images) loss = a else: pcoder_pre = getattr(net, f"pcoder{i}") pcoder_curr = getattr(net, f"pcoder{i+1}") a = loss_function(pcoder_curr.prd, pcoder_pre.rep) loss += a if writer is not None: writer.add_scalar(f"MSE Train/PCoder{i+1}", a.item(), (epoch-1) * len(train_loader) + batch_index) nb_trained_samples += images.shape[0] loss.backward() optimizer.step() print('Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}'.format( loss.item(), epoch=epoch, trained_samples=nb_trained_samples, total_samples=len(train_loader.dataset) )) if writer is not None: writer.add_scalar(f"MSE Train/Sum", loss.item(), (epoch-1) * len(train_loader) + batch_index)
def eval_pcoders(net: torch.nn.Module, loss_function: Callable, epoch: int, eval_loader: torch.utils.data.DataLoader, device: str, writer: torch.utils.tensorboard.SummaryWriter=None): r""" Evaluates the feedback modules of PCoders using a distance between the prediction of a PCoder and the representation of the PCoder below. Args: net (torch.nn.Module): Predified network including all the PCoders loss_function (Callable): A callable function that receives two tensors and returns the distance between them epoch (int): Evaluation epoch number test_loader (torch.utils.data.DataLoader): DataLoader for evaluation samples writer (torch.utils.tensorboard.SummaryWrite, optional): Tensorboard summary writer to track evaluation history. Default: None device (str): Training device (e.g. 'cpu', 'cuda:0') """ net.eval() final_loss = [0 for i in range(net.number_of_pcoders)] for batch_index, (images, _) in enumerate(eval_loader): net.reset() images = images.to(device) with torch.no_grad(): outputs = net(images) for i in range(net.number_of_pcoders): if i == 0: final_loss[i] += loss_function(net.pcoder1.prd, images).item() else: pcoder_pre = getattr(net, f"pcoder{i}") pcoder_curr = getattr(net, f"pcoder{i+1}") final_loss[i] += loss_function(pcoder_curr.prd, pcoder_pre.rep).item() loss_sum = 0 for i in range(net.number_of_pcoders): final_loss[i] /= len(eval_loader) loss_sum += final_loss[i] if writer is not None: writer.add_scalar(f"MSE Eval/PCoder{i+1}", final_loss[i], epoch-1) print('Training Epoch: {epoch} [{evaluated_samples}/{total_samples}]\tLoss: {:0.4f}'.format( loss_sum, epoch=epoch, evaluated_samples=len(eval_loader.dataset), total_samples=len(eval_loader.dataset) )) if writer is not None: writer.add_scalar(f"MSE Eval/Sum", loss_sum, epoch-1)
def run_environment(args: argparse.Namespace, device: str = 'cpu', logger: torch.utils.tensorboard.SummaryWriter = None): # == # Set up environment env = gym.make(args.env_name) env = MiniGridFlatWrapper(env, use_tensor=False, scale_observation=True, scale_min=0, scale_max=10) # == # Set up agent agent = init_agent(args, env, device=device) # == # Start training print(f'Starting training, {args.num_episode} episodes') for episode_idx in range(args.num_episode): # Reset environment and agent observation = env.reset() action = agent.begin_episode(observation) # Counters cumu_reward = 0.0 timestep = 0 # (optional) Record video video = None max_vid_len = 200 if args.video_freq is not None: if episode_idx % int(args.video_freq) == 0: # Render first frame and insert to video array frame = env.render() video = np.zeros(shape=((max_vid_len, ) + frame.shape), dtype=np.uint8) # (max_vid_len, C, W, H) video[0] = frame while True: # == # Interact with environment observation, reward, done, info = env.step(action) action = agent.step(observation, reward, done) # == # Counters cumu_reward += reward timestep += 1 # == # Optional video if video is not None: if timestep < max_vid_len: video[timestep] = env.render() # == # Episode done if done: # Logging if args.log_dir is not None: # Add reward logger.add_scalar('Reward', cumu_reward, global_step=episode_idx) # Optionally add video if video is not None: # Determine last frame last_frame_idx = timestep + 2 if last_frame_idx > max_vid_len: last_frame_idx = max_vid_len # Change to tensor vid_tensor = torch.tensor( video[:last_frame_idx, :, :, :], dtype=torch.uint8) vid_tensor = vid_tensor.unsqueeze(0) # Add to tensorboard logger.add_video('Run_Video', vid_tensor, global_step=episode_idx, fps=8) # Occasional print if episode_idx % 100 == 0: print( f'Epis {episode_idx}, Timesteps: {timestep}, Return: {cumu_reward}' ) else: print( f'Epis {episode_idx}, Timesteps: {timestep}, Return: {cumu_reward}' ) # Agent logging TODO: not sure if this is the best practice agent.report(logger=logger, episode_idx=episode_idx) break # TODO: have some debugging print-out (e.g. every 100 episode) to make sure times and # things are good and training is happening env.close() if args.log_dir is not None: logger.close()
def run_environment(args: argparse.Namespace, logger: torch.utils.tensorboard.SummaryWriter = None): # == # Initialize environment and agent env, agent = _init_env_agent(args) # == # Save the transition matrix for later comparison env_trans = env.get_transition_matrix() # == # Start training print(f'Start training for {args.num_episode} episodes') for episode_idx in range(args.num_episode): # Reset counter variables cumulative_reward = 0.0 steps = 0 # Reset environment and agent observation = env.reset() action = agent.begin_episode(observation) # == # Run episode while True: # Interaction observation, reward, done, info = env.step(action) action = agent.step(observation, reward, done) # Counter variables cumulative_reward += reward steps += 1 # TODO: need some way of recording the *recent* state occupancy # to evaluate the agent behaviour # == # If done if done or steps >= args.max_steps: # == # Compute error if episode_idx % 100 == 0: t_err = get_transition_estimation_error(env, agent) # == # Log if logger is None: print(episode_idx, steps, cumulative_reward) else: logger.add_scalar('Cumulative_reward', cumulative_reward, global_step=episode_idx) logger.add_scalar('Steps', steps, global_step=episode_idx) logger.add_scalar('Trans_l2_error', t_err, global_step=episode_idx) if episode_idx % 100 == 0: print(episode_idx, steps, cumulative_reward) # Agent self-report agent.report(logger=logger, episode_idx=episode_idx) break env.close()
def forward(self, x, opt: optim.Optimizer, step, summary_writer: torch.utils.tensorboard.SummaryWriter = None, sample_gpu=None): """ train inside forward """ opt.zero_grad() batch_size, num_pts = x.shape[:2] z_mu, z_sigma = self.encoder(x) # Compute Q(z|X) and entropy H{Q(z|X)} if self.use_deterministic_encoder: z = z_mu + 0 * z_sigma # ? why, the original code added this 0 multiplier entropy = torch.zeros(batch_size).to(z) else: z = self.reparametrized_gaussian(z_mu, z_sigma) entropy = self.gaussian_entropy(z_sigma) # Compute prior P(z) if self.use_latent_flow: w, dlog_pw = self.latentCNF(z, None, torch.zeros(batch_size, 1).to(z)) log_pw = standard_normal_logp(w).view(batch_size, -1).sum(dim=1, keepdim=True) dlog_pw = dlog_pw.view(batch_size, 1).to(z) log_pz = log_pw - dlog_pw else: log_pz = torch.zeros(batch_size, 1).to(z) # Compute recon. P(X|z) z_new = z.view(z.shape) + (log_pz * 0.).mean() # ? why y, dlog_py = self.pointCNF(x, z_new, torch.zeros(batch_size, num_pts, 1).to(x)) log_py = standard_normal_logp(y).view(batch_size, -1).sum(dim=1, keepdim=True) dlog_py = dlog_py.view(batch_size, num_pts, 1).to(x) log_px = log_py - dlog_py # Loss entropy_loss = -entropy.mean() * self.entropy_w recon_loss = -log_px.mean() * self.recon_w prior_loss = -log_pz.mean() * self.prior_w loss = entropy_loss + recon_loss + prior_loss loss.backward() opt.step() # Write logs if self.distributed: raise NotImplementedError("Distributed training not implemented!") else: entropy_log = entropy.mean() recon_log = -log_px.mean() prior_log = -log_pz.mean() recon_nats = recon_log / float(x.size(1) * x.size(2)) prior_nats = prior_log / float(self.fz) # reconstruct to save with torch.no_grad(): recon_pc = self.reconstruct(x, truncate_std=True) recon_im = visualize(recon_pc, path='/home/tmp/screenshot.png', samples=1) # sample to save if self.use_latent_flow: with torch.no_grad(): sample_pc = self.sample(1, 1024, gpu=sample_gpu) sample_im = visualize(sample_pc, samples=1, path='/home/tmp/screenshot.png') record_dict = { 'train/entropy': entropy_log.cpu().detach().item() if not isinstance(entropy_log, float) else entropy_log, 'train/prior': prior_log, 'train/recon': recon_log, 'train/recon-nats': recon_nats, 'train/prior-nats': prior_nats, # 'train/sample-reconstructed': recon_pc } if summary_writer is not None: for key, value in record_dict: summary_writer.add_scalar(key, value, step) record_dict['train/sample-reconstructed'] = recon_im summary_writer.add_images('train/sample-reconstructed', recon_im, step, dataformats='NHWC') record_dict['train/sample-sampled'] = sample_im summary_writer.add_images('train/sample-sampled', sample_im, step, dataformats='NHWC') return record_dict
def run_environment(config: configparser.ConfigParser, device: str = 'cpu', logger: torch.utils.tensorboard.SummaryWriter = None): # ========= # Set up environment config_env_name = config['Training']['env_name'] config_seed = config['Training'].getint('seed') env = gym.make(config_env_name) env = MiniGridFlatWrapper(env, use_tensor=False, scale_observation=False, scale_min=0, scale_max=10) env.seed(config_seed) # ========= # Set up agent agent = init_agent(config, env, device=device) # ========= # Start training # Extract training variables config_num_episodes = config['Training'].getint('num_episode') config_record_video = config['Video'].getboolean('record') config_video_freq = config['Video'].getint('frequency') config_video_maxlen = config['Video'].getint('max_length') config_video_fps = config['Video'].getint('fps') # Train print(f'Starting training, {config_num_episodes} episodes') for episode_idx in range(config_num_episodes): # == # Reset environment and agent observation = env.reset() action = agent.begin_episode(observation) # Counters cumu_reward = 0.0 timestep = 0 # == # (optional) Record video video = None if config_record_video: if episode_idx % int(config_video_freq) == 0: # Render first frame and insert to video array frame = env.render() video = np.zeros(shape=((config_video_maxlen, ) + frame.shape), dtype=np.uint8) # (max_vid_len, C, W, H) video[0] = frame # == # Run episode while True: # == # Interact with environment observation, reward, done, info = env.step(action) action = agent.step(observation, reward, done) # == # Counters cumu_reward += reward timestep += 1 # == # Optional video if video is not None: if timestep < config_video_maxlen: video[timestep] = env.render() # == # Episode done if done: # Logging if logger is not None: # Add reward logger.add_scalar('Reward', cumu_reward, global_step=episode_idx) # Optionally add video if video is not None: # Determine last frame last_frame_idx = timestep + 2 if last_frame_idx > config_video_maxlen: last_frame_idx = config_video_maxlen # Change to tensor vid_tensor = torch.tensor( video[:last_frame_idx, :, :, :], dtype=torch.uint8) vid_tensor = vid_tensor.unsqueeze(0) # Add to tensorboard logger.add_video('Run_Video', vid_tensor, global_step=episode_idx, fps=config_video_fps) # Occasional print if episode_idx % 100 == 0: print( f'Epis {episode_idx}, Timesteps: {timestep}, Return: {cumu_reward}' ) else: print( f'Epis {episode_idx}, Timesteps: {timestep}, Return: {cumu_reward}' ) # Agent logging TODO: not sure if this is the best practice agent.report(logger=logger, episode_idx=episode_idx) break # TODO: have some debugging print-out (e.g. every 100 episode) to make sure times and # things are good and training is happening env.close() if logger is not None: logger.close()