def evaluate(self, num=50): self.model.eval() count = 0 running_loss = 0. running_paf_log = 0. running_heatmap_log = 0. with torch.no_grad(): for imgs, pafs, heatmaps, ignore_mask in iter(self.val_loader): imgs, pafs, heatmaps, ignore_mask = imgs.to( self.device), pafs.to(self.device), heatmaps.to( self.device), ignore_mask.to(self.device) pafs_ys, heatmaps_ys = self.model(imgs) total_loss, paf_loss_log, heatmap_loss_log = compute_loss( pafs_ys, heatmaps_ys, pafs, heatmaps, ignore_mask) running_loss += total_loss.item() running_paf_log += paf_loss_log running_heatmap_log += heatmap_loss_log count += 1 if count >= num: break return running_loss / num, running_paf_log / num, running_heatmap_log / num
def train(self, resume=False): running_loss = 0. running_paf_log = 0. running_heatmap_log = 0. if resume: self.resume_training_load() for epoch in range(60): for imgs, pafs, heatmaps, ignore_mask in tqdm( iter(self.train_loader)): if self.step == 2000: for para in self.model.base.vgg_base.parameters(): para.requires_grad = True self.optimizer.add_param_group({ 'params': [*self.model.base.vgg_base.parameters()], 'lr': params['lr'] / 4 }) if self.step == 100000 or self.step == 200000: self.lr_schedule() imgs, pafs, heatmaps, ignore_mask = imgs.to( self.device), pafs.to(self.device), heatmaps.to( self.device), ignore_mask.to(self.device) self.optimizer.zero_grad() pafs_ys, heatmaps_ys = self.model(imgs) total_loss, paf_loss_log, heatmap_loss_log = compute_loss( pafs_ys, heatmaps_ys, pafs, heatmaps, ignore_mask) total_loss.backward() self.optimizer.step() running_loss += total_loss.item() running_paf_log += paf_loss_log running_heatmap_log += heatmap_loss_log if (self.step % self.board_loss_every == 0) & (self.step != 0): self.board_scalars( 'train', running_loss / self.board_loss_every, running_paf_log / self.board_loss_every, running_heatmap_log / self.board_loss_every) running_loss = 0. running_paf_log = 0. running_heatmap_log = 0. if (self.step % self.evaluate_every == 0) & (self.step != 0): val_loss, paf_loss_val_log, heatmap_loss_val_log = self.evaluate( num=params['eva_num']) self.model.train() self.board_scalars('val', val_loss, paf_loss_val_log, heatmap_loss_val_log) if (self.step % self.board_pred_image_every == 0) & (self.step != 0): self.model.eval() with torch.no_grad(): for i in range(20): img_id = self.val_loader.dataset.imgIds[i] img_path = os.path.join( params['coco_dir'], 'val2017', self.val_loader.dataset.coco.loadImgs( [img_id])[0]['file_name']) img = cv2.imread(img_path) # inference poses, _ = self.detect(img) # draw and save image img = draw_person_pose(img, poses) img = torch.tensor(img.transpose(2, 0, 1)) self.writer.add_image('pred_image_{}'.format(i), img, global_step=self.step) self.model.train() if (self.step % self.save_every == 0) & (self.step != 0): self.save_state(val_loss) self.step += 1 if self.step > 300000: break
def find_lr(self, init_value=1e-8, final_value=10., beta=0.98, bloding_scale=4., num=None): if not num: num = len(self.train_loader) mult = (final_value / init_value)**(1 / num) lr = init_value for params in self.optimizer.param_groups: params['lr'] = lr self.model.train() avg_loss = 0. best_loss = 0. batch_num = 0 losses = [] log_lrs = [] for i, (imgs, pafs, heatmaps, ignore_mask) in tqdm(enumerate(self.train_loader), total=num): imgs, pafs, heatmaps, ignore_mask = imgs.to(self.device), pafs.to( self.device), heatmaps.to(self.device), ignore_mask.to( self.device) self.optimizer.zero_grad() batch_num += 1 pafs_ys, heatmaps_ys = self.model(imgs) loss, _, _ = compute_loss(pafs_ys, heatmaps_ys, pafs, heatmaps, ignore_mask) self.optimizer.step() #Compute the smoothed loss avg_loss = beta * avg_loss + (1 - beta) * loss.item() self.writer.add_scalar('avg_loss', avg_loss, batch_num) smoothed_loss = avg_loss / (1 - beta**batch_num) self.writer.add_scalar('smoothed_loss', smoothed_loss, batch_num) #Stop if the loss is exploding if batch_num > 1 and smoothed_loss > bloding_scale * best_loss: print('exited with best_loss at {}'.format(best_loss)) plt.plot(log_lrs[10:-5], losses[10:-5]) return log_lrs, losses #Record the best loss if smoothed_loss < best_loss or batch_num == 1: best_loss = smoothed_loss #Store the values losses.append(smoothed_loss) log_lrs.append(math.log10(lr)) self.writer.add_scalar('log_lr', math.log10(lr), batch_num) #Do the SGD step #Update the lr for the next step loss.backward() self.optimizer.step() lr *= mult for params in self.optimizer.param_groups: params['lr'] = lr if batch_num > num: plt.plot(log_lrs[10:-5], losses[10:-5]) return log_lrs, losses