def forward(self, feats, last_rnn_state, pred_poly): pred_poly = pred_poly.detach().cpu().numpy() #[bs, time] # we will use numpy functions to get pred_mask and pred_vertex_mask pred_mask = np.zeros( (pred_poly.shape[0], 1, self.grid_size, self.grid_size), dtype=np.uint8) pred_vertex_mask = np.zeros( (pred_poly.shape[0], 1, self.grid_size, self.grid_size), dtype=np.uint8) # Draw Vertex mask and full polygon mask for b in range(pred_poly.shape[0]): masked_poly = utils.get_masked_poly(pred_poly[b], self.grid_size) xy_poly = utils.class_to_xy(masked_poly, self.grid_size) utils.get_vertices_mask(xy_poly, pred_vertex_mask[b, 0]) utils.draw_poly(pred_mask[b, 0], xy_poly) pred_mask = torch.from_numpy(pred_mask).to(device).to(torch.float32) pred_vertex_mask = torch.from_numpy(pred_vertex_mask).to(device).to( torch.float32) inp = torch.cat([ feats, last_rnn_state[0][0], last_rnn_state[1][0], pred_mask, pred_vertex_mask ], dim=1) conv1 = self.conv1(inp) conv2 = self.conv2(conv1) conv2 = conv2.view(conv2.size(0), -1) pred_iou = self.fc(conv2) return pred_iou.view(-1)
def process_output(self, polys, instance, grid_size): poly = polys[0] poly = utils.get_masked_poly(poly, grid_size) poly = utils.class_to_xy(poly, grid_size) poly = utils.poly0g_to_poly01(poly, grid_size) poly = poly * instance['patch_w'] poly = poly + instance['starting_point'] return [poly.astype(np.int).tolist()]
def validate(self): print 'Validating' ggnn_grid_size = self.opts['ggnn_grid_size'] self.model.ggnn.encoder.eval() self.model.temperature = 0 self.model.mode = "test" # Leave LSTM in train mode with torch.no_grad(): ious = [] for step, data in enumerate(tqdm(self.val_loader)): output = self.model(data['img'].to(device), data['fwd_poly'].to(device)) pred_polys = output['pred_polys'].data.numpy() # Get IoU iou = 0 orig_poly = data['orig_poly'] for i in range(pred_polys.shape[0]): p = pred_polys[i] mask_poly = utils.get_masked_poly( p, self.model.ggnn.ggnn_grid_size) mask_poly = utils.class_to_xy( mask_poly, self.model.ggnn.ggnn_grid_size) curr_gt_poly_112 = utils.poly01_to_poly0g( orig_poly[i], ggnn_grid_size) i, masks = metrics.iou_from_poly( np.array(mask_poly, dtype=np.int32), np.array(curr_gt_poly_112, dtype=np.int32), ggnn_grid_size, ggnn_grid_size) iou += i iou = iou / pred_polys.shape[0] ious.append(iou) del (output) del (pred_polys) iou = np.mean(ious) self.val_writer.add_scalar('iou', float(iou), self.global_step) print '[VAL] IoU: %f' % iou self.model.temperature = self.opts['temperature'] self.model.mode = "train_ggnn" self.model.ggnn.encoder.train()
def validate(self): print 'Validating' self.model.encoder.eval() self.model.first_v.eval() # Leave LSTM in train mode ious = [] accuracies = [] with torch.no_grad(): for step, data in enumerate(tqdm(self.val_loader)): output = self.model(data['img'].to(device), data['fwd_poly'].to(device)) # Get accuracy accuracy = metrics.train_accuracy( output['poly_class'].cpu().numpy(), data['mask'].cpu().numpy(), output['pred_polys'].cpu().numpy(), self.grid_size) # Get IoU iou = 0 pred_polys = output['pred_polys'].cpu().numpy() gt_polys = data['full_poly'] for i in range(pred_polys.shape[0]): p = pred_polys[i] p = utils.get_masked_poly(p, self.grid_size) p = utils.class_to_xy(p, self.grid_size) i, masks = metrics.iou_from_poly(p, gt_polys[i], self.grid_size, self.grid_size) iou += i iou = iou / pred_polys.shape[0] ious.append(iou) accuracies.append(accuracy) del (output) iou = np.mean(ious) accuracy = np.mean(accuracies) self.val_writer.add_scalar('iou', float(iou), self.global_step) self.val_writer.add_scalar('accuracy', float(accuracy), self.global_step) print '[VAL] IoU: %f, Accuracy: %f' % (iou, accuracy) # Reset self.model.train()
def train(self, epoch): print('Starting training') self.model.train() accum = defaultdict(float) # To accumulate stats for printin for step, data in enumerate(self.train_loader): if self.global_step % self.opts['val_freq'] == 0: self.validate() self.save_checkpoint(epoch) # Forward pass output = self.model(data['img'].to(device), data['fwd_poly'].to(device)) # Smoothed targets dt_targets = utils.dt_targets_from_class(output['poly_class'].cpu().numpy(), self.grid_size, self.opts['dt_threshold']) # Get losses loss = losses.poly_vertex_loss_mle(torch.from_numpy(dt_targets).to(device), data['mask'].to(device), output['logits']) fp_edge_loss = self.opts['fp_weight'] * losses.fp_edge_loss(data['edge_mask'].to(device), output['edge_logits']) fp_vertex_loss = self.opts['fp_weight'] * losses.fp_vertex_loss(data['vertex_mask'].to(device), output['vertex_logits']) total_loss = loss + fp_edge_loss + fp_vertex_loss # Backward pass self.optimizer.zero_grad() total_loss.backward() if 'grad_clip' in self.opts.keys(): nn.utils.clip_grad_norm_(self.model.parameters(), self.opts['grad_clip']) self.optimizer.step() # Get accuracy accuracy = metrics.train_accuracy(output['poly_class'].cpu().numpy(), data['mask'].cpu().numpy(), output['pred_polys'].cpu().numpy(), self.grid_size) # Get IoU iou = 0 pred_polys = output['pred_polys'].cpu().numpy() gt_polys = data['full_poly'] for i in range(pred_polys.shape[0]): p = pred_polys[i] p = utils.get_masked_poly(p, self.grid_size) p = utils.class_to_xy(p, self.grid_size) i, masks = metrics.iou_from_poly(p, gt_polys[i], self.grid_size, self.grid_size) iou += i iou = iou / pred_polys.shape[0] accum['loss'] += float(loss) accum['fp_edge_loss'] += float(fp_edge_loss) accum['fp_vertex_loss'] += float(fp_vertex_loss) accum['accuracy'] += accuracy accum['iou'] += iou accum['length'] += 1 if step % self.opts['print_freq'] == 0: # Mean of accumulated values for k in accum.keys(): if k == 'length': continue accum[k] /= accum['length'] # Add summaries masks = np.expand_dims(masks, -1).astype(np.uint8) # Add a channel dimension masks = np.tile(masks, [1, 1, 1, 3]) # Make [2, H, W, 3] img = (data['img'].cpu().numpy()[-1,...]*255).astype(np.uint8) img = np.transpose(img, [1,2,0]) # Make [H, W, 3] vert_logits = np.reshape(output['vertex_logits'][-1, ...].detach().cpu().numpy(), (self.grid_size, self.grid_size, 1)) edge_logits = np.reshape(output['edge_logits'][-1, ...].detach().cpu().numpy(), (self.grid_size, self.grid_size, 1)) vert_logits = (1/(1 + np.exp(-vert_logits))*255).astype(np.uint8) edge_logits = (1/(1 + np.exp(-edge_logits))*255).astype(np.uint8) vert_logits = np.tile(vert_logits, [1, 1, 3]) # Make [H, W, 3] edge_logits = np.tile(edge_logits, [1, 1, 3]) # Make [H, W, 3] vertex_mask = np.tile(np.expand_dims(data['vertex_mask'][-1,...].cpu().numpy().astype(np.uint8)*255,-1),(1,1,3)) edge_mask = np.tile(np.expand_dims(data['edge_mask'][-1,...].cpu().numpy().astype(np.uint8)*255,-1),(1,1,3)) self.writer.add_image('pred_mask', masks[0], self.global_step) self.writer.add_image('gt_mask', masks[1], self.global_step) self.writer.add_image('image', img, self.global_step) self.writer.add_image('vertex_logits', vert_logits, self.global_step) self.writer.add_image('edge_logits', edge_logits, self.global_step) self.writer.add_image('edge_mask', edge_mask, self.global_step) self.writer.add_image('vertex_mask', vertex_mask, self.global_step) if self.opts['return_attention'] is True: att = output['attention'][-1, 1:4, ...].detach().cpu().numpy() att = np.transpose(att, [0, 2, 3, 1]) # Make [T, H, W, 1] att = np.tile(att, [1, 1, 1, 3]) # Make [T, H, W, 3] def _scale(att): att = att/np.max(att) return (att*255).astype(np.int32) self.writer.add_image('attention_1', pyramid_expand(_scale(att[0]), upscale=8, sigma=10), self.global_step) self.writer.add_image('attention_2', pyramid_expand(_scale(att[1]), upscale=8, sigma=10), self.global_step) self.writer.add_image('attention_3', pyramid_expand(_scale(att[2]), upscale=8, sigma=10), self.global_step) for k in accum.keys(): if k == 'length': continue self.writer.add_scalar(k, accum[k], self.global_step) print("[%s] Epoch: %d, Step: %d, Polygon Loss: %f, Edge Loss: %f, Vertex Loss: %f, Accuracy: %f, IOU: %f"\ %(str(datetime.now()), epoch, self.global_step, accum['loss'], accum['fp_edge_loss'], accum['fp_vertex_loss'],\ accum['accuracy'], accum['iou'])) accum = defaultdict(float) del(output) self.global_step += 1
def train(self, epoch): print 'Starting training' self.model.temperature = self.opts['temperature'] self.model.ggnn.encoder.train() accum = defaultdict(float) # To accumulate stats for printing ggnn_grid_size = self.opts['ggnn_grid_size'] for step, data in enumerate(self.train_loader): self.optimizer.zero_grad() if self.global_step % self.opts['val_freq'] == 0: self.validate() self.save_checkpoint(epoch) output = self.model(data['img'].to(device), data['fwd_poly'].to(device), orig_poly=data['orig_poly']) ggnn_logits = output['ggnn_logits'] local_prediction = output['ggnn_local_prediction'].to(device) poly_masks = output['ggnn_mask'].to(device) pred_polys = output['pred_polys'].data.numpy() loss_sum = losses.poly_vertex_loss_mle_ggnn( local_prediction, poly_masks, ggnn_logits) loss_sum.backward() if 'grad_clip' in self.opts.keys(): # "grad_clip": 40 nn.utils.clip_grad_norm_(self.model.ggnn.parameters(), self.opts['grad_clip']) self.optimizer.step() with torch.no_grad(): # Get IoU iou = 0 orig_poly = data['orig_poly'] for i in range(pred_polys.shape[0]): p = pred_polys[i] mask_poly = utils.get_masked_poly( p, self.model.ggnn.ggnn_grid_size) #"ggnn_grid_size": 112 mask_poly = utils.class_to_xy( mask_poly, self.model.ggnn.ggnn_grid_size) curr_gt_poly_112 = utils.poly01_to_poly0g( orig_poly[i], ggnn_grid_size) cur_iou, masks = metrics.iou_from_poly( np.array(mask_poly, dtype=np.int32), np.array(curr_gt_poly_112, dtype=np.int32), ggnn_grid_size, ggnn_grid_size) iou += cur_iou iou = iou / pred_polys.shape[0] accum['loss'] += float(loss_sum.item()) accum['iou'] += iou accum['length'] += 1 if step % self.opts['print_freq'] == 0: #"print_freq": 20 # Mean of accumulated values for k in accum.keys(): if k == 'length': continue accum[k] /= accum['length'] # Add summaries masks = np.expand_dims(masks, -1).astype( np.uint8) # Add a channel dimension masks = np.tile(masks, [1, 1, 1, 3]) # Make [2, H, W, 3] img = (data['img'].cpu().numpy()[-1, ...] * 255).astype( np.uint8) img = np.transpose( img, [1, 2, 0]) # Make [H, W, 3], swap the dimention self.writer.add_image('pred_mask', masks[0], self.global_step) self.writer.add_image('gt_mask', masks[1], self.global_step) self.writer.add_image('image', img, self.global_step) for k in accum.keys(): if k == 'length': continue self.writer.add_scalar(k, accum[k], self.global_step) print( "[%s] Epoch: %d, Step: %d, Polygon Loss: %f, IOU: %f" \ % (str(datetime.now()), epoch, self.global_step, accum['loss'], accum['iou'])) accum = defaultdict(float) del (output, local_prediction, poly_masks, masks, ggnn_logits, pred_polys, loss_sum) self.global_step += 1