def __init__(self, geometric_model='affine', output_size=(240, 240), crop_factor=1.0, padding_factor=0.5, use_cuda=True, normalize=None): assert isinstance(output_size, (tuple)) assert isinstance(crop_factor, (float)) assert isinstance(padding_factor, (float)) assert isinstance(use_cuda, (bool)) self.out_h, self.out_w = output_size self.crop_factor = crop_factor self.padding_factor = padding_factor # self.crop_layer = crop_layer self.use_cuda = use_cuda self.normalize = normalize # Initialize an affine transformation to resize the cropped object to (240, 240) # if self.crop_layer == 'object': # self.rescalingTnf = GeometricTnf(geometric_model='affine', out_h=self.out_h, out_w=self.out_w, # use_cuda=self.use_cuda) # Initialize geometric transformation (tps or affine) to warp the image to form the training pair self.geometricTnf = GeometricTnf(geometric_model=geometric_model, out_h=self.out_h, out_w=self.out_w, use_cuda=self.use_cuda)
def __init__(self, crop_layer='img'): super().__init__() self.crop_layer = crop_layer if self.crop_layer == 'img': self.affineTnf = GeometricTnf(geometric_model='affine') elif self.crop_layer == 'pool4': self.RoIPool = ROIPool((15, 15), 1.0 / 16.0)
def __init__(self, csv_file, dataset_path, output_size=(240, 240), normalize=None, random_crop=False): self.dataframe = pd.read_csv(csv_file) # Read images data self.img_A_names = self.dataframe.iloc[:, 0] # Get source image & target image name self.img_B_names = self.dataframe.iloc[:, 1] self.flips = self.dataframe.iloc[:, 3].values.astype('int') self.dataset_path = dataset_path # Path for reading images self.out_h, self.out_w = output_size self.normalize = normalize self.random_crop = random_crop # Initialize an affine transformation to resize the image to (240, 240) self.affineTnf = GeometricTnf(geometric_model='affine', out_h=self.out_h, out_w=self.out_w, use_cuda=False)
def __init__(self, csv_file, dataset_path, output_size=(240, 240), normalize=None): self.dataframe = pd.read_csv(csv_file) # Read images data self.dataset_path = dataset_path # Path for reading images self.out_h, self.out_w = output_size self.normalize = normalize # Initialize an affine transformation to resize the image to (240, 240) self.affineTnf = GeometricTnf(geometric_model='affine', out_h=self.out_h, out_w=self.out_w, use_cuda=False)
def __init__(self, geometric_model='affine', output_size=(240, 240), crop_factor=1.0, padding_factor=0.5, use_cuda=True, normalize=None): assert isinstance(output_size, (tuple)) assert isinstance(crop_factor, (float)) assert isinstance(padding_factor, (float)) assert isinstance(use_cuda, (bool)) self.out_h, self.out_w = output_size self.crop_factor = crop_factor self.padding_factor = padding_factor # self.crop_layer = crop_layer self.use_cuda = use_cuda self.normalize = normalize self.geometricTnf = GeometricTnf(geometric_model=geometric_model, out_h=self.out_h, out_w=self.out_w, use_cuda=self.use_cuda)
def __init__(self, csv_file, dataset_path, output_size=(240, 240), geometric_model='affine', dataset_size=0, normalize=None, random_sample=False, random_t=0.5, random_s=0.5, random_alpha=1 / 6, random_t_tps=0.4, random_crop=False): self.dataframe = pd.read_csv(csv_file) # Read images data if dataset_size != 0: dataset_size = min((dataset_size, len(self.dataframe))) self.dataframe = self.dataframe.iloc[0:dataset_size, :] self.img_A_names = self.dataframe.iloc[:, 0] # Get source image & target image name self.img_B_names = self.dataframe.iloc[:, 1] self.categories = self.dataframe.iloc[:, 2].values self.flips = self.dataframe.iloc[:, 3].values.astype('int') self.random_sample = random_sample if not self.random_sample: self.theta_array = self.dataframe.iloc[:, 4:].values.astype( 'float') # Get ground-truth tps parameters self.dataset_path = dataset_path # Path for reading images self.out_h, self.out_w = output_size self.geometric_model = geometric_model self.normalize = normalize self.random_t = random_t self.random_s = random_s self.random_alpha = random_alpha self.random_t_tps = random_t_tps self.random_crop = random_crop # Initialize an affine transformation to resize the image to (240, 240) self.affineTnf = GeometricTnf(geometric_model='affine', out_h=self.out_h, out_w=self.out_w, use_cuda=False)
def affTpsTnf(self, source_image, theta_aff, theta_aff_tps, use_cuda=True): tpstnf = GeometricTnf(geometric_model='tps', out_h=240, out_w=240, use_cuda=use_cuda) sampling_grid_tps = tpstnf(image_batch=source_image, theta_batch=theta_aff_tps, padding_factor=0.5, crop_factor=1.0, return_sampling_grid=True)[1] X = sampling_grid_tps[:, :, :, 0].unsqueeze(3) Y = sampling_grid_tps[:, :, :, 1].unsqueeze(3) Xp = X * theta_aff[:, 0].unsqueeze(1).unsqueeze( 2) + Y * theta_aff[:, 1].unsqueeze(1).unsqueeze( 2) + theta_aff[:, 2].unsqueeze(1).unsqueeze(2) Yp = X * theta_aff[:, 3].unsqueeze(1).unsqueeze( 2) + Y * theta_aff[:, 4].unsqueeze(1).unsqueeze( 2) + theta_aff[:, 5].unsqueeze(1).unsqueeze(2) sampling_grid = torch.cat((Xp, Yp), 3) warped_image_batch = F.grid_sample(source_image, sampling_grid) return warped_image_batch
def train_fn_detect(epoch, model, faster_rcnn, aff_theta, loss_fn, optimizer, dataloader, triple_generation, use_cuda=True, log_interval=100, vis=None, show=False): """ Train the model with synthetically training triple: {source image, target image, refer image (warped source image), theta_GT} from PF-PASCAL. 1. Train the transformation parameters theta_st from source image to target image; 2. Train the transformation parameters theta_tr from target image to refer image; 3. Combine theta_st and theta_st to obtain theta from source image to refer image, and compute loss between theta and theta_GT. """ tpsTnf = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) epoch_loss = 0 if (epoch % 5 == 0 or epoch == 1) and vis is not None: stride_images = len(dataloader) / 3 watch_images = torch.Tensor(24, 3, 240, 240) # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)).cuda() stride_loss = len(dataloader) / 35 iter_loss = np.zeros(36) begin = time.time() for batch_idx, batch in enumerate(dataloader): ''' Move input batch to gpu ''' # batch['source_image'].shape & batch['target_image'].shape: (batch_size, 3, 240, 240) # batch['theta'].shape-tps: (batch_size, 18)-random or (batch_size, 18, 1, 1)-(pre-set from csv) if use_cuda: batch = batch_cuda(batch) ''' Get the training triple {source image, target image, refer image (warped source image), theta_GT}''' batch_triple = triple_generation(batch) ''' Train the model ''' optimizer.zero_grad() # Predict tps parameters between images box_info_s = faster_rcnn( im_data=batch_triple['source_im'], im_info=batch_triple['source_im_info'][:, 3:], gt_boxes=batch_triple['source_gt_boxes'], num_boxes=batch_triple['source_num_boxes'])[0:3] box_info_t = faster_rcnn( im_data=batch_triple['target_im'], im_info=batch_triple['target_im_info'][:, 3:], gt_boxes=batch_triple['target_gt_boxes'], num_boxes=batch_triple['target_num_boxes'])[0:3] box_info_r = faster_rcnn( im_data=batch_triple['refer_im'], im_info=batch_triple['refer_im_info'][:, 3:], gt_boxes=batch_triple['refer_gt_boxes'], num_boxes=batch_triple['refer_num_boxes'])[0:3] all_box_s = select_boxes(rois=box_info_s[0], cls_prob=box_info_s[1], bbox_pred=box_info_s[2], im_infos=batch_triple['source_im_info'][:, 3:]) all_box_t = select_boxes(rois=box_info_t[0], cls_prob=box_info_t[1], bbox_pred=box_info_t[2], im_infos=batch_triple['target_im_info'][:, 3:]) all_box_r = select_boxes(rois=box_info_r[0], cls_prob=box_info_r[1], bbox_pred=box_info_r[2], im_infos=batch_triple['source_im_info'][:, 3:]) box_s, box_t, box_r = select_box(all_box_s, all_box_t, all_box_r) theta_st = aff_theta(boxes_s=box_s, boxes_t=box_t) theta_tr = aff_theta(boxes_s=box_t, boxes_t=box_r) # theta.shape: (batch_size, 18) for tps batch_st = { 'source_image': batch_triple['source_image'], 'target_image': batch_triple['target_image'] } batch_tr = { 'source_image': batch_triple['target_image'], 'target_image': batch_triple['refer_image'] } theta_aff_tps_st, theta_aff_st = model( batch_st, theta_st) # from source image to target image theta_aff_tps_tr, theta_aff_tr = model( batch_tr, theta_tr) # from target image to refer image # show_images(batch_st=batch_st, batch_tr=batch_tr, box_s=box_s, box_t=box_t, box_r=box_r) loss = loss_fn(theta_st=theta_aff_tps_st, theta_tr=theta_aff_tps_tr, theta_GT=batch_triple['theta_GT']) loss.backward() optimizer.step() epoch_loss += loss.item() if (batch_idx + 1) % log_interval == 0: end = time.time() print( 'Train epoch: {} [{}/{} ({:.0%})]\t\tCurrent batch loss: {:.6f}\t\tTime cost ({} batches): {:.4f} s' .format(epoch, batch_idx + 1, len(dataloader), (batch_idx + 1) / len(dataloader), loss.item(), batch_idx + 1, end - begin)) if (epoch % 5 == 0 or epoch == 1) and vis is not None: if (batch_idx + 1) % stride_images == 0 or batch_idx == 0: watch_images = add_watch( watch_images, batch_st, batch_tr, tpsTnf, theta_aff_tps_st, theta_aff_tps_tr, int((batch_idx + 1) / stride_images) * 6) if (batch_idx + 1) % stride_loss == 0 or batch_idx == 0: iter_loss[int((batch_idx + 1) / stride_loss)] = epoch_loss / (batch_idx + 1) # tmp_images = batch_triple['target_im'].permute(0, 2, 3, 1) + pixel_means # tmp_images = tmp_images[:, :, :, [2, 1, 0]].permute(0, 3, 1, 2) # vis.image(torchvision.utils.make_grid(tmp_images, nrow=1, padding=3)) if show: # if dual: # warped_image_aff = affTnf(batch_st['source_image'], theta_aff_st_1) # warped_image_aff_2 = affTnf(batch_tr['source_image'], theta_aff_tr_1) # warped_image_aff_3 = affTnf(warped_image_aff, theta_aff_tr_1) # show_images(batch_st, batch_tr, warped_image_aff.detach(), warped_image_aff_2.detach(), warped_image_aff_3.detach()) # # warped_image_aff = affTnf(batch_st['source_image'], theta_aff_st) # warped_image_aff_2 = affTnf(batch_tr['source_image'], theta_aff_tr) # warped_image_aff_3 = affTnf(warped_image_aff, theta_aff_tr) # show_images(batch_st, batch_tr, warped_image_aff.detach(), warped_image_aff_2.detach(), warped_image_aff_3.detach()) # # warped_image_aff_tps = tpsTnf(batch_st['source_image'], theta_aff_tps_st) # warped_image_aff_tps_2 = tpsTnf(batch_tr['source_image'], theta_aff_tps_tr) # warped_image_aff_tps_3 = tpsTnf(warped_image_aff_tps, theta_aff_tps_tr) # show_images(batch_st, batch_tr, warped_image_aff_tps.detach(), warped_image_aff_tps_2.detach(), warped_image_aff_tps_3.detach()) # # warped_image = affTnf(batch_st['source_image'], theta_aff_st_1) # warped_image = affTnf(warped_image, theta_aff_st) # warped_image = tpsTnf(warped_image, theta_aff_tps_st) # warped_image_2 = affTnf(batch_tr['source_image'], theta_aff_tr_1) # warped_image_2 = affTnf(warped_image_2, theta_aff_tr) # warped_image_2 = tpsTnf(warped_image_2, theta_aff_tps_tr) # warped_image_3 = affTnf(warped_image, theta_aff_tr_1) # warped_image_3 = affTnf(warped_image_3, theta_aff_tr) # warped_image_3 = tpsTnf(warped_image_3, theta_aff_tps_tr) # show_images(batch_st, batch_tr, warped_image.detach(), warped_image_2.detach(), warped_image_3.detach()) # else: # warped_image_aff = affTnf(batch_st['source_image'], theta_st) # warped_image_aff_2 = affTnf(batch_tr['source_image'], theta_tr) # warped_image_aff_3 = affTnf(warped_image_aff, theta_tr) # show_images(batch_st, batch_tr, warped_image_aff.detach(), warped_image_aff_2.detach(), warped_image_aff_3.detach()) warped_image_tps = tpsTnf(batch_st['source_image'], theta_aff_tps_st) warped_image_tps_2 = tpsTnf(batch_tr['source_image'], theta_aff_tps_tr) warped_image_tps_3 = tpsTnf(warped_image_tps, theta_aff_tps_tr) show_images(batch_triple, warped_image_tps.detach(), warped_image_tps_2.detach(), warped_image_tps_3.detach(), box_s, box_t, box_r) end = time.time() # Visualize watch images & train loss if (epoch % 5 == 0 or epoch == 1) and vis is not None: opts = dict(jpgquality=100, title='Epoch ' + str(epoch) + ' source warped_sr target warped_tr refer warped_sr') # Un-normalize for caffe resnet and vgg # watch_images = watch_images.permute(0, 2, 3, 1) + pixel_means # watch_images = watch_images[:, :, :, [2, 1, 0]].permute(0, 3, 1, 2) watch_images = normalize_image(watch_images, forward=False) * 255.0 vis.image(torchvision.utils.make_grid(watch_images, nrow=6, padding=3), opts=opts) # vis.images(watch_images, nrow=6, padding=3, opts=opts) opts_loss = dict( xlabel='Iterations (' + str(stride_loss) + ')', ylabel='Loss', title='GM ResNet101 Detect&Affine&TPS Training Loss in Epoch ' + str(epoch), legend=['Loss'], width=2000) vis.line(iter_loss, np.arange(36), opts=opts_loss) epoch_loss /= len(dataloader) print('Train set -- Average loss: {:.6f}\t\tTime cost: {:.4f}'.format( epoch_loss, end - begin)) return epoch_loss, end - begin
def train_fn_dual(epoch, model, loss_fn, optimizer, dataloader, triple_generation, use_cuda=True, log_interval=100, vis=None, show=False): """ Train the model with synthetically training triple: {source image, target image, refer image (warped source image), theta_GT} from PF-PASCAL. 1. Train the transformation parameters theta_st from source image to target image; 2. Train the transformation parameters theta_tr from target image to refer image; 3. Combine theta_st and theta_st to obtain theta from source image to refer image, and compute loss between theta and theta_GT. """ geoTnf = ComposedGeometricTnf(use_cuda=use_cuda) affTnf = GeometricTnf(geometric_model='affine', use_cuda=use_cuda) tpsTnf = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) epoch_loss = 0 if (epoch % 5 == 0 or epoch == 1) and vis is not None: stride_images = len(dataloader) / 3 group_size = 6 watch_images = torch.ones(group_size * 4, 3, 260, 240).cuda() # watch_images = torch.ones(group_size * 20, 3, 260, 240).cuda() image_names = list() fnt = cv2.FONT_HERSHEY_COMPLEX # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)).cuda() stride_loss = len(dataloader) / 105 iter_loss = np.zeros(106) begin = time.time() for batch_idx, batch in enumerate(dataloader): ''' Move input batch to gpu ''' # batch['source_image'].shape & batch['target_image'].shape: (batch_size, 3, 240, 240) # batch['theta'].shape-tps: (batch_size, 18)-random or (batch_size, 18, 1, 1)-(pre-set from csv) if use_cuda: batch = batch_cuda(batch) ''' Get the training triple {source image, target image, refer image (warped source image), theta_GT}''' batch_triple = triple_generation(batch) ''' Train the model ''' optimizer.zero_grad() # Predict tps parameters between images # theta.shape: (batch_size, 18) for tps batch_st = {'source_image': batch_triple['source_image'], 'target_image': batch_triple['target_image']} batch_tr = {'source_image': batch_triple['target_image'], 'target_image': batch_triple['refer_image']} theta_aff_tps_st, theta_aff_st = model(batch_st) # from source image to target image theta_aff_tps_tr, theta_aff_tr = model(batch_tr) # from target image to refer image # show_images(batch_st=batch_st, batch_tr=batch_tr, box_s=box_s, box_t=box_t, box_r=box_r) # loss = loss_fn(theta_st=theta_aff_tps_st, theta_tr=theta_aff_tps_tr, theta_GT=batch_triple['theta_GT']) loss = loss_fn(theta_aff_tps_st=theta_aff_tps_st, theta_aff_st=theta_aff_st, theta_aff_tps_tr=theta_aff_tps_tr, theta_aff_tr=theta_aff_tr, theta_aff_GT=batch_triple['theta_aff_GT'], theta_tps_GT=batch_triple['theta_tps_GT']) loss.backward() optimizer.step() epoch_loss += loss.item() if (batch_idx+1) % log_interval == 0: end = time.time() print('Train epoch: {} [{}/{} ({:.0%})]\t\tCurrent batch loss: {:.6f}\t\tTime cost ({} batches): {:.4f} s' .format(epoch, batch_idx+1, len(dataloader), (batch_idx+1) / len(dataloader), loss.item(), batch_idx + 1, end - begin)) if (epoch % 5 == 0 or epoch == 1) and vis is not None: if (batch_idx + 1) % stride_images == 0 or batch_idx == 0: watch_images, image_names = add_watch(watch_images, image_names, batch_st, batch_tr, affTnf, tpsTnf, geoTnf, theta_aff_tps_st, theta_aff_st, theta_aff_tps_tr, theta_aff_tr, int((batch_idx + 1) / stride_images) * group_size) # if batch_idx <= 19: # watch_images, image_names = add_watch(watch_images, image_names, batch_st, batch_tr, affTnf, tpsTnf, geoTnf, theta_aff_tps_st, theta_aff_st, theta_aff_tps_tr, theta_aff_tr, batch_idx * group_size, batch_triple) # if batch_idx == 19: # opts = dict(jpgquality=100, title='Epoch ' + str(epoch) + ' source warped_st target warped_tr refer warped_sr') # watch_images[:, :, 0:240, :] = normalize_image(watch_images[:, :, 0:240, :], forward=False) # watch_images *= 255.0 # watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) # for i in range(len(image_names)): # cv2.putText(watch_images[i], image_names[i], (80, 255), fnt, 0.5, (0, 0, 0), 1) # watch_images = torch.Tensor(watch_images.astype(np.float32)) # watch_images = watch_images.permute(0, 3, 1, 2) # vis.image(torchvision.utils.make_grid(watch_images, nrow=group_size, padding=3), opts=opts) if (batch_idx + 1) % stride_loss == 0 or batch_idx == 0: iter_loss[int((batch_idx + 1) / stride_loss)] = epoch_loss / (batch_idx + 1) # tmp_images = normalize_image(batch_tr['target_image'], forward=False) * 255.0 # vis.images(tmp_images, nrow=8, padding=3) # if show: # if dual: # warped_image_aff = affTnf(batch_st['source_image'], theta_aff_st_1) # warped_image_aff_2 = affTnf(batch_tr['source_image'], theta_aff_tr_1) # warped_image_aff_3 = affTnf(warped_image_aff, theta_aff_tr_1) # show_images(batch_st, batch_tr, warped_image_aff.detach(), warped_image_aff_2.detach(), warped_image_aff_3.detach()) # # warped_image_aff = affTnf(batch_st['source_image'], theta_aff_st) # warped_image_aff_2 = affTnf(batch_tr['source_image'], theta_aff_tr) # warped_image_aff_3 = affTnf(warped_image_aff, theta_aff_tr) # show_images(batch_st, batch_tr, warped_image_aff.detach(), warped_image_aff_2.detach(), warped_image_aff_3.detach()) # # warped_image_aff_tps = tpsTnf(batch_st['source_image'], theta_aff_tps_st) # warped_image_aff_tps_2 = tpsTnf(batch_tr['source_image'], theta_aff_tps_tr) # warped_image_aff_tps_3 = tpsTnf(warped_image_aff_tps, theta_aff_tps_tr) # show_images(batch_st, batch_tr, warped_image_aff_tps.detach(), warped_image_aff_tps_2.detach(), warped_image_aff_tps_3.detach()) # # warped_image = affTnf(batch_st['source_image'], theta_aff_st_1) # warped_image = affTnf(warped_image, theta_aff_st) # warped_image = tpsTnf(warped_image, theta_aff_tps_st) # warped_image_2 = affTnf(batch_tr['source_image'], theta_aff_tr_1) # warped_image_2 = affTnf(warped_image_2, theta_aff_tr) # warped_image_2 = tpsTnf(warped_image_2, theta_aff_tps_tr) # warped_image_3 = affTnf(warped_image, theta_aff_tr_1) # warped_image_3 = affTnf(warped_image_3, theta_aff_tr) # warped_image_3 = tpsTnf(warped_image_3, theta_aff_tps_tr) # show_images(batch_st, batch_tr, warped_image.detach(), warped_image_2.detach(), warped_image_3.detach()) # else: # warped_image_aff = affTnf(batch_st['source_image'], theta_st) # warped_image_aff_2 = affTnf(batch_tr['source_image'], theta_tr) # warped_image_aff_3 = affTnf(warped_image_aff, theta_tr) # show_images(batch_st, batch_tr, warped_image_aff.detach(), warped_image_aff_2.detach(), warped_image_aff_3.detach()) # warped_image_tps = tpsTnf(batch_st['source_image'], theta_st) # warped_image_tps_2 = tpsTnf(batch_tr['source_image'], theta_tr) # warped_image_tps_3 = tpsTnf(warped_image_tps, theta_tr) # show_images(batch_st, batch_tr, warped_image_tps.detach(), warped_image_tps_2.detach(), warped_image_tps_3.detach()) end = time.time() # Visualize watch images & train loss if (epoch % 5 == 0 or epoch == 1) and vis is not None: opts = dict(jpgquality=100, title='Epoch ' + str(epoch) + ' source warped_st target warped_tr refer warped_sr') watch_images[:, :, 0:240, :] = normalize_image(watch_images[:, :, 0:240, :], forward=False) watch_images *= 255.0 watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) for i in range(watch_images.shape[0]): cv2.putText(watch_images[i], image_names[i], (80, 255), fnt, 0.5, (0, 0, 0), 1) watch_images = torch.Tensor(watch_images.astype(np.float32)) watch_images = watch_images.permute(0, 3, 1, 2) vis.image(torchvision.utils.make_grid(watch_images, nrow=group_size, padding=3), opts=opts) # Un-normalize for caffe resnet and vgg # watch_images = watch_images.permute(0, 2, 3, 1) + pixel_means # watch_images = watch_images[:, :, :, [2, 1, 0]].permute(0, 3, 1, 2) opts_loss = dict(xlabel='Iterations (' + str(stride_loss) + ')', ylabel='Loss', title='GM ResNet101 AffTPS Training Loss in Epoch ' + str(epoch), legend=['Loss'], width=2000) vis.line(iter_loss, np.arange(106), opts=opts_loss) epoch_loss /= len(dataloader) print('Train set -- Average loss: {:.6f}\t\tTime cost: {:.4f}'.format(epoch_loss, end - begin)) return epoch_loss, end - begin
def vis_pf(vis, dataloader, theta, theta_weak, theta_inver, theta_weak_inver, results, results_weak, dataset_name, use_cuda=True): # Visualize watch images affTnf = GeometricTnf(geometric_model='affine', use_cuda=use_cuda) tpsTnf = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) pt = PointTnf(use_cuda=use_cuda) watch_images = torch.ones(len(dataloader) * 6, 3, 280, 240) watch_keypoints = -torch.ones(len(dataloader) * 6, 2, 20) if use_cuda: watch_images = watch_images.cuda() watch_keypoints = watch_keypoints.cuda() num_points = np.ones(len(dataloader) * 6).astype(np.int8) correct_index = list() image_names = list() metrics = list() # Colors for keypoints cmap = plt.get_cmap('tab20') colors = list() for c in range(20): r = cmap(c)[0] * 255 g = cmap(c)[1] * 255 b = cmap(c)[2] * 255 colors.append((b, g, r)) fnt = cv2.FONT_HERSHEY_COMPLEX # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)) for batch_idx, batch in enumerate(dataloader): if use_cuda: batch = batch_cuda(batch) # Theta and theta_inver theta_aff = theta['aff'][batch_idx].unsqueeze(0) theta_aff_tps = theta['aff_tps'][batch_idx].unsqueeze(0) theta_weak_aff = theta_weak['aff'][batch_idx].unsqueeze(0) theta_weak_aff_tps = theta_weak['aff_tps'][batch_idx].unsqueeze(0) theta_aff_inver = theta_inver['aff'][batch_idx].unsqueeze(0) theta_aff_tps_inver = theta_inver['aff_tps'][batch_idx].unsqueeze(0) theta_weak_aff_inver = theta_weak_inver['aff'][batch_idx].unsqueeze(0) theta_weak_aff_tps_inver = theta_weak_inver['aff_tps'][ batch_idx].unsqueeze(0) # Warped image warped_aff = affTnf(batch['source_image'], theta_aff) warped_aff_tps = tpsTnf(warped_aff, theta_aff_tps) warped_weak_aff = affTnf(batch['source_image'], theta_weak_aff) warped_weak_aff_tps = tpsTnf(warped_weak_aff, theta_weak_aff_tps) watch_images[batch_idx * 6, :, 0:240, :] = batch['source_image'] watch_images[batch_idx * 6 + 1, :, 0:240, :] = warped_aff watch_images[batch_idx * 6 + 2, :, 0:240, :] = warped_aff_tps watch_images[batch_idx * 6 + 3, :, 0:240, :] = batch['target_image'] watch_images[batch_idx * 6 + 4, :, 0:240, :] = warped_weak_aff watch_images[batch_idx * 6 + 5, :, 0:240, :] = warped_weak_aff_tps # Warped keypoints source_im_size = batch['source_im_info'][:, 0:3] target_im_size = batch['target_im_info'][:, 0:3] source_points = batch['source_points'] target_points = batch['target_points'] source_points_norm = PointsToUnitCoords(P=source_points, im_size=source_im_size) target_points_norm = PointsToUnitCoords(P=target_points, im_size=target_im_size) warped_points_aff_norm = pt.affPointTnf(theta=theta_aff_inver, points=source_points_norm) warped_points_aff = PointsToPixelCoords(P=warped_points_aff_norm, im_size=target_im_size) pck_aff, index_aff, N_pts = pck(target_points, warped_points_aff, dataset_name) warped_points_aff = relocate(warped_points_aff, target_im_size) warped_points_aff_tps_norm = pt.tpsPointTnf(theta=theta_aff_tps_inver, points=source_points_norm) warped_points_aff_tps_norm = pt.affPointTnf( theta=theta_aff_inver, points=warped_points_aff_tps_norm) warped_points_aff_tps = PointsToPixelCoords( P=warped_points_aff_tps_norm, im_size=target_im_size) pck_aff_tps, index_aff_tps, _ = pck(target_points, warped_points_aff_tps, dataset_name) warped_points_aff_tps = relocate(warped_points_aff_tps, target_im_size) warped_points_weak_aff_norm = pt.affPointTnf( theta=theta_weak_aff_inver, points=source_points_norm) warped_points_weak_aff = PointsToPixelCoords( P=warped_points_weak_aff_norm, im_size=target_im_size) pck_weak_aff, index_weak_aff, _ = pck(target_points, warped_points_weak_aff, dataset_name) warped_points_weak_aff = relocate(warped_points_weak_aff, target_im_size) warped_points_weak_aff_tps_norm = pt.tpsPointTnf( theta=theta_weak_aff_tps_inver, points=source_points_norm) warped_points_weak_aff_tps_norm = pt.affPointTnf( theta=theta_weak_aff_inver, points=warped_points_weak_aff_tps_norm) warped_points_weak_aff_tps = PointsToPixelCoords( P=warped_points_weak_aff_tps_norm, im_size=target_im_size) pck_weak_aff_tps, index_weak_aff_tps, _ = pck( target_points, warped_points_weak_aff_tps, dataset_name) warped_points_weak_aff_tps = relocate(warped_points_weak_aff_tps, target_im_size) watch_keypoints[batch_idx * 6, :, :N_pts] = relocate( batch['source_points'], source_im_size)[:, :, :N_pts] watch_keypoints[batch_idx * 6 + 1, :, :N_pts] = warped_points_aff[:, :, :N_pts] watch_keypoints[batch_idx * 6 + 2, :, :N_pts] = warped_points_aff_tps[:, :, :N_pts] watch_keypoints[batch_idx * 6 + 3, :, :N_pts] = relocate( batch['target_points'], target_im_size)[:, :, :N_pts] watch_keypoints[batch_idx * 6 + 4, :, :N_pts] = warped_points_weak_aff[:, :, :N_pts] watch_keypoints[ batch_idx * 6 + 5, :, :N_pts] = warped_points_weak_aff_tps[:, :, :N_pts] num_points[batch_idx * 6:batch_idx * 6 + 6] = N_pts correct_index.append(np.arange(N_pts)) correct_index.append(index_aff) correct_index.append(index_aff_tps) correct_index.append(np.arange(N_pts)) correct_index.append(index_weak_aff) correct_index.append(index_weak_aff_tps) image_names.append('Source') image_names.append('Aff') image_names.append('Aff_tps') image_names.append('Target') image_names.append('Rocco_aff') image_names.append('Rocco_aff_tps') metrics.append('') metrics.append('PCK: {:.2%}'.format(pck_aff)) metrics.append('PCK: {:.2%}'.format(pck_aff_tps)) metrics.append('') metrics.append('PCK: {:.2%}'.format(pck_weak_aff)) metrics.append('PCK: {:.2%}'.format(pck_weak_aff_tps)) opts = dict(jpgquality=100, title=dataset_name) # Un-normalize for caffe resnet and vgg # watch_images = watch_images.permute(0, 2, 3, 1) + pixel_means # watch_images = watch_images[:, :, :, [2, 1, 0]].permute(0, 3, 1, 2) # watch_images = normalize_image(watch_images, forward=False) * 255.0 watch_images[:, :, 0:240, :] = normalize_image(watch_images[:, :, 0:240, :], forward=False) watch_images *= 255.0 watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) watch_keypoints = watch_keypoints.cpu().numpy() for i in range(watch_images.shape[0]): pos_name = (80, 255) if (i + 1) % 6 == 1 or (i + 1) % 6 == 4: pos_pck = (0, 0) else: pos_pck = (70, 275) cv2.putText(watch_images[i], image_names[i], pos_name, fnt, 0.5, (0, 0, 0), 1) cv2.putText(watch_images[i], metrics[i], pos_pck, fnt, 0.5, (0, 0, 0), 1) if (i + 1) % 6 == 4: for j in range(num_points[i]): cv2.drawMarker( watch_images[i], (watch_keypoints[i, 0, j], watch_keypoints[i, 1, j]), colors[j], cv2.MARKER_DIAMOND, 12, 2, cv2.LINE_AA) else: for j in correct_index[i]: cv2.drawMarker( watch_images[i], (watch_keypoints[i, 0, j], watch_keypoints[i, 1, j]), colors[j], cv2.MARKER_CROSS, 12, 2, cv2.LINE_AA) cv2.drawMarker(watch_images[i], (watch_keypoints[i + 3 - (i % 6), 0, j], watch_keypoints[i + 3 - (i % 6), 1, j]), colors[j], cv2.MARKER_DIAMOND, 12, 2, cv2.LINE_AA) watch_images = torch.Tensor(watch_images.astype(np.float32)) watch_images = watch_images.permute(0, 3, 1, 2) vis.image(torchvision.utils.make_grid(watch_images, nrow=3, padding=3), opts=opts)
def vis_tss(vis, dataloader, theta, theta_weak, csv_file, title, use_cuda=True): # Visualize watch images dataframe = pd.read_csv(csv_file) scores_aff = dataframe.iloc[:, 5] scores_aff_tps = dataframe.iloc[:, 6] scores_weak_aff = dataframe.iloc[:, 7] scores_weak_aff_tps = dataframe.iloc[:, 8] affTnf = GeometricTnf(geometric_model='affine', use_cuda=use_cuda) tpsTnf = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) watch_images = torch.ones(int(len(dataloader) / 2 * 6), 3, 280, 240) watch_images_inver = torch.ones(int(len(dataloader) / 2 * 6), 3, 280, 240) if use_cuda: watch_images = watch_images.cuda() watch_images_inver = watch_images_inver.cuda() image_names = list() image_names_inver = list() flow = list() flow_inver = list() fnt = cv2.FONT_HERSHEY_COMPLEX # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)) for batch_idx, batch in enumerate(dataloader): if use_cuda: batch = batch_cuda(batch) theta_aff = theta['aff'][batch_idx].unsqueeze(0) theta_aff_tps = theta['aff_tps'][batch_idx].unsqueeze(0) theta_weak_aff = theta_weak['aff'][batch_idx].unsqueeze(0) theta_weak_aff_tps = theta_weak['aff_tps'][batch_idx].unsqueeze(0) # Warped image warped_aff = affTnf(batch['source_image'], theta_aff) warped_aff_tps = tpsTnf(warped_aff, theta_aff_tps) warped_weak_aff = affTnf(batch['source_image'], theta_weak_aff) warped_weak_aff_tps = tpsTnf(warped_weak_aff, theta_weak_aff_tps) if (batch_idx + 1) % 2 != 0: watch_images[int(batch_idx / 2 * 6), :, 0:240, :] = batch['source_image'] watch_images[int(batch_idx / 2 * 6) + 1, :, 0:240, :] = warped_aff watch_images[int(batch_idx / 2 * 6) + 2, :, 0:240, :] = warped_aff_tps watch_images[int(batch_idx / 2 * 6) + 3, :, 0:240, :] = batch['target_image'] watch_images[int(batch_idx / 2 * 6) + 4, :, 0:240, :] = warped_weak_aff watch_images[int(batch_idx / 2 * 6) + 5, :, 0:240, :] = warped_weak_aff_tps image_names.append('Source') image_names.append('Aff') image_names.append('Aff_tps') image_names.append('Target') image_names.append('Rocco_aff') image_names.append('Rocco_aff_tps') flow.append('') flow.append('Flow: {:.3f}'.format(scores_aff[batch_idx])) flow.append('Flow: {:.3f}'.format(scores_aff_tps[batch_idx])) flow.append('') flow.append('Flow: {:.3f}'.format(scores_weak_aff[batch_idx])) flow.append('Flow: {:.3f}'.format(scores_weak_aff_tps[batch_idx])) else: watch_images_inver[int((batch_idx - 1) / 2 * 6), :, 0:240, :] = batch['source_image'] watch_images_inver[int((batch_idx - 1) / 2 * 6) + 1, :, 0:240, :] = warped_aff watch_images_inver[int((batch_idx - 1) / 2 * 6) + 2, :, 0:240, :] = warped_aff_tps watch_images_inver[int((batch_idx - 1) / 2 * 6) + 3, :, 0:240, :] = batch['target_image'] watch_images_inver[int((batch_idx - 1) / 2 * 6) + 4, :, 0:240, :] = warped_weak_aff watch_images_inver[int((batch_idx - 1) / 2 * 6) + 5, :, 0:240, :] = warped_weak_aff_tps image_names_inver.append('Source') image_names_inver.append('Aff') image_names_inver.append('Aff_tps') image_names_inver.append('Target') image_names_inver.append('Rocco_aff') image_names_inver.append('Rocco_aff_tps') flow_inver.append('') flow_inver.append('Flow: {:.3f}'.format(scores_aff[batch_idx])) flow_inver.append('Flow: {:.3f}'.format(scores_aff_tps[batch_idx])) flow_inver.append('') flow_inver.append('Flow: {:.3f}'.format( scores_weak_aff[batch_idx])) flow_inver.append('Flow: {:.3f}'.format( scores_weak_aff_tps[batch_idx])) opts = dict(jpgquality=100, title=title) # Un-normalize for caffe resnet and vgg # watch_images = watch_images.permute(0, 2, 3, 1) + pixel_means # watch_images = watch_images[:, :, :, [2, 1, 0]].permute(0, 3, 1, 2) # watch_images = normalize_image(watch_images, forward=False) * 255.0 def draw_image(images, names, flows): images[:, :, 0:240, :] = normalize_image(images[:, :, 0:240, :], forward=False) images *= 255.0 images = images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) for i in range(images.shape[0]): pos_name = (80, 255) if (i + 1) % 6 == 1 or (i + 1) % 6 == 4: pos_lt_ac = (0, 0) pos_flow = (0, 0) else: pos_flow = (70, 275) cv2.putText(images[i], names[i], pos_name, fnt, 0.5, (0, 0, 0), 1) cv2.putText(images[i], flows[i], pos_flow, fnt, 0.5, (0, 0, 0), 1) images = torch.Tensor(images.astype(np.float32)) images = images.permute(0, 3, 1, 2) return images watch_images = draw_image(images=watch_images, names=image_names, flows=flow) watch_images_inver = draw_image(images=watch_images_inver, names=image_names_inver, flows=flow_inver) vis.image(torchvision.utils.make_grid(watch_images, nrow=3, padding=5), opts=opts)
def vis_caltech(vis, dataloader, theta, theta_weak, results, results_weak, title, use_cuda=True): # Visualize watch images affTnf = GeometricTnf(geometric_model='affine', use_cuda=use_cuda) tpsTnf = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) watch_images = torch.ones(len(dataloader) * 6, 3, 280, 240) if use_cuda: watch_images = watch_images.cuda() image_names = list() lt_acc = list() iou = list() fnt = cv2.FONT_HERSHEY_COMPLEX # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)) for batch_idx, batch in enumerate(dataloader): if use_cuda: batch = batch_cuda(batch) theta_aff = theta['aff'][batch_idx].unsqueeze(0) theta_aff_tps = theta['aff_tps'][batch_idx].unsqueeze(0) theta_weak_aff = theta_weak['aff'][batch_idx].unsqueeze(0) theta_weak_aff_tps = theta_weak['aff_tps'][batch_idx].unsqueeze(0) # Warped image warped_aff = affTnf(batch['source_image'], theta_aff) warped_aff_tps = tpsTnf(warped_aff, theta_aff_tps) warped_weak_aff = affTnf(batch['source_image'], theta_weak_aff) warped_weak_aff_tps = tpsTnf(warped_weak_aff, theta_weak_aff_tps) watch_images[batch_idx * 6, :, 0:240, :] = batch['source_image'] watch_images[batch_idx * 6 + 1, :, 0:240, :] = warped_aff watch_images[batch_idx * 6 + 2, :, 0:240, :] = warped_aff_tps watch_images[batch_idx * 6 + 3, :, 0:240, :] = batch['target_image'] watch_images[batch_idx * 6 + 4, :, 0:240, :] = warped_weak_aff watch_images[batch_idx * 6 + 5, :, 0:240, :] = warped_weak_aff_tps image_names.append('Source') image_names.append('Aff') image_names.append('Aff_tps') image_names.append('Target') image_names.append('Rocco_aff') image_names.append('Rocco_aff_tps') lt_acc.append('') lt_acc.append('LT-ACC: {:.2f}'.format( float(results['aff']['label_transfer_accuracy'][batch_idx]))) lt_acc.append('LT-ACC: {:.2f}'.format( float(results['aff_tps']['label_transfer_accuracy'][batch_idx]))) lt_acc.append('') lt_acc.append('LT-ACC: {:.2f}'.format( float(results_weak['aff']['label_transfer_accuracy'][batch_idx]))) lt_acc.append('LT-ACC: {:.2f}'.format( float(results_weak['aff_tps']['label_transfer_accuracy'] [batch_idx]))) iou.append('') iou.append('IoU: {:.2f}'.format( float(results['aff']['intersection_over_union'][batch_idx]))) iou.append('IoU: {:.2f}'.format( float(results['aff_tps']['intersection_over_union'][batch_idx]))) iou.append('') iou.append('IoU: {:.2f}'.format( float(results_weak['aff']['intersection_over_union'][batch_idx]))) iou.append('IoU: {:.2f}'.format( float(results_weak['aff_tps']['intersection_over_union'] [batch_idx]))) opts = dict(jpgquality=100, title=title) # Un-normalize for caffe resnet and vgg # watch_images = watch_images.permute(0, 2, 3, 1) + pixel_means # watch_images = watch_images[:, :, :, [2, 1, 0]].permute(0, 3, 1, 2) # watch_images = normalize_image(watch_images, forward=False) * 255.0 watch_images[:, :, 0:240, :] = normalize_image(watch_images[:, :, 0:240, :], forward=False) watch_images *= 255.0 watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) for i in range(watch_images.shape[0]): pos_name = (80, 255) if (i + 1) % 6 == 1 or (i + 1) % 6 == 4: pos_lt_ac = (0, 0) pos_iou = (0, 0) else: pos_lt_ac = (10, 275) pos_iou = (140, 275) cv2.putText(watch_images[i], image_names[i], pos_name, fnt, 0.5, (0, 0, 0), 1) cv2.putText(watch_images[i], lt_acc[i], pos_lt_ac, fnt, 0.5, (0, 0, 0), 1) cv2.putText(watch_images[i], iou[i], pos_iou, fnt, 0.5, (0, 0, 0), 1) watch_images = torch.Tensor(watch_images.astype(np.float32)) watch_images = watch_images.permute(0, 3, 1, 2) vis.image(torchvision.utils.make_grid(watch_images, nrow=3, padding=5), opts=opts)
def vis_fn_detect(vis, model, faster_rcnn, aff_theta, train_loss, val_pck, train_lr, epoch, num_epochs, dataloader, use_cuda=True): # Visualize watch images affTnf = GeometricTnf(geometric_model='affine', use_cuda=use_cuda) tpsTnf = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) watch_images = torch.Tensor(len(dataloader) * 5, 3, 240, 240) # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)) for batch_idx, batch in enumerate(dataloader): if use_cuda: batch = batch_cuda(batch) box_info_s = faster_rcnn(im_data=batch['source_im'], im_info=batch['source_im_info'][:, 3:], gt_boxes=batch['source_gt_boxes'], num_boxes=batch['source_num_boxes'])[0:3] box_info_t = faster_rcnn(im_data=batch['target_im'], im_info=batch['target_im_info'][:, 3:], gt_boxes=batch['target_gt_boxes'], num_boxes=batch['target_num_boxes'])[0:3] all_box_s = select_boxes(rois=box_info_s[0], cls_prob=box_info_s[1], bbox_pred=box_info_s[2], im_infos=batch['source_im_info'][:, 3:]) all_box_t = select_boxes(rois=box_info_t[0], cls_prob=box_info_t[1], bbox_pred=box_info_t[2], im_infos=batch['target_im_info'][:, 3:]) box_s, box_t = select_box_st(all_box_s, all_box_t) theta_det = aff_theta(boxes_s=box_s, boxes_t=box_t) theta_aff_tps, theta_aff = model(batch, theta_det) warped_image_1 = affTnf(batch['source_image'], theta_det) warped_image_2 = affTnf(warped_image_1, theta_aff) warped_image_3 = tpsTnf(warped_image_2, theta_aff_tps) watch_images[batch_idx * 5] = batch['source_image'][0] watch_images[batch_idx * 5 + 1] = warped_image_1[0] watch_images[batch_idx * 5 + 2] = warped_image_2[0] watch_images[batch_idx * 5 + 3] = warped_image_3[0] watch_images[batch_idx * 5 + 4] = batch['target_image'][0] opts = dict(jpgquality=100, title='Epoch ' + str(epoch) + ' source warped target') # Un-normalize for caffe resnet and vgg # watch_images = watch_images.permute(0, 2, 3, 1) + pixel_means # watch_images = watch_images[:, :, :, [2, 1, 0]].permute(0, 3, 1, 2) watch_images = normalize_image(watch_images, forward=False) * 255.0 vis.image(torchvision.utils.make_grid(watch_images, nrow=5, padding=5), opts=opts) # vis.images(watch_images, nrow=5, padding=3, opts=opts) if epoch == num_epochs: epochs = np.arange(1, num_epochs + 1) # Visualize train loss opts_loss = dict(xlabel='Epoch', ylabel='Loss', title='GM ResNet101 Detect&Affine&TPS Training Loss', legend=['Loss'], width=2000) vis.line(train_loss, epochs, opts=opts_loss) # Visualize val pck opts_pck = dict(xlabel='Epoch', ylabel='Val PCK', title='GM ResNet101 Detect&Affine&TPS Val PCK', legend=['PCK'], width=2000) vis.line(val_pck, epochs, opts=opts_pck) # Visualize train lr opts_lr = dict( xlabel='Epoch', ylabel='Learning Rate', title='GM ResNet101 Detect&Affine&TPS Training Learning Rate', legend=['LR'], width=2000) vis.line(train_lr, epochs, opts=opts_lr)
def vis_fn_dual(vis, train_loss, val_pck, train_lr, epoch, num_epochs, dataloader, theta, thetai, results, title, use_cuda=True): # Visualize watch images affTnf = GeometricTnf(geometric_model='affine', use_cuda=use_cuda) tpsTnf = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) pt = PointTnf(use_cuda=use_cuda) group_size = 4 watch_images = torch.ones(len(dataloader) * group_size, 3, 280, 240) watch_keypoints = -torch.ones(len(dataloader) * group_size, 2, 20) if use_cuda: watch_images = watch_images.cuda() watch_keypoints = watch_keypoints.cuda() num_points = np.ones(len(dataloader) * group_size).astype(np.int8) correct_index = list() image_names = list() metrics = list() # Colors for keypoints cmap = plt.get_cmap('tab20') colors = list() for c in range(20): r = cmap(c)[0] * 255 g = cmap(c)[1] * 255 b = cmap(c)[2] * 255 colors.append((b, g, r)) fnt = cv2.FONT_HERSHEY_COMPLEX theta, thetai = swap(theta, thetai) # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)) for batch_idx, batch in enumerate(dataloader): if use_cuda: batch = batch_cuda(batch) batch['source_image'], batch['target_image'] = swap(batch['source_image'], batch['target_image']) batch['source_im_info'], batch['target_im_info'] = swap(batch['source_im_info'], batch['target_im_info']) batch['source_points'], batch['target_points'] = swap(batch['source_points'], batch['target_points']) # Theta and thetai theta_aff = theta['aff'][batch_idx].unsqueeze(0) theta_aff_tps = theta['afftps'][batch_idx].unsqueeze(0) theta_aff_inver = thetai['aff'][batch_idx].unsqueeze(0) theta_aff_tps_inver = thetai['afftps'][batch_idx].unsqueeze(0) # Warped image warped_aff = affTnf(batch['source_image'], theta_aff) warped_aff_tps = tpsTnf(warped_aff, theta_aff_tps) watch_images[batch_idx * group_size, :, 0:240, :] = batch['source_image'] watch_images[batch_idx * group_size + 1, :, 0:240, :] = warped_aff watch_images[batch_idx * group_size + 2, :, 0:240, :] = warped_aff_tps watch_images[batch_idx * group_size + 3, :, 0:240, :] = batch['target_image'] # Warped keypoints source_im_size = batch['source_im_info'][:, 0:3] target_im_size = batch['target_im_info'][:, 0:3] source_points = batch['source_points'] target_points = batch['target_points'] source_points_norm = PointsToUnitCoords(P=source_points, im_size=source_im_size) target_points_norm = PointsToUnitCoords(P=target_points, im_size=target_im_size) warped_points_aff_norm = pt.affPointTnf(theta=theta_aff_inver, points=source_points_norm) warped_points_aff = PointsToPixelCoords(P=warped_points_aff_norm, im_size=target_im_size) _, index_aff, N_pts = pck(target_points, warped_points_aff) warped_points_aff = relocate(warped_points_aff, target_im_size) warped_points_aff_tps_norm = pt.tpsPointTnf(theta=theta_aff_tps_inver, points=source_points_norm) warped_points_aff_tps_norm = pt.affPointTnf(theta=theta_aff_inver, points=warped_points_aff_tps_norm) warped_points_aff_tps = PointsToPixelCoords(P=warped_points_aff_tps_norm, im_size=target_im_size) _, index_aff_tps, _ = pck(target_points, warped_points_aff_tps) warped_points_aff_tps = relocate(warped_points_aff_tps, target_im_size) watch_keypoints[batch_idx * group_size, :, :N_pts] = relocate(batch['source_points'], source_im_size)[:, :, :N_pts] watch_keypoints[batch_idx * group_size + 1, :, :N_pts] = warped_points_aff[:, :, :N_pts] watch_keypoints[batch_idx * group_size + 2, :, :N_pts] = warped_points_aff_tps[:, :, :N_pts] watch_keypoints[batch_idx * group_size + 3, :, :N_pts] = relocate(batch['target_points'], target_im_size)[:, :, :N_pts] num_points[batch_idx * group_size:batch_idx * group_size + group_size] = N_pts correct_index.append(np.arange(N_pts)) correct_index.append(index_aff) correct_index.append(index_aff_tps) correct_index.append(np.arange(N_pts)) image_names.append('Source') image_names.append('Aff') image_names.append('AffTPS') image_names.append('Target') metrics.append('') metrics.append('PCK: {:.2%}'.format(float(results['aff']['pck'][batch_idx]))) metrics.append('PCK: {:.2%}'.format(float(results['afftps']['pck'][batch_idx]))) metrics.append('') opts = dict(jpgquality=100, title='Epoch ' + str(epoch) + ' source warped target') # Un-normalize for caffe resnet and vgg # watch_images = watch_images.permute(0, 2, 3, 1) + pixel_means # watch_images = watch_images[:, :, :, [2, 1, 0]].permute(0, 3, 1, 2) # watch_images = normalize_image(watch_images, forward=False) * 255.0 watch_images[:, :, 0:240, :] = normalize_image(watch_images[:, :, 0:240, :], forward=False) watch_images *= 255.0 watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) watch_keypoints = watch_keypoints.cpu().numpy() for i in range(watch_images.shape[0]): pos_name = (80, 255) if (i + 1) % group_size == 1 or (i + 1) % group_size == 0: pos_pck = (0, 0) else: pos_pck = (70, 275) cv2.putText(watch_images[i], image_names[i], pos_name, fnt, 0.5, (0, 0, 0), 1) cv2.putText(watch_images[i], metrics[i], pos_pck, fnt, 0.5, (0, 0, 0), 1) if (i + 1) % group_size == 0: for j in range(num_points[i]): cv2.drawMarker(watch_images[i], (watch_keypoints[i, 0, j], watch_keypoints[i, 1, j]), colors[j], cv2.MARKER_CROSS, 12, 2, cv2.LINE_AA) else: for j in correct_index[i]: cv2.drawMarker(watch_images[i], (watch_keypoints[i, 0, j], watch_keypoints[i, 1, j]), colors[j], cv2.MARKER_DIAMOND, 12, 2, cv2.LINE_AA) cv2.drawMarker(watch_images[i], (watch_keypoints[i + (group_size - 1) - (i % group_size), 0, j], watch_keypoints[i + (group_size - 1) - (i % group_size), 1, j]), colors[j], cv2.MARKER_CROSS, 12, 2, cv2.LINE_AA) watch_images = torch.Tensor(watch_images.astype(np.float32)) watch_images = watch_images.permute(0, 3, 1, 2) vis.image(torchvision.utils.make_grid(watch_images, nrow=4, padding=3), opts=opts) if epoch == num_epochs: epochs = np.arange(1, num_epochs+1) # Visualize train loss opts_loss = dict(xlabel='Epoch', ylabel='Loss', title='GM ResNet101 ' + title + ' Training Loss', legend=['Loss'], width=2000) vis.line(train_loss, epochs, opts=opts_loss) # Visualize val pck opts_pck = dict(xlabel='Epoch', ylabel='Val PCK', title='GM ResNet101 ' + title + ' Val PCK', legend=['PCK'], width=2000) vis.line(val_pck, epochs, opts=opts_pck) # Visualize train lr opts_lr = dict(xlabel='Epoch', ylabel='Learning Rate', title='GM ResNet101 ' + title + ' Training Learning Rate', legend=['LR'], width=2000) vis.line(train_lr, epochs, opts=opts_lr)
def compute_metric(metric, model, fasterRCNN, dataset, dataloader, args=None): # Initialize stats N = len(dataset) stats = {} # decide which results should be computed aff/tps/aff+tps stats['aff'] = {} stats['tps'] = {} stats['aff_tps'] = {} # if two_stage or do_aff: # stats['aff']={} # if not two_stage and do_tps: # stats['tps']={} # if two_stage: # stats['aff_tps']={} # choose metric function and metrics to compute if metric == 'pck': metrics = ['pck'] metric_fun = pck_metric if metric == 'dist': metrics = ['dist'] # metric_fun = point_dist_metric elif metric == 'area': metrics = [ 'intersection_over_union', 'label_transfer_accuracy', 'localization_error' ] # metric_fun = area_metrics elif metric == 'pascal_parts': metrics = ['intersection_over_union', 'pck'] # metric_fun = pascal_parts_metrics elif metric == 'flow': metrics = ['flow'] # metric_fun = flow_metrics elif metric == 'inlier_count': metrics = ['inlier_count'] # metric_fun = inlier_count # model.return_correlation = True # initialize vector for storing results for each metric for key in stats.keys(): for metric in metrics: stats[key][metric] = np.zeros((N, 1)) affine_theta = AffineTheta(use_cuda=True, original=False, image_size=240) affTnf = GeometricTnf(geometric_model='affine', use_cuda=True) # Compute for i, batch in enumerate(dataloader): # batch = batch_tnf(batch) batch_start_idx = args.batch_size * i batch_end_idx = np.minimum(batch_start_idx + args.batch_size, N) for k, v in batch.items(): batch[k] = batch[k].cuda() model.eval() fasterRCNN.eval() theta_aff = None theta_tps = None theta_aff_tps = None tnf_batch = { 'source_image': batch['source_image'], 'target_image': batch['target_image'] } theta_tps = model(tnf_batch) thresh = 0.05 max_per_image = 50 rois_s, cls_prob_s, bbox_pred_s, _, _, _, _, _ = fasterRCNN( batch['source_im'], batch['source_im_info'], batch['source_gt_boxes'], batch['source_num_boxes']) all_boxes_s = select_boxes(rois_s, cls_prob_s, bbox_pred_s, batch['source_im_info'], thresh, max_per_image) rois_t, cls_prob_t, bbox_pred_t, _, _, _, _, _ = fasterRCNN( batch['target_im'], batch['target_im_info'], batch['target_gt_boxes'], batch['target_num_boxes']) all_boxes_t = select_boxes(rois_t, cls_prob_t, bbox_pred_t, batch['target_im_info'], thresh, max_per_image) boxes_s, boxes_t = select_box(all_boxes_s, all_boxes_t) boxes_s = boxes_s.cuda() boxes_t = boxes_t.cuda() theta_aff = affine_theta(boxes_s=boxes_s, boxes_t=boxes_t, source_im_size=None, target_im_size=None) batch['source_image'] = affTnf(batch['source_image'], theta_aff) tnf_batch = { 'source_image': batch['source_image'], 'target_image': batch['target_image'] } theta_aff_tps = model(tnf_batch) stats = metric_fun(batch, batch_start_idx, theta_aff, theta_tps, theta_aff_tps, stats, args) # if two_stage: # if model.return_correlation==False: # theta_aff,theta_aff_tps=model(batch) # else: # theta_aff,theta_aff_tps,corr_aff,corr_aff_tps=model(batch) # elif do_aff: # theta_aff=model(batch) # if isinstance(theta_aff,tuple): # theta_aff=theta_aff[0] # elif do_tps: # theta_tps=model(batch) # if isinstance(theta_tps,tuple): # theta_tps=theta_tps[0] # # if metric=='inlier_count': # stats = inlier_count(batch,batch_start_idx,theta_aff,theta_tps,theta_aff_tps,corr_aff,corr_aff_tps,stats,args) # elif metric_fun is not None: # stats = metric_fun(batch,batch_start_idx,theta_aff,theta_tps,theta_aff_tps,stats,args) print('Batch: [{}/{} ({:.0f}%)]'.format(i, len(dataloader), 100. * i / len(dataloader))) # Print results if metric == 'flow': print('Flow files have been saved to ' + args.flow_output_dir) return stats for key in stats.keys(): print('=== Results ' + key + ' ===') for metric in metrics: # print per-class brakedown for PFPascal, or caltech if isinstance(dataset, PFPASCALDataset): N_cat = int(np.max(dataset.category) ) # Number of categories in dataset (PF-PASCAL) for c in range(N_cat): cat_idx = np.nonzero( dataset.category == c + 1)[0] # Compute indices of current category print( dataset.category_names[c].ljust(15) + ': ', '{:.2%}'.format(np.mean(stats[key][metric][cat_idx]))) # print mean value results = stats[key][metric] good_idx = np.flatnonzero((results != -1) * ~np.isnan(results)) print('Total: ' + str(results.size)) print('Valid: ' + str(good_idx.size)) filtered_results = results[good_idx] print(metric + ':', '{:.2%}'.format(np.mean(filtered_results))) print('\n') return stats
def vis_control(vis, dataloader, theta_1, theta_2, dataset_name, use_cuda=True): # Visualize watch images tpsTnf_1 = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) tpsTnf_2 = GeometricTnf2(geometric_model='tps', use_cuda=use_cuda) group_size = 5 watch_images = torch.ones(len(dataloader) * group_size, 3, 340, 340) if use_cuda: watch_images = watch_images.cuda() # Colors for keypoints cmap = plt.get_cmap('tab20') colors = list() for c in range(20): r = cmap(c)[0] * 255 g = cmap(c)[1] * 255 b = cmap(c)[2] * 255 colors.append((b, g, r)) fnt = cv2.FONT_HERSHEY_COMPLEX # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)) for batch_idx, batch in enumerate(dataloader): if use_cuda: batch = batch_cuda(batch) # Theta and theta_inver theta_tps_1 = theta_1['tps'][batch_idx].unsqueeze(0) theta_tps_2 = theta_2['tps'][batch_idx].unsqueeze(0) # Warped image warped_tps_1 = tpsTnf_1(batch['source_image'], theta_tps_1) warped_tps_2 = tpsTnf_2(batch['source_image'], theta_tps_2) watch_images[batch_idx * group_size, :, 50:290, 50:290] = batch['source_image'] watch_images[batch_idx * group_size + 1, :, 50:290, 50:290] = warped_tps_1 watch_images[batch_idx * group_size + 2, :, 50:290, 50:290] = batch['source_image'] watch_images[batch_idx * group_size + 3, :, 50:290, 50:290] = warped_tps_2 watch_images[batch_idx * group_size + 4, :, 50:290, 50:290] = batch['target_image'] opts = dict(jpgquality=100, title=dataset_name) watch_images[:, :, 50:290, 50:290] = normalize_image(watch_images[:, :, 50:290, 50:290], forward=False) watch_images *= 255.0 watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) im_size = torch.Tensor([[240, 240]]).cuda() for i in range(watch_images.shape[0]): if i % group_size == 0: cp_norm = theta_1['tps'][int(i / group_size)].view(1, 2, -1) cp = PointsToPixelCoords(P=cp_norm, im_size=im_size) cp = cp.squeeze().cpu().numpy() + 50 for j in range(9): cv2.drawMarker(watch_images[i], (cp[0, j], cp[1, j]), (0, 0, 255), cv2.MARKER_TILTED_CROSS, 12, 2, cv2.LINE_AA) for j in range(2): for k in range(3): # vertical grid cv2.line(watch_images[i], (cp[0, j + k * 3], cp[1, j + k * 3]), (cp[0, j + k * 3 + 1], cp[1, j + k * 3 + 1]), (0, 0, 255), 2, cv2.LINE_AA) # horizontal grid cv2.line(watch_images[i], (cp[0, j * 3 + k], cp[1, j * 3 + k]), (cp[0, j * 3 + k + 3], cp[1, j * 3 + k + 3]), (0, 0, 255), 2, cv2.LINE_AA) if i % group_size == 1: cp_norm = torch.Tensor( [-1, -1, -1, 0, 0, 0, 1, 1, 1, -1, 0, 1, -1, 0, 1, -1, 0, 1]).cuda().view(1, 2, -1) cp = PointsToPixelCoords(P=cp_norm, im_size=im_size) cp = cp.squeeze().cpu().numpy() + 50 for j in range(9): cv2.drawMarker(watch_images[i], (cp[0, j], cp[1, j]), (0, 0, 255), cv2.MARKER_TILTED_CROSS, 12, 2, cv2.LINE_AA) for j in range(1): for k in range(3): # vertical grid cv2.line(watch_images[i], (cp[0, j + k * 3], cp[1, j + k * 3]), (cp[0, j + k * 3 + 1], cp[1, j + k * 3 + 1]), (0, 0, 255), 2, cv2.LINE_AA) # horizontal grid cv2.line(watch_images[i], (cp[0, j * 3 + k], cp[1, j * 3 + k]), (cp[0, j * 3 + k + 3], cp[1, j * 3 + k + 3]), (0, 0, 255), 2, cv2.LINE_AA) if i % group_size == 2: cp_norm = theta_2['tps'][int(i / group_size)][:18].view(1, 2, -1) cp = PointsToPixelCoords(P=cp_norm, im_size=im_size) cp = cp.squeeze().cpu().numpy() + 50 for j in range(9): cv2.drawMarker(watch_images[i], (cp[0, j], cp[1, j]), (0, 0, 255), cv2.MARKER_TILTED_CROSS, 12, 2, cv2.LINE_AA) for j in range(2): for k in range(3): # vertical grid cv2.line(watch_images[i], (cp[0, j + k * 3], cp[1, j + k * 3]), (cp[0, j + k * 3 + 1], cp[1, j + k * 3 + 1]), (0, 0, 255), 2, cv2.LINE_AA) # horizontal grid cv2.line(watch_images[i], (cp[0, j * 3 + k], cp[1, j * 3 + k]), (cp[0, j * 3 + k + 3], cp[1, j * 3 + k + 3]), (0, 0, 255), 2, cv2.LINE_AA) if i % group_size == 3: cp_norm = theta_2['tps'][int(i / group_size)][18:].view(1, 2, -1) cp = PointsToPixelCoords(P=cp_norm, im_size=im_size) cp = cp.squeeze().cpu().numpy() + 50 for j in range(9): cv2.drawMarker(watch_images[i], (cp[0, j], cp[1, j]), (0, 0, 255), cv2.MARKER_TILTED_CROSS, 12, 2, cv2.LINE_AA) for j in range(2): for k in range(3): # vertical grid cv2.line(watch_images[i], (cp[0, j + k * 3], cp[1, j + k * 3]), (cp[0, j + k * 3 + 1], cp[1, j + k * 3 + 1]), (0, 0, 255), 2, cv2.LINE_AA) # horizontal grid cv2.line(watch_images[i], (cp[0, j * 3 + k], cp[1, j * 3 + k]), (cp[0, j * 3 + k + 3], cp[1, j * 3 + k + 3]), (0, 0, 255), 2, cv2.LINE_AA) watch_images = torch.Tensor(watch_images.astype(np.float32)) watch_images = watch_images.permute(0, 3, 1, 2) vis.image(torchvision.utils.make_grid(watch_images, nrow=5, padding=5), opts=opts)
def vis_pf(vis, dataloader, theta_1, theta_2, theta_inver_1, theta_inver_2, results_1, results_2, dataset_name, use_cuda=True): # Visualize watch images tpsTnf_1 = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) tpsTnf_2 = GeometricTnf2(geometric_model='tps', use_cuda=use_cuda) pt_1 = PointTnf(use_cuda=use_cuda) pt_2 = PointTPS(use_cuda=use_cuda) group_size = 4 watch_images = torch.ones(len(dataloader) * group_size, 3, 280, 240) watch_keypoints = -torch.ones(len(dataloader) * group_size, 2, 20) if use_cuda: watch_images = watch_images.cuda() watch_keypoints = watch_keypoints.cuda() num_points = np.ones(len(dataloader) * 6).astype(np.int8) correct_index = list() image_names = list() metrics = list() # Colors for keypoints cmap = plt.get_cmap('tab20') colors = list() for c in range(20): r = cmap(c)[0] * 255 g = cmap(c)[1] * 255 b = cmap(c)[2] * 255 colors.append((b, g, r)) fnt = cv2.FONT_HERSHEY_COMPLEX # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)) for batch_idx, batch in enumerate(dataloader): if use_cuda: batch = batch_cuda(batch) # Theta and theta_inver theta_tps_1 = theta_1['tps'][batch_idx].unsqueeze(0) theta_tps_2 = theta_2['tps'][batch_idx].unsqueeze(0) thetai_tps_1 = theta_inver_1['tps'][batch_idx].unsqueeze(0) thetai_tps_2 = theta_inver_2['tps'][batch_idx].unsqueeze(0) # Warped image warped_tps_1 = tpsTnf_1(batch['source_image'], theta_tps_1) warped_tps_2 = tpsTnf_2(batch['source_image'], theta_tps_2) watch_images[batch_idx * group_size, :, 0:240, :] = batch['source_image'] watch_images[batch_idx * group_size + 1, :, 0:240, :] = warped_tps_1 watch_images[batch_idx * group_size + 2, :, 0:240, :] = warped_tps_2 watch_images[batch_idx * group_size + 3, :, 0:240, :] = batch['target_image'] # Warped keypoints source_im_size = batch['source_im_info'][:, 0:3] target_im_size = batch['target_im_info'][:, 0:3] source_points = batch['source_points'] target_points = batch['target_points'] source_points_norm = PointsToUnitCoords(P=source_points, im_size=source_im_size) target_points_norm = PointsToUnitCoords(P=target_points, im_size=target_im_size) warped_points_tps_norm_1 = pt_1.tpsPointTnf(theta=thetai_tps_1, points=source_points_norm) warped_points_tps_1 = PointsToPixelCoords(P=warped_points_tps_norm_1, im_size=target_im_size) pck_tps_1, index_tps_1, N_pts = pck(target_points, warped_points_tps_1, dataset_name) warped_points_tps_1 = relocate(warped_points_tps_1, target_im_size) warped_points_tps_norm_2 = pt_2.tpsPointTnf(theta=thetai_tps_2, points=source_points_norm) warped_points_tps_2 = PointsToPixelCoords(P=warped_points_tps_norm_2, im_size=target_im_size) pck_tps_2, index_tps_2, _ = pck(target_points, warped_points_tps_2, dataset_name) warped_points_tps_2 = relocate(warped_points_tps_2, target_im_size) watch_keypoints[batch_idx * group_size, :, :N_pts] = relocate( batch['source_points'], source_im_size)[:, :, :N_pts] watch_keypoints[batch_idx * group_size + 1, :, :N_pts] = warped_points_tps_1[:, :, :N_pts] watch_keypoints[batch_idx * group_size + 2, :, :N_pts] = warped_points_tps_2[:, :, :N_pts] watch_keypoints[batch_idx * group_size + 3, :, :N_pts] = relocate( batch['target_points'], target_im_size)[:, :, :N_pts] num_points[batch_idx * group_size:batch_idx * group_size + group_size] = N_pts correct_index.append(np.arange(N_pts)) correct_index.append(index_tps_1) correct_index.append(index_tps_2) correct_index.append(np.arange(N_pts)) image_names.append('Source') image_names.append('TPS') image_names.append('TPS_Jitter') image_names.append('Target') metrics.append('') metrics.append('PCK: {:.2%}'.format(pck_tps_1)) metrics.append('PCK: {:.2%}'.format(pck_tps_2)) metrics.append('') opts = dict(jpgquality=100, title=dataset_name) # Un-normalize for caffe resnet and vgg # watch_images = watch_images.permute(0, 2, 3, 1) + pixel_means # watch_images = watch_images[:, :, :, [2, 1, 0]].permute(0, 3, 1, 2) # watch_images = normalize_image(watch_images, forward=False) * 255.0 watch_images[:, :, 0:240, :] = normalize_image(watch_images[:, :, 0:240, :], forward=False) watch_images *= 255.0 watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) watch_keypoints = watch_keypoints.cpu().numpy() for i in range(watch_images.shape[0]): pos_name = (80, 255) if (i + 1) % group_size == 1 or (i + 1) % group_size == 0: pos_pck = (0, 0) else: pos_pck = (70, 275) cv2.putText(watch_images[i], image_names[i], pos_name, fnt, 0.5, (0, 0, 0), 1) cv2.putText(watch_images[i], metrics[i], pos_pck, fnt, 0.5, (0, 0, 0), 1) if (i + 1) % group_size == 0: for j in range(num_points[i]): cv2.drawMarker( watch_images[i], (watch_keypoints[i, 0, j], watch_keypoints[i, 1, j]), colors[j], cv2.MARKER_DIAMOND, 12, 2, cv2.LINE_AA) else: for j in correct_index[i]: cv2.drawMarker( watch_images[i], (watch_keypoints[i, 0, j], watch_keypoints[i, 1, j]), colors[j], cv2.MARKER_CROSS, 12, 2, cv2.LINE_AA) cv2.drawMarker(watch_images[i], (watch_keypoints[i + (group_size - 1) - (i % group_size), 0, j], watch_keypoints[i + (group_size - 1) - (i % group_size), 1, j]), colors[j], cv2.MARKER_DIAMOND, 12, 2, cv2.LINE_AA) watch_images = torch.Tensor(watch_images.astype(np.float32)) watch_images = watch_images.permute(0, 3, 1, 2) vis.image(torchvision.utils.make_grid(watch_images, nrow=4, padding=5), opts=opts)
def __init__(self, aff_output_dim=6, tps_output_dim=18, use_cuda=True, feature_extraction_cnn='resnet101', feature_extraction_last_layer='', fr_feature_size=15, fr_kernel_sizes=[7, 5], fr_channels=[128, 64], fixed_blocks=3, normalize_features=True, normalize_matches=True, batch_normalization=True, crop_layer=None, pytorch=False, caffe=False, return_correlation=False): super(DualGeometricMatching, self).__init__() self.normalize_features = normalize_features self.normalize_matches = normalize_matches self.return_correlation = return_correlation # self.return_correlation = True self.AffTnf = GeometricTnf(geometric_model='affine', use_cuda=use_cuda) self.crop_layer = crop_layer # Feature extraction networks for two images self.FeatureExtraction = FeatureExtraction( feature_extraction_cnn=feature_extraction_cnn, last_layer=feature_extraction_last_layer, normalization=normalize_features, fixed_blocks=fixed_blocks, pytorch=pytorch, caffe=caffe) # Matching layer based on normalized feature maps of two images self.FeatureCorrelation = FeatureCorrelation( shape='3D', normalization=normalize_matches) for param in self.parameters(): param.requires_grad = False # Regression layer based on correlated feature map for predicting parameters of geometric transformation self.ThetaRegression = ThetaRegression( output_dim=aff_output_dim, feature_size=fr_feature_size, kernel_sizes=fr_kernel_sizes, channels=fr_channels, batch_normalization=batch_normalization) # Feature cropping on specific layer # self.FeatureCrop = FeatureCrop(crop_layer=crop_layer) # if self.crop_layer == 'conv1': # self.FeatureExtraction_2 = FeatureExtraction_2(feature_extraction_cnn=feature_extraction_cnn, # first_layer='pool1', last_layer='pool4', # pretrained=pretrained) self.ThetaRegression2 = ThetaRegression( output_dim=tps_output_dim, feature_size=fr_feature_size, kernel_sizes=fr_kernel_sizes, channels=fr_channels, batch_normalization=batch_normalization)
def train_fn(epoch, model, loss_fn, loss_cycle_fn, lambda_c, optimizer, dataloader, triple_generation, geometric_model='tps', use_cuda=True, log_interval=100, vis=None): """ Train the model with synthetically training triple: {source image, target image, refer image (warped source image), theta_GT} from PF-PASCAL. 1. Train the transformation parameters theta_st from source image to target image; 2. Train the transformation parameters theta_tr from target image to refer image; 3. Combine theta_st and theta_st to obtain theta from source image to refer image, and compute loss between theta and theta_GT. """ geoTnf = GeometricTnf(geometric_model=geometric_model, use_cuda=use_cuda) epoch_loss = 0 if (epoch % 5 == 0 or epoch == 1) and vis is not None: stride_images = len(dataloader) / 3 group_size = 6 watch_images = torch.ones(group_size * 4, 3, 260, 240).cuda() image_names = list() fnt = cv2.FONT_HERSHEY_COMPLEX stride_loss = len(dataloader) / 105 iter_loss = np.zeros(106) begin = time.time() for batch_idx, batch in enumerate(dataloader): ''' Move input batch to gpu ''' # batch['source_image'].shape & batch['target_image'].shape: (batch_size, 3, 240, 240) # batch['theta'].shape-tps: (batch_size, 18)-random or (batch_size, 18, 1, 1)-(pre-set from csv) if use_cuda: batch = batch_cuda(batch) ''' Get the training triple {source image, target image, refer image (warped source image), theta_GT}''' batch_triple = triple_generation(batch) ''' Train the model ''' optimizer.zero_grad() loss = 0 # Predict tps parameters between images # theta.shape: (batch_size, 18) for tps, theta.shape: (batch_size, 6) for affine theta_st, theta_ts, theta_tr, theta_rt = model(batch_triple) loss_match = loss_fn(theta_st=theta_st, theta_tr=theta_tr, theta_GT=batch_triple['theta_GT']) loss_cycle_st = loss_cycle_fn(theta_AB=theta_st, theta_BA=theta_ts) loss_cycle_ts = loss_cycle_fn(theta_AB=theta_ts, theta_BA=theta_st) loss_cycle_tr = loss_cycle_fn(theta_AB=theta_tr, theta_BA=theta_rt) loss_cycle_rt = loss_cycle_fn(theta_AB=theta_rt, theta_BA=theta_tr) loss = loss_match + lambda_c * (loss_cycle_st + loss_cycle_ts + loss_cycle_tr + loss_cycle_rt) / 4 loss.backward() optimizer.step() epoch_loss += loss.item() if (batch_idx + 1) % log_interval == 0: end = time.time() print( 'Train epoch: {} [{}/{} ({:.0%})]\t\tCurrent batch loss: {:.6f}\t\tTime cost ({} batches): {:.4f} s' .format(epoch, batch_idx + 1, len(dataloader), (batch_idx + 1) / len(dataloader), loss.item(), batch_idx + 1, end - begin)) if (epoch % 5 == 0 or epoch == 1) and vis is not None: if (batch_idx + 1) % stride_images == 0 or batch_idx == 0: watch_images, image_names = add_watch( watch_images, image_names, batch_triple, geoTnf, theta_st, theta_tr, int((batch_idx + 1) / stride_images) * group_size) # if batch_idx <= 3: # watch_images, image_names = add_watch(watch_images, image_names, batch_triple, geoTnf, theta_st, # theta_tr, batch_idx * group_size) # if batch_idx == 3: # opts = dict(jpgquality=100, title='Epoch ' + str(epoch) + ' source warped_sr target warped_tr refer warped_sr') # watch_images[:, :, 0:240, :] = normalize_image(watch_images[:, :, 0:240, :], forward=False) # watch_images *= 255.0 # watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) # for i in range(watch_images.shape[0]): # cv2.putText(watch_images[i], image_names[i], (80, 255), fnt, 0.5, (0, 0, 0), 1) # watch_images = torch.Tensor(watch_images.astype(np.float32)) # watch_images = watch_images.permute(0, 3, 1, 2) # vis.image(torchvision.utils.make_grid(watch_images, nrow=group_size, padding=3), opts=opts) if (batch_idx + 1) % stride_loss == 0 or batch_idx == 0: iter_loss[int((batch_idx + 1) / stride_loss)] = epoch_loss / (batch_idx + 1) end = time.time() # Visualize watch images & train loss if (epoch % 5 == 0 or epoch == 1) and vis is not None: opts = dict(jpgquality=100, title='Epoch ' + str(epoch) + ' source warped_sr target warped_tr refer warped_sr') watch_images[:, :, 0:240, :] = normalize_image(watch_images[:, :, 0:240, :], forward=False) watch_images *= 255.0 watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) for i in range(watch_images.shape[0]): cv2.putText(watch_images[i], image_names[i], (80, 255), fnt, 0.5, (0, 0, 0), 1) watch_images = torch.Tensor(watch_images.astype(np.float32)) watch_images = watch_images.permute(0, 3, 1, 2) vis.image(torchvision.utils.make_grid(watch_images, nrow=group_size, padding=3), opts=opts) opts_loss = dict(xlabel='Iterations (' + str(stride_loss) + ')', ylabel='Loss', title='GM ResNet101 ' + geometric_model + ' Training Loss in Epoch ' + str(epoch), legend=['Loss'], width=2000) vis.line(iter_loss, np.arange(106), opts=opts_loss) epoch_loss /= len(dataloader) print('Train set -- Average loss: {:.6f}\t\tTime cost: {:.4f}'.format( epoch_loss, end - begin)) return epoch_loss, end - begin