def get_corners(org_img, sol, s, part_path='', draw=False): img = org_img.transpose([2, 1, 0])[None, ...] img = img.astype(np.float32) img = torch.from_numpy(img) img = img / 128.0 - 1.0 img = Variable(img, requires_grad=False, volatile=True).cuda() predictions = sol(img) if draw: org_img = img[0].data.cpu().numpy().transpose([2, 1, 0]) org_img = ((org_img + 1) * 128).astype(np.uint8) c_img = org_img.copy() c_img = drawing.draw_sol_torch(predictions, c_img) cv2.imwrite("visuals/{}_predictions.png".format(part_path), c_img) predictions = predictions.data.cpu().numpy() # remove the ones with confidence less than 0.1 predictions = predictions[predictions[:, :, 0] > 0.1] # predictions is a matrix of 2000ish x 5 # where the first is the confidence and then there are two pairs of x and y coordinates # here we are extracting just the first pair since the second is probably a repeat confidence = predictions[:, 0] predictions = predictions[:, 1:3] return predictions, confidence
def get_predictions(self, org_img, s, write_path=''): img = org_img.transpose([2, 1, 0])[None, ...] img = img.astype(np.float32) img = torch.from_numpy(img) img = img / 128.0 - 1.0 if torch.cuda.is_available(): img = img.cuda() predictions = self.network(img) if write_path is not '': org_img = img[0].data.cpu().numpy().transpose([2, 1, 0]) org_img = ((org_img + 1) * 128).astype(np.uint8) c_img = org_img.copy() c_img = drawing.draw_sol_torch(predictions, c_img) cv2.imwrite(write_path, c_img) predictions = predictions.squeeze(0).data.cpu().numpy() # print(predictions.shape) # predictions is a matrix of 2000ish x 5 # where the first is the confidence and then there are two pairs of x and y coordinates # here we are extracting just the first pair since the second is probably a repeat confidence = predictions[:, 0] predictions = predictions[:, 1:3] return predictions, confidence
def train(config): training_set_list = load_file_list(pretrain_config['training_set']) train_dataset = SolDataset( training_set_list, rescale_range=pretrain_config['sol']['training_rescale_range'], transform=CropTransform(pretrain_config['sol']['crop_params'])) train_dataloader = DataLoader( train_dataset, batch_size=pretrain_config['sol']['batch_size'], shuffle=True, num_workers=0, collate_fn=sol.sol_dataset.collate) batches_per_epoch = int(pretrain_config['sol']['images_per_epoch'] / pretrain_config['sol']['batch_size']) train_dataloader = DatasetWrapper(train_dataloader, batches_per_epoch) if not os.path.exists("snapshots/sol_train"): os.makedirs("snapshots/sol_train") solf.train() sum_loss = 0.0 steps = 0.0 for step_i, x in enumerate(train_dataloader): img = Variable(x['img'].type(dtype), requires_grad=False) sol_gt = None if x['sol_gt'] is not None: # This is needed because if sol_gt is None it means that there # no GT positions in the image. The alignment loss will handle, # it correctly as None sol_gt = Variable(x['sol_gt'].type(dtype), requires_grad=False) # print((img.shape)) predictions = solf(img) loss = alignment_loss(predictions, sol_gt, x['label_sizes'], alpha_alignment, alpha_backprop) org_img = img[0].data.cpu().numpy().transpose([2, 1, 0]) org_img = ((org_img + 1) * 128).astype(np.uint8) org_img = org_img.copy() org_img = drawing.draw_sol_torch(predictions, org_img) cv2.imwrite("snapshots/sol_train/{}.png".format(step_i), org_img) optimizer.zero_grad() loss.backward() optimizer.step() sum_loss += loss.data.cpu().numpy() steps += 1 predictions = None loss = None gc.collect() return sum_loss / steps
def val(): test_set_list = load_file_list(pretrain_config['validation_set']) test_dataset = SolDataset( test_set_list, rescale_range=pretrain_config['sol']['validation_rescale_range'], transform=None) test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0, collate_fn=sol.sol_dataset.collate, pin_memory=True) writep = config['evaluation']['output_path'].split('_')[0] writep = 'snapshots/{}_val'.format(writep) if not os.path.exists(writep): os.makedirs(writep) solf.eval() sum_loss = 0.0 steps = 0.0 for step_i, x in enumerate(test_dataloader): img = x['img'].type(dtype) if x['sol_gt'] is not None: # This is needed because if sol_gt is None it means that there # no GT positions in the image. The alignment loss will handle, # it correctly as None sol_gt = x['sol_gt'].type(dtype) predictions = solf(img) loss = alignment_loss(predictions, sol_gt, x['label_sizes'], alpha_alignment, alpha_backprop) # Write images to file to visualization org_img = img[0].data.cpu().numpy().transpose([2, 1, 0]) org_img = ((org_img + 1) * 128).astype(np.uint8) org_img = org_img.copy() org_img = drawing.draw_sol_torch(predictions, org_img) # out = Image.fromarray((org_img * 255).astype(np.uint8)) # out.save(os.path.join(writep, "{}.png".format(step_i))) cv2.imwrite(os.path.join(writep, "{}.png".format(step_i)), org_img) sum_loss += loss.data.cpu().numpy() steps += 1 predictions = None loss = None gc.collect() return sum_loss / steps
def main(): with open(sys.argv[1]) as f: config = yaml.load(f) sol_network_config = config['network']['sol'] pretrain_config = config['pretraining'] eval_folder = pretrain_config['validation_set']['img_folder'] solf = continuous_state.init_model(config) if torch.cuda.is_available(): print("Using GPU") solf.cuda() dtype = torch.cuda.FloatTensor else: print("Warning: Not using a GPU, untested") dtype = torch.FloatTensor writep = config['evaluation']['output_path'].split('_')[0] writep = 'data/{}_val'.format(writep) if not os.path.exists(writep): os.makedirs(writep) for fil in os.listdir(eval_folder): imgfil = os.path.join(eval_folder, fil) org_img = cv2.imread(imgfil, cv2.IMREAD_COLOR) if org_img is not None: rescale_range = config['pretraining']['sol']['validation_rescale_range'] target_dim1 = rescale_range[0] s = target_dim1 / float(org_img.shape[1]) target_dim0 = int(org_img.shape[0]/float(org_img.shape[1]) * target_dim1) org_img = cv2.resize(org_img,(target_dim1, target_dim0), interpolation=cv2.INTER_CUBIC) img = org_img.transpose([2,1,0])[None,...] img = img.astype(np.float32) img = torch.from_numpy(img) img = img / 128.0 - 1.0 img = Variable(img.type(dtype), requires_grad=False) # print((img)) predictions = solf(img) org_img = img[0].data.cpu().numpy().transpose([2,1,0]) org_img = ((org_img + 1)*128).astype(np.uint8) org_img = org_img.copy() org_img = drawing.draw_sol_torch(predictions, org_img) predictions = None cv2.imwrite(os.path.join(writep, fil), org_img)
def val(config): writep = config['evaluation']['output_path'].split('_')[0] writep = 'snapshots/{}_val'.format(writep) if not os.path.exists(writep): os.makedirs(writep) with open(pretrain_config['validation_set']['file_list'], 'r') as f: evals = json.load(f) solf.eval() for fil in evals: imgfil = os.path.join(eval_folder, fil[1]) org_img = cv2.imread(imgfil, cv2.IMREAD_COLOR) if org_img is not None: rescale_range = pretrain_config['sol']['validation_rescale_range'] target_dim1 = rescale_range[0] s = target_dim1 / float(org_img.shape[1]) target_dim0 = int(org_img.shape[0] / float(org_img.shape[1]) * target_dim1) org_img = cv2.resize(org_img, (target_dim1, target_dim0), interpolation=cv2.INTER_CUBIC) img = org_img.transpose([2, 1, 0])[None, ...] img = img.astype(np.float32) img = torch.from_numpy(img) img = img / 128.0 - 1.0 img = Variable(img.type(dtype), requires_grad=False) # print((img)) predictions = solf(img) org_img = img[0].data.cpu().numpy().transpose([2, 1, 0]) org_img = ((org_img + 1) * 128).astype(np.uint8) org_img = org_img.copy() org_img = drawing.draw_sol_torch(predictions, org_img) cv2.imwrite(os.path.join(writep, fil[1]), org_img) predictions = None