def __getitem__(self, idx): image_name = self._info.iloc[idx, -1] if len(self._images) == 0: image = imread(os.path.join(self._images_dir, image_name)) else: image = self._images[image_name] image = self._ensure_standard_shape(image) center = image.shape[1] // 2 mask = image[:, center:] image = image[:, :center] mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)[1] mask = get_skeletion(mask) mask = cv2.dilate(mask, np.ones((5, 5))) mask[mask == 255] = 1 if self._stage == "train": try: image, mask = self._aug(image, mask) mask *= 255 except Exception as e: print("Exception", e) image, mask = self._random_crop(image, mask) image = self._transform(image) mask = torch.LongTensor(mask) mask = torch.clamp(mask, 0, 1) return image, mask
def test(self, source): """Test the model Args: source: Input to the model, either single image or directory containing images Returns: The generated image conditioned on the input image """ split_len = 600 if self.opts.dataset == 'maps' else 256 img = utils.normalize_images(utils.imread(source)) img_A = img[:, :split_len, :] img_B = img[:, split_len:, :] img_A = np.expand_dims(img_A, 0) img_B = np.expand_dims(img_B, 0) if self.opts.direction == 'b2a': input_images = img_B target_images = img_A else: input_images = img_A target_images = img_B self.saver.restore(self.sess, self.opts.ckpt) utils.imwrite(os.path.join( self.opts.target_dir, 'target_image'), target_images[0], inv_normalize=True) utils.imwrite(os.path.join( self.opts.target_dir, 'conditional_image'), input_images[0], inv_normalize=True) print ' - Sampling generator images for different random initial noise' for idx in xrange(self.opts.sample_num): print 'Sampling #', idx if self.opts.noise_type == "gauss": code = gaussian_noise([1, self.opts.code_len]) else: code = uniform_noise([1, self.opts.code_len]) feed_dict = { self.is_training: False, self.images_A: img_A, self.images_B: img_B, self.code: code } if self.opts.model == 'bicycle': images = self.G_cvae.eval(session=self.sess, feed_dict=feed_dict) utils.imwrite(os.path.join( self.opts.target_dir, 'test_cvae{}'.format(idx)), images, inv_normalize=True) images = self.G_clr.eval(session=self.sess, feed_dict=feed_dict) utils.imwrite(os.path.join( self.opts.target_dir, 'test_clr{}'.format(idx)), images, inv_normalize=True) else: raise ValueError("Testing only possible for bicycleGAN")
def infer(self, image, save_path, bright_diff=0, is_grayscale=True): # read image if isinstance(image, str): img = imread(image, is_grayscale=is_grayscale) else: img = image img = cv2.resize(img, dsize=(256, 256)) img = np.reshape(img, newshape=(img.shape[0], img.shape[1], 1)) gen_avatar = self.sess.run(self.testB, feed_dict={self.test_A: [img]}) if save_path is not None: save_images(gen_avatar + bright_diff, size=[1, 1], image_path=save_path) gen_avatar = np.reshape(gen_avatar, newshape=list(gen_avatar.shape[1:-1])) gen_avatar = inverse_transform(gen_avatar) return gen_avatar
def __init__(self, stage, configs=None): root_dir = configs["data_path"] self._root_dir = root_dir self._images_dir = os.path.join(root_dir, "images") self._stage = stage self._info_path = os.path.join(root_dir, "{}.txt".format(stage)) self._info = pd.read_csv(self._info_path, header=None, error_bad_lines=False) self._is_cached = configs["cached_npy"] == 1 self._transform = transforms.Compose([ transforms.ToPILImage(), transforms.ToTensor(), ]) self._images = {} if self._is_cached and os.path.exists( os.path.join(root_dir, "{}.npy".format(stage))): self._images = np.load(os.path.join(root_dir, "{}.npy".format(stage)), allow_pickle=True).tolist() if len(self._images) == 0 and self._stage != "little": for idx in range(len(self._info)): image_name = self._info.iloc[idx, 0] if image_name in self._images: continue print("Read {}".format(image_name)) self._images[image_name] = imread( os.path.join(self._images_dir, image_name)) if self._is_cached: np.save( os.path.join(self._root_dir, "{}.npy".format(self._stage)), self._images, )
def test(self): """ test model on the Test-Set """ self.model.eval() self.model.requires_grad(False) val_f1s = {'f1_iou': [], 'f1_center': [], 'f1_width': [], 'f1_height': []} val_losses = {'all': [], 'center': [], 'width': [], 'height': []} t = time() for step, sample in enumerate(self.val_loader): hmap_true, y_true, file_name, aug_info = sample hmap_true = hmap_true.to(self.cnf.device) y_true = json.loads(y_true[0]) hmap_pred = self.model.forward(hmap_true) x_true_center, x_true_width, x_true_height = hmap_true[0, 0], hmap_true[0, 1], hmap_true[0, 2] x_pred_center, x_pred_width, x_pred_height = hmap_pred[0, 0], hmap_pred[0, 1], hmap_pred[0, 2] # log center, width, height losses mask = torch.tensor(torch.where(x_true_height != 0, 1, 0), dtype=torch.float32) loss_center = self.cnf.masked_loss_c * nn.MSELoss()(x_pred_center, x_true_center) loss_width = self.cnf.masked_loss_w * MaskedMSELoss()(x_pred_width, x_true_width, mask=mask) loss_height = self.cnf.masked_loss_h * MaskedMSELoss()(x_pred_height, x_true_height, mask=mask) loss = loss_center + loss_width + loss_height val_losses['all'].append(loss.item()) val_losses['center'].append(loss_center.item()) val_losses['width'].append(loss_width.item()) val_losses['height'].append(loss_height.item()) y_center = [(coord[0], coord[1], coord[2]) for coord in y_true] y_width = [(coord[0], coord[1], coord[2], coord[3]) for coord in y_true] y_height = [(coord[0], coord[1], coord[2], coord[4]) for coord in y_true] y_center_pred = utils.local_maxima_3d(heatmap=x_pred_center, threshold=0.1, device=self.cnf.device) y_width_pred = [] y_height_pred = [] bboxes_info_pred = [] for center_coord in y_center_pred: # y_center_pred cam_dist, y2d, x2d = center_coord width = float(x_pred_width[cam_dist, y2d, x2d]) height = float(x_pred_height[cam_dist, y2d, x2d]) # denormalize width and height width = int(round(width * STD_DEV_WIDTH + MEAN_WIDTH)) height = int(round(height * STD_DEV_HEIGHT + MEAN_HEIGHT)) # width = int(round(width * MAX_WIDTH)) # height = int(round(height * MAX_HEIGHT)) y_width_pred.append((*center_coord, width)) y_height_pred.append((*center_coord, height)) x2d, y2d, cam_dist = utils.rescale_to_real(x2d=x2d, y2d=y2d, cam_dist=cam_dist, q=self.cnf.q) bboxes_info_pred.append((x2d - width / 2, y2d - height / 2, width, height, cam_dist)) y_center_true = utils.local_maxima_3d(heatmap=x_true_center, threshold=0.1, device=self.cnf.device) bboxes_info_true = [] for center_coord in y_center_true: cam_dist, y2d, x2d = center_coord width = float(x_true_width[cam_dist, y2d, x2d]) height = float(x_true_height[cam_dist, y2d, x2d]) # denormalize width and height width = int(round(width * STD_DEV_WIDTH + MEAN_WIDTH)) height = int(round(height * STD_DEV_HEIGHT + MEAN_HEIGHT)) # width = int(round(width * MAX_WIDTH)) # height = int(round(height * MAX_HEIGHT)) y_width_pred.append((*center_coord, width)) y_height_pred.append((*center_coord, height)) x2d, y2d, cam_dist = utils.rescale_to_real(x2d=x2d, y2d=y2d, cam_dist=cam_dist, q=self.cnf.q) bboxes_info_true.append((x2d - width / 2, y2d - height / 2, width, height, cam_dist)) metrics_iou = compute_det_metrics_iou(bboxes_a=bboxes_info_pred, bboxes_b=bboxes_info_true) metrics_center = joint_det_metrics(points_pred=y_center_pred, points_true=y_center, th=1) metrics_width = joint_det_metrics(points_pred=y_width_pred, points_true=y_width, th=1) metrics_height = joint_det_metrics(points_pred=y_height_pred, points_true=y_height, th=1) f1_iou = metrics_iou['f1'] f1_center = metrics_center['f1'] f1_width = metrics_width['f1'] f1_height = metrics_height['f1'] val_f1s['f1_iou'].append(f1_iou) val_f1s['f1_center'].append(f1_center) val_f1s['f1_width'].append(f1_width) val_f1s['f1_height'].append(f1_height) if step < 3: img_original = np.array(utils.imread(self.cnf.mot_synth_path / file_name[0]).convert("RGB")) hmap_pred = hmap_pred.squeeze() out_path = self.cnf.exp_log_path / f'{step}_center_pred.mp4' utils.save_3d_hmap(hmap=hmap_pred[0, ...], path=out_path) out_path = self.cnf.exp_log_path / f'{step}_width_pred.mp4' utils.save_3d_hmap(hmap=hmap_pred[1, ...], path=out_path, shift_values=True) out_path = self.cnf.exp_log_path / f'{step}_height_pred.mp4' utils.save_3d_hmap(hmap=hmap_pred[2, ...], path=out_path, shift_values=True) out_path = self.cnf.exp_log_path / f'{step}_bboxes_pred.jpg' utils.save_bboxes(img_original, bboxes_info_pred, path=out_path, use_z=True, half_images=True) hmap_true = hmap_true.squeeze() out_path = self.cnf.exp_log_path / f'{step}_center_true.mp4' utils.save_3d_hmap(hmap=hmap_true[0, ...], path=out_path) out_path = self.cnf.exp_log_path / f'{step}_width_true.mp4' utils.save_3d_hmap(hmap=hmap_true[1, ...], path=out_path, shift_values=True) out_path = self.cnf.exp_log_path / f'{step}_height_true.mp4' utils.save_3d_hmap(hmap=hmap_true[2, ...], path=out_path, shift_values=True) out_path = self.cnf.exp_log_path / f'{step}_bboxes_true.jpg' utils.save_bboxes(img_original, bboxes_info_true, path=out_path, use_z=True, half_images=True) if step >= self.cnf.test_len - 1: break # log average f1 on test set mean_val_loss = np.mean(val_losses['all']) mean_val_f1_iou = np.mean(val_f1s['f1_iou']) mean_val_f1_center = np.mean(val_f1s['f1_center']) mean_val_f1_width = np.mean(val_f1s['f1_width']) mean_val_f1_height = np.mean(val_f1s['f1_height']) mean_val_loss_center = np.mean(val_losses['center']) mean_val_loss_width = np.mean(val_losses['width']) mean_val_loss_height = np.mean(val_losses['height']) print(f'[TEST] AVG-Loss: {mean_val_loss:.6f}, ' f'AVG-F1_iou: {mean_val_f1_iou:.6f}, ' f'AVG-F1_center: {mean_val_f1_center:.6f}, ' f'AVG-F1_width: {mean_val_f1_width:.6f}, ' f'AVG-F1_height: {mean_val_f1_height:.6f}' f' │ Test time: {time() - t:.2f} s') self.sw.add_scalar(tag='val_F1/iou', scalar_value=mean_val_f1_iou, global_step=self.current_epoch) self.sw.add_scalar(tag='val_F1/center', scalar_value=mean_val_f1_center, global_step=self.current_epoch) self.sw.add_scalar(tag='val_F1/width', scalar_value=mean_val_f1_width, global_step=self.current_epoch) self.sw.add_scalar(tag='val_F1/height', scalar_value=mean_val_f1_height, global_step=self.current_epoch) self.sw.add_scalar(tag='val_loss', scalar_value=mean_val_loss, global_step=self.current_epoch) self.sw.add_scalar(tag='val_loss/center', scalar_value=mean_val_loss_center, global_step=self.current_epoch) self.sw.add_scalar(tag='val_loss/width', scalar_value=mean_val_loss_width, global_step=self.current_epoch) self.sw.add_scalar(tag='val_loss/height', scalar_value=mean_val_loss_height, global_step=self.current_epoch) # save best model if self.best_val_f1 is None or mean_val_f1_iou < self.best_val_f1: self.best_val_f1 = mean_val_f1_iou torch.save(self.model.state_dict(), self.log_path / 'best.pth')
args = parser.parse_args() if not args.image_a_path: parser.error('-image_a_path is required.') if not args.image_b_path: parser.error('-image_b_path is required.') if not args.ref_lines_path: parser.error('-ref_lines_path is required.') if not args.N: parser.error('-N is required') image_a_path = args.image_a_path image_b_path = args.image_b_path ref_lines_path = args.ref_lines_path # Reading the images image_a = imread(filename=image_a_path) image_b = imread(filename=image_b_path) if image_a.shape[2] > 3: image_a = image_a[:, :, 0:3] if image_b.shape[2] > 3: image_a = image_a[:, :, 0:3] shape_a = image_a.shape shape_b = image_b.shape # n_images = args.N n_images = 11 buf_shape = [] for i in range(3): buf_shape.append(max(shape_a[i], shape_b[i]))
def main(): MAX_WIDTH = 1919 MAX_HEIGHT = 1079 from test_metrics import joint_det_metrics, compute_det_metrics_iou import json import numpy as np from torch.utils.data import DataLoader from conf import Conf from dataset.mot_synth_det_ds import MOTSynthDetDS from utils import utils import torch cnf = Conf(exp_name='vha_d_debug', preload_checkpoint=False) # load dataset mode = 'test' ds = MOTSynthDetDS(mode=mode, cnf=cnf) loader = DataLoader(dataset=ds, batch_size=1, num_workers=0, shuffle=False) # load model from models.vha_det_variable_versions import Autoencoder as AutoencoderVariableVersions model = AutoencoderVariableVersions(vha_version=1).to(cnf.device) model.eval() model.requires_grad(False) if cnf.model_weights is not None: model.load_state_dict(torch.load(cnf.exp_log_path / 'best.pth', map_location=torch.device('cpu')), strict=False) # ======== MAIN LOOP ======== for i, sample in enumerate(loader): x, y, file_name, aug_info = None, None, None, None if mode == 'test': x, y, file_name, aug_info = sample y_true = json.loads(y[0]) if mode == 'train': x, file_name, aug_info = sample x = x.to(cnf.device) x_center, x_width, x_height = x[0, 0], x[0, 1], x[0, 2] y_pred = model.forward(x) x_pred_center, x_pred_width, x_pred_height = y_pred[0, 0], y_pred[ 0, 1], y_pred[0, 2] if mode == 'test': y = json.loads(y[0]) y_center = [(coord[0], coord[1], coord[2]) for coord in y] y_width = [(coord[0], coord[1], coord[2], coord[3]) for coord in y] y_height = [(coord[0], coord[1], coord[2], coord[4]) for coord in y] # utils.visualize_3d_hmap(x[0, 2]) y_center_pred = utils.local_maxima_3d(heatmap=x_pred_center, threshold=0.1, device=cnf.device) y_width_pred = [] y_height_pred = [] bboxes_info_pred = [] # w_min = min([float(x_width[cam_dist, y2d, x2d]) for cam_dist, y2d, x2d in y_center_pred]) # w_max = max([float(x_width[cam_dist, y2d, x2d]) for cam_dist, y2d, x2d in y_center_pred]) # h_min = min([float(x_height[cam_dist, y2d, x2d]) for cam_dist, y2d, x2d in y_center_pred]) # h_max = max([float(x_height[cam_dist, y2d, x2d]) for cam_dist, y2d, x2d in y_center_pred]) for cam_dist, y2d, x2d in y_center: width = float(x_pred_width[cam_dist, y2d, x2d]) height = float(x_pred_height[cam_dist, y2d, x2d]) # denormalize width and height width = int(round(width * MAX_WIDTH)) height = int(round(height * MAX_HEIGHT)) y_width_pred.append((cam_dist, y2d, x2d, width)) y_height_pred.append((cam_dist, y2d, x2d, height)) x2d, y2d, cam_dist = utils.rescale_to_real(x2d=x2d, y2d=y2d, cam_dist=cam_dist, q=cnf.q) bboxes_info_pred.append( (x2d - width / 2, y2d - height / 2, width, height, cam_dist)) img_original = np.array( utils.imread(cnf.mot_synth_path / file_name[0]).convert("RGB")) if mode == 'test': bboxes_info_true = [] for cam_dist, y2d, x2d, width, height in y: x2d, y2d, cam_dist = utils.rescale_to_real(x2d=x2d, y2d=y2d, cam_dist=cam_dist, q=cnf.q) bboxes_info_true.append((x2d - width / 2, y2d - height / 2, width, height, cam_dist)) metrics_iou = compute_det_metrics_iou(bboxes_info_pred, bboxes_info_true) metrics_center = joint_det_metrics(points_pred=y_center_pred, points_true=y_center, th=1) metrics_width = joint_det_metrics(points_pred=y_width_pred, points_true=y_width, th=1) metrics_height = joint_det_metrics(points_pred=y_height_pred, points_true=y_height, th=1) f1_iou = metrics_iou['f1'] f1_center = metrics_center['f1'] f1_width = metrics_width['f1'] f1_height = metrics_height['f1'] print( f'f1_iou={f1_iou}, f1_center={f1_center}, f1_width={f1_width}, f1_height={f1_height}' ) # for cam_dist, y2d, x2d, width, height in y_true: # x2d, y2d, cam_dist = utils.rescale_to_real(x2d=x2d, y2d=y2d, cam_dist=cam_dist, q=cnf.q) # bboxes_info_true.append((x2d - width / 2, y2d - height / 2, width, height, cam_dist)) # utils.visualize_bboxes(img_original, bboxes_info_true, use_z=True, half_images=False, aug_info=aug_info, # normalize_z=False) # print(f'({i}) Dataset example: x.shape={tuple(x.shape)}, y={y}') utils.visualize_bboxes(img_original, bboxes_info_pred, use_z=True, half_images=True, aug_info=aug_info, normalize_z=False)
def main(): from test_metrics import joint_det_metrics, compute_det_metrics_iou cnf = Conf(exp_name='debug') # load dataset mode = 'val' ds = MOTSynthDetDS(mode=mode, cnf=cnf) loader = DataLoader(dataset=ds, batch_size=1, num_workers=1, shuffle=False, worker_init_fn=MOTSynthDetDS.wif_test) # load model # from models.vha_det_c3d_pretrained import Autoencoder as AutoencoderC3dPretrained # model = AutoencoderC3dPretrained(hmap_d=cnf.hmap_d, legacy_pretrained=cnf.saved_epoch == 0).to(cnf.device) # model.eval() # model.requires_grad(False) # if cnf.model_weights is not None: # model.load_state_dict(cnf.model_weights, strict=False) # ======== MAIN LOOP ======== for i, sample in enumerate(loader): x, y, file_name, aug_info = None, None, None, None if mode == 'val' or mode == 'test': x, y, file_name, aug_info = sample y_true = json.loads(y[0]) if mode == 'train': x, file_name, aug_info = sample x = x.to(cnf.device) x_center, x_width, x_height = x[0, 0], x[0, 1], x[0, 2] # y_pred = model.forward(x) # x_pred_center, x_pred_width, x_pred_height = y_pred[0, 0], y_pred[0, 1], y_pred[0, 2] if mode == 'test': y = json.loads(y[0]) y_center = [(coord[0], coord[1], coord[2]) for coord in y] y_width = [(coord[0], coord[1], coord[2], coord[3]) for coord in y] y_height = [(coord[0], coord[1], coord[2], coord[4]) for coord in y] # utils.visualize_3d_hmap(x[0, 2]) y_center_pred = utils.local_maxima_3d(heatmap=x_center, threshold=0.1, device=cnf.device) y_width_pred = [] y_height_pred = [] bboxes_info_pred = [] # w_min = min([float(x_width[cam_dist, y2d, x2d]) for cam_dist, y2d, x2d in y_center_pred]) # w_max = max([float(x_width[cam_dist, y2d, x2d]) for cam_dist, y2d, x2d in y_center_pred]) # h_min = min([float(x_height[cam_dist, y2d, x2d]) for cam_dist, y2d, x2d in y_center_pred]) # h_max = max([float(x_height[cam_dist, y2d, x2d]) for cam_dist, y2d, x2d in y_center_pred]) for cam_dist, y2d, x2d in y_center_pred: width = float(x_width[cam_dist, y2d, x2d]) height = float(x_height[cam_dist, y2d, x2d]) # denormalize width and height # width = int(round(width * MAX_WIDTH)) # height = int(round(height * MAX_HEIGHT)) width = int(round(width * STD_DEV_WIDTH + MEAN_WIDTH)) height = int(round(height * STD_DEV_HEIGHT + MEAN_HEIGHT)) y_width_pred.append((cam_dist, y2d, x2d, width)) y_height_pred.append((cam_dist, y2d, x2d, height)) x2d, y2d, cam_dist = utils.rescale_to_real(x2d=x2d, y2d=y2d, cam_dist=cam_dist, q=cnf.q) bboxes_info_pred.append( (x2d - width / 2, y2d - height / 2, width, height, cam_dist)) img_original = np.array( utils.imread(cnf.mot_synth_path / file_name[0]).convert("RGB")) if mode == 'test': bboxes_info_true = [] for cam_dist, y2d, x2d, width, height in y: x2d, y2d, cam_dist = utils.rescale_to_real(x2d=x2d, y2d=y2d, cam_dist=cam_dist, q=cnf.q) bboxes_info_true.append((x2d - width / 2, y2d - height / 2, width, height, cam_dist)) metrics_iou = compute_det_metrics_iou(bboxes_info_pred, bboxes_info_true) metrics_center = joint_det_metrics(points_pred=y_center_pred, points_true=y_center, th=1) metrics_width = joint_det_metrics(points_pred=y_width_pred, points_true=y_width, th=1) metrics_height = joint_det_metrics(points_pred=y_height_pred, points_true=y_height, th=1) f1_iou = metrics_iou['f1'] f1_center = metrics_center['f1'] f1_width = metrics_width['f1'] f1_height = metrics_height['f1'] print( f'f1_iou={f1_iou}, f1_center={f1_center}, f1_width={f1_width}, f1_height={f1_height}' ) # for cam_dist, y2d, x2d, width, height in y_true: # x2d, y2d, cam_dist = utils.rescale_to_real(x2d=x2d, y2d=y2d, cam_dist=cam_dist, q=cnf.q) # bboxes_info_true.append((x2d - width / 2, y2d - height / 2, width, height, cam_dist)) # utils.visualize_bboxes(img_original, bboxes_info_true, use_z=True, half_images=False, aug_info=aug_info, # normalize_z=False) # print(f'({i}) Dataset example: x.shape={tuple(x.shape)}, y={y}') utils.visualize_bboxes(img_original, bboxes_info_pred, use_z=True, half_images=True, aug_info=aug_info, normalize_z=False)
def get_frame(self, file_path): # read input frame frame_path = self.cnf.mot_synth_path / file_path frame = utils.imread(frame_path).convert('RGB') # frame = transforms.ToTensor()(frame) return frame
parser.add_argument("-ro", type=int, default=5) parser.add_argument("-closing", action="store_true") parser.add_argument("-rc", type=int, default=5) parser.add_argument("-rm_ts", type=int, default=100) parser.add_argument("-r_error_th", type=int, default=3) parser.add_argument("-output_file", type=str, default='output.txt') args = parser.parse_args() if not args.image_path: parser.error('-image_path is required.') img_path = args.image_path # Opening the image image = utils.imread(filename=img_path, as_gray=False) # Separating layers red_image = image[:, :, 0] green_image = image[:, :, 1] blue_image = image[:, :, 2] # Yellow layer yellow_image = red_image.astype(np.float64) + green_image.astype( np.float64) - blue_image.astype(np.float64) yellow_image = utils.to_uint8(yellow_image / (255.0 + 255.0)) # Denoising d_lambda = args.d_lambda if d_lambda > 0: den_image = (restoration.denoise_tv_chambolle( image=yellow_image, weight=d_lambda) * 255).astype(np.uint8)