def test_single(): with open('../test/depth_gt.npy', 'rb') as f: depth_gt = np.load(f) with open('../test/depth_res.npy', 'rb') as f: depth_res = np.load(f) vis = False params = sun3d.set_params() if not np.all(depth_gt.shape == depth_res.shape): depth_gt = cv2.resize(depth_gt, (depth_res.shape[1], depth_res.shape[0]), interpolation=cv2.INTER_NEAREST) sample_rate = [0.01, 0.05, 0.1, 0.2, 0.4, 0.8] acc = np.zeros(len(sample_rate), dtype=np.float32) uts.plot_images({'image': depth_gt}) acc_o = eval_depth([depth_res], [depth_gt]) for i, rate in enumerate(sample_rate): depth_gt_down = uts_3d.down_sample_depth(depth_gt, method='uniform', percent=rate, K=params['intrinsic']) depth = uts_3d.xyz2depth(depth_gt_down, params['intrinsic'], depth_gt.shape) depth_up = upsampler.LaplacianDeform(depth_res, depth_gt_down, params['intrinsic'], False) acc[i] = eval_depth([depth_up], [depth_gt]) if vis: plot_figure(np.append(0, sample_rate), np.apend(acc_o, acc), 'depth_acc', 'sample rate', 'relative l1 error') else: print "rates: {}, thresholds {}".format(sample_rate, acc)
def show_pose(self, in_case=None): """ show an image pose by render point to image """ self._data_config = self.dataset.get_self_local_config( in_case.Road_id, in_case.split) cloud_name = '%s/%s/pc_sub.pcd' % (self._data_config['cloud_dir'], in_case.time_id) proj = pj.pyRenderPCD(cloud_name, self.shader['vertex'], self.shader['geometry'], self.shader['fragment'], self.image_size[0], self.image_size[1], in_case.with_label) intr = self._to_proj_intr( self._data_config['intrinsic'][in_case.camera_name], self.image_size[0], self.image_size[1]) ext = self._to_proj_mat(in_case.pose[:3], in_case.pose[3:]) label, depth = proj.pyRenderToRGBDepth(intr, ext) image_path = '%s/%s/%s/Camera %s/%s.jpg' % (\ self._data_config['image_dir'], in_case.time_id, in_case.record_id, in_case.camera_name[-1], in_case.image_name) image = cv2.resize(cv2.imread(image_path), (self.image_size[1], self.image_size[0])) assert not (image is None) uts.plot_images( { 'image': np.uint8(image), 'depth': depth, 'mask': label }, layout=[1, 3])
def gen_img_pair_data(scene, pair_num, id_img2depth): # for each scene, for each image, gen pair of images K = np.loadtxt(DATA_PATH + scene + '/intrinsics.txt') extrinsic_file = pd_util.preprocess_util.list_files(DATA_PATH + scene + '/extrinsics/') extrinsic_file.sort() extrinsic = np.reshape(np.loadtxt(extrinsic_file[-1]), (-1, 3, 4)) # keep the original id_img2depth = OrderedDict(sorted(id_img2depth.items(), key=lambda t: t[0])) image_names = id_img2depth.keys() for i in range(0, len(image_names) - 30, 10): pair_id = np.random.choice(range(10, 30), 10, replace=False) for j in pair_id: image_path1 = DATA_PATH + scene + '/image/' + image_names[ i] + '.jpg' image_path2 = DATA_PATH + scene + '/image/' + image_names[ i + j] + '.jpg' depth_path1 = DATA_PATH + scene + '/depth/' + id_img2depth[ image_names[i]] + '.png' depth_path2 = DATA_PATH + scene + '/depth/' + id_img2depth[ image_names[i + j]] + '.png' # try: image1 = np.array(uts.load_image(image_path1)) image2 = np.array(uts.load_image(image_path2)) depth1 = uts.read_depth(depth_path1) depth2 = uts.read_depth(depth_path2) # except: # continue print "image1 name: {}, image2 name: {} \ depth1 name: {}, depth2 name: {}".format( image_names[i], image_names[i + j], id_img2depth[image_names[i]], id_img2depth[image_names[i + j]]) flow, is_valid = get_opt_flow(depth1, depth2, K, extrinsic[i, :, :], extrinsic[i + j, :, :], True, image1, image2) is_valid = False uts.plot_images( OrderedDict([('image1', image1), ('image2', image2), ('flowu', flow[:, :, 0]), ('flowv', flow[:, :, 1])])) # print is_valid if is_valid: flow_file = FLOW_PATH + scene + '/flow/' + \ image_names[i] + '_' + image_names[i + j] + '.pkl' print 'saving ' + flow_file with open(flow_file, 'wb') as f: pkl.dump(flow, f, -1)
def showAnn(self, image_name): """Show the annotation of a pose file in an image Input: image_name: the name of image Output: depth: a rendered depth map of each car masks: an instance mask of the label image_vis: an image show the overlap of car model and image """ car_pose_file = '%s/%s.json' % (self._data_config['pose_dir'], image_name) with open(car_pose_file) as f: car_poses = json.load(f) image_file = '%s/%s.jpg' % (self._data_config['image_dir'], image_name) image = cv2.imread(image_file, cv2.IMREAD_UNCHANGED)[:, :, ::-1] # intrinsic are all used by Camera 5 intrinsic = self.dataset.get_intrinsic(image_name, 'Camera_5') image, self.intrinsic = self.rescale(image, intrinsic) self.depth = self.MAX_DEPTH * np.ones(self.image_size) self.mask = np.zeros(self.depth.shape) for i, car_pose in enumerate(car_poses): car_name = car_models.car_id2name[car_pose['car_id']].name depth, mask = self.render_car(car_pose['pose'], car_name) self.mask, self.depth = self.merge_inst(depth, i + 1, self.mask, self.depth) self.depth[self.depth == self.MAX_DEPTH] = -1.0 image = 0.5 * image for i in range(len(car_poses)): frame = np.float32(self.mask == i + 1) frame = np.tile(frame[:, :, None], (1, 1, 3)) image = image + frame * 0.5 * self.colors[i, :] uts.plot_images( { 'image_vis': np.uint8(image), 'depth': self.depth, 'mask': self.mask }, layout=[1, 3]) return image, self.mask, self.depth
def test_extend_building(): image = np.zeros((10, 10), dtype=np.int32) building_id = 1 sky_id = 2 image[0, 0] = 1 image[2, 0] = 2 image[9, 1] = 1 image_o = cv2.imread( '/home/peng/Data/zpark/Label/Record001/Camera_1/170427_222949577_Camera_1.png', cv2.IMREAD_UNCHANGED) # image_o = cv2.resize(image, (100, 100), interpolation=cv2.INTER_NEAREST) print image building_id = 25 sky_id = 1 image = cut.extend_building(np.int32(image_o), building_id, sky_id) print image uts.plot_images({'image_o': image_o, 'image_1': image})
def test_img(): # depth = np.load('/home/peng/Data/visualization.npy') depth = cv2.imread('/home/peng/Data/kitti/000000_10.png') print np.amax(depth) mask = depth[:, :, 0] == 0 depth = np.float32(1.0 / depth[:, :, 0]) * 1000 depth[mask] = 0.0 # depth = np.float32(1. / depth[:, :, 0]) height, width = depth.shape # depth = cv2.resize(depth, (width / 3, height / 3)) # intrinsic = np.array([1, 1, width / 2, height / 2], dtype=np.float32) intrinsic = np.array([959.0/width, 957.0/height, 696.0/width, 224.0/height],\ dtype=np.float32) normal = cut.depth2normals_np(depth, intrinsic) normal = normal.transpose([1, 2, 0]) normal[:, :, [1, 2]] *= -1 # uts.plot_images(OrderedDict([('depth', depth), # ('normal', (normal + 1.0)/2.)]), # layout=[2,1]) uts.plot_images(OrderedDict([('depth', depth), ('normal', normal)]), layout=[2, 1])
def test_geowarp(): image_path1 = '/home/peng/Data/sun3d/brown_bm_1/' + \ 'brown_bm_1/image/0001761-000059310235.jpg' image1 = cv2.imread(image_path1) with open('../test/depth_gt.npy', 'rb') as f: depth_gt = np.load(f) with open('../test/depth_res.npy', 'rb') as f: depth_res = np.load(f) if not np.all(depth_gt.shape == depth_res.shape): depth_gt = cv2.resize(depth_gt, (depth_res.shape[1], depth_res.shape[0]), interpolation=cv2.INTER_NEAREST) params = sun3d.set_params() rate = 0.05 height, width = depth_gt.shape[0], depth_gt.shape[1] depth_gt_down = uts_3d.down_sample_depth(depth_gt, method='uniform', percent=rate, K=params['intrinsic']) depth = uts_3d.xyz2depth(depth_gt_down, params['intrinsic'], depth_gt.shape) depth_up = LaplacianDeform(depth_res, depth_gt_down, params['intrinsic'], True) outputs, out_field = d_net.get_demon_outputs(inputs, params, ext_inputs=None) parameters, topo = paddle.parameters.create(outputs[out_field]) uts.plot_images(OrderedDict([('image', image1), ('depth_gt', depth_gt), ('depth_down', depth), ('depth_res', depth_res), ('mask', mask), ('depth_up', depth_up)]), layout=[4, 2])
def test(cfg, data, weights=None, batch_size=16, img_size=608, iou_thres=0.5, conf_thres=0.001, nms_thres=0.5, save_json=True, hyp=None, model=None, single_cls=False): """test the metrics of the trained model :param str cfg: model cfg file :param str data: data dict :param str weights: weights path :param int batch_size: batch size :param int img_size: image size :param float iou_thres: iou threshold :param float conf_thres: confidence threshold :param float nms_thres: nms threshold :param bool save_json: Whether to save the model :param str hyp: hyperparameter :param str model: yolov4 model :param bool single_cls: only one class :return: results """ if model is None: device = select_device(opt.device) verbose = False # Initialize model model = Model(cfg, img_size).to(device) # Load weights if weights.endswith('.pt'): checkpoint = torch.load(weights, map_location=device) state_dict = intersect_dicts(checkpoint['model'], model.state_dict()) model.load_state_dict(state_dict, strict=False) elif len(weights) > 0: load_darknet_weights(model, weights) print(f'Loaded weights from {weights}!') if torch.cuda.device_count() > 1: model = nn.DataParallel(model) else: device = next(model.parameters()).device verbose = False test_path = data['valid'] num_classes, names = (1, ['item']) if single_cls else (int( data['num_classes']), data['names']) # Dataloader dataset = LoadImagesAndLabels(test_path, img_size, batch_size, hyp=hyp) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=8, pin_memory=True, collate_fn=dataset.collate_fn) seen = 0 model.eval() coco91class = coco80_to_coco91_class() output_format = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'Pre', 'Rec', 'mAP', 'F1') precision, recall, f_1, mean_pre, mean_rec, mean_ap, mf1 = 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3) json_dict, stats, aver_pre, ap_class = [], [], [], [] for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=output_format)): targets = targets.to(device) imgs = imgs.to(device) / 255.0 _, _, height, width = imgs.shape # batch size, channels, height, width # Plot images with bounding boxes if batch_i == 0 and not os.path.exists('test_batch0.jpg'): plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg') with torch.no_grad(): inference_output, train_output = model(imgs) if hasattr(model, 'hyp'): # if model has loss hyperparameters loss += compute_loss(train_output, targets, model)[1][:3].cpu() # GIoU, obj, cls output = non_max_suppression(inference_output, conf_thres=conf_thres, nms_thres=nms_thres) # Statistics per image for i, pred in enumerate(output): labels = targets[targets[:, 0] == i, 1:] num_labels = len(labels) target_class = labels[:, 0].tolist() if num_labels else [] seen += 1 if pred is None: if num_labels: stats.append( ([], torch.Tensor(), torch.Tensor(), target_class)) continue # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(Path(paths[i]).stem.split('_')[-1]) box = pred[:, :4].clone() # xyxy scale_coords(imgs[i].shape[1:], box, shapes[i][0]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for det_i, det in enumerate(pred): json_dict.append({ 'image_id': image_id, 'category_id': coco91class[int(det[6])], 'bbox': [float(format(x, '.%gf' % 3)) for x in box[det_i]], 'score': float(format(det[4], '.%gf' % 5)) }) # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = [0] * len(pred) if num_labels: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= width tbox[:, [1, 3]] *= height # Search for correct predictions for j, (*pbox, _, _, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == num_labels: break # Continue if predicted class not among image classes if pcls.item() not in target_class: continue # Best iou, index between pred and targets mask = (pcls == tcls_tensor).nonzero( as_tuple=False).view(-1) iou, best_iou = bbox_iou(pbox, tbox[mask]).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and mask[ best_iou] not in detected: # and pcls == target_class[bi]: correct[j] = 1 detected.append(mask[best_iou]) # Append statistics (correct, conf, pcls, target_class) stats.append( (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), target_class)) # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] if len(stats): precision, recall, aver_pre, f_1, ap_class = ap_per_class(*stats) mean_pre, mean_rec, mean_ap, mf1 = precision.mean(), recall.mean( ), aver_pre.mean(), f_1.mean() num_targets = np.bincount( stats[3].astype(np.int64), minlength=num_classes) # number of targets per class else: num_targets = torch.zeros(1) # Print results print_format = '%20s' + '%10.3g' * 6 print(print_format % ('all', seen, num_targets.sum(), mean_pre, mean_rec, mean_ap, mf1)) # Print results per class if verbose and num_classes > 1 and stats: for i, class_ in enumerate(ap_class): print(print_format % (names[class_], seen, num_targets[class_], precision[i], recall[i], aver_pre[i], f_1[i])) # Save JSON if save_json and mean_ap and json_dict: try: img_ids = [ int(Path(x).stem.split('_')[-1]) for x in dataset.img_files ] with open('results.json', 'w') as file: json.dump(json_dict, file) # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb cocogt = COCO('data/coco/annotations/instances_val2017.json' ) # initialize COCO ground truth api cocodt = cocogt.loadRes('results.json') # initialize COCO pred api cocoeval = COCOeval(cocogt, cocodt, 'bbox') cocoeval.params.imgIds = img_ids # [:32] # only evaluate these images cocoeval.evaluate() cocoeval.accumulate() cocoeval.summarize() mean_ap = cocoeval.stats[1] # update mAP to pycocotools mAP except ImportError: print( 'WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.' ) # Return results maps = np.zeros(num_classes) + mean_ap for i, class_ in enumerate(ap_class): maps[class_] = aver_pre[i] return (mean_pre, mean_rec, mean_ap, mf1, *(loss / len(dataloader)).tolist()), maps
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = './yolov3.onnx' engine_file_path = "yolov3.trt" data_path = "./data/unrel.data" data = parse_data_cfg(data_path) nc = int(data['classes']) # number of classes path = data['valid'] # path to test images names = load_classes(data['names']) # class names iouv = torch.linspace(0.5, 0.95, 1, dtype=torch.float32) # iou vector for [email protected]:0.95 niou = 1 conf_thres = 0.001 iou_thres = 0.6 verbose = True # Genearte custom dataloader img_size = 448 # copy form pytorch src batch_size = 16 dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True) batch_size = min(batch_size, len(dataset)) dataloader = data_loader(dataset, batch_size, img_size) # Output shapes expected by the post-processor output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. pbar = tqdm.tqdm(dataloader, desc=s) stats, ap, ap_class = [], [], [] seen = 0 for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar): imgs = imgs.astype(np.float32) / 255.0 nb, _, height, width = imgs.shape # batch size, channels, height, width whwh = np.array([width, height, width, height]) inputs[0].host = imgs postprocessor_args = { "yolo_masks": [ (6, 7, 8), (3, 4, 5), (0, 1, 2) ], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), ( 62, 45 ), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "num_classes": 37, "stride": [32, 16, 8] } postprocessor = PostprocessYOLO(**postprocessor_args) # Do layers before yolo t = time.time() trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] trt_outputs = [ np.ascontiguousarray( otpt[:, :, :int(imgs.shape[2] * (2**i) / 32), :int(imgs.shape[3] * (2**i) / 32)], dtype=np.float32) for i, otpt in enumerate(trt_outputs) ] output_list = postprocessor.process(trt_outputs) t0 += time.time() - t inf_out = torch.cat(output_list, 1) t = time.time() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) # nms t1 += time.time() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh tbox = tbox.type(torch.float32) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero().view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[ j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if batch_i < 1: f = 'test_batch%g_gt.jpg' % batch_i # filename plot_images(imgs, targets, paths=paths, names=names, fname=f) # ground truth f = 'test_batch%g_pred.jpg' % batch_i plot_images(imgs, output_to_target(output, width, height), paths=paths, names=names, fname=f) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) if niou > 1: p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean( 1), ap[:, 0] # [P, R, [email protected]:0.95, [email protected]] mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Print speeds if verbose: t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + ( img_size, img_size, batch_size) # tuple print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
def get_train_valid_loader(data_dir, batch_size, random_seed, valid_size=0.1, shuffle=True, show_sample=False, num_workers=4, pin_memory=False): """ Utility function for loading and returning train and valid multi-process iterators over the MNIST dataset. A sample 9x9 grid of the images can be optionally displayed. If using CUDA, num_workers should be set to 1 and pin_memory to True. Args ---- - data_dir: path directory to the dataset. - batch_size: how many samples per batch to load. - random_seed: fix seed for reproducibility. - valid_size: percentage split of the training set used for the validation set. Should be a float in the range [0, 1]. In the paper, this number is set to 0.1. - shuffle: whether to shuffle the train/validation indices. - show_sample: plot 9x9 sample grid of the dataset. - num_workers: number of subprocesses to use when loading the dataset. - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to True if using GPU. Returns ------- - train_loader: training set iterator. - valid_loader: validation set iterator. """ error_msg = "[!] valid_size should be in the range [0, 1]." assert ((valid_size >= 0) and (valid_size <= 1)), error_msg # define transforms normalize = transforms.Normalize((0.1307, ), (0.3081, )) trans = transforms.Compose([ transforms.ToTensor(), normalize, ]) # load dataset dataset = datasets.MNIST(data_dir, train=True, download=True, transform=trans) num_train = len(dataset) indices = list(range(num_train)) split = int(np.floor(valid_size * num_train)) if shuffle: np.random.seed(random_seed) np.random.shuffle(indices) train_idx, valid_idx = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) train_loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers, pin_memory=pin_memory, ) valid_loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers, pin_memory=pin_memory, ) # visualize some images if show_sample: sample_loader = torch.utils.data.DataLoader(dataset, batch_size=9, shuffle=shuffle, num_workers=num_workers, pin_memory=pin_memory) data_iter = iter(sample_loader) images, labels = data_iter.next() X = images.numpy() X = np.transpose(X, [0, 2, 3, 1]) plot_images(X, labels) return (train_loader, valid_loader)
def check_diff(): # PaddlePaddle init paddle.init(use_gpu=True, gpu_id=FLAGS.gpu_id) # paddle.init(use_gpu=False) # setting parameters params = sun3d.set_params('sun3d') params['stage'] = 5 layout = [2, 3] cur_level = 0 inputs = d_net.get_demon_inputs(params) # define several external input here to avoid implementation difference inputs.update( d_net.get_cnn_input("image2_down", params['size_stage'][1], 3)) inputs.update(d_net.get_cnn_input("image_warp", params['size_stage'][1], 3)) inputs.update( d_net.get_cnn_input("depth_trans", params['size_stage'][1], 1)) inputs.update(d_net.get_cnn_input("flow", params['size_stage'][1], 2)) # Add neural network config outputs, out_filed = d_net.get_demon_outputs(inputs, params, ext_inputs=inputs) print('load parameters') with gzip.open('./output/' + FLAGS.model, 'r') as f: parameters_init = paddle.parameters.Parameters.from_tar(f) # print parameters_init.names() parameters = paddle.parameters.create(outputs[out_filed]) for name in parameters.names(): # print "setting parameter {}".format(name) parameters.set(name, parameters_init.get(name)) # load the input from saved example res_folder = 'output/example_output/' with open(res_folder + 'img_pair', 'rb') as f: tf_pair = np.load(f) tf_pair = tf_pair.squeeze() with open(res_folder + 'image2_down', 'rb') as f: image2_down = np.load(f) image2_down = image2_down.squeeze() intrinsic = np.array([0.89115971, 1.18821287, 0.5, 0.5]) # load some extra inputs names = ['flow', 'depth', 'normal', 'rotation', 'translation'] tf_names = [ 'predict_flow2', 'predict_depth2', 'predict_normal2', 'predict_rotation', 'predict_translation' ] start_id = range(4, 4 + len(names)) input_name_match = dict(zip(names, tf_names)) results_names = dict(zip(names, start_id)) boost_results = load_tf_boost_results(res_folder, input_name_match, params['stage']) test_data = [ tf_pair[:3, :, :].flatten(), tf_pair[3:, :, :].flatten(), image2_down.flatten(), intrinsic ] test_data = [tuple(test_data + boost_results)] feeding = {'image1': 0, 'image2': 1, 'image2_down': 2, 'intrinsic': 3} feeding.update(results_names) # img_diff1 = tf_pair[:3, :, :] - image1_new.reshape((3, params['size'][0], params['size'][1])) # img_diff1 = img_diff1.transpose((1, 2, 0)) # uts.plot_images({'img_diff': img_diff1}, layout=[1, 2]) # print np.sum(np.abs(tf_pair[:3, :, :].flatten() - image1_new)) # print np.sum(np.abs(tf_pair[3:, :, :].flatten() - image2_new)) # return outputs_list = [outputs[x] for x in outputs.keys()] # pdb.set_trace() print len(test_data) print feeding.keys() conv = paddle.infer(output_layer=outputs_list, parameters=parameters, input=test_data, feeding=feeding) height_list = [cp.g_layer_map[outputs[x].name].height \ for x in outputs.keys()] width_list = [cp.g_layer_map[outputs[x].name].width \ for x in outputs.keys()] conv = vec2img(inputs=conv, height=height_list, width=width_list) blob_name_match = get_name_matching(params['stage']) folder = './output/example_output/' # for name in outputs.keys()[cur_level:]: ob_names = outputs.keys()[cur_level:] # ob_names = ['depth_trans','geo_out'] # ob_names = ['depth_0'] for name in ob_names: i = outputs.keys().index(name) print name, ' ', blob_name_match[name] tf_conv_file = folder + str(params['stage']) + '_' + \ blob_name_match[name] + '.pkl' with open(tf_conv_file, 'rb') as f: tf_conv = np.load(f) print conv[i].shape, ' ', tf_conv.shape diff = conv[i] - tf_conv if len(diff.shape) <= 1: print '{} and {}, {}'.format(conv[i], tf_conv, diff) else: if len(diff.shape) == 2: diff = diff[:, :, np.newaxis] vis_dict = [] for j in range(min(diff.shape[2], layout[0] * layout[1])): vis_dict.append(('diff_' + str(j), diff[:, :, j])) vis_dict = OrderedDict(vis_dict) uts.plot_images(OrderedDict(vis_dict), layout=layout)
camera_matrix, distortion) # calculate the new targets new_targets = get_new_targets(new_bbox_coords_matrix, width, height) return img, new_targets, new_bbox_coords_matrix if __name__ == "__main__": dataset = LoadImagesAndLabels( "data/3 class ground and light/train/train_paths.txt", 640, augment=True) dataloader = torch.utils.data.DataLoader(dataset, 1, collate_fn=dataset.collate_fn) for imgs, targets, img_path, res in dataloader: imgs = imgs.to(device) targets = targets.to(device) plot_images(imgs=imgs, targets=targets) #for i in range(10): # im = Image.open("data/inside/train/images/image{}.png".format(i)) # with open('data/inside/train/labels/image{}.txt'.format(i), 'r') as f: # targets = [[float(num) for num in line.split(' ')] for line in f] # im, targets, new_bbox_coords_matrix = fisheye_augmentation(im, targets) # im = Image.fromarray(im) # plot_image(im, targets, new_bbox_coords_matrix)
def train(): # 0、Initialize parameters( set random seed, get cfg info, ) cfg = opt.cfg weights = opt.weights img_size = opt.img_size batch_size = opt.batch_size total_epochs = opt.epochs init_seeds() data = parse_data_cfg(opt.data) train_txt_path = data['train'] valid_txt_path = data['valid'] nc = int(data['classes']) # 0、打印配置文件信息,写log等 print('config file:', cfg) print('pretrained weights:', weights) # 1、加载模型 model = Darknet(cfg).to(device) if weights.endswith('.pt'): ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层 # 会报错size mismatch for module_list.81.Conv2d.weight: copying a param with shape torch.size([255, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([75, 1024, 1, 1]). # TODO:map_location=device ? chkpt = torch.load(weights, map_location=device) try: chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(chkpt['model'], strict=False) # model.load_state_dict(chkpt['model']) except KeyError as e: s = "%s is not compatible with %s" % (opt.weights, opt.cfg) raise KeyError(s) from e write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) elif weights.endswith('.pth'): # for 'https://download.pytorch.org/models/resnet50-19c8e357.pth', model_state_dict = model.state_dict() chkpt = torch.load(weights, map_location=device) #try: state_dict = {} block_cnt = 0 fc_item_num = 2 chkpt_keys = list(chkpt.keys()) model_keys = list(model.state_dict().keys()) model_values = list(model.state_dict().values()) for i in range(len(chkpt_keys) - fc_item_num): # 102 - 2 if i % 5 == 0: state_dict[model_keys[i+block_cnt]] = chkpt[chkpt_keys[i]] elif i % 5 == 1 or i % 5 == 2: state_dict[model_keys[i+block_cnt+2]] = chkpt[chkpt_keys[i]] elif i % 5 == 3 or i % 5 == 4: state_dict[model_keys[i+block_cnt-2]] = chkpt[chkpt_keys[i]] if i % 5 == 4: block_cnt += 1 state_dict[model_keys[i + block_cnt]] = model_values[i + block_cnt] #chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(state_dict, strict=False) # model.load_state_dict(chkpt['model']) # except KeyError as e: # s = "%s is not compatible with %s" % (opt.weights, opt.cfg) # raise KeyError(s) from e write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) elif len(weights) > 0: # darknet format # possible weights are '*.weights', 'yolov3-tiny.conv.15', 'darknet53.conv.74' etc. load_darknet_weights(model, weights) write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) # else: # raise Exception("pretrained model's path can't be NULL!") # 2、设置优化器 和 学习率 start_epoch = 0 #optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=momentum, weight_decay=weight_decay, nesterov=True) # TODO:nesterov ? weight_decay=0.0005 ? # Optimizer pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in dict(model.named_parameters()).items(): if '.bias' in k: pg2 += [v] # biases elif 'Conv2d.weight' in k: pg1 += [v] # apply weight_decay else: pg0 += [v] # parameter group 0 optimizer = torch.optim.SGD(pg0, lr=lr0, momentum=momentum, nesterov=True) optimizer.add_param_group({'params': pg1, 'weight_decay': weight_decay}) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) del pg0, pg1, pg2 ###### apex need ###### if mixed_precision: model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) # Initialize distributed training if torch.cuda.device_count() > 1: dist.init_process_group(backend='nccl', # 'distributed backend' init_method='tcp://127.0.0.1:9999', # distributed training init method world_size=1, # number of nodes for distributed training rank=0) # distributed training node rank model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True) # clw note: 多卡,在 amp.initialize()之后调用分布式代码 DistributedDataParallel否则报错 model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level ###### model.nc = nc #### 阶梯学习率 scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(total_epochs * x) for x in [0.8, 0.9]], gamma=0.1) ### 余弦学习率 #lf = lambda x: (1 + math.cos(x * math.pi / total_epochs)) / 2 #scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # 3、加载数据集 train_dataset = VocDataset(train_txt_path, img_size, with_label=True) dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, # TODO: True num_workers=8, # TODO collate_fn=train_dataset.train_collate_fn, pin_memory=True) # 4、训练 print('') # 换行 print('Starting training for %g epochs...' % total_epochs) nb = len(dataloader) mloss = torch.zeros(4).to(device) # mean losses writer = SummaryWriter() # tensorboard --logdir=runs, view at http://localhost:6006/ prebias = start_epoch == 0 for epoch in range(start_epoch, total_epochs): # epoch ------------------------------ model.train() # 写在这里,是因为在一个epoch结束后,调用test.test()时,会调用 model.eval() # # Prebias # if prebias: # if epoch < 3: # prebias # ps = 0.1, 0.9 # prebias settings (lr=0.1, momentum=0.9) # else: # normal training # ps = lr0, momentum # normal training settings # print_model_biases(model) # prebias = False # # # Bias optimizer settings # optimizer.param_groups[2]['lr'] = ps[0] # if optimizer.param_groups[2].get('momentum') is not None: # for SGD but not Adam # optimizer.param_groups[2]['momentum'] = ps[1] start = time.time() title = ('\n' + '%10s' * 11 ) % ('Epoch', 'Batch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size', 'lr', 'time_use') print(title) #pbar = tqdm(dataloader, ncols=20) # 行数参数ncols=10,这个值可以自己调:尽量大到不能引起上下滚动,同时满足美观的需求。 #for i, (img_tensor, target_tensor, img_path, _) in enumerate(pbar): # # Freeze darknet53.conv.74 for first epoch # freeze_backbone = False # if freeze_backbone and (epoch < 3): # for i, (name, p) in enumerate(model.named_parameters()): # if int(name.split('.')[2]) < 75: # if layer < 75 # 多卡是[2],单卡[1] # p.requires_grad = False if (epoch < 3) else True for i, (img_tensor, target_tensor, img_path, _) in enumerate(dataloader): # # SGD burn-in # ni = epoch * nb + i # if ni <= 1000: # n_burnin = 1000 # lr = lr0 * (ni / 1000) ** 2 # for g in optimizer.param_groups: # g['lr'] = lr batch_start = time.time() #print(img_path) img_tensor = img_tensor.to(device) target_tensor = target_tensor.to(device) ### 训练过程主要包括以下几个步骤: # (1) 前传 #print('img_tensor:', img_tensor[0][1][208][208]) pred = model(img_tensor) # (2) 计算损失 loss, loss_items = compute_loss(pred, target_tensor, model) if not torch.isfinite(loss): raise Exception('WARNING: non-finite loss, ending training ', loss_items) # (3) 损失:反向传播,求出梯度 if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # (4) 优化器:更新参数、梯度清零 # ni = i + nb * epoch # number integrated batches (since train start) # if ni % accumulate == 0: # Accumulate gradient for x batches before optimizing optimizer.step() optimizer.zero_grad() # Print batch results mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0 # (GB) #s = ('%10s' * 2 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, scheduler.get_lr()[0], time.time()-batch_start) #s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, optimizer.state_dict()['param_groups'][0]['lr'], time.time()-batch_start) s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, scheduler.get_lr()[0], time.time()-batch_start) if i % 10 == 0: print(s) # Plot if epoch == start_epoch and i == 0: fname = 'train_batch.jpg' # filename cur_path = os.getcwd() res = plot_images(images=img_tensor, targets=target_tensor, paths=img_path, fname=os.path.join(cur_path, fname)) writer.add_image(fname, res, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ print('time use per epoch: %.3fs' % (time.time() - start)) write_to_file(title, log_file_path) write_to_file(s, log_file_path) # Update scheduler scheduler.step() # compute mAP results, maps = test.test(cfg, 'cfg/voc.data', batch_size=batch_size, img_size=img_size, conf_thres=0.05, iou_thres=0.5, nms_thres=0.5, src_txt_path=valid_txt_path, dst_path='./output', weights=None, model=model, log_file_path = log_file_path) # Tensorboard tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1'] for x, tag in zip(list(mloss[:-1]) + list(results), tags): writer.add_scalar(tag, x, epoch) # save model 保存模型 chkpt = {'epoch': epoch, 'model': model.module.state_dict() if type(model) is nn.parallel.DistributedDataParallel else model.state_dict(), # clw note: 多卡 'optimizer': optimizer.state_dict()} torch.save(chkpt, last_model_path) print('end')
def test_demo(): # PaddlePaddle init paddle.init(use_gpu=True, gpu_id=FLAGS.gpu_id) params = sun3d.set_params() inputs = d_net.get_demon_inputs(params) params['stage'] = 5 # Add neural network config outputs, out_field = d_net.get_demon_outputs(inputs, params, ext_inputs=None) parameters, topo = paddle.parameters.create(outputs[out_field]) # Read image pair 1, 2 flow for scene_name in params['train_scene'][1:]: image_list = preprocess_util.list_files( params['flow_path'] + scene_name + '/flow/') image2depth = sun3d.get_image_depth_matching(scene_name) for pair_name in image_list[0:2]: image1, image2, flow_gt, depth1_gt, normal1_gt = \ sun3d.load_image_pair(scene_name, pair_name, image2depth) image1_new = uts.transform(image1.copy(), height=params['size'][0], width=params['size'][1]) image2_new = uts.transform(image2.copy(), height=params['size'][0], width=params['size'][1]) intrinsic = np.array([0.89115971, 1.18821287, 0.5, 0.5]) test_data = [(image1_new, image2_new, intrinsic)] depth_name = 'depth' if params['stage'] < 5 else 'depth_0' out_fields = ['flow', depth_name, 'normal', 'rotation', 'translation'] # out_fields = ['flow'] # height_list = [cp.g_layer_map[outputs[x].name].height \ # for x in ['flow']] # width_list = [cp.g_layer_map[outputs[x].name].width \ # for x in ['flow']] output_list = [outputs[x] for x in out_fields] flow, depth, normal, rotation, translation = paddle.infer( output=topo, parameters=parameters, input=test_data, feeding={'image1': 0, 'image2': 1, 'intrinsic': 2}); height_list = [cp.g_layer_map[outputs[x].name].height \ for x in ['flow', depth_name,'normal']] width_list = [cp.g_layer_map[outputs[x].name].width \ for x in ['flow', depth_name,'normal']] # flow = paddle.infer(output=output_list, # parameters=parameters, # input=test_data, # feeding={'image1': 0, # 'image2': 1, # 'intrinsic': 2}); # flow = vec2img(inputs=[flow], # height=height_list, # width=width_list) # uts.plot_images(OrderedDict([('image1',image1), # ('image2',image2), # ('flow',flow), # ('flow_gt',flow_gt)]), # layout=[4,2]) flow, depth, normal = vec2img(inputs=[flow, depth, normal], height=height_list, width=width_list) # visualize depth in 3D # image1_down = cv2.resize(image1, # (depth.shape[1], depth.shape[0])) # visualize_prediction( # depth=depth, # image=np.uint8(image1_down.transpose([2, 0, 1])), # rotation=rotation, # translation=translation) uts.plot_images(OrderedDict([('image1',image1), ('image2',image2), ('flow',flow), ('flow_gt',flow_gt), ('depth', depth), ('depth_gt', depth1_gt)]), # ('normal', (normal + 1.0)/2.), # ('normal_gt', (normal1_gt + 1.0)/2)]), layout=[4,2])
box_visualizer.draw_normalized_box(decoded_positive_boxes, selected_key) # drawing generator output train_keys, validation_keys = split_data(ground_truth_data, training_ratio=.8) image_generator = ImageGenerator(ground_truth_data, prior_box_manager, 1, image_shape, train_keys, validation_keys, image_prefix, vertical_flip_probability=0, horizontal_flip_probability=0.5) generated_data = next(image_generator.flow(mode='demo')) generated_input = generated_data[0]['input_1'] generated_output = generated_data[1]['predictions'] transformed_image = np.squeeze(generated_input[0]).astype('uint8') validation_image_name = image_prefix + validation_keys[0] original_image = read_image(validation_image_name) original_image = resize_image(original_image, image_shape) plot_images(original_image, transformed_image) # finally draw the assigned boxes given by the generator generated_encoded_boxes = np.squeeze(generated_output) generated_boxes = prior_box_manager.decode_boxes(generated_encoded_boxes) positive_mask = generated_boxes[:, 4] != 1 generated_positive_boxes = generated_boxes[positive_mask] box_visualizer.draw_normalized_box(generated_positive_boxes, validation_keys[0])
#s = ('%10s' * 2 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, scheduler.get_lr()[0], time.time()-batch_start) #s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, optimizer.state_dict()['param_groups'][0]['lr'], time.time()-batch_start) s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, optimizer.param_groups[0]['lr'], time.time()-batch_start) #s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, scheduler.get_lr()[0], time.time()-batch_start) #pbar.set_description(s) ### for debug ### if i % 10 == 0: print(s) # Plot if epoch == start_epoch and i == 0: fname = 'train_batch.jpg' # filename cur_path = os.getcwd() res = plot_images(images=img_tensor, targets=target_tensor, paths=img_path, fname=os.path.join(cur_path, fname)) writer.add_image(fname, res, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ print('clw: time use per epoch: %.3fs' % (time.time() - start)) write_to_file(title, log_file_path) write_to_file(s, log_file_path) ### Update scheduler per epoch # scheduler.step() # compute mAP results, maps = test.test(cfg,
def sequencial_upsampleing(dataset='sun3d', split='train', max_num=None, vis=False): # Read image pair 1, 2, generate depth if dataset == 'sun3d': params = sun3d.set_params() params['demon_model'] = '../output/tf_model_full_5.tar.gz' else: print "dataset {} is not supported".format(dataset) deep_upsampler = DeepUpSampler(params) part, part_id = [int(x) for x in FLAGS.part.split(',')] test_ids = partition(len(params[split + '_scene']), part, part_id) rate = 0.05 process_scene_names = [params[split + '_scene'][x] for x in test_ids] all_time = 0. all_count = 0. for scene_name in process_scene_names: image_list = preprocess_util.list_files(params['flow_path'] + scene_name + '/flow/') image2depth = sun3d.get_image_depth_matching(scene_name) image_num = len(image_list) if max_num is None \ else min(len(image_list), max_num) image_id = range(0, len(image_list), len(image_list) / image_num) upsample_output_path = params['flow_path'] + scene_name + \ '/pair_depth/' + str(rate) + '/' uts.mkdir_if_need(upsample_output_path) print "processing {} with images: {}".format(scene_name, len(image_id)) image_name_list = [image_list[x] for x in image_id] for pair_name in image_name_list: pair_image_name = pair_name.split('/')[-1] outfile = upsample_output_path + pair_image_name[:-4] + '.npy' # if uts.exists(outfile): # print "\t {} exists".format(pair_name) # continue image1, image2, flow_gt, depth_gt = \ sun3d.load_image_pair(scene_name, pair_name, image2depth, False) print pair_name uts.plot_images(OrderedDict([('image', image1), ('depth_gt', depth_gt)]), layout=[4, 2]) continue depth_gt_down = uts_3d.down_sample_depth(depth_gt, method='uniform', percent=rate, K=params['intrinsic']) try: start_time = time.time() print "\t upsampling {}".format(pair_name) depth_up = deep_upsampler.UpSample(depth_gt_down, [image1, image2]) np.save(outfile, depth_up) print "\t time: {}".format(time.time() - start_time) all_time += time.time() - start_time all_count += 1 except: print "{} failed".format(pair_name) if vis: uts.plot_images(OrderedDict([('image', image1), ('depth_gt', depth_gt), ('depth_up', depth_up)]), layout=[4, 2]) print "average run time {}\n".format(all_time / all_count)
def test_refine_net(dataset='sun3d', split='train', vis=False): paddle.init(use_gpu=True, gpu_id=FLAGS.gpu_id) params = sun3d.set_params() part, part_id = [int(x) for x in FLAGS.part.split(',')] test_ids = partition(len(params[split + '_scene']), part, part_id) rate = 0.05 is_inverse = False depth_name = 'depth_inv' if is_inverse else 'depth' process_scene_names = [params[split + '_scene'][x] for x in test_ids] inputs = u_net.get_inputs(params) outputs = u_net.refine_net(inputs, params) parameters, topo = paddle.parameters.create(outputs[depth_name]) print('load parameters {}'.format(FLAGS.model)) with gzip.open(FLAGS.model, 'r') as f: parameters = paddle.parameters.Parameters.from_tar(f) feeding = {'image1': 0, 'depth': 1} for scene_name in process_scene_names: id_img2depth = sun3d.get_image_depth_matching(scene_name) upsample_output_path = params['flow_path'] + scene_name + \ '/pair_depth/' + str(rate) + '/' prefix_len = len(upsample_output_path) image_list = preprocess_util.list_files(upsample_output_path) for pair_name in image_list: print pair_name pair_image_name = pair_name.split('/')[-1] outfile = upsample_output_path + pair_image_name[:-4] + '.npy' depth_net = np.load(outfile) depth_net_in = depth_net.flatten() if is_inverse: depth_net_in = uts_3d.inverse_depth(depth_net) image_name1, _ = pair_image_name.split('_') image_path1 = params['data_path'] + scene_name + \ '/image/' + image_name1 + '.jpg' depth_path1 = params['data_path'] + scene_name + '/depth/' + \ id_img2depth[image_name1] + '.png' image1 = cv2.imread(image_path1) depth1 = uts.read_depth(depth_path1) image1_new = uts.transform(image1.copy(), height=params['size'][0], width=params['size'][1]) test_data = [( image1_new, depth_net_in, )] print 'forward' depth_out = paddle.infer(output=topo, parameters=parameters, input=test_data, feeding=feeding) if is_inverse: depth_out = uts_3d.inverse_depth(depth_out) depth = uts.vec2img(inputs=depth_out, height=params['size'][0], width=params['size'][1]) if vis: uts.plot_images(OrderedDict([('image', image1), ('depth1', depth1), ('depth_net', depth_net), ('depth', depth)]), layout=[4, 2])
def showAnn(self, image_name, if_result=False, if_visualize=False, if_save=False, plot_path='tmp', is_training=False): """Show the annotation of a pose file in an image Input: image_name: the name of image Output: depth: a rendered depth map of each car masks: an instance mask of the label image_vis: an image show the overlap of car model and image """ image_file = '%s/%s.jpg' % (self._data_config['image_dir'], image_name) image = cv2.imread(image_file, cv2.IMREAD_UNCHANGED)[:, :, ::-1] # print 'Original and rescaled image size: ', image.shape, self.image_size intrinsic = self.dataset.get_intrinsic(image_name, 'Camera_5') image_rescaled, self.intrinsic = self.rescale(image, intrinsic) if is_training: car_pose_file = '%s/%s.json' % ( self._data_config['pose_dir'] if not (if_result) else self._data_config['pose_dir_result'], image_name) with open(car_pose_file) as f: car_poses = json.load(f) self.depth = self.MAX_DEPTH * np.ones(self.image_size) self.mask = np.zeros(self.depth.shape) self.shape_id_map = np.zeros(self.depth.shape) self.pose_map = np.zeros( (self.depth.shape[0], self.depth.shape[1], 6)) + np.inf self.shape_map = np.zeros( (self.depth.shape[0], self.depth.shape[1], 10)) + np.inf self.pose_list = [] self.rot_uvd_list = [] self.bbox_list = [] self.shape_id_list = [] plt.figure(figsize=(20, 10)) plt.imshow(image_rescaled) for i, car_pose in enumerate(car_poses): car_name = car_models.car_id2name[car_pose['car_id']].name # if if_result: # car_pose['pose'][-1] = 1./car_pose['pose'][-1] depth, mask, vert, K = self.render_car(car_pose['pose'], car_name) self.mask, self.shape_id_map, self.depth, self.pose_map = self.merge_inst( depth, i + 1, car_pose['car_id'] + 1, self.mask, self.shape_id_map, self.depth, self.pose_map, car_pose['pose']) self.pose_list.append(car_pose['pose']) self.shape_id_list.append(car_pose['car_id']) scale = np.ones((3, )) car = self.car_models[car_name] pose = np.array(car_pose['pose']) print 'GT pose: ', pose[3:] vert = car['vertices'] vert = np.zeros((1, 3)) vert_transformed = uts.project(pose, scale, vert) # [*, 3] print 'Center transformed: ', vert_transformed vert_hom = np.hstack( (vert_transformed, np.ones((vert.shape[0], 1)))) K_hom = np.hstack((K, np.zeros((3, 1)))) proj_uv_hom = np.matmul(K_hom, vert_hom.T) proj_uv = np.vstack((proj_uv_hom[0, :] / proj_uv_hom[2, :], proj_uv_hom[1, :] / proj_uv_hom[2, :])) u = proj_uv[0:1, :] # [1, 1] v = proj_uv[1:2, :] d = proj_uv_hom[2:3, :] rot_uvd = [ car_pose['pose'][0], car_pose['pose'][1], car_pose['pose'][2], u[0, 0], v[0, 0], car_pose['pose'][5] ] self.rot_uvd_list.append(rot_uvd) plt.scatter(u, v, linewidths=20) F1 = K_hom[0, 0] W = K_hom[0, 2] F2 = K_hom[1, 1] H = K_hom[1, 2] K_T = np.array([[1. / F1, 0., -W / F1], [0, 1. / F2, -H / F2], [0., 0., 1.]]) # print K_T # print self.intrinsic # print F1, W, F2, H uvd = np.vstack((u * d, v * d, d)) xyz = np.matmul(K_T, uvd) print 'xyz / pose recovered: ', xyz # print 'uvd:', rot_uvd # print car_pose['pose'].shape, vert_transformed.shape ## Get bbox from mask arr = np.expand_dims(np.int32(mask), -1) # number of highest label: labmax = 1 # maximum and minimum positions along each axis (initialized to very low and high values) b_first = np.iinfo('int32').max * np.ones( (3, labmax + 1), dtype='int32') b_last = np.iinfo('int32').max * np.ones( (3, labmax + 1), dtype='int32') # run through all dimensions making 2D slices and marking all existing labels to b for dim in range(2): # create a generic slice object to make the slices sl = [slice(None), slice(None), slice(None)] bf = b_first[dim] bl = b_last[dim] # go through all slices in this dimension for k in range(arr.shape[dim]): # create the slice object sl[dim] = k # update the last "seen" vector bl[arr[sl].flatten()] = k # if we have smaller values in "last" than in "first", update bf[:] = np.clip(bf, None, bl) bbox = [ b_first[1, 1], b_last[1, 1], b_first[0, 1], b_last[0, 1] ] # [x_min, x_max, y_min, y_max] self.bbox_list.append(bbox) plt.imshow(mask) print mask.shape currentAxis = plt.gca() # print (bbox[0], bbox[2]), bbox[1]-bbox[0], bbox[3]-bbox[2] currentAxis.add_patch( Rectangle((bbox[0], bbox[2]), bbox[1] - bbox[0], bbox[3] - bbox[2], alpha=1, edgecolor='r', facecolor='none')) # plt.show() # break plt.show() self.depth[self.depth == self.MAX_DEPTH] = -1.0 image = 0.5 * image_rescaled for i in range(len(car_poses)): frame = np.float32(self.mask == i + 1) frame = np.tile(frame[:, :, None], (1, 1, 3)) image = image + frame * 0.5 * self.colors[i, :] if if_visualize: uts.plot_images( { 'image_vis': np.uint8(image), 'shape_id': self.shape_id_map, 'mask': self.mask, 'depth': self.depth }, np.asarray(self.rot_uvd_list), self.bbox_list, layout=[1, 4], fig_size=10, save_fig=if_save, fig_name=plot_path) return image, self.mask, self.shape_id_map, self.depth, self.pose_map, image_rescaled, self.pose_list, self.shape_id_list, self.rot_uvd_list, self.bbox_list else: return None, None, None, None, None, image_rescaled, None, None, None, None
def test_demo(): # PaddlePaddle init paddle.init(use_gpu=True, gpu_id=FLAGS.gpu_id) params = sun3d.set_params() inputs = d_net.get_demon_inputs(params) # Add neural network config outputs_bs = d_net.bootstrap_net(inputs, params) outputs_it = d_net.iterative_net(inputs, params) outputs_re = d_net.refine_net(inputs, params) out_fields = ['flow', 'depth_inv', 'normal', 'rotation', 'translation'] my_g_layer_map = {} parameters_bs, topo_bs = paddle.parameters.create( [outputs_bs[x] for x in out_fields]) my_g_layer_map.update(cp.g_layer_map) parameters_it, topo_it = paddle.parameters.create( [outputs_it[x] for x in out_fields]) my_g_layer_map.update(cp.g_layer_map) parameters_re, topo_re = paddle.parameters.create(outputs_re['depth_0']) my_g_layer_map.update(cp.g_layer_map) print('load parameters') with gzip.open(FLAGS.model, 'r') as f: parameters_init = paddle.parameters.Parameters.from_tar(f) for name in parameters_bs.names(): parameters_bs.set(name, parameters_init.get(name)) for name in parameters_it.names(): parameters_it.set(name, parameters_init.get(name)) for name in parameters_re.names(): parameters_re.set(name, parameters_init.get(name)) # Read image pair 1, 2 flow for scene_name in params['train_scene'][1:]: image_list = preprocess_util.list_files(params['flow_path'] + scene_name + '/flow/') image2depth = sun3d.get_image_depth_matching(scene_name) for pair_name in image_list[0:2]: image1, image2, flow_gt, depth1_gt, normal1_gt = \ sun3d.load_image_pair(scene_name, pair_name, image2depth) #transform and yield image1_new = uts.transform(image1.copy(), height=params['size'][0], width=params['size'][1]) image2_new = uts.transform(image2.copy(), height=params['size'][0], width=params['size'][1]) intrinsic = np.array([0.89115971, 1.18821287, 0.5, 0.5]) test_data_bs = [(image1_new, image2_new)] feeding_bs = {'image1': 0, 'image2': 1} flow, depth_inv, normal, rotation, translation = paddle.infer( output=topo_bs, parameters=parameters_bs, input=test_data_bs, feeding=feeding_bs) for i in range(3): test_data_it = [(image1_new, image2_new, intrinsic, rotation, translation, depth_inv, normal)] feeding_it = { 'image1': 0, 'image2': 1, 'intrinsic': 2, 'rotation': 3, 'translation': 4, 'depth_inv': 5, 'normal': 6 } flow, depth_inv, normal, rotation, translation = paddle.infer( output=topo_it, parameters=parameters_it, input=test_data_it, feeding=feeding_it) test_data_re = [(image1_new, image2_new, depth_inv)] feeding_re = {'image1': 0, 'image2': 1, 'depth_inv': 2} depth = paddle.infer(output=topo_re, parameters=parameters_re, input=test_data_re, feeding=feeding_re) layer_names = [ outputs_it['flow'].name, outputs_it['normal'].name, outputs_re['depth_0'].name ] height_list = [my_g_layer_map[x].height for x in layer_names] width_list = [my_g_layer_map[x].width for x in layer_names] flow, normal, depth = vec2img(inputs=[flow, normal, depth], height=height_list, width=width_list) # visualize depth in 3D # image1_down = cv2.resize(image1, # (depth.shape[1], depth.shape[0])) # visualize_prediction( # depth=depth, # image=np.uint8(image1_down.transpose([2, 0, 1])), # rotation=rotation, # translation=translation) with open('./test/depth_gt.npy', 'wb') as f: np.save(f, depth1_gt) with open('./test/depth_res.npy', 'wb') as f: np.save(f, depth) uts.plot_images(OrderedDict([ ('image1', image1), ('image2', image2), ('flow', flow), ('flow_gt', flow_gt), ('depth', depth), ('depth_gt', depth1_gt), ('normal', (normal + 1.0) / 2.), ('normal_gt', (normal1_gt + 1.0) / 2) ]), layout=[4, 2])