def valid(datacfg, cfgfile, weightfile): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i # Parse data configuration files data_options = read_data_cfg(datacfg) valid_images = data_options['valid'] meshname = data_options['mesh'] name = data_options['name'] im_width = int(data_options['im_width']) im_height = int(data_options['im_height']) fx = float(data_options['fx']) fy = float(data_options['fy']) u0 = float(data_options['u0']) v0 = float(data_options['v0']) # Parse net configuration file net_options = parse_cfg(cfgfile)[0] loss_options = parse_cfg(cfgfile)[-1] conf_thresh = float(net_options['conf_thresh']) num_keypoints = int(net_options['num_keypoints']) num_classes = int(loss_options['classes']) num_anchors = int(loss_options['num']) anchors = [float(anchor) for anchor in loss_options['anchors'].split(',')] # Read object model information, get 3D bounding box corners, get intrinsics mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) diam = float(data_options['diam']) intrinsic_calibration = get_camera_intrinsic(u0, v0, fx, fy) # camera params # Network I/O params num_labels = 2 * num_keypoints + 3 # +2 for width, height, +1 for object class errs_2d = [] # to save with open(valid_images) as fp: # validation file names tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] # Compute-related Parameters use_cuda = True # whether to use cuda or no kwargs = {'num_workers': 4, 'pin_memory': True} # number of workers etc. # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode model = Darknet(cfgfile) model.load_weights(weightfile) model.cuda() model.eval() # Get the dataloader for the test dataset valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.width, model.height), shuffle=False, objclass=name, transform=transforms.Compose([ transforms.ToTensor(), ])) test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=False, **kwargs) # Iterate through test batches (Batch size for test data is 1) logging('Testing {}...'.format(name)) for batch_idx, (data, target) in enumerate(test_loader): t1 = time.time() # Pass data to GPU if use_cuda: data = data.cuda() # target = target.cuda() # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference data = Variable(data, volatile=True) t2 = time.time() # Forward pass output = model(data).data t3 = time.time() # Using confidence threshold, eliminate low-confidence predictions trgt = target[0].view(-1, num_labels) all_boxes = get_multi_region_boxes(output, conf_thresh, num_classes, num_keypoints, anchors, num_anchors, int(trgt[0][0]), only_objectness=0) t4 = time.time() # Iterate through all images in the batch for i in range(output.size(0)): # For each image, get all the predictions boxes = all_boxes[i] # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image) truths = target[i].view(-1, num_labels) # Get how many object are present in the scene num_gts = truths_length(truths) # Iterate through each ground-truth object for k in range(num_gts): box_gt = list() for j in range(1, num_labels): box_gt.append(truths[k][j]) box_gt.extend([1.0, 1.0]) box_gt.append(truths[k][0]) # If the prediction has the highest confidence, choose it as our prediction best_conf_est = -sys.maxsize for j in range(len(boxes)): if (boxes[j][2 * num_keypoints] > best_conf_est) and (boxes[j][2 * num_keypoints + 2] == int(truths[k][0])): best_conf_est = boxes[j][2 * num_keypoints] box_pr = boxes[j] match = corner_confidence( box_gt[:2 * num_keypoints], torch.FloatTensor(boxes[j][:2 * num_keypoints])) # Denormalize the corner predictions corners2D_gt = np.array(np.reshape(box_gt[:2 * num_keypoints], [-1, 2]), dtype='float32') corners2D_pr = np.array(np.reshape(box_pr[:2 * num_keypoints], [-1, 2]), dtype='float32') corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height corners2D_gt_corrected = fix_corner_order( corners2D_gt) # Fix the order of corners # Compute [R|t] by pnp objpoints3D = np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32') K = np.array(intrinsic_calibration, dtype='float32') R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K) R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K) # Compute pixel error Rt_gt = np.concatenate((R_gt, t_gt), axis=1) Rt_pr = np.concatenate((R_pr, t_pr), axis=1) proj_2d_gt = compute_projection(vertices, Rt_gt, intrinsic_calibration) proj_2d_pred = compute_projection(vertices, Rt_pr, intrinsic_calibration) proj_corners_gt = np.transpose( compute_projection(corners3D, Rt_gt, intrinsic_calibration)) proj_corners_pr = np.transpose( compute_projection(corners3D, Rt_pr, intrinsic_calibration)) norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0) pixel_dist = np.mean(norm) errs_2d.append(pixel_dist) t5 = time.time() # Compute 2D projection score eps = 1e-5 for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]: acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / ( len(errs_2d) + eps) # Print test statistics logging(' Acc using {} px 2D Projection = {:.2f}%'.format( px_threshold, acc))
#img = cv2.rectangle(img, (x1,y1), (x2,y2), rgb, 1) if savename: print("save plot results to %s" % savename) cv2.imwrite(savename, img) return img if __name__ == '__main__': #datacfg = 'cfg/ape.data' modelcfg = 'multi_obj_pose_estimation/cfg/yolo-pose-multi.cfg' weightfile = '../Assets/trained/multi.weights' #模型初始化 model = Darknet(modelcfg) model.load_weights(weightfile) model = model.cuda() model.eval() #加载模型用 net_options = parse_cfg(modelcfg)[0] loss_options = parse_cfg(modelcfg)[-1] conf_thresh = float(net_options['conf_thresh']) num_keypoints = int(net_options['num_keypoints']) num_classes = int(loss_options['classes']) num_anchors = int(loss_options['num']) anchors = [float(anchor) for anchor in loss_options['anchors'].split(',')] test_width = 416 test_height = 416 datasetPath = '../Assets/DataSets/LINEMOD/'
def valid(datacfg, cfgfile, weightfile, conf_th): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i # Parse configuration files options = read_data_cfg(datacfg) valid_images = options['valid'] meshname = options['mesh'] name = options['name'] prefix = 'results' # Read object model information, get 3D bounding box corners mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) diam = float(options['diam']) # Read intrinsic camera parameters internal_calibration = get_camera_intrinsic() # Get validation file names with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode model = Darknet(cfgfile) model.load_weights(weightfile) model.cuda() model.eval() test_width = 544 test_height = 544 # Get the parser for the test dataset valid_dataset = dataset_multi.listDataset(valid_images, shape=(test_width, test_height), shuffle=False, objclass=name, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_batchsize = 1 # Specify the number of workers for multiple processing, get the dataloader for the test dataset kwargs = {'num_workers': 4, 'pin_memory': True} test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) # Parameters use_cuda = True num_classes = 2 anchors = [ 1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851 ] num_anchors = 5 eps = 1e-5 conf_thresh = conf_th iou_thresh = 0.5 # Parameters to save errs_2d = [] edges = [[1, 2], [1, 3], [1, 5], [2, 4], [2, 6], [3, 4], [3, 7], [4, 8], [5, 6], [5, 7], [6, 8], [7, 8]] edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]] # Iterate through test batches (Batch size for test data is 1) logging('Testing {}...'.format(name)) for batch_idx, (data, target) in enumerate(test_loader): t1 = time.time() # Pass data to GPU if use_cuda: data = data.cuda() # target = target.cuda() # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference data = Variable(data, volatile=True) t2 = time.time() # Forward pass output = model(data).data t3 = time.time() # Using confidence threshold, eliminate low-confidence predictions trgt = target[0].view(-1, 21) all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0) t4 = time.time() # Iterate through all images in the batch for i in range(output.size(0)): # For each image, get all the predictions boxes = all_boxes[i] # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image) truths = target[i].view(-1, 21) if debug_multi: print(type(truth)) # Get how many object are present in the scene num_gts = truths_length(truths) if debug_multi: print('numbers of ground truth: ' + str(num_gts)) # Iterate through each ground-truth object for k in range(num_gts): if debug_multi: print('object class in label is: ' + str(truths[k][0])) box_gt = [ truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6], truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12], truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0] ] best_conf_est = -1 # If the prediction has the highest confidence, choose it as our prediction for j in range(len(boxes)): if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int( truths[k][0])): best_conf_est = boxes[j][18] box_pr = boxes[j] bb2d_gt = get_2d_bb(box_gt[:18], output.size(3)) bb2d_pr = get_2d_bb(box_pr[:18], output.size(3)) iou = bbox_iou(bb2d_gt, bb2d_pr) match = corner_confidence9( box_gt[:18], torch.FloatTensor(boxes[j][:18])) # Denormalize the corner predictions corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32') corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32') corners2D_gt[:, 0] = corners2D_gt[:, 0] * 1280 corners2D_gt[:, 1] = corners2D_gt[:, 1] * 720 corners2D_pr[:, 0] = corners2D_pr[:, 0] * 1280 corners2D_pr[:, 1] = corners2D_pr[:, 1] * 720 #corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of corners # don't fix corner since the order is already correct corners2D_gt_corrected = corners2D_gt if debug_multi: print('2d corners ground truth: ') print(type(corners2D_gt_corrected)) print(corners2D_gt_corrected) # Compute [R|t] by pnp objpoints3D = np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32') # make correction to 3D points for class 2 & 3 (i.e. upperPortRed and uppoerPortBlue) correspondingclass = boxes[j][20] if (correspondingclass == 2 or correspondingclass == 3): x_min_3d = 0 x_max_3d = 1.2192 y_min_3d = 0 y_max_3d = 1.1176 z_min_3d = 0 z_max_3d = 0.003302 centroid = [(x_min_3d + x_max_3d) / 2, (y_min_3d + y_max_3d) / 2, (z_min_3d + z_max_3d) / 2] objpoints3D = np.array([centroid,\ [ x_min_3d, y_min_3d, z_min_3d],\ [ x_min_3d, y_min_3d, z_max_3d],\ [ x_min_3d, y_max_3d, z_min_3d],\ [ x_min_3d, y_max_3d, z_max_3d],\ [ x_max_3d, y_min_3d, z_min_3d],\ [ x_max_3d, y_min_3d, z_max_3d],\ [ x_max_3d, y_max_3d, z_min_3d],\ [ x_max_3d, y_max_3d, z_max_3d]]) K = np.array(internal_calibration, dtype='float32') _, R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K) _, R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K) # Compute pixel error Rt_gt = np.concatenate((R_gt, t_gt), axis=1) Rt_pr = np.concatenate((R_pr, t_pr), axis=1) proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration) proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration) proj_corners_gt = np.transpose( compute_projection(corners3D, Rt_gt, internal_calibration)) proj_corners_pr = np.transpose( compute_projection(corners3D, Rt_pr, internal_calibration)) norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0) pixel_dist = np.mean(norm) errs_2d.append(pixel_dist) t5 = time.time() # Compute 2D projection score for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]: acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / ( len(errs_2d) + eps) # Print test statistics logging(' Acc using {} px 2D Projection = {:.2f}%'.format( px_threshold, acc))
# Pass the model to GPU params_dict = dict(model.named_parameters()) params = [] #init_epoch = model.seen//nsamples for key, value in params_dict.items(): if key.find('.bn') >= 0 or key.find('.bias') >= 0: params += [{'params': [value], 'weight_decay': 0.0}] else: params += [{'params': [value], 'weight_decay': decay * batch_size}] optimizer = optim.Adam(model.parameters(), lr=learning_rate) # optimizer = optim.Adam(model.parameters(), lr=0.001) # Adam optimization if use_cuda: # model = model.cuda() #model = torch.nn.DataParallel(model).cuda() # Multiple GPU parallelism model = model.cuda() # Multiple GPU parallelism # Get the optimizer evaluate = True if evaluate: logging('evaluating ...') test(0) else: for epoch in range(init_epoch, max_epochs): # TRAIN niter, loss = train(epoch) # TEST and SAVE if ((epoch + 1) % save_interval == 0) and (not pre) and (loss < 10): acc = test(epoch)