def initialize_model(): weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: exit() else: my_vgg = vgg.vgg19_bn(pretrained=True) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2) checkpoint = torch.load(weights_path + '/%s' % model_lst[-1], map_location='cpu') model.load_state_dict(checkpoint['model_state_dict']) model.eval() # load yolo yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' yolo = cv_Yolo(yolo_path) averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) return (yolo, model, averages, angle_bins)
def __init__(self): rospy.loginfo("pointcloud object detection is running...") # frame size self.frame_x = 640 self.frame_y = 480 self.bridge = CvBridge() # cv_image and pcl variables self.cv_image = np.zeros([self.frame_x, self.frame_y]) self.pcl = None # transform config # self.tf_pub = tf.TransformBroadcaster() # load torch weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: print('No previous model found, please train first!') exit() else: print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=True) self.model = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load(weights_path + '/%s' % model_lst[-1]) self.model.load_state_dict(checkpoint['model_state_dict']) self.model.eval() # load yolo yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' self.yolo = cv_Yolo(yolo_path) self.averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? self.angle_bins = generate_bins(2) calib_path = os.path.abspath( os.path.dirname(__file__)) + "/" + "camera_cal/" self.calib_file = calib_path + "calib_cam_to_cam.txt" # subscribers self.img_sub = rospy.Subscriber("/kitti/camera_color_right/image_raw", Image, self.rgb_callback) #self.pcl_sub = rospy.Subscriber("/camera/depth_registered/points", PointCloud2, self.pcl_callback) # publishers self.img_detected_pub = rospy.Publisher( "ROS_3D_BBox/img_detected_frame", Image, queue_size=100) self.location_pub = rospy.Publisher("ROS_3D_BBox/location_array", LocationArray, queue_size=100) self.rate = rospy.Rate(1)
def main(): root = os.path.dirname(os.path.abspath(__file__)) weights_path = root + '/weights' cam = cv2.VideoCapture(0) model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] assert len(model_lst) > 0, 'No previous model found, please train first!' print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=False) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load(weights_path + '/%s' % model_lst[-1]) model.load_state_dict(checkpoint['model_state_dict']) model.eval() # load yolo yolo_path = root + '/weights' yolo = cv_Yolo(yolo_path) averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) FLAGS = parser.parse_args() cal_dir = FLAGS.cal_dir # using P_rect from global calibration file calib_path = root + '/' + cal_dir calib_file = calib_path + 'calib_cam_to_cam.txt' # using P from each frame # calib_path = root + '/Kitti/testing/calib/' while cv2.waitKey(5) != 27: # P for each frame # calib_file = calib_path + id + '.txt' ret, truth_img = cam.read() if not ret: continue start_time = time.time() img = truth_img.copy() yolo_img = truth_img.copy() detections = yolo.detect(yolo_img) for detection in detections: if not averages.recognized_class(detection.detected_class): continue # This is throwing when the 2d bbox is invalid # TODO: better check try: detectedObject = DetectedObject(img, detection.detected_class, detection.box_2d, calib_file) except: continue theta_ray = detectedObject.theta_ray input_img = detectedObject.img proj_matrix = detectedObject.proj_matrix box_2d = detection.box_2d detected_class = detection.detected_class input_tensor = torch.zeros([1, 3, 224, 224]).cuda() input_tensor[0, :, :, :] = input_img [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(detected_class) argmax = np.argmax(conf) cos, sin = orient[argmax, :2] alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] - np.pi if FLAGS.show_yolo: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img) else: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray) if not FLAGS.hide_debug: print('Estimated location: %s' % location) # x,y,z if not FLAGS.hide_debug: print('Got %s poses in %.3f seconds\n' % (len(detections), time.time() - start_time)) if FLAGS.show_yolo: img = np.concatenate((truth_img, img), axis=0) cv2.imshow('3D-DeepBox', img)
def main(): root = os.path.dirname(os.path.abspath(__file__)) weights_path = root + '/weights' model_lst = [x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')] assert len(model_lst)>0, 'No previous model found, please train first!' print ('Using previous model %s'%model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=False) model = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load(weights_path + '/%s'%model_lst[-1]) model.load_state_dict(checkpoint['model_state_dict']) model.eval() # defaults to /eval dataset = Dataset(root + '/eval') averages = ClassAverages.ClassAverages() all_images = dataset.all_objects() for key in sorted(all_images.keys()): start_time = time.time() data = all_images[key] truth_img = data['Image'] img = np.copy(truth_img) objects = data['Objects'] cam_to_img = data['Calib'] for detectedObject in objects: label = detectedObject.label theta_ray = detectedObject.theta_ray input_img = detectedObject.img input_tensor = torch.zeros([1,3,224,224]).cuda() input_tensor[0,:,:,:] = input_img input_tensor.cuda() [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(label['Class']) argmax = np.argmax(conf) cos, sin = orient[argmax, :2] alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] - np.pi location = plot_regressed_3d_bbox(img, truth_img, cam_to_img, label['Box_2D'], dim, alpha, theta_ray) print('Truth pose: %s\nEstimated location: %s'%(label['Location'], location)) # x,y,z # plot car by car if single_car: numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imshow('3D-DeepBox', numpy_vertical); cv2.waitKey(0) print('Got %s poses in %.3f seconds\n'%(len(objects), time.time()-start_time)) # plot image by image if not single_car: numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imshow('3D-DeepBox', numpy_vertical) if cv2.waitKey(0) == 27: return
def main(): # 默认值:cal_dir='camera_cal/', hide_debug=False, image_dir='eval/image_2/', show_yolo=False, video=False FLAGS = parser.parse_args() # 注意:总共有两个权重文件,一个是yolo2D检测的yolov3.weights权重文件 # 一个是自己训练的回归维度和alpha的权重文件,命名为epoch_10.pkl weights_path = os.path.abspath( os.path.dirname(__file__)) + os.path.sep + 'weights' + os.path.sep model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: print('No previous model found, please train first!') exit() else: print('Using previous model %s' % model_lst[-1]) # 采用vgg19_bn来提取图片的特征,该特征作为后面3个branch的输入特征 # TODO 是否要换成VGG16_bn? my_vgg = vgg.vgg19_bn(pretrained=True) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2) # 在CPU上进行测试 checkpoint = torch.load(weights_path + '/%s' % model_lst[-1], map_location='cpu') model.load_state_dict(checkpoint['model_state_dict']) model.eval() # load yolo yolo_path = os.path.abspath( os.path.dirname(__file__)) + os.path.sep + 'weights' + os.path.sep yolo = cv_Yolo(yolo_path) # 训练集中统计的各个class的维度统计信息 averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) # 待检测图片的途径 image_dir = FLAGS.image_dir # 当所有的图片用的是同一个proj_matrix时,应该将该proj_matrix放在该目录下 cal_dir = FLAGS.cal_dir # FLAGS.video默认为false if FLAGS.video: if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/": image_dir = "eval/video/2011_09_26/image_2/" cal_dir = "eval/video/2011_09_26/" img_path = os.path.abspath( os.path.dirname(__file__)) + os.path.sep + image_dir # using P_rect from global calibration file # calib_path = os.path.abspath(os.path.dirname(__file__)) + os.path.sep + cal_dir # calib_file = calib_path + "calib_cam_to_cam.txt" # using P from each frame calib_path = os.path.abspath(os.path.dirname( __file__)) + os.path.sep + 'eval' + os.path.sep + 'calib' + os.path.sep try: ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))] except: print("\nError: no images in %s" % img_path) exit() for img_id in ids: start_time = time.time() img_file = img_path + img_id + ".png" # P for each frame calib_file = calib_path + img_id + ".txt" truth_img = cv2.imread(img_file) img = np.copy(truth_img) yolo_img = np.copy(truth_img) # yolo检测出来的结果为2d像素坐标和类别 detections = yolo.detect(yolo_img) for detection in detections: # 检测的类别必须出现在KITTI数据集的枚举的类别中,如果不在,那么忽视这个被检测出来的类别 # 因为yolo定义的类别数量是比KITTI数据集的类别数量多,所以可能yolo检测出了一个类别,但没有出现 # 在KITTI数据集的枚举类别中 if not averages.recognized_class(detection.detected_class): print('class ' + detection.detected_class + ' is not in KITTI class, so ignore this class') continue # this is throwing when the 2d bbox is invalid # TODO: better check # 将图像 以及检测到的类别,2D框 以及对应这张图像的proj_matrix作为参数传入到DetectedObject类的init()函数中 try: detectedObject = DetectedObject(img, detection.detected_class, detection.box_2d, calib_file) except: print("yolo检测错误,2D框无效!") continue theta_ray = detectedObject.theta_ray input_img = detectedObject.img proj_matrix = detectedObject.proj_matrix box_2d = detection.box_2d detected_class = detection.detected_class input_tensor = torch.zeros([1, 3, 224, 224]) input_tensor[0, :, :, :] = input_img # 得到预测的orient,conf,dim [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(detected_class) # 取conf大的那个bin,将该bin对应的orient的值赋值给最终的orient argmax = np.argmax(conf) orient = orient[argmax, :] # 得到预测出来的cos值和sin值 # cos值在训练集中是cos(angle_diff),sin值在训练集中是sin(angle_diff) # 而angle_diff是真实的alpha(经过扩展到0-2pi)与对应的bin的夹角 cos = orient[0] sin = orient[1] # np.arctan2传入sin为y轴坐标 # cos为x轴坐标 # 返回弧度制角度 -pi~+pi # 参考https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.arctan2.html alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] alpha -= np.pi # 得到最终的alpha的值 # 展示2D检测效果,默认不展示 if FLAGS.show_yolo: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img) else: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray) if not FLAGS.hide_debug: # FLAGS.hide_debug默认为False # 对于每一个检测到的类输出其位置信息。为了保证与KITTI数据集中的一致 # 进行 location[1] += dim[0] location[1] += dim[0] / 2 print('Estimated pose: %s' % location) if FLAGS.show_yolo: # FLAGS.show_yolo默认为False numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical) else: cv2.imshow('3D detections', img) if not FLAGS.hide_debug: print('Got %s detect class in %.3f seconds' % (len(detections), time.time() - start_time)) print('-------------') if FLAGS.video: cv2.waitKey(1) else: if cv2.waitKey(0) != 32: # space bar exit()
def main(): FLAGS = parser.parse_args() # load torch weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: print('No previous model found, please train first!') exit() else: print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=True) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load(weights_path + '/%s' % model_lst[-1]) model.load_state_dict(checkpoint['model_state_dict']) model.eval() # load yolo yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' yolo = cv_Yolo(yolo_path) averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) image_dir = FLAGS.image_dir cal_dir = FLAGS.cal_dir if FLAGS.video: if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/": image_dir = "eval/video/2011_09_26/image_2/" cal_dir = "eval/video/2011_09_26/" img_path = os.path.abspath(os.path.dirname(__file__)) + "/" + image_dir # using P_rect from global calibration file calib_path = os.path.abspath(os.path.dirname(__file__)) + "/" + cal_dir # calib_file = calib_path + "calib_cam_to_cam.txt" # using P from each frame # calib_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/testing/calib/' try: ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))] except: print("\nError: no images in %s" % img_path) exit() for id in ids: start_time = time.time() img_file = img_path + id + ".png" # P for each frame calib_file = calib_path + id + ".txt" #comp_img = np.array(Image.open(img_file).convert('RGB')) truth_img = cv2.imread(img_file) img = np.copy(truth_img) yolo_img = np.copy(truth_img) detections = yolo.detect(img_file) for detection in detections: if not averages.recognized_class(detection.detected_class): continue # this is throwing when the 2d bbox is invalid # TODO: better check #try: object = DetectedObject(img, detection.detected_class, detection.box_2d, calib_file) #except: # continue theta_ray = object.theta_ray input_img = object.img proj_matrix = object.proj_matrix box_2d = detection.box_2d detected_class = detection.detected_class input_tensor = torch.zeros([1, 3, 224, 224]).cuda() input_tensor[0, :, :, :] = input_img [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(detected_class) argmax = np.argmax(conf) orient = orient[argmax, :] cos = orient[0] sin = orient[1] alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] alpha -= np.pi if FLAGS.show_yolo: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img) else: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray) if not FLAGS.hide_debug: print('Estimated pose: %s' % location) if FLAGS.show_yolo: numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imwrite(os.path.join('output', id + '_yolo.png'), numpy_vertical) #cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical) else: cv2.imwrite(os.path.join('output', id + '_3d.png'), img) #cv2.imshow('3D detections', img) if not FLAGS.hide_debug: print("\n") print('Got %s poses in %.3f seconds' % (len(detections), time.time() - start_time)) print('-------------')
def main(): FLAGS = parser.parse_args() # load torch weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: print('No previous model found, please train first!') exit() else: print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=True) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load(weights_path + '/%s' % model_lst[-1]) model.load_state_dict(checkpoint['model_state_dict']) model.eval() # load yolo yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' yolo = cv_Yolo(yolo_path) averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) image_dir = FLAGS.image_dir cal_dir = FLAGS.cal_dir if FLAGS.video: if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/": image_dir = "eval/video/2011_09_26/image_2/" cal_dir = "eval/video/2011_09_26/" img_path = os.path.abspath(os.path.dirname(__file__)) + "/" + image_dir # using P_rect from global calibration file calib_path = os.path.abspath(os.path.dirname(__file__)) + "/" + cal_dir calib_file = calib_path + "calib_cam_to_cam_custom.txt" # using P from each frame # calib_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/testing/calib/' try: ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))] except: print("\nError: no images in %s" % img_path) exit() for img_id in ids: start_time = time.time() img_file = img_path + img_id + ".png" # P for each frame # calib_file = calib_path + id + ".txt" pad_image = False if pad_image: truth_img = cv2.imread(img_file) truth_img = cv2.resize(truth_img, (374, 374)) height, width, channels = truth_img.shape width_pad = 1242 height_pad = 375 center_height = height_pad // 2 center_width = width_pad // 2 img_pad = np.zeros([height_pad, width_pad, 3], dtype=np.uint8) start_height = center_height - height // 2 stop_height = center_height + height // 2 start_width = center_width - width // 2 stop_width = center_width + width // 2 img_pad[start_height:stop_height, start_width:stop_width, :] = truth_img truth_img = img_pad img = np.copy(img_pad) yolo_img = np.copy(img_pad) detections = yolo.detect(yolo_img) else: truth_img = cv2.imread(img_file) img = np.copy(truth_img) yolo_img = np.copy(truth_img) detections = yolo.detect(yolo_img) for detection in detections: if not averages.recognized_class(detection.detected_class): continue # this is throwing when the 2d bbox is invalid # TODO: better check try: detectedObject = DetectedObject(img, detection.detected_class, detection.box_2d, calib_file) except: continue theta_ray = detectedObject.theta_ray input_img = detectedObject.img proj_matrix = detectedObject.proj_matrix box_2d = detection.box_2d detected_class = detection.detected_class input_tensor = torch.zeros([1, 3, 224, 224]).cuda() input_tensor[0, :, :, :] = input_img [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(detected_class) argmax = np.argmax(conf) orient = orient[argmax, :] cos = orient[0] sin = orient[1] print('cos:', cos) print('sin:', sin) alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] alpha -= np.pi my_alpha = alpha - np.pi / 2 print('new cos', np.cos(my_alpha)) print('new sin', np.sin(my_alpha)) print('adding', angle_bins[argmax]) print('confidence', conf) print('ANGLE', (my_alpha % (2 * np.pi)) / (2 * np.pi) * 360) print(theta_ray) if FLAGS.show_yolo: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img) else: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray) if not FLAGS.hide_debug: print('Estimated pose: %s' % location) if FLAGS.show_yolo: numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical) else: cv2.imshow('3D detections', img) if not FLAGS.hide_debug: print("\n") print('Got %s poses in %.3f seconds' % (len(detections), time.time() - start_time)) print('-------------') if FLAGS.video: cv2.waitKey(1) else: if cv2.waitKey(0) != 32: # space bar exit()
def main(): weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: print('No previous model found, please train first!') exit() else: print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=True) #TODO model in Cuda throws an error model = Model.Model(features=my_vgg.features, bins=2) checkpoint = torch.load(weights_path + '/%s' % model_lst[-1]) model.load_state_dict(checkpoint['model_state_dict']) model.eval() dataset = Dataset( os.path.abspath(os.path.dirname(__file__)) + '/../nusc_kitti/mini_val') averages = ClassAverages.ClassAverages() all_images = dataset.all_objects() orient_score = 0 l2 = 0 tot = 0 os_tot = 0 for key in sorted(all_images.keys()): data = all_images[key] truth_img = data['Image'] img = np.copy(truth_img) objects = data['Objects'] cam_to_img = data['Calib'] for object in objects: label = object.label theta_ray = object.theta_ray input_img = object.img input_tensor = torch.zeros([1, 3, 224, 224]) input_tensor[0, :, :, :] = input_img input_tensor.cuda() [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(label['Class']) argmax = np.argmax(conf) orient = orient[argmax, :] cos = orient[0] sin = orient[1] alpha = np.arctan2(sin, cos) alpha += dataset.angle_bins[argmax] alpha -= np.pi delta_theta = label['Alpha'] - alpha tot += 1 if label['Class'] != 'traffic_cone': orient_score += (1 + np.cos(delta_theta)) / 2 os_tot += 1 label_dim = label['Dimensions'] l2 += (dim[0] - label_dim[0])**2 + (dim[1] - label_dim[1])**2 + ( dim[2] - label_dim[2])**2 print('Average Orientation Score', orient_score / os_tot) print('L2 Loss', l2 / tot) print('Total Orientation Examples', os_tot) print('Total Examples', tot) location = plot_regressed_3d_bbox(img, truth_img, cam_to_img, label['Box_2D'], dim, alpha, theta_ray) print('Estimated pose: %s' % location) print('Truth pose: %s' % label['Location']) print('-------------') # plot car by car if single_car: numpy_vertical = np.concatenate((truth_img, img), axis=0) #cv2.imshow('2D detection on top, 3D prediction on bottom', numpy_vertical) #cv2.waitKey(0) cv2.imwrite(os.path.join('output', key + '_yolo.png'), numpy_vertical) # plot image by image if not single_car: numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imwrite(os.path.join('output', key + '_yolo.png'), numpy_vertical)
def main(): FLAGS = parser.parse_args() # load torch weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' model_lst = [x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')] if len(model_lst) == 0: exit() else: my_vgg = vgg.vgg19_bn(pretrained=True) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2) checkpoint = torch.load(weights_path + '/%s'%model_lst[-1],map_location='cpu') model.load_state_dict(checkpoint['model_state_dict']) model.eval() # load yolo yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' yolo = cv_Yolo(yolo_path) averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) image_dir = FLAGS.image_dir cal_dir = FLAGS.cal_dir if FLAGS.video: if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/": image_dir = "eval/video/2011_09_26/image_2/" cal_dir = "eval/video/2011_09_26/" img_path = image_dir calib_path = cal_dir for img_id in os.listdir(img_path): if(img_id == ".ipynb_checkpoints" or img_id.split(".")[1]=="txt"): continue print(img_id) start_time = time.time() img_file = img_path + img_id # P for each frame calib_file = calib_path + img_id.split(".")[0] + ".txt" # print(img_file,calib_file) truth_img = cv2.imread(img_file) # truth_img = cv2.resize(truth_img, (480,640), interpolation=cv2.INTER_AREA) img = np.copy(truth_img) yolo_img = np.copy(truth_img) detections = yolo.detect(yolo_img) for detection in detections: print(detection.detected_class) if not averages.recognized_class(detection.detected_class): continue # this is throwing when the 2d bbox is invalid # TODO: better check try: detectedObject = DetectedObject(img, detection.detected_class, detection.box_2d, calib_file) except: detectedObject = None theta_ray = detectedObject.theta_ray input_img = detectedObject.img proj_matrix = detectedObject.proj_matrix box_2d = detection.box_2d detected_class = detection.detected_class input_tensor = torch.zeros([1,3,224,224]) input_tensor[0,:,:,:] = input_img [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(detected_class) argmax = np.argmax(conf) orient = orient[argmax, :] cos = orient[0] sin = orient[1] alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] alpha -= np.pi if FLAGS.show_yolo: location, corners = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img) else: location, corners = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray) object_info(corners) # if not FLAGS.hide_debug: # print('Estimated pose: %s'%location) if FLAGS.show_yolo: numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical) # cv2.imwrite("out_"+img_id, numpy_vertical) else: # img = cv2.resize(img, (540,1160)) img = ResizeWithAspectRatio(img,height=950) cv2.imshow('3D detections', img) # cv2.imwrite("out_"+img_id, img) if cv2.waitKey(0) != 32: # space bar exit()
model = Darknet(args.config_path, img_size=args.img_size) model.load_weights(args.weights_path) if cuda: model.cuda() model.eval() dataloader = DataLoader(ImageFolder(args.image_folder, img_size=args.img_size), batch_size=1, shuffle=False) classes = load_classes(args.class_path) # Extracts class labels from file Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor my_vgg = vgg.vgg19_bn(pretrained=True) model_3d = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load("./3d_info_weights/best.pkl") model_3d.load_state_dict(checkpoint['model_state_dict']) model_3d.eval() calib_file = "./camera_cal/" + "calib_cam_to_cam.txt" angle_bins = generate_bins(2) imgs = [] img_detections = [] total = len(dataloader) for batch_i, (img_paths, input_imgs) in enumerate(dataloader): input_imgs = input_imgs.cuda() with torch.no_grad(): detections = model(input_imgs)
def main(): df = pd.read_csv('1.csv') FLAGS = parser.parse_args() classes = load_classes(META_DIR) # load torch weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: print('No previous model found, please train first!') exit() else: print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=True) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load(weights_path + '/%s' % model_lst[-1]) model.load_state_dict(checkpoint['model_state_dict']) model.eval() print(os.listdir(weights_path)) # load yolo yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' yolo = cv_Yolo(yolo_path) averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) image_dir = FLAGS.image_dir cal_dir = FLAGS.cal_dir if FLAGS.video: if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/": image_dir = "eval/video/2011_09_26/image_2/" cal_dir = "eval/video/2011_09_26/" img_path = os.path.abspath(os.path.dirname(__file__)) + "/" + image_dir # using P_rect from global calibration file calib_path = os.path.abspath(os.path.dirname(__file__)) + "/" + cal_dir calib_file = calib_path + "calib_cam_to_cam.txt" # using P from each frame # calib_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/testing/calib/' try: ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))] except: print("\nError: no images in %s" % img_path) exit() for img_id in ids: start_time = time.time() img_file = img_path + img_id + ".png" # print('\n'+img_id+'\n') # P for each frame # calib_file = calib_path + id + ".txt" print(img_file) truth_img = cv2.imread(img_file) img = np.copy(truth_img) yolo_img = np.copy(truth_img) # print(img.shape) detections = yolo.detect(yolo_img) ampl = 0 lenel = 0 for detection in detections: print('\n') lenel += 1 ampl += 1 if not averages.recognized_class(detection.detected_class): continue # this is throwing when the 2d bbox is invalid # TODO: better check try: detectedObject = DetectedObject(img, detection.detected_class, detection.box_2d, calib_file) except: continue theta_ray = detectedObject.theta_ray input_img = detectedObject.img proj_matrix = detectedObject.proj_matrix box_2d = detection.box_2d detected_class = detection.detected_class procent1 = (-box_2d[0][1] + box_2d[1][1]) // 5 procent2 = (-box_2d[0][0] + box_2d[1][0]) // 5 if box_2d[0][1] - procent1 <= 0: yminim = 0 else: yminim = box_2d[0][1] - procent1 if box_2d[1][1] + procent1 >= img.shape[0] - 1: ymaxim = img.shape[0] - 1 else: ymaxim = box_2d[1][1] + procent1 if box_2d[0][0] - procent2 < 0: xminim = 0 else: xminim = box_2d[0][0] - procent2 if box_2d[1][0] + procent2 >= img.shape[1] - 1: xmaxim = img.shape[1] - 1 else: xmaxim = box_2d[1][0] + procent2 srez = truth_img[yminim:ymaxim, xminim:xmaxim, :] # box_2d[] input_tensor = torch.zeros([1, 3, 224, 224]).cuda() input_tensor[0, :, :, :] = input_img [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(detected_class) argmax = np.argmax(conf) orient = orient[argmax, :] cos = orient[0] sin = orient[1] alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] alpha -= np.pi if FLAGS.show_yolo: location, r, z1, c = plot_regressed_3d_bbox( img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img) else: location, r, z1, c = plot_regressed_3d_bbox( img, proj_matrix, box_2d, dim, alpha, theta_ray) # if not FLAGS.hide_debug: # print('Estimated pose: %s'%location) cv2.imwrite('out2/' + 'temp' + ".png", srez) img_folder = SAMPLE_DIR model_dir = ROOT_DIR model_file = 'stage-2-152-c.pkl' img_file = 'temp.png' pred_class = batch_predict(img_folder, model_dir, model_file, img_file) print('Possible car model:', classes[int(str(pred_class))]) alpha = angleofcam / img.shape[0] * r / 30 h = df[df['model'] == classes[int(str(pred_class))]].values[0][3] if 4 * H * (H - h) <= 0: print("Сan't estimate the distance") else: if alpha > math.atan(h / math.sqrt(4 * H * (H - h))): alpha = math.atan(h / math.sqrt(4 * H * (H - h))) - 0.05 # print(alpha, math.atan(h/math.sqrt(4*H*(H - h)))) # print(img.shape,"&&&&") s1 = (h / H + math.sqrt( (h * h) / (H * H) - 4 * math.tan(alpha) * math.tan(alpha) * (H - h) / H)) * H / (2 * math.tan(alpha)) s2 = (h / H - math.sqrt( (h * h) / (H * H) - 4 * math.tan(alpha) * math.tan(alpha) * (H - h) / H)) * H / (2 * math.tan(alpha)) print('Distance to car:', max(s1, s2), 'mm') a = 0.63 if img_id == '0000000043': if lenel == 1: k = (1 - sqrt((1313 * a - z1[0]) * (1313 * a - z1[0]) + (546 * a - z1[1]) * (546 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 2: k = (1 - sqrt((716 * a - z1[0]) * (716 * a - z1[0]) + (391 * a - z1[1]) * (391 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 3: k = (1 - sqrt((781 * a - z1[0]) * (781 * a - z1[0]) + (341 * a - z1[1]) * (341 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 4: k = (1 - sqrt((1093 * a - z1[0]) * (1093 * a - z1[0]) + (423 * a - z1[1]) * (423 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 5: k = (1 - sqrt((982 * a - z1[0]) * (982 * a - z1[0]) + (348 * a - z1[1]) * (348 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif img_id == '0000000098': if lenel == 1: k = (1 - sqrt((1303 * a - z1[0]) * (1303 * a - z1[0]) + (496 * a - z1[1]) * (496 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 2: k = (1 - sqrt((1180 * a - z1[0]) * (1180 * a - z1[0]) + (406 * a - z1[1]) * (406 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 3: k = (1 - sqrt((761 * a - z1[0]) * (761 * a - z1[0]) + (399 * a - z1[1]) * (399 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 4: k = (1 - sqrt((657 * a - z1[0]) * (657 * a - z1[0]) + (461 * a - z1[1]) * (461 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 5: k = (1 - sqrt((898 * a - z1[0]) * (898 * a - z1[0]) + (312 * a - z1[1]) * (312 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif img_id == '0000000191': if lenel == 1: k = (1 - sqrt((1212 * a - z1[0]) * (1212 * a - z1[0]) + (449 * a - z1[1]) * (449 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 2: k = (1 - sqrt((757 * a - z1[0]) * (757 * a - z1[0]) + (398 * a - z1[1]) * (398 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 3: k = (1 - sqrt((838 * a - z1[0]) * (838 * a - z1[0]) + (335 * a - z1[1]) * (335 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 4: k = (1 - sqrt((1129 * a - z1[0]) * (1129 * a - z1[0]) + (395 * a - z1[1]) * (395 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 5: k = (1 - sqrt((980 * a - z1[0]) * (980 * a - z1[0]) + (325 * a - z1[1]) * (325 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif img_id == '0000000272': k = (1 - sqrt((1251 * a - z1[0]) * (1251 * a - z1[0]) + (467 * a - z1[1]) * (467 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif img_id == '0000000559': if lenel == 1: k = (1 - sqrt((753 * a - z1[0]) * (753 * a - z1[0]) + (400 * a - z1[1]) * (400 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 2: k = (1 - sqrt((815 * a - z1[0]) * (815 * a - z1[0]) + (366 * a - z1[1]) * (366 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif img_id == '0000000722': if lenel == 1: k = (1 - sqrt((1395 * a - z1[0]) * (1395 * a - z1[0]) + (508 * a - z1[1]) * (508 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 2: k = (1 - sqrt((849 * a - z1[0]) * (849 * a - z1[0]) + (377 * a - z1[1]) * (377 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 3: k = (1 - sqrt((1035 * a - z1[0]) * (1035 * a - z1[0]) + (360 * a - z1[1]) * (360 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif img_id == '0000001038': if lenel == 1: k = (1 - sqrt((371 * a - z1[0]) * (371 * a - z1[0]) + (418 * a - z1[1]) * (418 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 2: k = (1 - sqrt((812 * a - z1[0]) * (812 * a - z1[0]) + (366 * a - z1[1]) * (366 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 3: k = (1 - sqrt((900 * a - z1[0]) * (900 * a - z1[0]) + (316 * a - z1[1]) * (316 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 4: k = (1 - sqrt((1141 * a - z1[0]) * (1141 * a - z1[0]) + (489 * a - z1[1]) * (489 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif img_id == '1000000529': if lenel == 1: k = (1 - sqrt((563 * a - z1[0]) * (563 * a - z1[0]) + (462 * a - z1[1]) * (462 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 2: k = (1 - sqrt((685 * a - z1[0]) * (685 * a - z1[0]) + (405 * a - z1[1]) * (405 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif img_id == '1000000594': if lenel == 1: k = (1 - sqrt((648 * a - z1[0]) * (648 * a - z1[0]) + (470 * a - z1[1]) * (470 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 2: k = (1 - sqrt((796 * a - z1[0]) * (796 * a - z1[0]) + (359 * a - z1[1]) * (359 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 3: k = (1 - sqrt((1268 * a - z1[0]) * (1268 * a - z1[0]) + (458 * a - z1[1]) * (458 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 4: k = (1 - sqrt((1071 * a - z1[0]) * (1071 * a - z1[0]) + (352 * a - z1[1]) * (352 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif img_id == '1000000616': if lenel == 1: k = (1 - sqrt((1319 * a - z1[0]) * (1319 * a - z1[0]) + (489 * a - z1[1]) * (489 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 2: k = (1 - sqrt((1021 * a - z1[0]) * (1021 * a - z1[0]) + (313 * a - z1[1]) * (313 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif img_id == '1000000776': if lenel == 1: k = (1 - sqrt((1458 * a - z1[0]) * (1458 * a - z1[0]) + (446 * a - z1[1]) * (446 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 2: k = (1 - sqrt((1160 * a - z1[0]) * (1160 * a - z1[0]) + (373 * a - z1[1]) * (373 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 3: k = (1 - sqrt((656 * a - z1[0]) * (656 * a - z1[0]) + (359 * a - z1[1]) * (359 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 4: k = (1 - sqrt((1103 * a - z1[0]) * (1103 * a - z1[0]) + (349 * a - z1[1]) * (349 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 5: k = (1 - sqrt((930 * a - z1[0]) * (930 * a - z1[0]) + (335 * a - z1[1]) * (335 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 6: k = (1 - sqrt((1036 * a - z1[0]) * (1036 * a - z1[0]) + (340 * a - z1[1]) * (340 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 7: k = (1 - sqrt((584 * a - z1[0]) * (584 * a - z1[0]) + (386 * a - z1[1]) * (386 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 8: k = (1 - sqrt((435 * a - z1[0]) * (435 * a - z1[0]) + (426 * a - z1[1]) * (426 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 9: k = (1 - sqrt((754 * a - z1[0]) * (754 * a - z1[0]) + (328 * a - z1[1]) * (328 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif img_id == '1000000864': if lenel == 1: k = (1 - sqrt((1651 * a - z1[0]) * (1651 * a - z1[0]) + (463 * a - z1[1]) * (463 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 2: k = (1 - sqrt((1031 * a - z1[0]) * (1031 * a - z1[0]) + (302 * a - z1[1]) * (302 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 3: k = (1 - sqrt((962 * a - z1[0]) * (962 * a - z1[0]) + (348 * a - z1[1]) * (348 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 4: k = (1 - sqrt((655 * a - z1[0]) * (655 * a - z1[0]) + (355 * a - z1[1]) * (355 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') elif lenel == 5: k = (1 - sqrt((802 * a - z1[0]) * (802 * a - z1[0]) + (310 * a - z1[1]) * (310 * a - z1[1])) / c) * 100 if k > 0: print(k, '%') else: print(0, '%') if FLAGS.show_yolo: numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imwrite('out/' + img_id + ".png", numpy_vertical) # cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical) else: cv2.imwrite('out/' + img_id + ".png", img) # cv2.imshow('3D detections', img) if not FLAGS.hide_debug: print("\n") print('Got %s poses in %.3f seconds' % (len(detections), time.time() - start_time)) print('-------------')
def main(): FLAGS = parser.parse_args() # load torch weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights/' model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: print('No previous model found, please train first!') exit() else: print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=True) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load(weights_path + '/%s' % model_lst[-1]) model_data = checkpoint['model_state_dict'] # print(model_data.keys()) # with open ("model_state_dic.txt",'a') as f: # with open ("model_state_dic.txt",'w') as f: # f.write(str(model_data.keys())) model.load_state_dict(checkpoint['model_state_dict']) model.eval() # print(model) # load yolo yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' yolo = cv_Yolo(yolo_path) averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) print(angle_bins) image_dir = FLAGS.image_dir cal_dir = FLAGS.cal_dir if FLAGS.video: if FLAGS.image_dir == "Kitti/testing/image_2/" and FLAGS.cal_dir == "camera_cal/": image_dir = "Kitti/testing/image_2/" cal_dir = "camera_cal/" img_path = os.path.abspath(os.path.dirname(__file__)) + "/" + image_dir # using P_rect from global calibration file calib_path = os.path.abspath(os.path.dirname(__file__)) + "/" + cal_dir calib_file = calib_path + "calib_cam_to_cam.txt" try: ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))] except: print("\nError: no images in %s" % img_path) exit() for img_id in ids: start_time = time.time() img_file = img_path + img_id + ".png" truth_img = cv2.imread(img_file) img = np.copy(truth_img) yolo_img = np.copy(truth_img) detections = yolo.detect(yolo_img) for detection in detections: # print(detection.detected_class) if not averages.recognized_class(detection.detected_class): continue # this is throwing when the 2d bbox is invalid # TODO: better check try: detectedObject = DetectedObject(img, detection.detected_class, detection.box_2d, calib_file) # detectedObject = DetectedObject(img, detection.detected_class, detection.box_2d) except: continue theta_ray = detectedObject.theta_ray input_img = detectedObject.img proj_matrix = detectedObject.proj_matrix box_2d = detection.box_2d detected_class = detection.detected_class input_tensor = torch.zeros([1, 3, 224, 224]).cuda() input_tensor[0, :, :, :] = input_img [orient, conf, dim] = model(input_tensor) # print('orient, conf, dim', orient, conf, dim) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(detected_class) argmax = np.argmax(conf) # print(conf, argmax) orient = orient[argmax, :] cos = orient[0] sin = orient[1] alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] alpha -= np.pi if math.isnan(dim[0]) is False: if FLAGS.show_yolo: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img)[0] else: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray)[0] # if not FLAGS.hide_debug: # if not FLAGS.hide_debug: # print('class' + str(detection.detected_class), 'Estimated pose: %s' % location, # '2D' + (str(box_2d[0][0]) + ' ' + str(box_2d[0][1]) + ' ' + str(box_2d[1][0]) + ' ' + str( # box_2d[1][1])), # 'alpha:' + str(alpha), # # 'dim' + str(dim), # 'ray' + str(theta_ray), # # 'proj_matrix' + str(proj_matrix), # 'img' + str(img.shape), # 'input_tensor' +'input_img'+ str(input_tensor.shape)+str(input_tensor) # ) # with open('./camera_para/image_label/'+str(file_name)+'/'+str(img_id)+'.txt','w') as file: # with open('./Kitti/testing/testing_result/' + str(img_id) + '.txt', 'a') as file: # file.write(str(detection.detected_class) + ' ') # file.write(str(0.00)+' ') # file.write(str(0)+' ') # file.write(str(alpha)+ ' ') # file.write(str(box_2d[0][0]) + ' ' + str(box_2d[0][1]) + ' ' + str(box_2d[1][0]) + ' ' + str(box_2d[1][1])+' ') # file.write(str(dim[0])+' '+str(dim[1])+ ' '+ str(dim[2])+ ' ') # file.write(str(location[0])+' '+str(location[1])+' '+str(location[2])+' ') # file.write(str(theta_ray)) # file.write('\n') if FLAGS.show_yolo: numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical) else: cv2.imshow('3D detections', img) if not FLAGS.hide_debug: print("\n") print('Got %s poses in %.3f seconds' % (len(detections), time.time() - start_time)) print('-------------') if FLAGS.video: cv2.waitKey(1) else: if cv2.waitKey(0) != 32: # space bar exit()