예제 #1
0
def initialize_model():
    weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]
    if len(model_lst) == 0:
        exit()
    else:
        my_vgg = vgg.vgg19_bn(pretrained=True)
        # TODO: load bins from file or something
        model = Model.Model(features=my_vgg.features, bins=2)
        checkpoint = torch.load(weights_path + '/%s' % model_lst[-1],
                                map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()

    # load yolo
    yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    yolo = cv_Yolo(yolo_path)

    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)

    return (yolo, model, averages, angle_bins)
예제 #2
0
    def __init__(self):
        rospy.loginfo("pointcloud object detection is running...")

        # frame size
        self.frame_x = 640
        self.frame_y = 480

        self.bridge = CvBridge()

        # cv_image and pcl variables
        self.cv_image = np.zeros([self.frame_x, self.frame_y])
        self.pcl = None

        # transform config
        # self.tf_pub = tf.TransformBroadcaster()

        # load torch
        weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
        model_lst = [
            x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
        ]
        if len(model_lst) == 0:
            print('No previous model found, please train first!')
            exit()
        else:
            print('Using previous model %s' % model_lst[-1])
            my_vgg = vgg.vgg19_bn(pretrained=True)
            self.model = Model.Model(features=my_vgg.features, bins=2).cuda()
            checkpoint = torch.load(weights_path + '/%s' % model_lst[-1])
            self.model.load_state_dict(checkpoint['model_state_dict'])
            self.model.eval()

        # load yolo
        yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
        self.yolo = cv_Yolo(yolo_path)

        self.averages = ClassAverages.ClassAverages()

        # TODO: clean up how this is done. flag?
        self.angle_bins = generate_bins(2)

        calib_path = os.path.abspath(
            os.path.dirname(__file__)) + "/" + "camera_cal/"
        self.calib_file = calib_path + "calib_cam_to_cam.txt"

        # subscribers
        self.img_sub = rospy.Subscriber("/kitti/camera_color_right/image_raw",
                                        Image, self.rgb_callback)
        #self.pcl_sub = rospy.Subscriber("/camera/depth_registered/points", PointCloud2, self.pcl_callback)
        # publishers
        self.img_detected_pub = rospy.Publisher(
            "ROS_3D_BBox/img_detected_frame", Image, queue_size=100)
        self.location_pub = rospy.Publisher("ROS_3D_BBox/location_array",
                                            LocationArray,
                                            queue_size=100)
        self.rate = rospy.Rate(1)
예제 #3
0
def main():
    root = os.path.dirname(os.path.abspath(__file__))
    weights_path = root + '/weights'
    cam = cv2.VideoCapture(0)
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]
    assert len(model_lst) > 0, 'No previous model found, please train first!'

    print('Using previous model %s' % model_lst[-1])
    my_vgg = vgg.vgg19_bn(pretrained=False)
    # TODO: load bins from file or something
    model = Model.Model(features=my_vgg.features, bins=2).cuda()
    checkpoint = torch.load(weights_path + '/%s' % model_lst[-1])
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    # load yolo
    yolo_path = root + '/weights'
    yolo = cv_Yolo(yolo_path)

    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)

    FLAGS = parser.parse_args()
    cal_dir = FLAGS.cal_dir
    # using P_rect from global calibration file
    calib_path = root + '/' + cal_dir
    calib_file = calib_path + 'calib_cam_to_cam.txt'

    # using P from each frame
    # calib_path = root + '/Kitti/testing/calib/'

    while cv2.waitKey(5) != 27:
        # P for each frame
        # calib_file = calib_path + id + '.txt'

        ret, truth_img = cam.read()
        if not ret: continue
        start_time = time.time()
        img = truth_img.copy()
        yolo_img = truth_img.copy()
        detections = yolo.detect(yolo_img)

        for detection in detections:
            if not averages.recognized_class(detection.detected_class):
                continue

            # This is throwing when the 2d bbox is invalid
            # TODO: better check
            try:
                detectedObject = DetectedObject(img, detection.detected_class,
                                                detection.box_2d, calib_file)
            except:
                continue

            theta_ray = detectedObject.theta_ray
            input_img = detectedObject.img
            proj_matrix = detectedObject.proj_matrix
            box_2d = detection.box_2d
            detected_class = detection.detected_class

            input_tensor = torch.zeros([1, 3, 224, 224]).cuda()
            input_tensor[0, :, :, :] = input_img

            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]
            dim += averages.get_item(detected_class)

            argmax = np.argmax(conf)
            cos, sin = orient[argmax, :2]
            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax] - np.pi

            if FLAGS.show_yolo:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray,
                                                  truth_img)
            else:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray)
            if not FLAGS.hide_debug:
                print('Estimated location: %s' % location)  # x,y,z

        if not FLAGS.hide_debug:
            print('Got %s poses in %.3f seconds\n' %
                  (len(detections), time.time() - start_time))

        if FLAGS.show_yolo:
            img = np.concatenate((truth_img, img), axis=0)
        cv2.imshow('3D-DeepBox', img)
예제 #4
0
def main():
    root = os.path.dirname(os.path.abspath(__file__))
    weights_path = root + '/weights'
    model_lst = [x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')]
    assert len(model_lst)>0, 'No previous model found, please train first!'

    print ('Using previous model %s'%model_lst[-1])
    my_vgg = vgg.vgg19_bn(pretrained=False)
    model = Model.Model(features=my_vgg.features, bins=2).cuda()
    checkpoint = torch.load(weights_path + '/%s'%model_lst[-1])
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    # defaults to /eval
    dataset = Dataset(root + '/eval')
    averages = ClassAverages.ClassAverages()

    all_images = dataset.all_objects()
    for key in sorted(all_images.keys()):
        start_time = time.time()
        data = all_images[key]

        truth_img = data['Image']
        img = np.copy(truth_img)
        objects = data['Objects']
        cam_to_img = data['Calib']

        for detectedObject in objects:
            label = detectedObject.label
            theta_ray = detectedObject.theta_ray
            input_img = detectedObject.img

            input_tensor = torch.zeros([1,3,224,224]).cuda()
            input_tensor[0,:,:,:] = input_img
            input_tensor.cuda()

            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]

            dim += averages.get_item(label['Class'])

            argmax = np.argmax(conf)
            cos, sin = orient[argmax, :2]
            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax] - np.pi

            location = plot_regressed_3d_bbox(img, truth_img, cam_to_img, label['Box_2D'], dim, alpha, theta_ray)
            print('Truth pose: %s\nEstimated location: %s'%(label['Location'], location)) # x,y,z

            # plot car by car
            if single_car:
                numpy_vertical = np.concatenate((truth_img, img), axis=0)
                cv2.imshow('3D-DeepBox', numpy_vertical); cv2.waitKey(0)

        print('Got %s poses in %.3f seconds\n'%(len(objects), time.time()-start_time))

        # plot image by image
        if not single_car:
            numpy_vertical = np.concatenate((truth_img, img), axis=0)
            cv2.imshow('3D-DeepBox', numpy_vertical)
            if cv2.waitKey(0) == 27: return
예제 #5
0
def main():

    # 默认值:cal_dir='camera_cal/', hide_debug=False, image_dir='eval/image_2/', show_yolo=False, video=False
    FLAGS = parser.parse_args()

    # 注意:总共有两个权重文件,一个是yolo2D检测的yolov3.weights权重文件
    # 一个是自己训练的回归维度和alpha的权重文件,命名为epoch_10.pkl
    weights_path = os.path.abspath(
        os.path.dirname(__file__)) + os.path.sep + 'weights' + os.path.sep
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]
    if len(model_lst) == 0:
        print('No previous model found, please train first!')
        exit()
    else:
        print('Using previous model %s' % model_lst[-1])

        # 采用vgg19_bn来提取图片的特征,该特征作为后面3个branch的输入特征
        # TODO 是否要换成VGG16_bn?
        my_vgg = vgg.vgg19_bn(pretrained=True)

        # TODO: load bins from file or something
        model = Model.Model(features=my_vgg.features, bins=2)

        # 在CPU上进行测试
        checkpoint = torch.load(weights_path + '/%s' % model_lst[-1],
                                map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()

    # load yolo
    yolo_path = os.path.abspath(
        os.path.dirname(__file__)) + os.path.sep + 'weights' + os.path.sep
    yolo = cv_Yolo(yolo_path)

    # 训练集中统计的各个class的维度统计信息
    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)

    # 待检测图片的途径
    image_dir = FLAGS.image_dir

    # 当所有的图片用的是同一个proj_matrix时,应该将该proj_matrix放在该目录下
    cal_dir = FLAGS.cal_dir

    # FLAGS.video默认为false
    if FLAGS.video:
        if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/":
            image_dir = "eval/video/2011_09_26/image_2/"
            cal_dir = "eval/video/2011_09_26/"

    img_path = os.path.abspath(
        os.path.dirname(__file__)) + os.path.sep + image_dir
    # using P_rect from global calibration file
    # calib_path = os.path.abspath(os.path.dirname(__file__)) + os.path.sep + cal_dir
    # calib_file = calib_path + "calib_cam_to_cam.txt"

    # using P from each frame
    calib_path = os.path.abspath(os.path.dirname(
        __file__)) + os.path.sep + 'eval' + os.path.sep + 'calib' + os.path.sep

    try:
        ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))]
    except:
        print("\nError: no images in %s" % img_path)
        exit()

    for img_id in ids:

        start_time = time.time()

        img_file = img_path + img_id + ".png"

        # P for each frame
        calib_file = calib_path + img_id + ".txt"

        truth_img = cv2.imread(img_file)
        img = np.copy(truth_img)
        yolo_img = np.copy(truth_img)

        # yolo检测出来的结果为2d像素坐标和类别
        detections = yolo.detect(yolo_img)

        for detection in detections:

            # 检测的类别必须出现在KITTI数据集的枚举的类别中,如果不在,那么忽视这个被检测出来的类别
            # 因为yolo定义的类别数量是比KITTI数据集的类别数量多,所以可能yolo检测出了一个类别,但没有出现
            # 在KITTI数据集的枚举类别中
            if not averages.recognized_class(detection.detected_class):
                print('class ' + detection.detected_class +
                      ' is not in KITTI class, so ignore this class')
                continue

            # this is throwing when the 2d bbox is invalid
            # TODO: better check
            # 将图像 以及检测到的类别,2D框 以及对应这张图像的proj_matrix作为参数传入到DetectedObject类的init()函数中
            try:
                detectedObject = DetectedObject(img, detection.detected_class,
                                                detection.box_2d, calib_file)
            except:
                print("yolo检测错误,2D框无效!")
                continue

            theta_ray = detectedObject.theta_ray
            input_img = detectedObject.img
            proj_matrix = detectedObject.proj_matrix
            box_2d = detection.box_2d
            detected_class = detection.detected_class

            input_tensor = torch.zeros([1, 3, 224, 224])
            input_tensor[0, :, :, :] = input_img

            # 得到预测的orient,conf,dim
            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]
            dim += averages.get_item(detected_class)
            # 取conf大的那个bin,将该bin对应的orient的值赋值给最终的orient
            argmax = np.argmax(conf)
            orient = orient[argmax, :]

            # 得到预测出来的cos值和sin值
            # cos值在训练集中是cos(angle_diff),sin值在训练集中是sin(angle_diff)
            # 而angle_diff是真实的alpha(经过扩展到0-2pi)与对应的bin的夹角
            cos = orient[0]
            sin = orient[1]

            # np.arctan2传入sin为y轴坐标
            # cos为x轴坐标
            # 返回弧度制角度 -pi~+pi
            # 参考https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.arctan2.html
            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax]
            alpha -= np.pi  # 得到最终的alpha的值

            # 展示2D检测效果,默认不展示
            if FLAGS.show_yolo:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray,
                                                  truth_img)
            else:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray)

            if not FLAGS.hide_debug:  # FLAGS.hide_debug默认为False

                # 对于每一个检测到的类输出其位置信息。为了保证与KITTI数据集中的一致
                # 进行 location[1] += dim[0]
                location[1] += dim[0] / 2
                print('Estimated pose: %s' % location)

        if FLAGS.show_yolo:  # FLAGS.show_yolo默认为False
            numpy_vertical = np.concatenate((truth_img, img), axis=0)
            cv2.imshow('SPACE for next image, any other key to exit',
                       numpy_vertical)
        else:
            cv2.imshow('3D detections', img)

        if not FLAGS.hide_debug:
            print('Got %s detect class in %.3f seconds' %
                  (len(detections), time.time() - start_time))
            print('-------------')

        if FLAGS.video:
            cv2.waitKey(1)
        else:
            if cv2.waitKey(0) != 32:  # space bar
                exit()
예제 #6
0
def main():

    FLAGS = parser.parse_args()

    # load torch
    weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]
    if len(model_lst) == 0:
        print('No previous model found, please train first!')
        exit()
    else:
        print('Using previous model %s' % model_lst[-1])
        my_vgg = vgg.vgg19_bn(pretrained=True)
        # TODO: load bins from file or something
        model = Model.Model(features=my_vgg.features, bins=2).cuda()
        checkpoint = torch.load(weights_path + '/%s' % model_lst[-1])
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()

    # load yolo
    yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    yolo = cv_Yolo(yolo_path)

    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)

    image_dir = FLAGS.image_dir
    cal_dir = FLAGS.cal_dir
    if FLAGS.video:
        if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/":
            image_dir = "eval/video/2011_09_26/image_2/"
            cal_dir = "eval/video/2011_09_26/"

    img_path = os.path.abspath(os.path.dirname(__file__)) + "/" + image_dir
    # using P_rect from global calibration file
    calib_path = os.path.abspath(os.path.dirname(__file__)) + "/" + cal_dir
    # calib_file = calib_path + "calib_cam_to_cam.txt"

    # using P from each frame
    # calib_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/testing/calib/'

    try:
        ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))]
    except:
        print("\nError: no images in %s" % img_path)
        exit()

    for id in ids:

        start_time = time.time()

        img_file = img_path + id + ".png"

        # P for each frame
        calib_file = calib_path + id + ".txt"

        #comp_img = np.array(Image.open(img_file).convert('RGB'))
        truth_img = cv2.imread(img_file)
        img = np.copy(truth_img)
        yolo_img = np.copy(truth_img)

        detections = yolo.detect(img_file)

        for detection in detections:

            if not averages.recognized_class(detection.detected_class):
                continue

            # this is throwing when the 2d bbox is invalid
            # TODO: better check
            #try:
            object = DetectedObject(img, detection.detected_class,
                                    detection.box_2d, calib_file)
            #except:
            #    continue

            theta_ray = object.theta_ray
            input_img = object.img
            proj_matrix = object.proj_matrix
            box_2d = detection.box_2d
            detected_class = detection.detected_class

            input_tensor = torch.zeros([1, 3, 224, 224]).cuda()
            input_tensor[0, :, :, :] = input_img

            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]

            dim += averages.get_item(detected_class)

            argmax = np.argmax(conf)
            orient = orient[argmax, :]
            cos = orient[0]
            sin = orient[1]
            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax]
            alpha -= np.pi

            if FLAGS.show_yolo:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray,
                                                  truth_img)
            else:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray)

            if not FLAGS.hide_debug:
                print('Estimated pose: %s' % location)

        if FLAGS.show_yolo:
            numpy_vertical = np.concatenate((truth_img, img), axis=0)
            cv2.imwrite(os.path.join('output', id + '_yolo.png'),
                        numpy_vertical)
            #cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical)
        else:
            cv2.imwrite(os.path.join('output', id + '_3d.png'), img)
            #cv2.imshow('3D detections', img)

        if not FLAGS.hide_debug:
            print("\n")
            print('Got %s poses in %.3f seconds' %
                  (len(detections), time.time() - start_time))
            print('-------------')
예제 #7
0
def main():

    FLAGS = parser.parse_args()

    # load torch
    weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]
    if len(model_lst) == 0:
        print('No previous model found, please train first!')
        exit()
    else:
        print('Using previous model %s' % model_lst[-1])
        my_vgg = vgg.vgg19_bn(pretrained=True)
        # TODO: load bins from file or something
        model = Model.Model(features=my_vgg.features, bins=2).cuda()
        checkpoint = torch.load(weights_path + '/%s' % model_lst[-1])
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()

    # load yolo
    yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    yolo = cv_Yolo(yolo_path)

    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)

    image_dir = FLAGS.image_dir
    cal_dir = FLAGS.cal_dir
    if FLAGS.video:
        if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/":
            image_dir = "eval/video/2011_09_26/image_2/"
            cal_dir = "eval/video/2011_09_26/"

    img_path = os.path.abspath(os.path.dirname(__file__)) + "/" + image_dir
    # using P_rect from global calibration file
    calib_path = os.path.abspath(os.path.dirname(__file__)) + "/" + cal_dir
    calib_file = calib_path + "calib_cam_to_cam_custom.txt"

    # using P from each frame
    # calib_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/testing/calib/'

    try:
        ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))]
    except:
        print("\nError: no images in %s" % img_path)
        exit()

    for img_id in ids:

        start_time = time.time()

        img_file = img_path + img_id + ".png"

        # P for each frame
        # calib_file = calib_path + id + ".txt"
        pad_image = False
        if pad_image:
            truth_img = cv2.imread(img_file)
            truth_img = cv2.resize(truth_img, (374, 374))
            height, width, channels = truth_img.shape
            width_pad = 1242
            height_pad = 375
            center_height = height_pad // 2
            center_width = width_pad // 2
            img_pad = np.zeros([height_pad, width_pad, 3], dtype=np.uint8)
            start_height = center_height - height // 2
            stop_height = center_height + height // 2
            start_width = center_width - width // 2
            stop_width = center_width + width // 2
            img_pad[start_height:stop_height,
                    start_width:stop_width, :] = truth_img
            truth_img = img_pad
            img = np.copy(img_pad)
            yolo_img = np.copy(img_pad)
            detections = yolo.detect(yolo_img)
        else:
            truth_img = cv2.imread(img_file)
            img = np.copy(truth_img)
            yolo_img = np.copy(truth_img)

            detections = yolo.detect(yolo_img)

        for detection in detections:

            if not averages.recognized_class(detection.detected_class):
                continue

            # this is throwing when the 2d bbox is invalid
            # TODO: better check
            try:
                detectedObject = DetectedObject(img, detection.detected_class,
                                                detection.box_2d, calib_file)
            except:
                continue

            theta_ray = detectedObject.theta_ray
            input_img = detectedObject.img
            proj_matrix = detectedObject.proj_matrix
            box_2d = detection.box_2d
            detected_class = detection.detected_class

            input_tensor = torch.zeros([1, 3, 224, 224]).cuda()
            input_tensor[0, :, :, :] = input_img

            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]

            dim += averages.get_item(detected_class)

            argmax = np.argmax(conf)
            orient = orient[argmax, :]
            cos = orient[0]
            sin = orient[1]
            print('cos:', cos)
            print('sin:', sin)
            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax]

            alpha -= np.pi
            my_alpha = alpha - np.pi / 2
            print('new cos', np.cos(my_alpha))
            print('new sin', np.sin(my_alpha))
            print('adding', angle_bins[argmax])
            print('confidence', conf)
            print('ANGLE', (my_alpha % (2 * np.pi)) / (2 * np.pi) * 360)
            print(theta_ray)

            if FLAGS.show_yolo:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray,
                                                  truth_img)
            else:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray)

            if not FLAGS.hide_debug:
                print('Estimated pose: %s' % location)

        if FLAGS.show_yolo:
            numpy_vertical = np.concatenate((truth_img, img), axis=0)
            cv2.imshow('SPACE for next image, any other key to exit',
                       numpy_vertical)
        else:
            cv2.imshow('3D detections', img)

        if not FLAGS.hide_debug:
            print("\n")
            print('Got %s poses in %.3f seconds' %
                  (len(detections), time.time() - start_time))
            print('-------------')

        if FLAGS.video:
            cv2.waitKey(1)
        else:
            if cv2.waitKey(0) != 32:  # space bar
                exit()
예제 #8
0
def main():

    weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]
    if len(model_lst) == 0:
        print('No previous model found, please train first!')
        exit()
    else:
        print('Using previous model %s' % model_lst[-1])
        my_vgg = vgg.vgg19_bn(pretrained=True)
        #TODO model in Cuda throws an error
        model = Model.Model(features=my_vgg.features, bins=2)
        checkpoint = torch.load(weights_path + '/%s' % model_lst[-1])
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()

    dataset = Dataset(
        os.path.abspath(os.path.dirname(__file__)) + '/../nusc_kitti/mini_val')
    averages = ClassAverages.ClassAverages()
    all_images = dataset.all_objects()
    orient_score = 0
    l2 = 0
    tot = 0
    os_tot = 0
    for key in sorted(all_images.keys()):
        data = all_images[key]

        truth_img = data['Image']
        img = np.copy(truth_img)
        objects = data['Objects']
        cam_to_img = data['Calib']

        for object in objects:
            label = object.label
            theta_ray = object.theta_ray
            input_img = object.img

            input_tensor = torch.zeros([1, 3, 224, 224])
            input_tensor[0, :, :, :] = input_img
            input_tensor.cuda()

            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]

            dim += averages.get_item(label['Class'])

            argmax = np.argmax(conf)
            orient = orient[argmax, :]
            cos = orient[0]
            sin = orient[1]
            alpha = np.arctan2(sin, cos)
            alpha += dataset.angle_bins[argmax]
            alpha -= np.pi
            delta_theta = label['Alpha'] - alpha
            tot += 1
            if label['Class'] != 'traffic_cone':
                orient_score += (1 + np.cos(delta_theta)) / 2
                os_tot += 1
            label_dim = label['Dimensions']
            l2 += (dim[0] - label_dim[0])**2 + (dim[1] - label_dim[1])**2 + (
                dim[2] - label_dim[2])**2
            print('Average Orientation Score', orient_score / os_tot)
            print('L2 Loss', l2 / tot)
            print('Total Orientation Examples', os_tot)
            print('Total Examples', tot)
            location = plot_regressed_3d_bbox(img, truth_img, cam_to_img,
                                              label['Box_2D'], dim, alpha,
                                              theta_ray)

            print('Estimated pose: %s' % location)
            print('Truth pose: %s' % label['Location'])
            print('-------------')

            # plot car by car
            if single_car:
                numpy_vertical = np.concatenate((truth_img, img), axis=0)
                #cv2.imshow('2D detection on top, 3D prediction on bottom', numpy_vertical)
                #cv2.waitKey(0)
                cv2.imwrite(os.path.join('output', key + '_yolo.png'),
                            numpy_vertical)

        # plot image by image
        if not single_car:
            numpy_vertical = np.concatenate((truth_img, img), axis=0)
            cv2.imwrite(os.path.join('output', key + '_yolo.png'),
                        numpy_vertical)
예제 #9
0
def main():

    FLAGS = parser.parse_args()

    # load torch
    weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    model_lst = [x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')]
    if len(model_lst) == 0:
        exit()
    else:
        my_vgg = vgg.vgg19_bn(pretrained=True)
        # TODO: load bins from file or something
        model = Model.Model(features=my_vgg.features, bins=2)
        checkpoint = torch.load(weights_path + '/%s'%model_lst[-1],map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()

    # load yolo
    yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    yolo = cv_Yolo(yolo_path)

    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)

    image_dir = FLAGS.image_dir
    cal_dir = FLAGS.cal_dir
    if FLAGS.video:
        if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/":
            image_dir = "eval/video/2011_09_26/image_2/"
            cal_dir = "eval/video/2011_09_26/"


    img_path = image_dir
    
    calib_path = cal_dir
    
    for img_id in os.listdir(img_path):
        if(img_id == ".ipynb_checkpoints" or img_id.split(".")[1]=="txt"):
            continue

        print(img_id)
        start_time = time.time()

        img_file = img_path + img_id

        # P for each frame
        calib_file = calib_path + img_id.split(".")[0] + ".txt"
        # print(img_file,calib_file)
        truth_img = cv2.imread(img_file)
        # truth_img = cv2.resize(truth_img, (480,640), interpolation=cv2.INTER_AREA)
        img = np.copy(truth_img)
        yolo_img = np.copy(truth_img)

        detections = yolo.detect(yolo_img)

        for detection in detections:
            
            print(detection.detected_class)
            if not averages.recognized_class(detection.detected_class):
                continue

            # this is throwing when the 2d bbox is invalid
            # TODO: better check
            try:
                detectedObject = DetectedObject(img, detection.detected_class, detection.box_2d, calib_file)
            except:
                detectedObject = None
            theta_ray = detectedObject.theta_ray
            input_img = detectedObject.img
            proj_matrix = detectedObject.proj_matrix
            box_2d = detection.box_2d
            detected_class = detection.detected_class

            input_tensor = torch.zeros([1,3,224,224])
            input_tensor[0,:,:,:] = input_img

            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]

            dim += averages.get_item(detected_class)
            argmax = np.argmax(conf)
            orient = orient[argmax, :]
            cos = orient[0]
            sin = orient[1]
            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax]
            alpha -= np.pi

            if FLAGS.show_yolo:
                location, corners = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img)
            else:
                location, corners = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray)
            
            object_info(corners)
            # if not FLAGS.hide_debug:
            #     print('Estimated pose: %s'%location)

        if FLAGS.show_yolo:
            numpy_vertical = np.concatenate((truth_img, img), axis=0)
            cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical)
            # cv2.imwrite("out_"+img_id, numpy_vertical)
        else:
            # img = cv2.resize(img, (540,1160))
            img = ResizeWithAspectRatio(img,height=950)
            cv2.imshow('3D detections', img)
            # cv2.imwrite("out_"+img_id, img)


        if cv2.waitKey(0) != 32: # space bar
            exit()
예제 #10
0
model = Darknet(args.config_path, img_size=args.img_size)
model.load_weights(args.weights_path)

if cuda:
    model.cuda()

model.eval()
dataloader = DataLoader(ImageFolder(args.image_folder, img_size=args.img_size),
                        batch_size=1,
                        shuffle=False)
classes = load_classes(args.class_path)  # Extracts class labels from file
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

my_vgg = vgg.vgg19_bn(pretrained=True)
model_3d = Model.Model(features=my_vgg.features, bins=2).cuda()
checkpoint = torch.load("./3d_info_weights/best.pkl")
model_3d.load_state_dict(checkpoint['model_state_dict'])
model_3d.eval()
calib_file = "./camera_cal/" + "calib_cam_to_cam.txt"
angle_bins = generate_bins(2)

imgs = []
img_detections = []

total = len(dataloader)
for batch_i, (img_paths, input_imgs) in enumerate(dataloader):

    input_imgs = input_imgs.cuda()
    with torch.no_grad():
        detections = model(input_imgs)
예제 #11
0
def main():
    df = pd.read_csv('1.csv')
    FLAGS = parser.parse_args()
    classes = load_classes(META_DIR)
    # load torch
    weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]
    if len(model_lst) == 0:
        print('No previous model found, please train first!')
        exit()
    else:
        print('Using previous model %s' % model_lst[-1])
        my_vgg = vgg.vgg19_bn(pretrained=True)
        # TODO: load bins from file or something
        model = Model.Model(features=my_vgg.features, bins=2).cuda()
        checkpoint = torch.load(weights_path + '/%s' % model_lst[-1])
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()
    print(os.listdir(weights_path))
    # load yolo
    yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    yolo = cv_Yolo(yolo_path)

    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)

    image_dir = FLAGS.image_dir
    cal_dir = FLAGS.cal_dir
    if FLAGS.video:
        if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/":
            image_dir = "eval/video/2011_09_26/image_2/"
            cal_dir = "eval/video/2011_09_26/"

    img_path = os.path.abspath(os.path.dirname(__file__)) + "/" + image_dir
    # using P_rect from global calibration file
    calib_path = os.path.abspath(os.path.dirname(__file__)) + "/" + cal_dir
    calib_file = calib_path + "calib_cam_to_cam.txt"

    # using P from each frame
    # calib_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/testing/calib/'

    try:
        ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))]
    except:
        print("\nError: no images in %s" % img_path)
        exit()

    for img_id in ids:
        start_time = time.time()

        img_file = img_path + img_id + ".png"
        #         print('\n'+img_id+'\n')
        # P for each frame
        # calib_file = calib_path + id + ".txt"
        print(img_file)
        truth_img = cv2.imread(img_file)
        img = np.copy(truth_img)
        yolo_img = np.copy(truth_img)
        # print(img.shape)
        detections = yolo.detect(yolo_img)
        ampl = 0
        lenel = 0
        for detection in detections:
            print('\n')
            lenel += 1
            ampl += 1
            if not averages.recognized_class(detection.detected_class):
                continue

            # this is throwing when the 2d bbox is invalid
            # TODO: better check
            try:
                detectedObject = DetectedObject(img, detection.detected_class,
                                                detection.box_2d, calib_file)
            except:
                continue

            theta_ray = detectedObject.theta_ray
            input_img = detectedObject.img
            proj_matrix = detectedObject.proj_matrix
            box_2d = detection.box_2d
            detected_class = detection.detected_class
            procent1 = (-box_2d[0][1] + box_2d[1][1]) // 5
            procent2 = (-box_2d[0][0] + box_2d[1][0]) // 5
            if box_2d[0][1] - procent1 <= 0:
                yminim = 0
            else:
                yminim = box_2d[0][1] - procent1
            if box_2d[1][1] + procent1 >= img.shape[0] - 1:
                ymaxim = img.shape[0] - 1
            else:
                ymaxim = box_2d[1][1] + procent1
            if box_2d[0][0] - procent2 < 0:
                xminim = 0
            else:
                xminim = box_2d[0][0] - procent2
            if box_2d[1][0] + procent2 >= img.shape[1] - 1:
                xmaxim = img.shape[1] - 1
            else:
                xmaxim = box_2d[1][0] + procent2
            srez = truth_img[yminim:ymaxim, xminim:xmaxim, :]

            # box_2d[]

            input_tensor = torch.zeros([1, 3, 224, 224]).cuda()
            input_tensor[0, :, :, :] = input_img

            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]

            dim += averages.get_item(detected_class)

            argmax = np.argmax(conf)
            orient = orient[argmax, :]
            cos = orient[0]
            sin = orient[1]
            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax]
            alpha -= np.pi

            if FLAGS.show_yolo:
                location, r, z1, c = plot_regressed_3d_bbox(
                    img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img)
            else:
                location, r, z1, c = plot_regressed_3d_bbox(
                    img, proj_matrix, box_2d, dim, alpha, theta_ray)

            # if not FLAGS.hide_debug:
            #     print('Estimated pose: %s'%location)
            cv2.imwrite('out2/' + 'temp' + ".png", srez)
            img_folder = SAMPLE_DIR
            model_dir = ROOT_DIR
            model_file = 'stage-2-152-c.pkl'
            img_file = 'temp.png'
            pred_class = batch_predict(img_folder, model_dir, model_file,
                                       img_file)
            print('Possible car model:', classes[int(str(pred_class))])
            alpha = angleofcam / img.shape[0] * r / 30
            h = df[df['model'] == classes[int(str(pred_class))]].values[0][3]
            if 4 * H * (H - h) <= 0:
                print("Сan't estimate the distance")
            else:
                if alpha > math.atan(h / math.sqrt(4 * H * (H - h))):
                    alpha = math.atan(h / math.sqrt(4 * H * (H - h))) - 0.05
    #             print(alpha, math.atan(h/math.sqrt(4*H*(H - h))))
    #             print(img.shape,"&&&&")
                s1 = (h / H + math.sqrt(
                    (h * h) / (H * H) - 4 * math.tan(alpha) * math.tan(alpha) *
                    (H - h) / H)) * H / (2 * math.tan(alpha))
                s2 = (h / H - math.sqrt(
                    (h * h) / (H * H) - 4 * math.tan(alpha) * math.tan(alpha) *
                    (H - h) / H)) * H / (2 * math.tan(alpha))
                print('Distance to car:', max(s1, s2), 'mm')
            a = 0.63
            if img_id == '0000000043':
                if lenel == 1:
                    k = (1 - sqrt((1313 * a - z1[0]) * (1313 * a - z1[0]) +
                                  (546 * a - z1[1]) *
                                  (546 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 2:
                    k = (1 - sqrt((716 * a - z1[0]) * (716 * a - z1[0]) +
                                  (391 * a - z1[1]) *
                                  (391 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 3:
                    k = (1 - sqrt((781 * a - z1[0]) * (781 * a - z1[0]) +
                                  (341 * a - z1[1]) *
                                  (341 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 4:
                    k = (1 - sqrt((1093 * a - z1[0]) * (1093 * a - z1[0]) +
                                  (423 * a - z1[1]) *
                                  (423 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 5:
                    k = (1 - sqrt((982 * a - z1[0]) * (982 * a - z1[0]) +
                                  (348 * a - z1[1]) *
                                  (348 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
            elif img_id == '0000000098':
                if lenel == 1:
                    k = (1 - sqrt((1303 * a - z1[0]) * (1303 * a - z1[0]) +
                                  (496 * a - z1[1]) *
                                  (496 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 2:
                    k = (1 - sqrt((1180 * a - z1[0]) * (1180 * a - z1[0]) +
                                  (406 * a - z1[1]) *
                                  (406 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 3:
                    k = (1 - sqrt((761 * a - z1[0]) * (761 * a - z1[0]) +
                                  (399 * a - z1[1]) *
                                  (399 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 4:
                    k = (1 - sqrt((657 * a - z1[0]) * (657 * a - z1[0]) +
                                  (461 * a - z1[1]) *
                                  (461 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 5:
                    k = (1 - sqrt((898 * a - z1[0]) * (898 * a - z1[0]) +
                                  (312 * a - z1[1]) *
                                  (312 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
            elif img_id == '0000000191':
                if lenel == 1:
                    k = (1 - sqrt((1212 * a - z1[0]) * (1212 * a - z1[0]) +
                                  (449 * a - z1[1]) *
                                  (449 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 2:
                    k = (1 - sqrt((757 * a - z1[0]) * (757 * a - z1[0]) +
                                  (398 * a - z1[1]) *
                                  (398 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 3:
                    k = (1 - sqrt((838 * a - z1[0]) * (838 * a - z1[0]) +
                                  (335 * a - z1[1]) *
                                  (335 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 4:
                    k = (1 - sqrt((1129 * a - z1[0]) * (1129 * a - z1[0]) +
                                  (395 * a - z1[1]) *
                                  (395 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 5:
                    k = (1 - sqrt((980 * a - z1[0]) * (980 * a - z1[0]) +
                                  (325 * a - z1[1]) *
                                  (325 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
            elif img_id == '0000000272':
                k = (1 - sqrt((1251 * a - z1[0]) * (1251 * a - z1[0]) +
                              (467 * a - z1[1]) * (467 * a - z1[1])) / c) * 100
                if k > 0:
                    print(k, '%')
                else:
                    print(0, '%')
            elif img_id == '0000000559':
                if lenel == 1:
                    k = (1 - sqrt((753 * a - z1[0]) * (753 * a - z1[0]) +
                                  (400 * a - z1[1]) *
                                  (400 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 2:
                    k = (1 - sqrt((815 * a - z1[0]) * (815 * a - z1[0]) +
                                  (366 * a - z1[1]) *
                                  (366 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
            elif img_id == '0000000722':
                if lenel == 1:
                    k = (1 - sqrt((1395 * a - z1[0]) * (1395 * a - z1[0]) +
                                  (508 * a - z1[1]) *
                                  (508 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 2:
                    k = (1 - sqrt((849 * a - z1[0]) * (849 * a - z1[0]) +
                                  (377 * a - z1[1]) *
                                  (377 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 3:
                    k = (1 - sqrt((1035 * a - z1[0]) * (1035 * a - z1[0]) +
                                  (360 * a - z1[1]) *
                                  (360 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
            elif img_id == '0000001038':
                if lenel == 1:
                    k = (1 - sqrt((371 * a - z1[0]) * (371 * a - z1[0]) +
                                  (418 * a - z1[1]) *
                                  (418 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 2:
                    k = (1 - sqrt((812 * a - z1[0]) * (812 * a - z1[0]) +
                                  (366 * a - z1[1]) *
                                  (366 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 3:
                    k = (1 - sqrt((900 * a - z1[0]) * (900 * a - z1[0]) +
                                  (316 * a - z1[1]) *
                                  (316 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 4:
                    k = (1 - sqrt((1141 * a - z1[0]) * (1141 * a - z1[0]) +
                                  (489 * a - z1[1]) *
                                  (489 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
            elif img_id == '1000000529':
                if lenel == 1:
                    k = (1 - sqrt((563 * a - z1[0]) * (563 * a - z1[0]) +
                                  (462 * a - z1[1]) *
                                  (462 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 2:
                    k = (1 - sqrt((685 * a - z1[0]) * (685 * a - z1[0]) +
                                  (405 * a - z1[1]) *
                                  (405 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
            elif img_id == '1000000594':
                if lenel == 1:
                    k = (1 - sqrt((648 * a - z1[0]) * (648 * a - z1[0]) +
                                  (470 * a - z1[1]) *
                                  (470 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 2:
                    k = (1 - sqrt((796 * a - z1[0]) * (796 * a - z1[0]) +
                                  (359 * a - z1[1]) *
                                  (359 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 3:
                    k = (1 - sqrt((1268 * a - z1[0]) * (1268 * a - z1[0]) +
                                  (458 * a - z1[1]) *
                                  (458 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 4:
                    k = (1 - sqrt((1071 * a - z1[0]) * (1071 * a - z1[0]) +
                                  (352 * a - z1[1]) *
                                  (352 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
            elif img_id == '1000000616':
                if lenel == 1:
                    k = (1 - sqrt((1319 * a - z1[0]) * (1319 * a - z1[0]) +
                                  (489 * a - z1[1]) *
                                  (489 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 2:
                    k = (1 - sqrt((1021 * a - z1[0]) * (1021 * a - z1[0]) +
                                  (313 * a - z1[1]) *
                                  (313 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
            elif img_id == '1000000776':
                if lenel == 1:
                    k = (1 - sqrt((1458 * a - z1[0]) * (1458 * a - z1[0]) +
                                  (446 * a - z1[1]) *
                                  (446 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 2:
                    k = (1 - sqrt((1160 * a - z1[0]) * (1160 * a - z1[0]) +
                                  (373 * a - z1[1]) *
                                  (373 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 3:
                    k = (1 - sqrt((656 * a - z1[0]) * (656 * a - z1[0]) +
                                  (359 * a - z1[1]) *
                                  (359 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 4:
                    k = (1 - sqrt((1103 * a - z1[0]) * (1103 * a - z1[0]) +
                                  (349 * a - z1[1]) *
                                  (349 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 5:
                    k = (1 - sqrt((930 * a - z1[0]) * (930 * a - z1[0]) +
                                  (335 * a - z1[1]) *
                                  (335 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 6:
                    k = (1 - sqrt((1036 * a - z1[0]) * (1036 * a - z1[0]) +
                                  (340 * a - z1[1]) *
                                  (340 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 7:
                    k = (1 - sqrt((584 * a - z1[0]) * (584 * a - z1[0]) +
                                  (386 * a - z1[1]) *
                                  (386 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 8:
                    k = (1 - sqrt((435 * a - z1[0]) * (435 * a - z1[0]) +
                                  (426 * a - z1[1]) *
                                  (426 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 9:
                    k = (1 - sqrt((754 * a - z1[0]) * (754 * a - z1[0]) +
                                  (328 * a - z1[1]) *
                                  (328 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
            elif img_id == '1000000864':
                if lenel == 1:
                    k = (1 - sqrt((1651 * a - z1[0]) * (1651 * a - z1[0]) +
                                  (463 * a - z1[1]) *
                                  (463 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 2:
                    k = (1 - sqrt((1031 * a - z1[0]) * (1031 * a - z1[0]) +
                                  (302 * a - z1[1]) *
                                  (302 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 3:
                    k = (1 - sqrt((962 * a - z1[0]) * (962 * a - z1[0]) +
                                  (348 * a - z1[1]) *
                                  (348 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 4:
                    k = (1 - sqrt((655 * a - z1[0]) * (655 * a - z1[0]) +
                                  (355 * a - z1[1]) *
                                  (355 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
                elif lenel == 5:
                    k = (1 - sqrt((802 * a - z1[0]) * (802 * a - z1[0]) +
                                  (310 * a - z1[1]) *
                                  (310 * a - z1[1])) / c) * 100
                    if k > 0:
                        print(k, '%')
                    else:
                        print(0, '%')
        if FLAGS.show_yolo:
            numpy_vertical = np.concatenate((truth_img, img), axis=0)
            cv2.imwrite('out/' + img_id + ".png", numpy_vertical)
            # cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical)
        else:
            cv2.imwrite('out/' + img_id + ".png", img)
            # cv2.imshow('3D detections', img)

        if not FLAGS.hide_debug:
            print("\n")
            print('Got %s poses in %.3f seconds' %
                  (len(detections), time.time() - start_time))
            print('-------------')
예제 #12
0
def main():

    FLAGS = parser.parse_args()

    # load torch
    weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights/'
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]

    if len(model_lst) == 0:
        print('No previous model found, please train first!')
        exit()
    else:
        print('Using previous model %s' % model_lst[-1])
        my_vgg = vgg.vgg19_bn(pretrained=True)
        # TODO: load bins from file or something
        model = Model.Model(features=my_vgg.features, bins=2).cuda()
        checkpoint = torch.load(weights_path + '/%s' % model_lst[-1])

        model_data = checkpoint['model_state_dict']
        # print(model_data.keys())
        # with open ("model_state_dic.txt",'a') as f:
        # with open ("model_state_dic.txt",'w') as f:
        #     f.write(str(model_data.keys()))

        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()
        # print(model)
    # load yolo
    yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    yolo = cv_Yolo(yolo_path)

    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)
    print(angle_bins)
    image_dir = FLAGS.image_dir
    cal_dir = FLAGS.cal_dir

    if FLAGS.video:
        if FLAGS.image_dir == "Kitti/testing/image_2/" and FLAGS.cal_dir == "camera_cal/":
            image_dir = "Kitti/testing/image_2/"
            cal_dir = "camera_cal/"

    img_path = os.path.abspath(os.path.dirname(__file__)) + "/" + image_dir
    # using P_rect from global calibration file
    calib_path = os.path.abspath(os.path.dirname(__file__)) + "/" + cal_dir

    calib_file = calib_path + "calib_cam_to_cam.txt"

    try:
        ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))]
    except:
        print("\nError: no images in %s" % img_path)
        exit()

    for img_id in ids:

        start_time = time.time()

        img_file = img_path + img_id + ".png"

        truth_img = cv2.imread(img_file)
        img = np.copy(truth_img)
        yolo_img = np.copy(truth_img)

        detections = yolo.detect(yolo_img)

        for detection in detections:
            # print(detection.detected_class)

            if not averages.recognized_class(detection.detected_class):
                continue

            # this is throwing when the 2d bbox is invalid
            # TODO: better check
            try:
                detectedObject = DetectedObject(img, detection.detected_class,
                                                detection.box_2d, calib_file)
                # detectedObject = DetectedObject(img, detection.detected_class, detection.box_2d)
            except:
                continue

            theta_ray = detectedObject.theta_ray
            input_img = detectedObject.img
            proj_matrix = detectedObject.proj_matrix

            box_2d = detection.box_2d
            detected_class = detection.detected_class

            input_tensor = torch.zeros([1, 3, 224, 224]).cuda()
            input_tensor[0, :, :, :] = input_img

            [orient, conf, dim] = model(input_tensor)

            # print('orient, conf, dim', orient, conf, dim)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]
            dim += averages.get_item(detected_class)

            argmax = np.argmax(conf)
            # print(conf, argmax)
            orient = orient[argmax, :]
            cos = orient[0]
            sin = orient[1]

            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax]
            alpha -= np.pi

            if math.isnan(dim[0]) is False:
                if FLAGS.show_yolo:
                    location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                      dim, alpha, theta_ray,
                                                      truth_img)[0]
                else:
                    location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                      dim, alpha, theta_ray)[0]

            # if not FLAGS.hide_debug:
            #     if not FLAGS.hide_debug:
            #         print('class' + str(detection.detected_class), 'Estimated pose: %s' % location,
            #               '2D' + (str(box_2d[0][0]) + ' ' + str(box_2d[0][1]) + ' ' + str(box_2d[1][0]) + ' ' + str(
            #                   box_2d[1][1])),
            #               'alpha:' + str(alpha),
            #
            #               'dim' + str(dim),
            #               'ray' + str(theta_ray),
            #
            #               'proj_matrix' + str(proj_matrix),
            #               'img' + str(img.shape),
            #               'input_tensor' +'input_img'+ str(input_tensor.shape)+str(input_tensor)
            #               )
            # with open('./camera_para/image_label/'+str(file_name)+'/'+str(img_id)+'.txt','w') as file:

            # with open('./Kitti/testing/testing_result/'  + str(img_id) + '.txt', 'a') as file:
            #     file.write(str(detection.detected_class) + ' ')
            #     file.write(str(0.00)+' ')
            #     file.write(str(0)+' ')
            #     file.write(str(alpha)+ ' ')
            #     file.write(str(box_2d[0][0]) + ' ' + str(box_2d[0][1]) + ' ' + str(box_2d[1][0]) + ' ' + str(box_2d[1][1])+' ')
            #     file.write(str(dim[0])+' '+str(dim[1])+ ' '+ str(dim[2])+ ' ')
            #     file.write(str(location[0])+' '+str(location[1])+' '+str(location[2])+' ')
            #     file.write(str(theta_ray))
            #     file.write('\n')
        if FLAGS.show_yolo:
            numpy_vertical = np.concatenate((truth_img, img), axis=0)
            cv2.imshow('SPACE for next image, any other key to exit',
                       numpy_vertical)
        else:
            cv2.imshow('3D detections', img)

        if not FLAGS.hide_debug:
            print("\n")
            print('Got %s poses in %.3f seconds' %
                  (len(detections), time.time() - start_time))
            print('-------------')

        if FLAGS.video:
            cv2.waitKey(1)
        else:
            if cv2.waitKey(0) != 32:  # space bar
                exit()