예제 #1
0
def load_yolov2(image,output_file,dataset='coco',threshold=0.5,nms_thresh=0.4):
    batch_size = 1
    confidence = threshold
    start = 0
    global num_classes
    imlist = image
    output_file_names = [output_file]

    CUDA = torch.cuda.is_available()
    
    if dataset == "pascal":
        inp_dim = 416
        num_classes = 20
        classes = load_classes('data/voc.names')
        weightsfile = 'yolov2-voc.weights'
        cfgfile = "cfg/yolo-voc.cfg"

    
    elif dataset == "coco":
        inp_dim = 544
        num_classes = 80
        classes = load_classes('data/coco.names')
        weightsfile = 'yolov2.weights'
        cfgfile = "cfg/yolo.cfg" 
        
    else: 
        print("Invalid dataset")
        exit()

        
    stride = 32

    #Set up the neural network
    print("Loading network.....")
    model = Darknet(cfgfile)
    model.load_weights(weightsfile)
    print("Network successfully loaded")
    
    
    #If there's a GPU availible, put the model on GPU
    if CUDA:
        model.cuda()
    
    model(get_test_input(inp_dim, CUDA))
    #Set the model in evaluation mode
    model.eval()
    
    read_dir = time.time()
    #Detection phase
    load_batch = time.time()
    batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
    im_batches = [x[0] for x in batches]
    orig_ims = [x[1] for x in batches]
    im_dim_list = [x[2] for x in batches]
    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
    
    if CUDA:
        im_dim_list = im_dim_list.cuda()
    
    leftover = 0
    
    if (len(im_dim_list) % batch_size):
        leftover = 1
        
    if batch_size != 1:
        num_batches = len(imlist) // batch_size + leftover            
        im_batches = [torch.cat((im_batches[i*batch_size : min((i +  1)*batch_size,
                            len(im_batches))]))  for i in range(num_batches)]        


    i = 0
    
    output = torch.FloatTensor(1, 8)
    write = False
#    model(get_test_input(inp_dim, CUDA))
    
    start_det_loop = time.time()
    for batch in im_batches:
        #load the image 
        start = time.time()
        if CUDA:
            batch = batch.cuda()
       
        prediction = model(Variable(batch, volatile = True))
        
        prediction = prediction.data 
        
        
        
        #Apply offsets to the result predictions
        #Tranform the predictions as described in the YOLO paper
        #flatten the prediction vector 
        # B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes) 
        # Put every proposed box as a row.
        #get the boxes with object confidence > threshold
        #Convert the cordinates to absolute coordinates
        
        prediction = predict_transform(prediction, inp_dim, stride, model.anchors, num_classes, confidence, CUDA)
        
            
        if type(prediction) == int:
            i += 1
            continue
        
        #perform NMS on these boxes, and save the results 
        #I could have done NMS and saving seperately to have a better abstraction
        #But both these operations require looping, hence 
        #clubbing these ops in one loop instead of two. 
        #loops are slower than vectorised operations. 
        
        prediction = write_results(prediction, num_classes, nms = True, nms_conf = nms_thresh)
        
        
        end = time.time()
        
                    
#        print(end - start)

            

        prediction[:,0] += i*batch_size
        
    
            
        
          
        if not write:
            output = prediction
            write = 1
        else:
            output = torch.cat((output,prediction))
            

        for image in imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]:
            im_id = imlist.index(image)
            objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
            print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
            print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
            print("----------------------------------------------------------")
        i += 1

        
        if CUDA:
            torch.cuda.synchronize()
    
    
    output_recast = time.time()
    output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))
        
    im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())/inp_dim
    output[:,1:5] *= im_dim_list
    
    
    class_load = time.time()

    colors = pkl.load(open("pallete", "rb"))
    
    
    draw = time.time()


    def write(x, batches, results):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        img = results[int(x[0])]
        cls = int(x[-1])
        label = "{0}".format(classes[cls])
        color = random.choice(colors)
        cv2.rectangle(img, c1, c2,color, 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2,color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
        return img
    
            
    list(map(lambda x: write(x, im_batches, orig_ims), output))
      
    #det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1]))
    det_names = output_file_names
    
    list(map(cv2.imwrite, det_names, orig_ims))
    
    end = time.time()
    
    print()
    print("SUMMARY")
    print("----------------------------------------------------------")
    print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
    print()
    print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
    print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch))
    print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) +  " images)", output_recast - start_det_loop))
    print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast))
    print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
    print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist)))
    print("----------------------------------------------------------")

    
    torch.cuda.empty_cache()
예제 #2
0
    # Get the dataloader for test data
    test_loader = torch.utils.data.DataLoader(dataset.listDataset(
        testlist,
        shape=(test_width, test_height),
        shuffle=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ]),
        train=False),
                                              batch_size=1,
                                              shuffle=False,
                                              **kwargs)

    # Pass the model to GPU
    if use_cuda:
        model = model.cuda(
        )  # model = torch.nn.DataParallel(model, device_ids=[0]).cuda() # Multiple GPU parallelism

    # Get the optimizer
    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        if key.find('.bn') >= 0 or key.find('.bias') >= 0:
            params += [{'params': [value], 'weight_decay': 0.0}]
        else:
            params += [{'params': [value], 'weight_decay': decay * batch_size}]
    optimizer = optim.SGD(model.parameters(),
                          lr=learning_rate / batch_size,
                          momentum=momentum,
                          dampening=0,
                          weight_decay=decay * batch_size)
예제 #3
0
def valid(datacfg, cfgfile, weightfile, outfile):
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    name_list = options['names']
    prefix = 'results'
    names = load_class_names(name_list)

    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    m = Darknet(cfgfile)
    m.print_network()
    m.load_weights(weightfile)
    m.cuda()
    m.eval()

    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(m.width, m.height),
                                        shuffle=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    valid_batchsize = 2
    assert (valid_batchsize > 1)

    kwargs = {'num_workers': 4, 'pin_memory': True}
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_batchsize,
                                               shuffle=False,
                                               **kwargs)

    fps = [0] * m.num_classes
    if not os.path.exists('results'):
        os.mkdir('results')
    for i in range(m.num_classes):
        buf = '%s/%s%s.txt' % (prefix, outfile, names[i])
        fps[i] = open(buf, 'w')

    lineId = -1

    conf_thresh = 0.005
    nms_thresh = 0.45
    if m.net_name() == 'region':  # region_layer
        shape = (0, 0)
    else:
        shape = (m.width, m.height)
    for _, (data, target, org_w, org_h) in enumerate(valid_loader):
        data = data.cuda()
        output = m(data)
        batch_boxes = get_all_boxes(output,
                                    shape,
                                    conf_thresh,
                                    m.num_classes,
                                    only_objectness=0,
                                    validation=True)

        for i in range(len(batch_boxes)):
            lineId += 1
            fileId = os.path.basename(valid_files[lineId]).split('.')[0]
            #width, height = get_image_size(valid_files[lineId])
            width, height = float(org_w[i]), float(org_h[i])
            print(valid_files[lineId])
            boxes = batch_boxes[i]
            correct_yolo_boxes(boxes, width, height, m.width, m.height)
            boxes = nms(boxes, nms_thresh)
            for box in boxes:
                x1 = (box[0] - box[2] / 2.0) * width
                y1 = (box[1] - box[3] / 2.0) * height
                x2 = (box[0] + box[2] / 2.0) * width
                y2 = (box[1] + box[3] / 2.0) * height

                det_conf = box[4]
                for j in range((len(box) - 5) // 2):
                    cls_conf = box[5 + 2 * j]
                    cls_id = box[6 + 2 * j]
                    prob = det_conf * cls_conf
                    fps[cls_id].write('%s %f %f %f %f %f\n' %
                                      (fileId, prob, x1, y1, x2, y2))

    for i in range(m.num_classes):
        fps[i].close()
예제 #4
0
def valid(datacfg, cfgfile, weightfile, outfile):
    def truths_length(truths):
        for i in range(50):
            if truths[i][1] == 0:
                return i

    # Parse configuration files
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    meshname = options['mesh']
    backupdir = options['backup']
    name = options['name']
    if not os.path.exists(backupdir):
        makedirs(backupdir)

    # Parameters
    prefix = 'results'
    seed = int(time.time())
    gpus = '0'  # Specify which gpus to use
    test_width = 544
    test_height = 544
    torch.manual_seed(seed)
    use_cuda = True
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
        torch.cuda.manual_seed(seed)
    save = False
    testtime = True
    use_cuda = True
    num_classes = 1
    testing_samples = 0.0
    eps = 1e-5
    notpredicted = 0
    conf_thresh = 0.1
    nms_thresh = 0.4
    match_thresh = 0.5
    if save:
        makedirs(backupdir + '/test')
        makedirs(backupdir + '/test/gt')
        makedirs(backupdir + '/test/pr')

    # To save
    testing_error_trans = 0.0
    testing_error_angle = 0.0
    testing_error_pixel = 0.0
    errs_2d = []
    errs_3d = []
    errs_trans = []
    errs_angle = []
    errs_corner2D = []
    preds_trans = []
    preds_rot = []
    preds_corners2D = []
    gts_trans = []
    gts_rot = []
    gts_corners2D = []
    edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6],
                     [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]]

    # Read object model information, get 3D bounding box corners
    mesh = MeshPly(meshname)
    vertices = np.c_[np.array(mesh.vertices),
                     np.ones((len(mesh.vertices), 1))].transpose()
    corners3D = get_3D_corners(vertices)
    # diam          = calc_pts_diameter(np.array(mesh.vertices))
    diam = float(options['diam'])

    # Read intrinsic camera parameters
    internal_calibration = get_camera_intrinsic()

    # Get validation file names
    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode
    model = Darknet(cfgfile)
    model.print_network()
    model.load_weights(weightfile)
    model.cuda()
    model.eval()

    # Get the parser for the test dataset
    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(test_width, test_height),
                                        shuffle=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    valid_batchsize = 1

    # Specify the number of workers for multiple processing, get the dataloader for the test dataset
    kwargs = {'num_workers': 4, 'pin_memory': True}
    test_loader = torch.utils.data.DataLoader(valid_dataset,
                                              batch_size=valid_batchsize,
                                              shuffle=False,
                                              **kwargs)

    logging("   Testing {}...".format(name))
    logging("   Number of test samples: %d" % len(test_loader.dataset))
    # Iterate through test batches (Batch size for test data is 1)
    count = 0
    z = np.zeros((3, 1))
    for batch_idx, (data, target) in enumerate(test_loader):

        # Images
        img = data[0, :, :, :]
        img = img.numpy().squeeze()
        img = np.transpose(img, (1, 2, 0))

        t1 = time.time()
        # Pass data to GPU
        if use_cuda:
            data = data.cuda()
            target = target.cuda()

        # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
        data = Variable(data, volatile=True)
        t2 = time.time()

        # Forward pass
        output = model(data).data
        t3 = time.time()

        # Using confidence threshold, eliminate low-confidence predictions
        all_boxes = get_region_boxes(output, conf_thresh, num_classes)
        t4 = time.time()

        # Iterate through all images in the batch
        for i in range(output.size(0)):

            # For each image, get all the predictions
            boxes = all_boxes[i]

            # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
            truths = target[i].view(-1, 21)

            # Get how many object are present in the scene
            num_gts = truths_length(truths)

            # Iterate through each ground-truth object
            for k in range(num_gts):
                box_gt = [
                    truths[k][1], truths[k][2], truths[k][3], truths[k][4],
                    truths[k][5], truths[k][6], truths[k][7], truths[k][8],
                    truths[k][9], truths[k][10], truths[k][11], truths[k][12],
                    truths[k][13], truths[k][14], truths[k][15], truths[k][16],
                    truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]
                ]
                best_conf_est = -1

                # If the prediction has the highest confidence, choose it as our prediction for single object pose estimation
                for j in range(len(boxes)):
                    if (boxes[j][18] > best_conf_est):
                        match = corner_confidence9(
                            box_gt[:18], torch.FloatTensor(boxes[j][:18]))
                        box_pr = boxes[j]
                        best_conf_est = boxes[j][18]

                # Denormalize the corner predictions
                corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]),
                                        dtype='float32')
                corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]),
                                        dtype='float32')
                corners2D_gt[:, 0] = corners2D_gt[:, 0] * 640
                corners2D_gt[:, 1] = corners2D_gt[:, 1] * 480
                corners2D_pr[:, 0] = corners2D_pr[:, 0] * 640
                corners2D_pr[:, 1] = corners2D_pr[:, 1] * 480
                preds_corners2D.append(corners2D_pr)
                gts_corners2D.append(corners2D_gt)

                # Compute corner prediction error
                corner_norm = np.linalg.norm(corners2D_gt - corners2D_pr,
                                             axis=1)
                corner_dist = np.mean(corner_norm)
                errs_corner2D.append(corner_dist)

                # Compute [R|t] by pnp
                R_gt, t_gt = pnp(
                    np.array(np.transpose(
                        np.concatenate((np.zeros((3, 1)), corners3D[:3, :]),
                                       axis=1)),
                             dtype='float32'), corners2D_gt,
                    np.array(internal_calibration, dtype='float32'))
                R_pr, t_pr = pnp(
                    np.array(np.transpose(
                        np.concatenate((np.zeros((3, 1)), corners3D[:3, :]),
                                       axis=1)),
                             dtype='float32'), corners2D_pr,
                    np.array(internal_calibration, dtype='float32'))

                if save:
                    preds_trans.append(t_pr)
                    gts_trans.append(t_gt)
                    preds_rot.append(R_pr)
                    gts_rot.append(R_gt)

                    np.savetxt(
                        backupdir + '/test/gt/R_' + valid_files[count][-8:-3] +
                        'txt', np.array(R_gt, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/gt/t_' + valid_files[count][-8:-3] +
                        'txt', np.array(t_gt, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/pr/R_' + valid_files[count][-8:-3] +
                        'txt', np.array(R_pr, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/pr/t_' + valid_files[count][-8:-3] +
                        'txt', np.array(t_pr, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/gt/corners_' +
                        valid_files[count][-8:-3] + 'txt',
                        np.array(corners2D_gt, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/pr/corners_' +
                        valid_files[count][-8:-3] + 'txt',
                        np.array(corners2D_pr, dtype='float32'))

                # Compute translation error
                trans_dist = np.sqrt(np.sum(np.square(t_gt - t_pr)))
                errs_trans.append(trans_dist)

                # Compute angle error
                angle_dist = calcAngularDistance(R_gt, R_pr)
                errs_angle.append(angle_dist)

                # Compute pixel error
                Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
                Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
                proj_2d_gt = compute_projection(vertices, Rt_gt,
                                                internal_calibration)
                proj_2d_pred = compute_projection(vertices, Rt_pr,
                                                  internal_calibration)
                proj_corners_gt = np.transpose(
                    compute_projection(corners3D, Rt_gt, internal_calibration))
                proj_corners_pr = np.transpose(
                    compute_projection(corners3D, Rt_pr, internal_calibration))
                norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
                pixel_dist = np.mean(norm)
                errs_2d.append(pixel_dist)

                # Visualize
                fig = plt.figure()
                plt.xlim((0, 640))
                plt.ylim((0, 480))
                plt.imshow(scipy.misc.imresize(img, (480, 640)))
                # Projections
                for edge in edges_corners:
                    plt.plot(proj_corners_gt[edge, 0],
                             proj_corners_gt[edge, 1],
                             color='g',
                             linewidth=3.0)
                    plt.plot(proj_corners_pr[edge, 0],
                             proj_corners_pr[edge, 1],
                             color='b',
                             linewidth=3.0)
                plt.gca().invert_yaxis()
                # plt.show()

                plt.savefig(outfile + '/output_' + str(count) + '_.png',
                            bbox_inches='tight')
                fig.canvas.draw()
                count = count + 1

                # Compute 3D distances
                transform_3d_gt = compute_transformation(vertices, Rt_gt)
                transform_3d_pred = compute_transformation(vertices, Rt_pr)
                norm3d = np.linalg.norm(transform_3d_gt - transform_3d_pred,
                                        axis=0)
                vertex_dist = np.mean(norm3d)
                errs_3d.append(vertex_dist)

                # Sum errors
                testing_error_trans += trans_dist
                testing_error_angle += angle_dist
                testing_error_pixel += pixel_dist
                testing_samples += 1
                count = count + 1

        t5 = time.time()

    # Compute 2D projection error, 6D pose error, 5cm5degree error
    px_threshold = 5
    acc = len(np.where(
        np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d) + eps)
    acc5cm5deg = len(
        np.where((np.array(errs_trans) <= 0.05)
                 & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) +
                                                              eps)
    acc3d10 = len(np.where(
        np.array(errs_3d) <= diam * 0.1)[0]) * 100. / (len(errs_3d) + eps)
    acc5cm5deg = len(
        np.where((np.array(errs_trans) <= 0.05)
                 & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) +
                                                              eps)
    corner_acc = len(np.where(np.array(errs_corner2D) <= px_threshold)
                     [0]) * 100. / (len(errs_corner2D) + eps)
    mean_err_2d = np.mean(errs_2d)
    mean_corner_err_2d = np.mean(errs_corner2D)
    nts = float(testing_samples)

    if testtime:
        print('-----------------------------------')
        print('  tensor to cuda : %f' % (t2 - t1))
        print('         predict : %f' % (t3 - t2))
        print('get_region_boxes : %f' % (t4 - t3))
        print('            eval : %f' % (t5 - t4))
        print('           total : %f' % (t5 - t1))
        print('-----------------------------------')

    # Print test statistics
    logging('Results of {}'.format(name))
    logging('   Acc using {} px 2D Projection = {:.2f}%'.format(
        px_threshold, acc))
    logging('   Acc using 10% threshold - {} vx 3D Transformation = {:.2f}%'.
            format(diam * 0.1, acc3d10))
    logging('   Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg))
    logging(
        "   Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f"
        % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d))
    logging(
        '   Translation error: %f m, angle error: %f degree, pixel error: % f pix'
        % (testing_error_trans / nts, testing_error_angle / nts,
           testing_error_pixel / nts))

    if save:
        predfile = backupdir + '/predictions_linemod_' + name + '.mat'
        scipy.io.savemat(
            predfile, {
                'R_gts': gts_rot,
                't_gts': gts_trans,
                'corner_gts': gts_corners2D,
                'R_prs': preds_rot,
                't_prs': preds_trans,
                'corner_prs': preds_corners2D
            })
예제 #5
0
def main(camera_id):
    ip = str('192.168.0.2')
    name = str('admin')
    pw = str('a1234567')
    camera = HKCamera(ip, name, pw)

    threadPubMsg_shelfID_1 = pubmsg.MsgPublishClass(cameraID=camera_id,
                                                    shelfID=1)
    threadPubMsg_shelfID_1.setDaemon(True)
    threadPubMsg_shelfID_1.start()

    threadPubMsg_shelfID_2 = pubmsg.MsgPublishClass(cameraID=camera_id,
                                                    shelfID=2)
    threadPubMsg_shelfID_2.setDaemon(True)
    threadPubMsg_shelfID_2.start()

    threadPubMsg_dict = {
        'shelfID_1': threadPubMsg_shelfID_1,
        'shelfID_2': threadPubMsg_shelfID_2
    }

    model = loadDataset()

    cfg = Darknet('cfg/yolov3.cfg')
    cfg.load_weights('yolov3.weights')
    cfg.cuda()
    # global frame_number
    frame_number2 = [0]
    flag = [0]
    bridge = CvBridge()

    dic_change = {}
    dic_change_huojia2 = {}
    huojia1_id = 1
    huojia2_id = 2
    while not rospy.is_shutdown():
        frame_origin = camera.getFrame()
        frame_origin = np.array(frame_origin)
        frame_origin = cv2.resize(frame_origin,
                                  None,
                                  fx=0.75,
                                  fy=0.75,
                                  interpolation=cv2.INTER_AREA)
        frame_trans = copy.deepcopy(frame_origin)

        # # draw the shangping area
        # left_x, top_y, right_m, bottom_n = shangpin_area()
        # cv2.rectangle(frame_origin, (left_x, top_y), (right_m, bottom_n), (0, 255, 0), 2)
        #
        # left_x_2, top_y_2, right_m_2, bottom_n_2 = shangpin_area_huojia2()
        # cv2.rectangle(frame_origin, (left_x_2, top_y_2), (right_m_2, bottom_n_2), (255, 0, 0), 2)

        res, camera_id = callback((None, cfg, model, frame_number2, bridge,
                                   camera_id, flag, frame_origin))

        if res == []:
            threadPubMsg = threadPubMsg_dict['shelfID_' + str(huojia1_id)]
            threadPubMsg.set_commodity_recognition_trigger_with_image(
                camera_id=camera_id,
                person_id=-1,
                shelf_id=-1,
                flag=0,
                flag1=0,
                flag2=0,
                flag_list=[],
                frame=frame_trans)

            threadPubMsg = threadPubMsg_dict['shelfID_' + str(huojia2_id)]
            threadPubMsg.set_commodity_recognition_trigger_with_image(
                camera_id=camera_id,
                person_id=-1,
                shelf_id=-1,
                flag=0,
                flag1=0,
                flag2=0,
                flag_list=[],
                frame=frame_trans)
            continue
        dic, dic_huojia2 = xuanze_original(res, frame_origin, model, cfg,
                                           camera_id, dic_change,
                                           dic_change_huojia2, huojia1_id,
                                           huojia2_id)

        if compare_dic(dic, dic_change) == False and compare_dic(
                dic_huojia2, dic_change_huojia2) == False:
            pass
        else:
            dic, dic_huojia2 = xuanze(res, frame_origin, model, cfg,
                                      threadPubMsg_dict, camera_id, dic,
                                      dic_change, dic_huojia2,
                                      dic_change_huojia2, huojia1_id,
                                      huojia2_id, frame_trans)

        print("**********************")
        print("dic_change shelf1: {}".format(dic))
        print("dic_change_shelf2: {}".format(dic_huojia2))
        print("")
        dic_change = dic
        dic_change_huojia2 = dic_huojia2

    HKIPcamera.release()
예제 #6
0
init_epoch        = model.seen/nsamples 

kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}
test_loader = torch.utils.data.DataLoader(
    dataset.listDataset(testlist, shape=(init_width, init_height),
                   shuffle=False,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                   ]), train=False),
    batch_size=batch_size, shuffle=False, **kwargs)

if use_cuda:
    if ngpus > 1:
        model = torch.nn.DataParallel(model).cuda()
    else:
        model = model.cuda()

params_dict = dict(model.named_parameters())
params = []
for key, value in params_dict.items():
    if key.find('.bn') >= 0 or key.find('.bias') >= 0:
        params += [{'params': [value], 'weight_decay': 0.0}]
    else:
        params += [{'params': [value], 'weight_decay': decay*batch_size}]
optimizer = optim.SGD(model.parameters(), lr=learning_rate/batch_size, momentum=momentum, dampening=0, weight_decay=decay*batch_size)

def adjust_learning_rate(optimizer, batch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = learning_rate
    for i in range(len(steps)):
        scale = scales[i] if i < len(scales) else 1
예제 #7
0
def generate_det(seq_dir, npy_dir, cfg_file, weight_file):

    # load yolo model
    m = Darknet(cfg_file)
    m.print_network()
    m.load_weights(weight_file)
    print('Loading weights from %s... Done!' % (weight_file))

    use_cuda = 1
    if use_cuda:
        m.cuda()

    if not os.path.exists(npy_dir):
        os.makedirs(npy_dir)

    seq_list = glob.glob(os.path.join(seq_dir, "*-YOLO"))
    seq_list = sorted(seq_list)

    for seq in seq_list:
        seq_name = os.path.basename(seq)
        print("processing: %s" % seq_name)
        det_dir = os.path.join(seq, "det")
        if not os.path.exists(det_dir):
            os.makedirs(det_dir)
        txt_file = os.path.join(det_dir, "det.txt")
        fid = open(txt_file, 'w')

        npy_file = os.path.join(npy_dir, seq_name + ".npy")

        img_dir = os.path.join(seq, "img1")
        img_list = os.listdir(img_dir)
        img_list = sorted(img_list)

        img = cv2.imread(os.path.join(img_dir, img_list[0]))
        sized = cv2.resize(img, (m.width, m.height))
        sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)
        boxes = do_detect(m, sized, 0.5, 0.4, use_cuda)

        total_time = 0.0

        npy_list = []
        for i in range(len(img_list)):
            print("processing: %d/%d" % (i + 1, len(img_list)))
            img_name = img_list[i][:-4]
            img_idx = int(img_name)
            img_path = os.path.join(img_dir, img_list[i])

            img = cv2.imread(img_path)
            sized = cv2.resize(img, (m.width, m.height))
            sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)

            #time_0 = time.time()
            boxes = do_detect(m, sized, 0.5, 0.4, use_cuda)
            #time_1 = time.time()
            #total_time += time_1 - time_0

            height, width = img.shape[:2]
            for j in range(len(boxes)):
                box = boxes[j]
                cls_id = box[6]
                if cls_id != 0:
                    continue

                x = (box[0] - box[2] / 2.0) * width
                y = (box[1] - box[3] / 2.0) * height
                w = box[2] * width
                h = box[3] * height
                cls_conf = box[5]

                txt_tmp = "%d,-1,%.1f,%.1f,%.1f,%.1f,%.3f\n" % (img_idx, x, y,
                                                                w, h, cls_conf)
                fid.write(txt_tmp)
                npy_list.append([
                    img_idx, -1.0, x, y, w, h, cls_conf, -1.0, -1.0, -1.0, 1.0
                ])

        fid.close()
        np.save(npy_file,
                np.asarray(npy_list, dtype=np.float32),
                allow_pickle=False)
예제 #8
0
def valid(datacfg, cfgfile, weightfile, outfile):
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    name_list = options['names']
    prefix = 'results'
    names = load_class_names(name_list)

    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    m = Darknet(cfgfile)
    m.print_network()
    m.load_weights(weightfile)
    m.cuda()
    m.eval()

    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(m.width, m.height),
                                        shuffle=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    valid_batchsize = 2
    assert (valid_batchsize > 1)

    kwargs = {'num_workers': 4, 'pin_memory': True}
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_batchsize,
                                               shuffle=False,
                                               **kwargs)

    fps = [0] * m.num_classes
    if not os.path.exists('results'):
        os.mkdir('results')
    for i in range(m.num_classes):
        buf = '%s/%s%s.txt' % (prefix, outfile, names[i])
        fps[i] = open(buf, 'w')

    lineId = -1

    conf_thresh = 0.005
    nms_thresh = 0.45
    for batch_idx, (data, target) in enumerate(valid_loader):
        data = data.cuda()
        data = Variable(data, volatile=True)
        output = m(data).data
        batch_boxes = get_region_boxes(output, conf_thresh, m.num_classes,
                                       m.anchors, m.num_anchors, 0)
        for i in range(output.size(0)):
            lineId = lineId + 1
            fileId = os.path.basename(valid_files[lineId]).split('.')[0]
            width, height = get_image_size(valid_files[lineId])
            print(valid_files[lineId])
            boxes = batch_boxes[i]
            boxes = nms(boxes, nms_thresh)
            for box in boxes:
                x1 = (box[0] - box[2] / 2.0) * width
                y1 = (box[1] - box[3] / 2.0) * height
                x2 = (box[0] + box[2] / 2.0) * width
                y2 = (box[1] + box[3] / 2.0) * height

                det_conf = box[4]
                cls_conf = box[5]
                cls_id = box[6]
                prob = det_conf * cls_conf
                fps[cls_id].write('%s %f %f %f %f %f\n' %
                                  (fileId, prob, x1, y1, x2, y2))

    for i in range(m.num_classes):
        fps[i].close()
def main(camera_id, shelf_id):
    rospy.init_node('MultiProcessingNode', anonymous=True)
    ip = '192.168.0.' + str(camera_id)
    name = str('admin')
    pw = str('a1234567')
    camera = HKCamera(ip, name, pw)

    threadPubMsg_shelfID_1 = pubmsg.MsgPublishClass(cameraID=camera_id,
                                                    shelfID=shelf_id[0])
    threadPubMsg_shelfID_1.setDaemon(True)
    threadPubMsg_shelfID_1.start()

    shelf1 = 'shelfID_' + str(shelf_id[0])
    threadPubMsg_dict = {shelf1: threadPubMsg_shelfID_1}

    model = loadDataset()

    cfg = Darknet('cfg/yolov3.cfg')
    cfg.load_weights('yolov3.weights')
    cfg.cuda()
    # global frame_number
    frame_number2 = [0]
    flag = [0]
    bridge = CvBridge()

    dic_change = {}
    pre_res = {}
    huojia1_id = shelf_id[0]
    print("huojia1_id: {}".format(huojia1_id))
    tmp = 0
    while not rospy.is_shutdown():
        frame_origin = camera.getFrame()

        frame_origin = np.array(frame_origin)
        frame_origin = cv2.resize(frame_origin,
                                  None,
                                  fx=0.75,
                                  fy=0.75,
                                  interpolation=cv2.INTER_AREA)
        frame_trans = copy.deepcopy(frame_origin)

        # draw the shangping area
        # left_x, top_y, right_m, bottom_n = shangpin_area(huojia1_id)
        # cv2.rectangle(frame_origin, (left_x, top_y), (right_m, bottom_n), (0, 255, 0), 2)

        res, camera_id, dict_res = callback(
            (None, cfg, model, frame_number2, bridge, camera_id, flag,
             frame_origin, huojia1_id, pre_res))

        if res == []:
            if tmp > 30:
                threadPubMsg = threadPubMsg_dict['shelfID_' + str(huojia1_id)]
                threadPubMsg.set_commodity_recognition_trigger_with_image(
                    camera_id=camera_id,
                    person_id=-1,
                    shelf_id=huojia1_id,
                    flag=0,
                    flag1=0,
                    flag2=0,
                    flag_list=[],
                    frame=None)

                tmp = 0

            else:
                tmp += 1
            continue
        else:
            tmp = 0

        dic = xuanze_original(res, frame_origin, model, cfg, camera_id,
                              dic_change, huojia1_id, pre_res)

        if compare_dic(dic, dic_change) == False:
            pass

        else:
            dic = xuanze(res, frame_origin, model, cfg, threadPubMsg_dict,
                         camera_id, dic, dic_change, huojia1_id, frame_trans,
                         pre_res)

        #print("**********************")
        #print("dic_change_shelf_{}: {}".format(shelf_id[0], dic))
        #print("")
        change_idnum = len(pre_res.keys()) == len(res)
        if change_idnum:
            pre_res = dict_res
        else:
            pre_res = {}
        dic_change = dic

    HKIPcamera.release()
예제 #10
0
class Detector:
    def __init__(self, resolution=416):
        '''

        :param resolution: int, multiple of 32 greater than 32
        '''
        self.batch_size = 1
        self.scales = [1, 2, 3]
        self.resolution = resolution
        self.num_boxes = [
            self.resolution // 8, self.resolution // 16, self.resolution // 32
        ]
        self.num_boxes = sum([3 * (x**2) for x in self.num_boxes])
        self.scales_indices = []
        for scale in self.scales:
            li = list(
                range((scale - 1) * self.num_boxes // 3,
                      scale * self.num_boxes // 3))
            self.scales_indices.extend(li)
        self.confidence = 0.5
        self.nms_thresh = 0.4
        self.start = 0
        self.save_directory = '.'
        self.cfg_file = 'cfg/yolov3.cfg'
        self.weights_file = "yolov3.weights"
        self.colors = pkl.load(open("pallete", "rb"))

        self.CUDA = torch.cuda.is_available()

        self.num_classes = 80
        self.classes = load_classes('data/coco.names')

        # Set up the neural network
        print("Loading network.....")
        self.model = Darknet(self.cfg_file)
        self.model.load_weights(self.weights_file)
        print("Network successfully loaded")

        self.model.net_info["height"] = self.resolution
        self.inp_dim = self.model.net_info["height"]
        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32

        # If there's a GPU availible, put the model on GPU
        if self.CUDA:
            self.model.cuda()

        # Set the model in evaluation mode
        self.model.eval()

    def detect_objects(self, image_path):
        image_prep = prep_image(image_path, self.inp_dim)
        im_batches = [image_prep[0]]
        orig_ims = [image_prep[1]]
        im_dim_list = [image_prep[2]]
        im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)

        img_path = image_path

        if self.CUDA:
            im_dim_list = im_dim_list.cuda()

        write = False
        self.model(get_test_input(self.inp_dim, self.CUDA), self.CUDA)

        objs = {}
        i = 0
        for batch in im_batches:
            if self.CUDA:
                batch = batch.cuda()

            with torch.no_grad():
                prediction = self.model(Variable(batch), self.CUDA)

            prediction = prediction[:, self.scales_indices]

            prediction = write_results(prediction,
                                       self.confidence,
                                       self.num_classes,
                                       nms=True,
                                       nms_conf=self.nms_thresh)
            prediction[:, 0] += i * self.batch_size

            if not write:
                output = prediction
                write = 1
            else:
                output = torch.cat((output, prediction))

            for im_num, image in enumerate(img_path[i * self.batch_size:min(
                (i + 1) * self.batch_size, len(img_path))]):
                im_id = i * self.batch_size + im_num
                objs = [
                    self.classes[int(x[-1])] for x in output
                    if int(x[0]) == im_id
                ]
                print("{0:20s} {1:s}".format("Objects Detected:",
                                             " ".join(objs)))
                print(
                    "----------------------------------------------------------"
                )
            i += 1

            if self.CUDA:
                torch.cuda.synchronize()

        try:
            output
        except NameError:
            print("No detections were made")
            exit()

        im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long())

        scaling_factor = torch.min(self.inp_dim / im_dim_list,
                                   1)[0].view(-1, 1)

        output[:,
               [1, 3]] -= (self.inp_dim -
                           scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
        output[:,
               [2, 4]] -= (self.inp_dim -
                           scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

        output[:, 1:5] /= scaling_factor

        for i in range(output.shape[0]):
            output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                            im_dim_list[i, 0])
            output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                            im_dim_list[i, 1])

        def write(x, batches, results):
            c1 = tuple(x[1:3].int())
            c2 = tuple(x[3:5].int())
            img = results[int(x[0])]
            cls = int(x[-1])
            label = "{0}".format(self.classes[cls])
            color = random.choice(self.colors)
            cv2.rectangle(img, c1, c2, color, 1)
            t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
            c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
            cv2.rectangle(img, c1, c2, color, -1)
            cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                        cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
            return img

        list(map(lambda x: write(x, im_batches, orig_ims), output))

        det_names = pd.Series(img_path).apply(
            lambda x: "{}/det_{}".format(self.save_directory,
                                         x.split("/")[-1]))

        cv2.imwrite(det_names[0], orig_ims[0])
        torch.cuda.empty_cache()
        ret_path = det_names[0]

        return ret_path, objs, orig_ims[0]
예제 #11
0
def evaluate_with_gt_pos(cfgfile,
                         weightfile,
                         listfile,
                         append,
                         bestCnt,
                         withZoom=True,
                         use_cuda=True,
                         zoom_type=None):
    import cv2

    all_channels = [
        32, 64, 32, 64, 128, 64, 128, 64, 128, 256, 128, 256, 128, 256, 128,
        256, 128, 256, 128, 256, 128, 256, 128, 256, 128, 256, 512, 256, 512,
        256, 512, 256, 512, 256, 512, 256, 512, 256, 512, 256, 512, 256, 512,
        1024, 512, 1024, 512, 1024, 512, 1024, 512, 1024, 512, 1024, 512, 1024,
        512, 256, 256, 512, 256, 512, 256, 128, 128, 256, 128, 256, 128, 256,
        2, 512, 1024, 512, 1024, 512, 256, 256, 512, 256, 512, 256, 128, 128,
        256, 128, 256, 128, 256, 24
    ]

    m = Darknet(cfgfile, all_channels)
    m.print_network()
    m.load_state_dict(torch.load(weightfile))
    print('Loading weights from %s... Done!' % (weightfile))
    if use_cuda:
        m.cuda()

    m.print_bn_weights()

    with open(listfile, 'r') as file:
        imglines = file.readlines()

    failed_pred = 0
    total_pred = 0
    for idx in range(len(imglines)):
        max_conf = 0
        imgfile = imglines[idx].rstrip()
        img = cv2.imread(imgfile)
        dirname, filename = os.path.split(imgfile)

        baseName, _ = os.path.splitext(filename)
        dirname = os.path.splitext(dirname[dirname.rfind('/') + 1:])[0]
        outFileName = dirname + '_' + baseName

        start = time.time()
        gtPoses = [None] * 3
        rawk = K_tango
        target_shape = (704, 704)

        max_conf = 0
        best_pred = None
        best_border = None
        save = False

        print('imgfile', imgfile)
        print(str(failed_pred) + ' ' + str(total_pred))
        #predPose, conf, deviation, p2d = do_detect(m, img, rawk, gtPoses, bestCnt, 0, 0, use_cuda)
        try:
            total_pred = total_pred + 1
            predPose, conf, p2d = do_detect(m, img, rawk, gtPoses, bestCnt, 0,
                                            0, use_cuda)
        except Exception:
            failed_pred = failed_pred + 1
            pass

        finish = time.time()
        name = 'img/' + filename + '.png'
        if predPose is not None and (len(predPose) != 0):
            pose = predPose[0][1]
            print(str(pose))
            r = pose[:, 3]

            name = 'img/' + filename + '_' + str(conf) + '.png'
            #save_img_with_label(img, pose, rawk, name)
            print(name)
            quat = np.delete(pose, 3, axis=1).T
            q0, qvec = dcm2quat(quat)
            q = [q0, qvec[0], qvec[1], qvec[2]]
            print(conf)
        else:
            name = 'img/missing' + filename + '.png'
            print('problem', name, save)
    print(str(failed_pred) + ' ' + str(total_pred))
예제 #12
0
class Main(QtWidgets.QMainWindow, Ui_MainWindow):

    logQueue = multiprocessing.Queue()  # 日志数据队列,用于多进程之间传输数据
    receiveLogSignal = pyqtSignal(str)

    def __init__(self):
        QtWidgets.QMainWindow.__init__(self)
        self.setupUi(self)
        self.cap = None
        # video
        self.center()
        self.openFIleButton.clicked.connect(self.open_video)
        self.closeFileButton.clicked.connect(self.close_video)

        # 创建一个关闭事件并设为未触发
        self.stopEvent = threading.Event()
        self.stopEvent.clear()

        # 加载模型
        self.load_models.clicked.connect(self.load_model)

        # 加载日志
        self.receiveLogSignal.connect(lambda log: self.logOutput(log))
        self.logOutputThread = threading.Thread(target=self.receiveLog,
                                                daemon=True)
        self.logOutputThread.start()

        # 调节帧率
        self.changeFrameSlider.valueChanged.connect(self.frameChange)
        self.frameInterval = self.changeFrameSlider.value()

    def frameChange(self):
        self.label_14.setText(str(self.changeFrameSlider.value()))
        #print("frameInterval:" + str(self.frameInterval))

    def logOutput(self, log):
        # 获取当前系统时间
        time = datetime.now().strftime('[%Y/%m/%d %H:%M:%S]')
        log = time + '\n' + log
        self.logFile.write(log)
        self.textEdit.moveCursor(QTextCursor.End)
        self.textEdit.insertPlainText(log)
        self.textEdit.ensureCursorVisible()  # 自动滚屏

    def receiveLog(self):
        while True:
            data = self.logQueue.get()
            if data:
                self.receiveLogSignal.emit(data)
            else:
                continue

    def center(self, screenNum=0):
        screen = QDesktopWidget().screenGeometry()
        size = self.geometry()
        self.normalGeometry2 = QRect(
            (screen.width() - size.width()) / 2 + screen.left(),
            (screen.height() - size.height()) / 2, size.width(), size.height())
        self.setGeometry((screen.width() - size.width()) / 2 + screen.left(),
                         (screen.height() - size.height()) / 2, size.width(),
                         size.height())

    def open_video(self):
        fileName, _ = QFileDialog.getOpenFileName(self, "载入监控视频", '../videos')
        self.cap = cv2.VideoCapture(fileName)
        self.frameRate = self.cap.get(cv2.CAP_PROP_FPS)
        video_thread = threading.Thread(target=self.display_video)
        video_thread.start()
        self.logFile = open('../log/log_info.txt', 'a')

    def close_video(self):
        self.stopEvent.set()

    def display_video(self):
        self.openFIleButton.setEnabled(False)
        self.closeFileButton.setEnabled(True)
        # RGB转BGR
        frames = 0
        while self.cap.isOpened():
            ret, frame = self.cap.read()
            if ret:
                if frames % self.changeFrameSlider.value() == 0:
                    #frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                    plate_frame = frame.copy()
                    img = frame.copy()
                    output = None
                    orign_img = None

                    # 系统日志
                    count_info_log = ""
                    event_info_log = ""
                    break_info_log = ""

                    try:
                        if self.target_detect.isChecked():  # 目标识别
                            output, orign_img, img, pedestrians_num = target_detect(
                                self.model, frame)
                            if int(pedestrians_num) != 0:
                                break_info_log = str(
                                    pedestrians_num) + "人闯红灯;\n"
                                self.break_traffic_warning.setVisible(True)
                                self.break_traffic_label.setVisible(True)
                                self.break_traffic_label.setText(
                                    break_info_log)
                            else:
                                self.break_traffic_label.setVisible(False)
                                self.break_traffic_warning.setVisible(False)
                                # 红绿灯检测
                        if self.traffic_light_detect.isChecked():
                            traffic_light_color = traffic_light_detect(
                                output, orign_img)
                            if traffic_light_color == "green":
                                self.red_light.setVisible(False)
                                self.green_light.setVisible(True)
                            elif traffic_light_color == "red":
                                self.green_light.setVisible(False)
                                self.red_light.setVisible(True)
                            else:
                                self.green_light.setVisible(False)
                                self.red_light.setVisible(False)
                        else:
                            self.green_light.setVisible(False)
                            self.red_light.setVisible(False)

                            #车流,人流检测
                        people_num = 0
                        cars_num = 0
                        motors_num = 0

                        if self.cars_detect.isChecked():
                            _, cars_num, motors_num = classNum_detect(output)
                            self.tableWidget.setItem(
                                0, 1, QTableWidgetItem(str(cars_num)))
                            self.tableWidget.setItem(
                                0, 2, QTableWidgetItem(str(motors_num)))
                        else:
                            self.tableWidget.setItem(0, 1,
                                                     QTableWidgetItem(str(0)))
                            self.tableWidget.setItem(0, 2,
                                                     QTableWidgetItem(str(0)))
                            #print(1111)
                        if self.people_detect.isChecked():
                            people_num, _, _ = classNum_detect(output)
                            self.tableWidget.setItem(
                                0, 0, QTableWidgetItem(str(people_num)))
                        else:
                            self.tableWidget.setItem(0, 0,
                                                     QTableWidgetItem(str(0)))

                        count_info_log = "people:" + str(people_num) + ", cars:" + str(cars_num) + \
                             ", motors:" + str(motors_num) + ";\n"

                        # 车牌识别
                        if self.license_plate_detect.isChecked():
                            plate_info_list = recognize_plate(plate_frame)
                            for plate_info in plate_info_list:
                                plate = plate_info[0]  # 车牌
                                conficdence = plate_info[1]  # 置信度
                                #print("车牌:" + plate)
                                #self.license_result.clear()
                                self.license_result.setText(plate)
                                rect = plate_info[2]  # 位置
                                #print(rect[0], rect[2], rect[1], rect[3])
                                plate_img = plate_frame[int(rect[1]) : int(rect[3] + rect[1]), \
                                 int(rect[0]) : int(rect[2]+rect[0])]

                                plate_img = cv2.cvtColor(
                                    plate_img, cv2.COLOR_RGB2BGR)
                                plate_img = cv2.resize(plate_img, (140, 30))
                                plate_img = QImage(plate_img.data, plate_img.shape[1], \
                                    plate_img.shape[0], QImage.Format_RGB888)
                                self.license_graph.setPixmap(
                                    QPixmap.fromImage(plate_img))

                                img = drawRectBox(img, rect, plate)

                                plate_center = [
                                    int(rect[0] + rect[2]),
                                    int(rect[1] + rect[3])
                                ]  #车牌中心中心位置
                                car_color = detect_car_color(
                                    output, plate_frame, plate_center)  #检测汽车颜色

                                event_info_log = car_color + "汽车,车牌信息:" + plate + \
                                    "识别准确率:" + str(conficdence)[:5] + '\n'
                                #print(event_info_log)
                        else:
                            self.license_graph.clear()
                            self.license_result.clear()
                    except:
                        pass

                    log_info = count_info_log + event_info_log + break_info_log
                    self.logQueue.put(log_info)

                    #self.sys_log(count_info_log, event_info_log, break_info_log)
                    #self.set_log_info(count_info_log, event_info_log, break_info_log)

                    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                    img = cv2.resize(img, (1080, 540))
                    img = QImage(img.data, img.shape[1], img.shape[0],
                                 QImage.Format_RGB888)
                    self.video_plate.setPixmap(QPixmap.fromImage(img))
                cv2.waitKey(1)
                frames += 1
                #print(frames)

                if self.stopEvent.is_set():
                    self.stopEvent.clear()
                    #self.textEdit.clear()
                    self.video_plate.clear()
                    self.tableWidget.setItem(0, 0, QTableWidgetItem(str(0)))
                    self.tableWidget.setItem(0, 1, QTableWidgetItem(str(0)))
                    self.tableWidget.setItem(0, 2, QTableWidgetItem(str(0)))
                    break
            else:
                self.video_plate.clear()
                break
        try:
            self.openFIleButton.setEnabled(True)
            self.cap.release()
            self.logFile.close()
            self.green_light.setVisible(False)
            self.red_light.setVisible(False)
            self.break_traffic_warning.setVisible(False)
            self.break_traffic_label.setVisible(False)
            self.license_graph.clear()
            self.license_result.clear()
        except:
            print("资源释放错误")

    def load_model(self):
        CUDA = torch.cuda.is_available()
        print("Loading network.....")
        self.model = Darknet("../yolov3/cfg/yolov3.cfg")
        self.model.load_weights("../yolov3/weights/yolov3.weights")

        print("Network successfully loaded")
        self.model.net_info["height"] = 416
        inp_dim = int(self.model.net_info["height"])
        assert inp_dim % 32 == 0
        assert inp_dim > 32

        if CUDA:
            self.model.cuda()  # 将模型迁移到GPU
        self.model.eval()

        load_completed = QMessageBox.information(self, 'message', '模型加载完成.',
                                                 QMessageBox.Ok)
class YOLO:
    def __init__(self,
                 cfg_file: pathlib.Path,
                 weights_file: pathlib.Path,
                 class_names_file: pathlib.Path,
                 resolution: int = 416,
                 class_filters: List[str] = None) -> None:
        self.net: Any = None
        self.input_dim: int = None
        self.load_net(cfg_file, weights_file, resolution)
        self.class_names = load_classes(class_names_file)
        self.num_classes = len(self.class_names)
        self.class_filters = class_filters

    def load_net(self, cfg_file: pathlib.Path, weights_file: pathlib.Path,
                 resolution: int) -> None:
        self.net = Darknet(str(cfg_file))
        self.net.load_weights(str(weights_file))
        self.net.net_info['height'] = resolution
        self.net.cuda()
        self.input_dim = self.net.net_info['height']
        if self.input_dim % 32 != 0 or self.input_dim <= 32:
            raise ValueError("Bad input dimension. Resolution is bad")
        # self.net(get_test_input(self.input_dim, True), True)
        self.net.eval()

    def prep_frame(
            self, frame: np.ndarray
    ) -> Tuple[np.ndarray, np.ndarray, Tuple[int, int]]:
        original_frame = frame
        dim = original_frame.shape[1], original_frame.shape[0]
        frame = (letterbox_image(original_frame,
                                 (self.input_dim, self.input_dim)))
        frame_ = frame[:, :, ::-1].transpose((2, 0, 1)).copy()
        frame_ = torch.from_numpy(frame_).float().div(255.0).unsqueeze(0)
        return frame_, original_frame, dim

    def format_output(self, output: Any, threshold: float,
                      frame_dimensions: Tuple[int, int]) -> Optional[Any]:
        output = write_results(output,
                               threshold,
                               self.num_classes,
                               nms=True,
                               nms_conf=threshold)
        if isinstance(output, int):
            # means no output
            return None
        frame_dimensions = frame_dimensions.repeat(output.size(0), 1)
        scaling_factor = torch.min(self.input_dim / frame_dimensions,
                                   1)[0].view(-1, 1)
        output[:, [1, 3]] -= (self.input_dim - scaling_factor *
                              frame_dimensions[:, 0].view(-1, 1)) / 2
        output[:, [2, 4]] -= (self.input_dim - scaling_factor *
                              frame_dimensions[:, 1].view(-1, 1)) / 2
        output[:, 1:5] /= scaling_factor
        for i in range(output.shape[0]):
            output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                            frame_dimensions[i, 0])
            output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                            frame_dimensions[i, 1])
        return output

    def get_detections(self,
                       frame: np.ndarray,
                       threshold: float = 0.7) -> FrameAnnotations:
        if frame is None:
            return FrameAnnotations(frame=frame,
                                    objects=list(),
                                    image_width=None,
                                    image_height=None)
        new_frame, frame, dimensions = self.prep_frame(frame)
        new_frame = new_frame.cuda()
        frame_dimensions = torch.FloatTensor(dimensions).repeat(1, 2).cuda()
        with torch.no_grad():
            output = self.net(Variable(new_frame), True)
        output = self.format_output(output, threshold, frame_dimensions)
        objects = list()
        if output is not None:
            for obj in output:
                if self.class_filters is not None:
                    if self.class_names[int(obj[-1])] not in \
                            self.class_filters:
                        continue
                objects.append(
                    Object(class_name=self.class_names[int(obj[-1])],
                           bbox=BBox(left=int(obj[1]),
                                     top=int(obj[2]),
                                     right=int(obj[3]),
                                     bottom=int(obj[4]))))
        return FrameAnnotations(frame=frame,
                                objects=objects,
                                image_width=frame.shape[1],
                                image_height=frame.shape[0])
예제 #14
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        #        if args.arch == 'resnet50':
        #            import resnet_model
        #            model = resnet_model.resnet50_new(pretrained=True)
        #            print('save resnet50 to resnet50.weights')
        #            model.saveas_darknet_weights('resnet50.weights')
        if args.arch == 'resnet50-darknet':
            from darknet import Darknet
            model = Darknet('cfg/resnet50.cfg')
            print('load weights from resnet50.weights')
            model.load_weights('resnet50.weights')
        elif args.arch == 'resnet50-kaiming':
            from caffenet import CaffeNet
            model = CaffeNet('ResNet-50-deploy.prototxt')
            print('load weights from ResNet-50-model.caffemodel')
            model.load_weights('ResNet-50-model.caffemodel')
        elif args.arch == 'resnet50-kaiming-dk':
            from darknet import Darknet
            model = Darknet('ResNet-50-model.cfg')
            print('load weights from ResNet-50-model.weights')
            model.load_weights('ResNet-50-model.weights')
        elif args.arch == 'resnet18-caffe':
            from caffenet import CaffeNet
            model = CaffeNet('cfg/resnet-18.prototxt')
            print('load weights from resnet-18.caffemodel')
            model.load_weights('resnet-18.caffemodel')
        elif args.arch == 'resnet18-darknet':
            from darknet import Darknet
            model = Darknet('resnet-18.cfg')
            print('load weights from resnet-18.weights')
            model.load_weights('resnet-18.weights')
        elif args.arch == 'resnet50-test':
            from darknet import Darknet
            model = Darknet('test/ResNet-50-model.cfg')
            print('load weights from test/ResNet-50-model.weights')
            model.load_weights('test/ResNet-50-model.weights')
        else:
            model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        if args.arch.startswith('mobilenet'):
            model = Net()
            print(model)
        else:
            model = models.__dict__[args.arch]()

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    if args.arch == 'resnet50-test' or args.arch == 'resnet50-kaiming' or args.arch == 'resnet50-kaiming-dk':
        normalize = transforms.Normalize(mean=[0.0, 0.0, 0.0],
                                         std=[1.0, 1.0, 1.0])
    elif args.arch == 'resnet18-darknet' or args.arch == 'resnet18-caffe':
        normalize = transforms.Normalize(
            mean=[104 / 255.0, 117 / 255.0, 123 / 255.0], std=[1.0, 1.0, 1.0])
    else:
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

    train_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomSizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Scale(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
예제 #15
0
class Detector:
    def __init__(self, showTags=False, showCordenates=False):
        self.showTags = showTags
        self.showCordenates = showCordenates

        self.model = Darknet(cfgfile)
        self.model.load_weights(weightsfile)
        self.model.net_info["height"] = resolution
        self.inp_dim = int(self.model.net_info["height"])
        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32

        # If there's a GPU availible, put the model on GPU
        if CUDA:
            self.model.cuda()

        self.model.eval()
        self.reconocimiento_facil = ReconocimientoFacial()

    def detect(self, frame, debugFrames=[], frame_counter=0):

        img = prep_image(frame, self.inp_dim)

        im_dim = frame.shape[1], frame.shape[0]
        im_dim = torch.FloatTensor(im_dim).repeat(1, 2)

        if CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()

        with torch.no_grad():
            # output = self.model(Variable(img, volatile=True), CUDA)
            output = self.model(Variable(img), CUDA)
        output = write_results(output,
                               confidence,
                               num_classes,
                               nms_conf=nms_thesh)

        if isinstance(output, int):
            return frame

        im_dim = im_dim.repeat(output.size(0), 1)
        scaling_factor = torch.min(416 / im_dim, 1)[0].view(-1, 1)

        output[:, [1, 3]] -= (self.inp_dim -
                              scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
        output[:, [2, 4]] -= (self.inp_dim -
                              scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

        output[:, 1:5] /= scaling_factor

        for i in range(output.shape[0]):
            output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i,
                                                                           0])
            output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i,
                                                                           1])

        if (self.showCordenates):
            frame = lineaGol.add_linea_gol(frame, frame_counter)

        list(
            map(
                lambda x: write(x, frame, self.showTags, self.
                                reconocimiento_facil), output))

        return frame
예제 #16
0
    CUDA = torch.cuda.is_available()  # cuda가 사용가능한 상황인지

    num_classes = 80  # 암튼 80
    bbox_attrs = 5 + num_classes  # Bouding Box 속성

    model = Darknet(cfgfile)  # Darknet
    model.load_weights(weightsfile)  # Model에 weighs파일을 load해준다

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()  # Cuda를 사용중이면 model.cuda()

    model.eval()  # 모델 평가?

    # cap = cv2.VideoCapture(0)                       #videoCapture(0) >> video 캡쳐변수 선언
    cap = cv2.VideoCapture("http://192.168.0.54:8409/?action=snapshot")
    # videoCapture("주소") >> video 캡쳐변수 선언

    assert cap.isOpened(), 'Cannot capture source'
    # assert는 가정설정문, 뒤의 조건이 True가 아니면 AssertError를 발생시킨다.

    frames = 0
    # frame 변수 선언, 초기값은 0

    start = time.time()  # 시간을 측정해주는 함수
    while cap.isOpened():  # cap이 초기화가 잘 되어 있는지 확인
예제 #17
0
                            stdoutToServer=True,
                            stderrToServer=True)

    datacfg = 'cfg/voc.data'
    cfgfile = 'cfg/yolov2-tiny-voc.cfg'
    weightfile = '../yolov2-tiny-bnn/weights/yolov2-tiny-voc.weights'
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    m = Darknet(cfgfile)
    m.print_network()
    m.load_weights(weightfile)
    m.cuda()
    m.eval()

    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(m.width, m.height),
                                        shuffle=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    valid_batchsize = 2
    assert (valid_batchsize > 1)

    kwargs = {'num_workers': 0, 'pin_memory': True}
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_batchsize,
                                               shuffle=False,
예제 #18
0
def valid(datacfg, modelcfg, weightfile):
    def truths_length(truths, max_num_gt=50):
        for i in range(max_num_gt):
            if truths[i][1] == 0:
                return i

    # Parse configuration files
    data_options = read_data_cfg(datacfg)
    valid_images = data_options['valid']
    meshname = data_options['mesh']
    backupdir = data_options['backup']
    name = data_options['name']
    gpus = data_options['gpus']
    fx = float(data_options['fx'])
    fy = float(data_options['fy'])
    u0 = float(data_options['u0'])
    v0 = float(data_options['v0'])
    im_width = int(data_options['width'])
    im_height = int(data_options['height'])
    if not os.path.exists(backupdir):
        makedirs(backupdir)

    # Parameters
    seed = int(time.time())
    os.environ['CUDA_VISIBLE_DEVICES'] = gpus
    torch.cuda.manual_seed(seed)
    save = False
    testtime = True
    num_classes = 1
    testing_samples = 0.0
    if save:
        makedirs(backupdir + '/test')
        makedirs(backupdir + '/test/gt')
        makedirs(backupdir + '/test/pr')
    # To save
    testing_error_trans = 0.0
    testing_error_angle = 0.0
    testing_error_pixel = 0.0
    errs_2d = []
    errs_3d = []
    errs_trans = []
    errs_angle = []
    errs_corner2D = []
    preds_trans = []
    preds_rot = []
    preds_corners2D = []
    gts_trans = []
    gts_rot = []
    gts_corners2D = []

    # Read object model information, get 3D bounding box corners
    mesh = MeshPly(meshname)
    vertices = np.c_[np.array(mesh.vertices),
                     np.ones((len(mesh.vertices), 1))].transpose()
    corners3D = get_3D_corners(vertices)
    try:
        diam = float(options['diam'])
    except:
        diam = calc_pts_diameter(np.array(mesh.vertices))

    # Read intrinsic camera parameters
    intrinsic_calibration = get_camera_intrinsic(u0, v0, fx, fy)

    # Get validation file names
    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode
    model = Darknet(modelcfg)
    model.print_network()
    model.load_weights(weightfile)
    model.cuda()
    model.eval()
    test_width = model.test_width
    test_height = model.test_height
    num_keypoints = model.num_keypoints
    num_labels = num_keypoints * 2 + 3  # +2 for width, height,  +1 for class label

    # Get the parser for the test dataset
    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(test_width, test_height),
                                        shuffle=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))

    # Specify the number of workers for multiple processing, get the dataloader for the test dataset
    kwargs = {'num_workers': 4, 'pin_memory': True}
    test_loader = torch.utils.data.DataLoader(valid_dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              **kwargs)

    logging("   Testing {}...".format(name))
    logging("   Number of test samples: %d" % len(test_loader.dataset))
    # Iterate through test batches (Batch size for test data is 1)
    count = 0
    for batch_idx, (data, target) in enumerate(test_loader):
        t1 = time.time()
        # Pass data to GPU
        # import IPython; IPython.embed()
        data = data.cuda()
        # target = target.cuda()
        # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
        data = Variable(data, volatile=True)
        t2 = time.time()
        # Forward pass
        output = model(data).data
        t3 = time.time()
        # Using confidence threshold, eliminate low-confidence predictions
        all_boxes = get_region_boxes(output, num_classes, num_keypoints)
        all_boxes = [t.cpu() for t in all_boxes]
        t4 = time.time()
        # Evaluation
        # Iterate through all batch elements
        for box_pr, target in zip([all_boxes], [target[0]]):
            # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
            truths = target.view(-1, num_labels)
            # Get how many objects are present in the scene
            num_gts = truths_length(truths)
            # Iterate through each ground-truth object
            for k in range(num_gts):
                box_gt = list()
                for j in range(1, 2 * num_keypoints + 1):
                    box_gt.append(truths[k][j])
                box_gt.extend([1.0, 1.0])
                box_gt.append(truths[k][0])

                # Denormalize the corner predictions
                corners2D_gt = np.array(np.reshape(box_gt[:18], [-1, 2]),
                                        dtype='float32')
                corners2D_pr = np.array(np.reshape(box_pr[:18], [-1, 2]),
                                        dtype='float32')
                corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width
                corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height
                corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width
                corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height
                preds_corners2D.append(corners2D_pr)
                gts_corners2D.append(corners2D_gt)

                # Compute corner prediction error
                corner_norm = np.linalg.norm(corners2D_gt - corners2D_pr,
                                             axis=1)
                corner_dist = np.mean(corner_norm)
                errs_corner2D.append(corner_dist)

                # Compute [R|t] by pnp
                R_gt, t_gt = pnp(
                    np.array(np.transpose(
                        np.concatenate((np.zeros((3, 1)), corners3D[:3, :]),
                                       axis=1)),
                             dtype='float32'), corners2D_gt,
                    np.array(intrinsic_calibration, dtype='float32'))
                R_pr, t_pr = pnp(
                    np.array(np.transpose(
                        np.concatenate((np.zeros((3, 1)), corners3D[:3, :]),
                                       axis=1)),
                             dtype='float32'), corners2D_pr,
                    np.array(intrinsic_calibration, dtype='float32'))

                # Compute translation error
                trans_dist = np.sqrt(np.sum(np.square(t_gt - t_pr)))
                errs_trans.append(trans_dist)

                # Compute angle error
                angle_dist = calcAngularDistance(R_gt, R_pr)
                errs_angle.append(angle_dist)

                # Compute pixel error
                Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
                Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
                proj_2d_gt = compute_projection(vertices, Rt_gt,
                                                intrinsic_calibration)
                proj_2d_pred = compute_projection(vertices, Rt_pr,
                                                  intrinsic_calibration)
                norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
                pixel_dist = np.mean(norm)
                errs_2d.append(pixel_dist)

                # Compute 3D distances
                transform_3d_gt = compute_transformation(vertices, Rt_gt)
                transform_3d_pred = compute_transformation(vertices, Rt_pr)
                norm3d = np.linalg.norm(transform_3d_gt - transform_3d_pred,
                                        axis=0)
                vertex_dist = np.mean(norm3d)
                errs_3d.append(vertex_dist)

                # Sum errors
                testing_error_trans += trans_dist
                testing_error_angle += angle_dist
                testing_error_pixel += pixel_dist
                testing_samples += 1
                count = count + 1

                if save:
                    preds_trans.append(t_pr)
                    gts_trans.append(t_gt)
                    preds_rot.append(R_pr)
                    gts_rot.append(R_gt)

                    np.savetxt(
                        backupdir + '/test/gt/R_' + valid_files[count][-8:-3] +
                        'txt', np.array(R_gt, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/gt/t_' + valid_files[count][-8:-3] +
                        'txt', np.array(t_gt, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/pr/R_' + valid_files[count][-8:-3] +
                        'txt', np.array(R_pr, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/pr/t_' + valid_files[count][-8:-3] +
                        'txt', np.array(t_pr, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/gt/corners_' +
                        valid_files[count][-8:-3] + 'txt',
                        np.array(corners2D_gt, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/pr/corners_' +
                        valid_files[count][-8:-3] + 'txt',
                        np.array(corners2D_pr, dtype='float32'))

        t5 = time.time()

    # Compute 2D projection error, 6D pose error, 5cm5degree error
    px_threshold = 5  # 5 pixel threshold for 2D reprojection error is standard in recent sota 6D object pose estimation works
    eps = 1e-5
    acc = len(np.where(
        np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d) + eps)
    acc5cm5deg = len(
        np.where((np.array(errs_trans) <= 0.05)
                 & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) +
                                                              eps)
    acc3d10 = len(np.where(
        np.array(errs_3d) <= diam * 0.1)[0]) * 100. / (len(errs_3d) + eps)
    acc5cm5deg = len(
        np.where((np.array(errs_trans) <= 0.05)
                 & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) +
                                                              eps)
    corner_acc = len(np.where(np.array(errs_corner2D) <= px_threshold)
                     [0]) * 100. / (len(errs_corner2D) + eps)
    mean_err_2d = np.mean(errs_2d)
    mean_corner_err_2d = np.mean(errs_corner2D)
    nts = float(testing_samples)

    if testtime:
        print('-----------------------------------')
        print('  tensor to cuda : %f' % (t2 - t1))
        print('    forward pass : %f' % (t3 - t2))
        print('get_region_boxes : %f' % (t4 - t3))
        print(' prediction time : %f' % (t4 - t1))
        print('            eval : %f' % (t5 - t4))
        print('-----------------------------------')

    # Print test statistics
    logging('Results of {}'.format(name))
    logging('   Acc using {} px 2D Projection = {:.2f}%'.format(
        px_threshold, acc))
    logging('   Acc using 10% threshold - {} vx 3D Transformation = {:.2f}%'.
            format(diam * 0.1, acc3d10))
    logging('   Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg))
    logging(
        "   Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f"
        % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d))
    logging(
        '   Translation error: %f m, angle error: %f degree, pixel error: % f pix'
        % (testing_error_trans / nts, testing_error_angle / nts,
           testing_error_pixel / nts))

    if save:
        predfile = backupdir + '/predictions_linemod_' + name + '.mat'
        scipy.io.savemat(
            predfile, {
                'R_gts': gts_rot,
                't_gts': gts_trans,
                'corner_gts': gts_corners2D,
                'R_prs': preds_rot,
                't_prs': preds_trans,
                'corner_prs': preds_corners2D
            })
예제 #19
0
    'cautery': 'cfg/my_config_webcam.yaml'
}  #

weightfile = {'hands': 'backup/hands/000500.weights'}

namesfile = {'hands': 'data/hands.names'}

#######################################################
# Setting up YOLO-hand
#######################################################
model_hand = Darknet(cfgfile['hands'])
model_hand.load_weights(weightfile['hands'])
print('Loading weights from %s... Done!' % (weightfile['hands']))

if use_cuda:
    model_hand.cuda()

class_names = uyolo.load_class_names(namesfile['hands'])

#######################################################
# Setting up DOPE
#######################################################
yaml_path = cfgfile['cautery']
with open(yaml_path, 'r') as stream:
    try:
        print("Loading DOPE parameters from '{}'...".format(yaml_path))
        params = yaml.load(stream)
        print('    Parameters loaded.')
    except yaml.YAMLError as exc:
        print(exc)
예제 #20
0
m = Darknet(cfgfile)
region_loss = m.loss
m.load_weights(weightfile)

print('--- bn weight ---')
print(m.models[0][1].weight)
print('--- bn bias ---')
print(m.models[0][1].bias)
print('--- bn running_mean ---')
print(m.models[0][1].running_mean)
print('--- bn running_var ---')
print(m.models[0][1].running_var)

m.train()
if torch.cuda.is_available():
    m = m.cuda()

optimizer = optim.SGD(m.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.1)

img = Image.open(imgpath)
img = image2torch(img)
if torch.cuda.is_available():
    img = img.cuda()
img = Variable(img)

target = Variable(label)

print('----- img ---------------------')
print(img.data.storage()[0:100])
print('----- target  -----------------')
print(target.data.storage()[0:100])
예제 #21
0
class ObjectDetection:
    def __init__(self, id): 
        # self.cap = cv2.VideoCapture(id)
        self.cap = WebcamVideoStream(src = id).start()
        self.cfgfile = "cfg/yolov3.cfg"
        # self.cfgfile = 'cfg/yolov3-tiny.cfg'
        self.weightsfile = "yolov3.weights"
        # self.weightsfile = 'yolov3-tiny.weights'
        self.confidence = float(0.5)
        self.nms_thesh = float(0.4)
        self.num_classes = 80
        self.classes = load_classes('data/coco.names')
        self.colors = pkl.load(open("pallete", "rb"))
        self.model = Darknet(self.cfgfile)
        self.CUDA = torch.cuda.is_available()
        self.model.load_weights(self.weightsfile)
        self.model.net_info["height"] = 160
        self.inp_dim = int(self.model.net_info["height"])
        self.width = 640 #640#
        self.height = 480 #360#
        print("Loading network.....")
        if self.CUDA:
            self.model.cuda()
        print("Network successfully loaded")
        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32
        self.model.eval()

    def main(self):
        q = queue.Queue()
        def frame_render(queue_from_cam):
            frame = self.cap.read()
            frame = cv2.resize(frame,(self.width, self.height))
            queue_from_cam.put(frame)
        cam = threading.Thread(target=frame_render, args=(q,))
        cam.start()
        cam.join()
        frame = q.get()
        q.task_done()
        fps = FPS().start() 
        try:
            img, orig_im, dim = prep_image(frame, self.inp_dim)
            im_dim = torch.FloatTensor(dim).repeat(1,2)
            if self.CUDA:                            #### If you have a gpu properly installed then it will run on the gpu
                im_dim = im_dim.cuda()
                img = img.cuda()
            # with torch.no_grad():               #### Set the model in the evaluation mode
            output = self.model(Variable(img), self.CUDA)
            output = write_results(output, self.confidence, self.num_classes, nms = True, nms_conf = self.nms_thesh)  #### Localize the objects in a frame
            output = output.type(torch.half)
            if list(output.size()) == [1,86]:
                pass
            else:
                output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(self.inp_dim))/self.inp_dim

    #            im_dim = im_dim.repeat(output.size(0), 1)
                output[:,[1,3]] *= frame.shape[1]
                output[:,[2,4]] *= frame.shape[0]
                list(map(lambda x: write(x, frame, self.classes, self.colors),output))
                x,y,w,h = b_boxes["bbox"][0],b_boxes["bbox"][1], b_boxes["bbox"][2], b_boxes["bbox"][3]
                distance = (2 * 3.14 * 180) / (w + h * 360) * 1000 + 3 ### Distance measuring in Inch
                feedback = ("{}".format(labels["Current Object"])+ " " +"is"+" at {} ".format(round(distance))+"Inches")
                # speak.Speak(feedback)     # If you are running this on linux based OS kindly use espeak. Using this speaking library in winodws will add unnecessary latency 
#                 print(feedback)
        except:
            pass
        fps.update()
        fps.stop()
        print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
        print("[INFO] approx. FPS: {:.1f}".format(fps.fps()))
        frame = cv2.putText(frame, str("{:.2f} Inches".format(distance)), (x,y), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0,0,255), 1, cv2.LINE_AA)
        ret, jpeg = cv2.imencode('.jpg', frame)
        return jpeg.tostring()
예제 #22
0
def main():
    args = arg_parse()
    confidence = args.confidence
    nms_thresh = args.nms_thresh
    start = 0
    CUDA = torch.cuda.is_available()

    classes = load_classes("data/coco.names")
    num_classes = len(classes)

    # Set up the neural network
    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    # if there's a GPU available, put the model on GPU
    if CUDA:
        model.cuda()

    # set the model in evaluation mode
    model.eval()

    def write(x, img, color):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        cls = int(x[-1])
        label = "{0}".format(classes[cls])
        cv2.rectangle(img, c1, c2, color, 4)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2, color, -1)
        cv2.putText(img,
                    label, (c1[0], c1[1] + t_size[1] + 4),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, (150, 150, 150),
                    thickness=1)

    # detection phaase
    cap = cv2.VideoCapture(0)
    assert cap.isOpened(), "Cannot capture source"

    frames = 0
    start = time.time()
    hsv_tuples = [(x / num_classes, 1., 1.) for x in range(num_classes)]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(
        map(lambda x: (int(x[0] * 200), int(x[1] * 200), int(x[2] * 200)),
            colors))
    np.random.seed(10000)
    np.random.shuffle(colors)
    np.random.seed(None)  # reset seed to default.

    while cap.isOpened():
        ret, frame = cap.read()

        if ret:
            frame = cv2.resize(frame, dsize=(1280, 960))
            img = prep_image(frame, inp_dim)
            print(f"IMG_SHAPE: {img.shape}")
            im_dim = frame.shape[1], frame.shape[0]
            im_dim = torch.FloatTensor(im_dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            with torch.no_grad():
                outputs = model(Variable(img, volatile=True), CUDA)
            outputs = write_results(outputs,
                                    confidence,
                                    num_classes,
                                    nms_conf=nms_thresh)

            if outputs != None:
                im_dim = im_dim.repeat(outputs.size(0), 1)
                scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

                outputs[:, [1, 3]] -= (
                    inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
                outputs[:, [2, 4]] -= (
                    inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

                outputs[:, 1:5] /= scaling_factor

                for i in range(outputs.shape[0]):
                    outputs[i, [1, 3]] = torch.clamp(outputs[i, [1, 3]], 0.0,
                                                     im_dim[i, 0])
                    outputs[i, [2, 4]] = torch.clamp(outputs[i, [2, 4]], 0.0,
                                                     im_dim[i, 1])

                for output in outputs:
                    color = colors[int(output[-1])]
                    write(output, frame, color)

            cv2.imshow("frame", frame)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            frames += 1
            print(time.time() - start)
            print("FPS of the video is {:5.2f}".format(frames /
                                                       (time.time() - start)))
        else:
            break
예제 #23
0
    CUDA = torch.cuda.is_available()
    num_classes = 80
    bbox_attrs = 5 + num_classes

    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda().half()

#     model(get_test_input(inp_dim, CUDA), CUDA)

#     model.eval()

    videofile = 'sample.mp4'

    cap = cv2.VideoCapture(videofile)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():
예제 #24
0
class ObjectDetection:
    def __init__(self, id): 
        # self.cap = cv2.VideoCapture(id)
        self.cap = WebcamVideoStream(src = id).start()
        self.cfgfile = "cfg/yolov3.cfg"
        # self.cfgfile = 'cfg/yolov3-tiny.cfg'
        self.weightsfile = "yolov3.weights"
        # self.weightsfile = 'yolov3-tiny.weights'
        self.confidence = float(0.6)
        self.nms_thesh = float(0.8)
        self.num_classes = 80
        self.classes = load_classes('data/coco.names')
        self.colors = pkl.load(open("pallete", "rb"))
        self.model = Darknet(self.cfgfile)
        self.CUDA = torch.cuda.is_available()
        self.model.load_weights(self.weightsfile)
        self.model.net_info["height"] = 160
        self.inp_dim = int(self.model.net_info["height"])
        self.width = 1280 #640#1280
        self.height = 720 #360#720
        print("Loading network.....")
        if self.CUDA:
            self.model.cuda()
        print("Network successfully loaded")
        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32
        self.model.eval()

    def main(self):
        q = queue.Queue()
        while True:
            def frame_render(queue_from_cam):
                frame = self.cap.read() # If you capture stream using opencv (cv2.VideoCapture()) the use the following line
                # ret, frame = self.cap.read()
                frame = cv2.resize(frame,(self.width, self.height))
                queue_from_cam.put(frame)
            cam = threading.Thread(target=frame_render, args=(q,))
            cam.start()
            cam.join()
            frame = q.get()
            q.task_done()
            fps = FPS().start() 
            try:
                img, orig_im, dim = prep_image(frame, self.inp_dim)
                im_dim = torch.FloatTensor(dim).repeat(1,2)
                if self.CUDA:                            #### If you have a gpu properly installed then it will run on the gpu
                    im_dim = im_dim.cuda()
                    img = img.cuda()
                # with torch.no_grad():               #### Set the model in the evaluation mode
                output = self.model(Variable(img), self.CUDA)
                output = write_results(output, self.confidence, self.num_classes, nms = True, nms_conf = self.nms_thesh)  #### Localize the objects in a frame
                output = output.type(torch.half)
                
                if list(output.size()) == [1,86]:
                    print(output.size())
                    pass
                else:
                    output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(self.inp_dim))/self.inp_dim
                
        #            im_dim = im_dim.repeat(output.size(0), 1)
                    output[:,[1,3]] *= frame.shape[1]
                    output[:,[2,4]] *= frame.shape[0]
                    list(map(lambda boxes: write(boxes, frame, self.classes, self.colors),output))
                    
            except:
                pass
            
            fps.update()
            fps.stop()
            ret, jpeg = cv2.imencode('.jpg', frame)
            print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
            print("[INFO] approx. FPS: {:.1f}".format(fps.fps()))

            return jpeg.tostring()
예제 #25
0
def predict():

    target = os.path.join(APP_ROOT, 'static/')
    print(target)
    if not os.path.isdir(target):
        os.mkdir(target)
    else:
        print("Couldn't create upload directory: {}".format(target))
    print(request.files.getlist("file"))
    for upload in request.files.getlist("file"):
        print(upload)
        print("{} is the file name".format(upload.filename))
        filename = upload.filename
        destination = "/".join([target, filename])
        print("Accept incoming file:", filename)
        print("Save it to:", destination)
        upload.save(destination)

    scales = "1,2,3"
    print(filename)

    images = "static/" + str(filename)
    batch_size = int(1)
    confidence = float(0.5)
    nms_thesh = float(0.4)
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80
    classes = load_classes('data/coco.names')
    print("Loading network.....")
    model = Darknet("cfg/yolov3.cfg")
    model.load_weights("yolov3.weights")
    print("Network successfully loaded")
    model.net_info["height"] = "416"
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32
    if CUDA:
        model.cuda()
    model.eval()
    read_dir = time.time()
    try:
        imlist = [
            osp.join(osp.realpath('.'), images, img)
            for img in os.listdir(images)
            if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1]
            == '.jpeg' or os.path.splitext(img)[1] == '.jpg'
        ]
    except NotADirectoryError:
        imlist = []
        imlist.append(osp.join(osp.realpath('.'), images))
    except FileNotFoundError:
        print("No file or directory with the name {}".format(images))
        exit()
    load_batch = time.time()
    batches = list(
        map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
    im_batches = [x[0] for x in batches]
    orig_ims = [x[1] for x in batches]
    im_dim_list = [x[2] for x in batches]
    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
    if CUDA:
        im_dim_list = im_dim_list.cuda()
    leftover = 0
    if (len(im_dim_list) % batch_size):
        leftover = 1
    if batch_size != 1:
        num_batches = len(imlist) // batch_size + leftover
        im_batches = [
            torch.cat(
                (im_batches[i * batch_size:min((i + 1) *
                                               batch_size, len(im_batches))]))
            for i in range(num_batches)
        ]
    i = 0
    write = False
    model(get_test_input(inp_dim, CUDA), CUDA)
    start_det_loop = time.time()
    objs = {}
    f = open("result.txt", "w+")
    for batch in im_batches:
        start = time.time()
        if CUDA:
            batch = batch.cuda()
        with torch.no_grad():
            prediction = model(Variable(batch), CUDA)
        prediction = write_results(prediction,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)
        if type(prediction) == int:
            i += 1
            continue
        end = time.time()
        prediction[:, 0] += i * batch_size
        if not write:
            output = prediction
            write = 1
        else:
            output = torch.cat((output, prediction))

        for im_num, image in enumerate(
                imlist[i * batch_size:min((i + 1) * batch_size, len(imlist))]):
            im_id = i * batch_size + im_num
            objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
            print("{0:20s} predicted in {1:6.3f} seconds".format(
                image.split("/")[-1], (end - start) / batch_size))
            print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
        #f.write(listToString(objs))
        obj6 = []
        for i in objs:
            if i not in obj6:
                obj6.append(i)
        f.write(listToString(obj6))

        print(f.read())
        print("----------------------------------------------------------")
        #i += 1
        if CUDA:
            torch.cuda.synchronize()

    try:
        output
    except NameError:
        print("No detections were made")
        exit()

    im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long())
    scaling_factor = torch.min(inp_dim / im_dim_list, 1)[0].view(-1, 1)
    output[:, [1, 3]] -= (inp_dim -
                          scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
    output[:, [2, 4]] -= (inp_dim -
                          scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2
    output[:, 1:5] /= scaling_factor
    for i in range(output.shape[0]):
        output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim_list[i,
                                                                            0])
        output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim_list[i,
                                                                            1])
    output_recast = time.time()
    class_load = time.time()
    colors = pkl.load(open("pallete", "rb"))
    draw = time.time()

    def write(x, batches, results):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        img = results[int(x[0])]
        cls = int(x[-1])
        label = "{0}".format(classes[cls])
        color = random.choice(colors)
        cv2.rectangle(img, c1, c2, color, 2)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2, color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                    cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
        return img

    list(map(lambda x: write(x, im_batches, orig_ims), output))
    det_names = pd.Series(imlist).apply(
        lambda x: "{}/{}".format("static",
                                 x.split("/")[-1]))
    list(map(cv2.imwrite, det_names, orig_ims))
    end = time.time()
    torch.cuda.empty_cache()

    main1()
    main2()
    with open("result.txt", "r") as read_file:
        with open("result2.txt", "w") as write_file:
            write_file.write(read_file.read().replace(" ", '\n'))
    with open("hashesfromcaption.txt", "r") as read_file:
        with open("hsh.txt", "w") as write_file:
            write_file.write(read_file.read().replace(" ", '_'))

    with open("hsh.txt", "r") as read_file:
        with open("h.txt", "w") as write_file:
            write_file.write(read_file.read().replace("#", '\n#'))
    text = open('output.txt', 'r+')
    content = text.read()
    text.close()
    objk = open('result.txt', 'r+')
    contentobjk = objk.read()
    objk.close()
    hashcode = open('h.txt', 'r+')
    h = hashcode.read()
    hashcode.close()

    return render_template("results.html",
                           image_name=filename,
                           text=content,
                           objects=contentobjk,
                           last=h)
예제 #26
0
    CUDA = torch.cuda.is_available()
    
    bbox_attrs = 5 + num_classes
    
    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0 
    assert inp_dim > 32

    if CUDA:
        model.cuda()
        
    model(get_test_input(inp_dim, CUDA), CUDA)

    model.eval()
    
    videofile = args.video
    
    cap = cv2.VideoCapture(videofile)
    video_frame_cnt = int(cap.get(7))
    video_width = int(cap.get(3))
    video_height = int(cap.get(4))
    video_fps = int(cap.get(5))
    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    videoWriter = cv2.VideoWriter('video_result.mp4', fourcc, video_fps, (video_width, video_height))
    
예제 #27
0
class Detector(object):
    def __init__(self, model_def_file='', weights_file=''):
        self.model_def_file = model_def_file
        self.weights_file = weights_file
        self.model = Darknet(self.model_def_file)
        self.model.load_weights(self.weights_file)

        self.CUDA = True
        if self.CUDA:
            self.model.cuda()
        print('load network finish')

        self.confidence = 0.5
        self.nms_thresh = 0.4
        self.num_classes = 80

        self.yolo_dir = '/home/yfji/SourceCode/pytorch-yolo-v3'
        self.classes = util.load_classes(op.join('data/coco.names'))
        self.colors = pickle.load(open(op.join(self.yolo_dir, 'pallete',
                                               'rb')))

    def detect(self, image):
        prediction = self.model(Variable(image), self.CUDA)
        output = self.filter_results(prediction)  #list of [score, x1,y1,x2,y2]

        return output

    def filter_results(self, prediction, nms=True):
        conf_mask = (prediction[:, :, 4] >
                     self.confidence).float().unsqueeze(2)
        prediction = prediction * conf_mask

        try:
            torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous()
        except:
            return 0

        box_a = prediction.new(prediction.shape)
        box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
        box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
        box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
        box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
        prediction[:, :, :4] = box_a[:, :, :4]

        batch_size = prediction.size(0)

        output = prediction.new(1, prediction.size(2) + 1)
        write = False

        for ind in range(batch_size):
            image_pred = prediction[ind]

            max_conf, max_conf_score = torch.max(
                image_pred[:, 5:5 + self.num_classes], 1)
            max_conf = max_conf.float().unsqueeze(1)
            max_conf_score = max_conf_score.float().unsqueeze(1)
            seq = (image_pred[:, :5], max_conf, max_conf_score)
            image_pred = torch.cat(seq, 1)

            non_zero_ind = (torch.nonzero(image_pred[:, 4]))

            try:
                image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)
            except:
                continue
            img_classes = util.unique(image_pred_[:, -1])

            for cls in img_classes:
                #get the detections with one particular class
                cls_mask = image_pred_ * (image_pred_[:, -1]
                                          == cls).float().unsqueeze(1)
                class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()

                image_pred_class = image_pred_[class_mask_ind].view(-1, 7)

                conf_sort_index = torch.sort(image_pred_class[:, 4],
                                             descending=True)[1]
                image_pred_class = image_pred_class[conf_sort_index]
                idx = image_pred_class.size(0)

                if nms:
                    for i in range(idx):
                        try:
                            ious = util.bbox_iou(
                                image_pred_class[i].unsqueeze(0),
                                image_pred_class[i + 1:])
                        except ValueError:
                            break

                        except IndexError:
                            break

                        iou_mask = (ious <
                                    self.nms_thresh).float().unsqueeze(1)
                        image_pred_class[i + 1:] *= iou_mask

                        non_zero_ind = torch.nonzero(
                            image_pred_class[:, 4]).squeeze()
                        image_pred_class = image_pred_class[non_zero_ind].view(
                            -1, 7)

                batch_ind = image_pred_class.new(image_pred_class.size(0),
                                                 1).fill_(ind)
                seq = batch_ind, image_pred_class
                if not write:
                    output = torch.cat(seq, 1)
                    write = True
                else:
                    out = torch.cat(seq, 1)
                    output = torch.cat((output, out))

        return output
예제 #28
0
    def start(self):

        # Inicializacion de variables globales
        global classes, BBox, colors, phase, frame, initBBox, true_class_filter

        # PREPARACION DE LA FASE DE DETECCION

        CUDA = torch.cuda.is_available()

        text = 'No class filter selected'

        classes = load_classes('model/{}/model.names'.format(
            self.model_folder))
        colors = pkl.load(open('pallete', 'rb'))

        num_classes = len(classes)
        if [i for i in self.class_filter if not (i in classes)]:
            if self.label_info:
                text = 'WARNING: {} class/classes are not included in the selected model. Updating the searching list...'.format(
                    [i for i in self.class_filter if not (i in classes)])
                self.label_info.setText(text)
            else:
                print(
                    'WARNING: {} class/classes are not included in the selected model. Updating the searching list...'
                    .format(
                        [i for i in self.class_filter if not (i in classes)]))
        true_class_filter = [i for i in self.class_filter if (i in classes)]

        # Configuracion de la red
        if self.label_info:
            text += '\nLoading network...'
            self.label_info.setText(text)
        else:
            print('Loading network.....')

        model = Darknet('model/{}/model.cfg'.format(self.model_folder))
        model.load_weights('model/{}/model.weights'.format(self.model_folder))

        if self.label_info:
            text += '\nNetwork succesfully loaded'
            self.label_info.setText(text)
        else:
            print('Network successfully loaded')

        model.net_info['height'] = self.reso_det
        inp_dim_det = int(model.net_info['height'])
        assert inp_dim_det % 32 == 0
        assert inp_dim_det > 32

        # Si hay un dispositivo CUDA se carga en el el modelo
        if CUDA:
            model.cuda()

        # Modelo en modo de evaluacion
        model.eval()

        # PREPARACION DE LA FASE DE TRACKING

        inp_dim_track = int(self.reso_track)

        OPENCV_OBJECT_TRACKERS = {
            'csrt': cv2.TrackerCSRT_create,
            'kcf': cv2.TrackerKCF_create,
            'boosting': cv2.TrackerBoosting_create,
            'mil': cv2.TrackerMIL_create,
            'tld': cv2.TrackerTLD_create,
            'medianflow': cv2.TrackerMedianFlow_create,
            'mosse': cv2.TrackerMOSSE_create
        }

        # INICIALIZACION DE LA FUENTE

        if self.source == '0' or self.source == '1':
            self.cap = cv2.VideoCapture(int(self.source))
            mode = 'cam'
            self.window_name = 'Camera ' + self.source
        else:
            if self.label_info:  # via GUI se obtiene el path completo
                self.cap = cv2.VideoCapture(self.source)
            else:  # via terminal solo escribimos el nombre del archivo
                self.cap = cv2.VideoCapture('videos/{}'.format(self.source))
            mode = 'file'
            self.window_name = self.source
        assert self.cap.isOpened(), 'Cannot capture source'

        phase = 'det'
        initBBox = []
        cont = 0
        frames = 0

        cv2.namedWindow(self.window_name)
        cv2.setMouseCallback(self.window_name, click_det2track)

        while self.cap.isOpened():
            grab, frame = self.cap.read()

            start = time.time()

            if grab:
                # Fase de deteccion
                if phase == 'det':

                    if mode == 'cam':
                        img = prep_image_c(frame, inp_dim_det)
                    elif mode == 'file':
                        img = prep_image_f(frame, inp_dim_det)

                    im_dim = frame.shape[1], frame.shape[0]
                    im_dim = torch.FloatTensor(im_dim).repeat(1, 2)

                    if CUDA:
                        im_dim = im_dim.cuda()
                        img = img.cuda()

                    # Inicializacion la lista de BBox detectadas
                    BBox = []

                    output = model.forward(Variable(img), CUDA)
                    output = write_results(output,
                                           self.confidence,
                                           num_classes,
                                           nms_conf=self.nms_thresh)

                    if type(output) == int:
                        frames += 1
                        cv2.imshow(self.window_name, frame)
                        key = cv2.waitKey(1)
                        if key & 0xFF == ord('q'):
                            break
                        continue

                    if mode == 'cam':

                        output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0,
                                                     float(inp_dim_det))

                        im_dim = im_dim.repeat(output.size(0), 1) / inp_dim_det
                        output[:, 1:5] *= im_dim

                    elif mode == 'file':

                        im_dim = im_dim.repeat(output.size(0), 1)
                        scaling_factor = torch.min(inp_dim_det / im_dim,
                                                   1)[0].view(-1, 1)

                        output[:, [1, 3]] -= (inp_dim_det - scaling_factor *
                                              im_dim[:, 0].view(-1, 1)) / 2
                        output[:, [2, 4]] -= (inp_dim_det - scaling_factor *
                                              im_dim[:, 1].view(-1, 1)) / 2

                        output[:, 1:5] /= scaling_factor

                        for i in range(output.shape[0]):
                            output[i, [1, 3]] = torch.clamp(
                                output[i, [1, 3]], 0.0, im_dim[i, 0])
                            output[i, [2, 4]] = torch.clamp(
                                output[i, [2, 4]], 0.0, im_dim[i, 1])

                    list(map(lambda x: write(x, frame), output))

                    cv2.imshow(self.window_name, frame)
                    key = cv2.waitKey(1)
                    if key & 0xFF == ord('q'):
                        break
                    frames += 1

                    if self.label_info:
                        self.label_info.setText(
                            text + '\nDETECTION PHASE:' +
                            '\n   {0: .2f} fps'.format(
                                float(1 / (time.time() - start))))

                # Fase de tracking
                elif phase == 'track':

                    ratio = frame.shape[0] / inp_dim_track

                    img = imutils.resize(frame, height=inp_dim_track)

                    if initBBox:
                        (success, box) = tracker.update(img)

                        if success:
                            cont = 0
                            (x, y, w, h) = [int(v) for v in box]
                            x, y, w, h = prep_rect(x, y, w, h, ratio)
                            cv2.rectangle(frame, (x, y), (x + w, y + h),
                                          (0, 255, 0), 2)

                        else:
                            cont += 1
                            if self.label_info:
                                self.label_info.setText(
                                    text + '\nTRACKING PHASE' +
                                    '\nObject lost ({})'.format(cont))
                            else:
                                print('Object lost ', cont)

                    else:
                        (x, y, w, h) = [int(v) for v in track_rect]
                        initBBox = (prep_rect(x, y, w, h, float(1 / ratio)))
                        tracker = OPENCV_OBJECT_TRACKERS[self.tracker_alg]()
                        tracker.init(img, initBBox)

                    if cont > 100:
                        phase = 'det'
                        cont = 0
                        initBBox = []

                    cv2.imshow(self.window_name, frame)
                    key = cv2.waitKey(1)
                    if key & 0xFF == ord('q'):
                        break
                    frames += 1

                    if self.label_info:
                        self.label_info.setText(
                            text + '\nTRACKING PHASE:' +
                            '\n   {0: .2f} fps'.format(
                                float(1 / (time.time() - start))))

                else:
                    break

            else:
                break

        if not self.label_info:
            cv2.destroyWindow(self.window_name)

        self.cap.release()

        torch.cuda.empty_cache()
예제 #29
0
class ssp_rosbag:
    def __init__(self):
        rospy.init_node('eval_baseline', anonymous=True)

        ##############################################################################
        ##############################################################################
        ##############################################################################
        self.b_first_rb_loop = True
        self.first_time = None

        self.ns = rospy.get_param('~ns')  # robot namespace
        modelcfg = rospy.get_param('~modelcfg')
        weightfile = rospy.get_param('~weightfile')
        datacfg = rospy.get_param('~datacfg')
        rb_name = rospy.get_param('~rb_name')
        self.ado_names = [rospy.get_param('~tracked_name')]

        # Parse configuration files
        data_options = read_data_cfg(datacfg)
        valid_images = data_options['valid']

        if 'mesh' in data_options:
            meshname = data_options['mesh']
        else:
            meshname = None
            assert ('box_length' in data_options)
            box_length = float(data_options['box_length'])
            box_width = float(data_options['box_width'])
            box_height = float(data_options['box_height'])

        self.ego_name = data_options['name']
        gpus = data_options['gpus']
        self.im_width = int(data_options['width'])
        self.im_height = int(data_options['height'])

        # Parameters
        seed = int(time.time())
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
        torch.cuda.manual_seed(seed)
        self.num_classes = 1

        # Read object model information, get 3D bounding box corners
        if meshname is None:
            # vertices must be 4 x N for compute_projections to work later
            vertices = np.array(
                [[box_length / 2, box_width / 2, box_height / 2, 1.],
                 [box_length / 2, box_width / 2, -box_height / 2, 1.],
                 [box_length / 2, -box_width / 2, -box_height / 2, 1.],
                 [box_length / 2, -box_width / 2, box_height / 2, 1.],
                 [-box_length / 2, -box_width / 2, box_height / 2, 1.],
                 [-box_length / 2, -box_width / 2, -box_height / 2, 1.],
                 [-box_length / 2, box_width / 2, -box_height / 2, 1.],
                 [-box_length / 2, box_width / 2, box_height / 2, 1.]]).T
            self.diam = float(data_options['diam'])
        else:
            mesh = MeshPly(meshname)
            vertices = np.c_[np.array(mesh.vertices),
                             np.ones((len(mesh.vertices), 1))].transpose()
            try:
                self.diam = float(data_options['diam'])
            except:
                self.diam = calc_pts_diameter(np.array(mesh.vertices))
        self.vertices = vertices
        self.corners3D = get_3D_corners(vertices)

        # Specify model, load pretrained weights, pass to GPU and set the module in evaluation mode
        torch.set_grad_enabled(False)  # since we are just doing forward passes
        self.model = Darknet(modelcfg)
        self.model.load_weights(weightfile)
        self.model.cuda()
        self.model.eval()
        self.shape = (self.model.test_width, self.model.test_height)
        num_keypoints = self.model.num_keypoints
        num_labels = num_keypoints * 2 + 3  # +2 for width, height,  +1 for class label
        ##############################################################################
        ##############################################################################
        ##############################################################################

        self.result_list = []  # save the results as they are processed
        self.itr = 0
        self.time_prev = -1
        self.bridge = CvBridge()
        self.pose_buffer_len = 20
        self.ado_pose_msg_buf = []
        self.ego_pose_msg_buf = []
        self.ego_pose_est_msg_buf = []
        self.ego_pose_est_time_msg_buf = []
        self.ado_pose_time_msg_buf = []
        self.ego_pose_time_msg_buf = []

        # Create camera (camera extrinsics from quad7.param in the msl_raptor project):
        self.tf_cam_ego = np.eye(4)
        self.tf_cam_ego[0:3,
                        3] = np.asarray([0.01504337, -0.06380886, -0.13854437])
        self.tf_cam_ego[0:3, 0:3] = np.reshape([
            -6.82621737e-04, -9.99890488e-01, -1.47832690e-02, 3.50423970e-02,
            1.47502748e-02, -9.99276969e-01, 9.99385593e-01, -1.20016936e-03,
            3.50284906e-02
        ], (3, 3))

        # Correct Rotation w/ Manual Calibration
        Angle_x = 8. / 180.
        Angle_y = 8. / 180.
        Angle_z = 0. / 180.
        R_deltax = np.array([[1., 0., 0.],
                             [0., np.cos(Angle_x), -np.sin(Angle_x)],
                             [0., np.sin(Angle_x),
                              np.cos(Angle_x)]])
        R_deltay = np.array([[np.cos(Angle_y), 0.,
                              np.sin(Angle_y)], [0., 1., 0],
                             [-np.sin(Angle_y), 0.,
                              np.cos(Angle_y)]])
        R_deltaz = np.array([[np.cos(Angle_z), -np.sin(Angle_z), 0.],
                             [np.sin(Angle_z),
                              np.cos(Angle_z), 0.], [0., 0., 1.]])
        R_delta = np.dot(R_deltax, np.dot(R_deltay, R_deltaz))
        self.tf_cam_ego[0:3, 0:3] = np.matmul(R_delta, self.tf_cam_ego[0:3,
                                                                       0:3])
        #########################################################################################

        camera_info = rospy.wait_for_message(self.ns + '/camera/camera_info',
                                             CameraInfo, 30)
        self.K = np.reshape(camera_info.K, (3, 3))
        self.dist_coefs = np.reshape(camera_info.D, (5, ))
        self.new_camera_matrix, _ = cv2.getOptimalNewCameraMatrix(
            self.K, self.dist_coefs, (camera_info.width, camera_info.height),
            0, (camera_info.width, camera_info.height))

        self.log_out_dir = '/mounted_folder/ssp_logs'
        # ssp_log_name    = self.log_out_dir + "/log_" + rb_name.split("_")[-1] + "_SSP.log"
        # param_log_name = self.log_out_dir + "/log_" + rb_name.split("_")[-1] + "_PARAM.log"
        # self.logger = raptor_logger(source="SSP", mode="write", ssp_fn=ssp_log_name, param_fn=param_log_name)
        base_path = self.log_out_dir + "/log_" + rb_name.split("_")[-1]
        self.rb_name = rb_name
        self.bb_3d_dict_all = {
            self.ado_names[0]: [box_length, box_width, box_height, self.diam]
        }
        self.logger = RaptorLogger(mode="write",
                                   names=self.ado_names,
                                   base_path=base_path,
                                   b_ssp=True)

        # Write params to log file ########################################################################################################
        param_data = {}
        if self.new_camera_matrix is not None:
            param_data['K'] = np.array([
                self.new_camera_matrix[0, 0], self.new_camera_matrix[1, 1],
                self.new_camera_matrix[0, 2], self.new_camera_matrix[1, 2]
            ])
        else:
            param_data['K'] = np.array(
                [self.K[0, 0], self.K[1, 1], self.K[0, 2], self.K[1, 2]])
        param_data['3d_bb_dims'] = np.array(
            [box_length, box_width, box_height, self.diam])
        param_data['tf_cam_ego'] = np.reshape(copy(self.tf_cam_ego), (16, ))
        # self.logger.write_data_to_log(log_data, mode='prms')
        self.logger.write_params(param_data)
        ###################################################################################################################################
        self.t0 = None
        self.time_arr = []
        # self.raptor_metrics = pose_metric_tracker(px_thresh=5, prct_thresh=10, trans_thresh=0.05, ang_thresh=5, name=self.name, diam=self.diam)
        self.raptor_metrics = PoseMetricTracker(px_thresh=5,
                                                prct_thresh=10,
                                                trans_thresh=0.05,
                                                ang_thresh=5,
                                                names=self.ado_names,
                                                bb_3d_dict=self.bb_3d_dict_all)

        rospy.Subscriber(self.ns + '/mavros/vision_pose/pose',
                         PoseStamped,
                         self.ego_pose_gt_cb,
                         queue_size=10)  # optitrack pose
        rospy.Subscriber(self.ns + '/mavros/local_position/pose',
                         PoseStamped,
                         self.ego_pose_est_cb,
                         queue_size=10)  # onboard ekf pose est
        rospy.Subscriber('/quad4' + '/mavros/vision_pose/pose',
                         PoseStamped,
                         self.ado_pose_gt_cb,
                         queue_size=10)  # optitrack pose
        rospy.Subscriber(self.ns + '/camera/image_raw',
                         ROS_IMAGE,
                         self.image_cb,
                         queue_size=1,
                         buff_size=2**21)

    def ado_pose_gt_cb(self, msg):
        # if self.first_time is not None and self.first_time >= msg.header.stamp.to_sec():
        #     return
        self.ado_pose_gt_rosmsg = msg.pose
        pose_tm = msg.header.stamp.to_sec()
        self.ado_pose_msg_buf.append(msg)
        self.ado_pose_time_msg_buf.append(pose_tm)

    def ego_pose_gt_cb(self, msg):
        # if self.first_time is not None and self.first_time >= msg.header.stamp.to_sec():
        #     return
        self.ego_pose_gt_rosmsg = msg.pose
        pose_tm = msg.header.stamp.to_sec()
        self.ego_pose_msg_buf.append(msg)
        self.ego_pose_time_msg_buf.append(pose_tm)

    def ego_pose_est_cb(self, msg):
        # if self.first_time is not None and self.first_time >= msg.header.stamp.to_sec():
        #     return
        self.ego_pose_est_rosmsg = msg.pose
        pose_tm = msg.header.stamp.to_sec()
        self.ego_pose_est_msg_buf.append(msg)
        self.ego_pose_est_time_msg_buf.append(pose_tm)

    def image_cb(self, msg):
        """
        Maintains a buffer of images & times. The first element is the earliest. 
        Stored in a way to interface with a quick method for finding closest match by time.
        """
        tic = time.time()
        img_tm = msg.header.stamp.to_sec()
        if len(program.result_list) > 0 and img_tm <= self.result_list[-1][5]:
            return

        if self.t0 is None:
            self.t0 = img_tm

        img_cv2 = self.bridge.imgmsg_to_cv2(msg,
                                            desired_encoding="passthrough")
        img_cv2 = cv2.undistort(img_cv2, self.K, self.dist_coefs, None,
                                self.new_camera_matrix)
        img_pil = PIL.Image.fromarray(img_cv2).resize(self.shape)

        img = Variable(transforms.ToTensor()(img_pil).resize(
            1, 3, img_pil.size[0], img_pil.size[1]).cuda(),
                       volatile=True)

        with torch.no_grad():
            output = self.model(img).data  # Forward pass

        # Using confidence threshold, eliminate low-confidence predictions
        box_pr = get_region_boxes(output, self.num_classes,
                                  self.model.num_keypoints)

        # Denormalize the corner predictions
        corners2D_pr = np.array(np.reshape(box_pr[:18], [-1, 2]),
                                dtype='float32')
        corners2D_pr[:, 0] = corners2D_pr[:, 0] * self.im_width
        corners2D_pr[:, 1] = corners2D_pr[:, 1] * self.im_height

        # Compute [R|t] by pnp
        R_pr, t_pr = pnp(
            np.array(np.transpose(
                np.concatenate((np.zeros((3, 1)), self.corners3D[:3, :]),
                               axis=1)),
                     dtype='float32'), corners2D_pr,
            np.array(self.K, dtype='float32'))

        tf_cam_ado_est = rotm_and_t_to_tf(R_pr, t_pr)

        if len(self.ado_pose_time_msg_buf) == 0 or len(
                self.ego_pose_time_msg_buf) == 0 or len(
                    self.ego_pose_est_time_msg_buf) == 0:
            print("still waiting for other rosbag messages")
            return

        ado_msg, _ = find_closest_by_time(img_tm,
                                          self.ado_pose_time_msg_buf,
                                          message_list=self.ado_pose_msg_buf)
        ego_gt_msg, _ = find_closest_by_time(
            img_tm,
            self.ego_pose_time_msg_buf,
            message_list=self.ego_pose_msg_buf)
        ego_est_msg, _ = find_closest_by_time(
            img_tm,
            self.ego_pose_est_time_msg_buf,
            message_list=self.ego_pose_est_msg_buf)

        tf_w_ado_gt = pose_to_tf(ado_msg.pose)
        tf_w_ego_gt = pose_to_tf(ego_gt_msg.pose)
        tf_w_ego_est = pose_to_tf(ego_est_msg.pose)

        tf_w_cam_gt = tf_w_ego_gt @ invert_tf(self.tf_cam_ego)

        tf_w_ado_est = tf_w_cam_gt @ tf_cam_ado_est

        quat_pr = rotm_to_quat(tf_w_ado_est[0:3, 0:3])
        state_pr = np.concatenate((tf_w_ado_est[0:3,
                                                3], quat_pr))  # shape = (7,)

        b_remove_yaw = True
        if b_remove_yaw:
            quat_pr_with_yaw = quat_pr  # quat with yaw
            quat_gt = rotm_to_quat(tf_w_ado_gt[0:3, 0:3])  # quat with yaw

            quat_pr = remove_yaw(quat_pr)  # remove yaw
            quat_gt = remove_yaw(quat_gt)  # remove yaw

            tf_w_ado_est[0:3, 0:3] = quat_to_rotm(quat_pr)  # update tf
            tf_w_ado_gt[0:3, 0:3] = quat_to_rotm(quat_gt)  # update tf

        img_to_save = copy(np.array(img.cpu()))

        self.result_list.append(
            (state_pr, copy(tf_w_ado_est), copy(tf_w_ado_gt),
             copy(corners2D_pr), img_to_save, img_tm, time.time(), copy(R_pr),
             copy(t_pr), invert_tf(tf_w_cam_gt), copy(tf_w_ego_gt),
             copy(tf_w_ego_est)))

        del img
        self.itr += 1
        self.time_arr.append(time.time() - tic)
        if self.itr > 0 and self.itr % 50 == 0:
            print("Finished processing image #{}, mean time: {}".format(
                self.itr, np.mean(self.time_arr)))
            torch.cuda.empty_cache()

    def post_process_data(self):
        print("Post-processing data now ({} itrs)".format(len(
            self.result_list)))
        b_save_bb_imgs = True
        name = self.ado_names[0]
        bb_im_path = os.path.dirname(os.path.relpath(
            __file__)) + '/output_imgs'  # PATH MUST BE RELATIVE
        create_dir_if_missing(bb_im_path)
        N = len(self.result_list)

        # To save
        trans_dist = 0.0
        angle_dist = 0.0
        pixel_dist = 0.0
        testing_samples = 0.0
        testing_error_trans = 0.0
        testing_error_angle = 0.0
        testing_error_pixel = 0.0
        errs_2d = []
        errs_3d = []
        errs_trans = []
        errs_angle = []
        errs_corner2D = []
        preds_trans = []
        preds_rot = []
        preds_corners2D = []
        gts_trans = []
        gts_rot = []
        gts_corners2D = []
        corners2D_gt = None
        log_data = {}
        for i, res in enumerate(self.result_list):

            # extract /  compute values for comparison
            state_pr, tf_w_ado_est, tf_w_ado_gt, corners2D_pr, img, img_tm, sys_time, R_cam_ado_pr, t_cam_ado_pr, tf_cam_w_gt, tf_w_ego_gt, tf_w_ego_est = res
            tf_cam_ado_gt = tf_cam_w_gt @ tf_w_ado_gt
            R_cam_ado_gt = tf_cam_ado_gt[0:3, 0:3]
            t_cam_ado_gt = tf_cam_ado_gt[0:3, 3].reshape(t_cam_ado_pr.shape)

            if img_tm - self.t0 > 34 and self.rb_name == "rosbag_for_post_process_2019-12-18-02-10-28":
                print("STOPPING EARLY")
                break  # quad crashes

            Rt_cam_ado_gt = np.concatenate((R_cam_ado_gt, t_cam_ado_gt),
                                           axis=1)
            Rt_cam_ado_pr = np.concatenate((R_cam_ado_pr, t_cam_ado_pr),
                                           axis=1)
            corners2D_gt = compute_projection(
                np.hstack((np.reshape([0, 0, 0, 1], (4, 1)), self.vertices)),
                Rt_cam_ado_gt, self.new_camera_matrix).T

            if b_save_bb_imgs:
                draw_2d_proj_of_3D_bounding_box(img,
                                                corners2D_pr,
                                                corners2D_gt=corners2D_gt,
                                                epoch=None,
                                                batch_idx=None,
                                                detect_num=i,
                                                im_save_dir=bb_im_path)

            if self.raptor_metrics is not None:
                # self.raptor_metrics.update_all_metrics(vertices=self.vertices, R_gt=R_gt, t_gt=t_gt, R_pr=R_pr, t_pr=t_pr, K=self.new_camera_matrix)
                self.raptor_metrics.update_all_metrics(
                    name=name,
                    vertices=self.vertices,
                    tf_w_cam=invert_tf(tf_cam_w_gt),
                    R_cam_ado_gt=R_cam_ado_gt,
                    t_cam_ado_gt=t_cam_ado_gt,
                    R_cam_ado_pr=R_cam_ado_pr,
                    t_cam_ado_pr=t_cam_ado_pr,
                    K=self.new_camera_matrix)

            # Write data to log file #############################
            log_data['time'] = img_tm - self.t0
            log_data['state_est'] = tf_to_state_vec(tf_w_ado_est)
            log_data['state_gt'] = tf_to_state_vec(tf_w_ado_gt)
            log_data['ego_state_est'] = tf_to_state_vec(tf_w_ego_est)
            log_data['ego_state_gt'] = tf_to_state_vec(tf_w_ego_gt)
            corners3D_pr = (tf_w_ado_est @ self.vertices)[0:3, :]
            corners3D_gt = (tf_w_ado_gt @ self.vertices)[0:3, :]
            log_data['corners_3d_est'] = np.reshape(corners3D_pr,
                                                    (corners3D_pr.size, ))
            log_data['corners_3d_gt'] = np.reshape(corners3D_gt,
                                                   (corners3D_gt.size, ))
            log_data['proj_corners_est'] = np.reshape(
                self.raptor_metrics.proj_2d_pr[name].T,
                (self.raptor_metrics.proj_2d_pr[name].size, ))
            log_data['proj_corners_gt'] = np.reshape(
                self.raptor_metrics.proj_2d_gt[name].T,
                (self.raptor_metrics.proj_2d_gt[name].size, ))

            log_data['x_err'] = tf_w_ado_est[0, 3] - tf_w_ado_gt[0, 3]
            log_data['y_err'] = tf_w_ado_est[1, 3] - tf_w_ado_gt[1, 3]
            log_data['z_err'] = tf_w_ado_est[2, 3] - tf_w_ado_gt[2, 3]
            log_data['ang_err'] = calcAngularDistance(tf_w_ado_est[0:3, 0:3],
                                                      tf_w_ado_gt[0:3, 0:3])
            log_data['pix_err'] = np.mean(
                la.norm(self.raptor_metrics.proj_2d_pr[name] -
                        self.raptor_metrics.proj_2d_gt[name],
                        axis=0))
            log_data['add_err'] = np.mean(
                la.norm(corners3D_pr - corners3D_gt, axis=0))
            log_data['measurement_dist'] = la.norm(tf_w_ego_gt[0:3, 3] -
                                                   tf_w_ado_gt[0:3, 3])

            self.logger.write_data_to_log(log_data, name, mode='ssp')
            self.logger.write_data_to_log(log_data, name, mode='ssperr')

            if np.any(np.isnan(corners3D_pr)) or np.any(
                    np.isnan(corners3D_gt)) or np.any(
                        np.isnan(self.raptor_metrics.proj_2d_pr[name])
                    ):  #or la.norm(tf_cam_ado_gt[0:3, 3] - t_cam_ado_pr) > 10:
                print("ISSUE DETECTED!!")
                pdb.set_trace()
            ######################################################
        if self.raptor_metrics is not None:
            self.raptor_metrics.calc_final_metrics()
            self.raptor_metrics.print_final_metrics()

        self.logger.close_files()
        print("done with post process!")

    def truths_length(self, truths, max_num_gt=50):
        for i in range(max_num_gt):
            if truths[i][1] == 0:
                return i

    def run(self):
        rate = rospy.Rate(100)
        b_flag = True
        while not rospy.is_shutdown():
            try:
                rate.sleep()
            except:  # this will happen if the clock goes backwards (i.e. rosbag loops)
                self.post_process_data()
                return
예제 #30
0
    test_loader = torch.utils.data.DataLoader(InriaDataset(img_dir,
                                                           lab_dir,
                                                           shuffle=True),
                                              batch_size=3,
                                              shuffle=True)

    cfgfile = "cfg/yolov2.cfg"
    weightfile = "weights/yolov2.weights"
    printfile = "non_printability/30values.txt"

    patch_size = 400

    darknet_model = Darknet(cfgfile)
    darknet_model.load_weights(weightfile)
    darknet_model = darknet_model.cuda()
    patch_applier = PatchApplier().cuda()
    patch_transformer = PatchTransformer().cuda()
    prob_extractor = MaxProbExtractor(0, 80).cuda()
    nms_calculator = NMSCalculator(printfile, patch_size)
    total_variation = TotalVariation()
    '''
    img = Image.open('data/horse.jpg').convert('RGB')
    img = img.resize((darknet_model.width, darknet_model.height))
    width = img.width
    height = img.height
    img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
    img = img.view(height, width, 3).transpose(0, 1).transpose(0, 2).contiguous()
    img = img.view(1, 3, height, width)
    img = img.float().div(255.0)
    img = torch.autograd.Variable(img)

#Set up the neural network
print("Loading network.....")
model = Darknet(args.cfgfile)
model.load_weights(args.weightsfile)
print("Network successfully loaded")

model.net_info["height"] = args.reso
inp_dim = int(model.net_info["height"])
assert inp_dim % 32 == 0 
assert inp_dim > 32

#If there's a GPU availible, put the model on GPU
if CUDA:
    model.cuda()


#Set the model in evaluation mode
model.eval()

read_dir = time.time()
#Detection phase
try:
    imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images)]
except NotADirectoryError:
    imlist = []
    imlist.append(osp.join(osp.realpath('.'), images))
except FileNotFoundError:
    print ("No file or directory with the name {}".format(images))
    exit()