예제 #1
0
def car():
    def prepare_input(img, inp_dim):
        """
        Prepare image for inputting to the neural network.
        Perform tranpose and return Tensor
        """

        orig_im = img
        dim = orig_im.shape[1], orig_im.shape[0]
        img = (custom_resize(orig_im, (inp_dim, inp_dim)))
        img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
        img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
        return img_, orig_im, dim


    def write(x, img):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        cls = int(x[-1])
        label = "{0}".format(classes[cls])
        color = random.choice(colors)
        cv2.rectangle(img, c1, c2, color, 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2, color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1);
        #print(label)
        if ( label == "car" or label == "truck"):
            labels.append(label)
            #print(labels)
        #print("label:", label)
        return img


    def arg_parse():
        """
        Parse arguements to the detect module

        """

        parser = argparse.ArgumentParser(description='YOLO v3 Video Detection Module')

        parser.add_argument("--video", dest='video', help=
        "Video to run detection upon",
                            default="video5.avi", type=str)
        parser.add_argument("--dataset", dest="dataset", help="Dataset on which the network has been trained",
                            default="pascal")
        parser.add_argument("--confidence", dest="confidence", help="Object Confidence to filter predictions", default=0.5)
        parser.add_argument("--nms_thresh", dest="nms_thresh", help="NMS Threshhold", default=0.4)
        parser.add_argument("--cfg", dest='cfgfile', help=
        "Config file",
                            default="cfg/yolov3.cfg", type=str)
        parser.add_argument("--weights", dest='weightsfile', help=
        "weightsfile",
                            default="yolov3.weights", type=str)
        parser.add_argument("--reso", dest='reso', help=
        "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
                            default="128", type=str)
        return parser.parse_args()



    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80

    bbox_attrs = 5 + num_classes

    print("Loading network")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network loaded")
    classes = load_classes('data/coco.names')
    colors = pkl.load(open("pallete", "rb"))
    model.DNInfo["height"] = args.reso
    inp_dim = int(model.DNInfo["height"])

    if CUDA:
        model.cuda()

    model.eval()

    videofile = args.video

    cap = cv2.VideoCapture(videofile)

    assert cap.isOpened(), 'Cannot capture source'

    while cap.isOpened():

        ret, frame = cap.read()
        if ret:

            img, orig_im, dim = prepare_input(frame, inp_dim)

            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            with torch.no_grad():
                output = model(Variable(img), CUDA)
            output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh)

            if type(output) == int:
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('x'):
                    break
                continue

            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

            output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
            output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

            output[:, 1:5] /= scaling_factor

            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1])

            labels = []
            list(map(lambda x: write(x, orig_im), output))
            #print("count:", len(labels))
            count = len(labels)
                # print(count)

            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('x'):
                break
        else:
            break
    return count

if __name__ == '__main__':
    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80
    
    bbox_attrs = 5 + num_classes
    
    print("Loading network")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network loaded")
    classes = load_classes('data/coco.names')
    colors = pkl.load(open("pallete", "rb"))
    model.DNInfo["height"] = args.reso
    inp_dim = int(model.DNInfo["height"])


    if CUDA:
        model.cuda()
        
    model.eval()
    
    videofile = 'video.avi'
    
예제 #3
0
    # Input resolution of the network. Increase to increase accuracy. Decrease to increase speed
    reso = 128

    start = 0

    # CUDA tensor types, that implement the same function as CPU tensors, but utilizes GPU for computation
    # My laptop does not have GPU therefore I can't test this functionality.
    CUDA = torch.cuda.is_available()

    # 80 catagories have been trained in the model
    # see coco.names
    num_classes = 80

    print("Loading network")
    model = Darknet(cfgfile)
    model.load_weights(weightsfile)
    print("Network loaded")
    classes = load_classes('data/coco.names')

    model.DNInfo["height"] = reso
    inp_dim = int(model.DNInfo["height"])

    model.eval()

    videofile = video

    cap = cv2.VideoCapture(videofile)

    assert cap.isOpened(), 'Cannot capture source: was the correct movie loaded?'
    print("YOLO test program running press escape to exit")