Example #1
0
def image_object_detection(in_image, out_image):
    frame = cv2.imread(in_image)

    y2t = yolov2tiny.YOLO_V2_TINY([1, 416, 416, 3], "./y2t_weights.pickle")

    t_end2end = time.time()

    _frame = resize_input(frame)
    _frame = np.expand_dims(_frame, axis=0)

    t_inference = time.time()
    tout = y2t.inference(_frame)
    t_inference = time.time() - t_inference

    tout = np.squeeze(tout)
    boxes = yolov2tiny.postprocessing(tout)
    frame = cv2.resize(frame, (416, 416), interpolation=cv2.INTER_CUBIC)

    for b in boxes:
        frame = cv2.rectangle(frame, b[1], b[2], b[3])
        cv2.putText(
            frame, b[0],
            (int(min(b[1][0], b[2][0]) - 1), int(min(b[1][1], b[2][1])) - 5),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, b[3], 1)

    t_end2end = time.time() - t_end2end

    cv2.imwrite(out_image, frame)

    print('DNN inference elapsed time: %.3f' % t_inference)
    print('End-to-end elapsed time   : %.3f' % t_end2end)
Example #2
0
def photo_write(in_video_path,
                out_photo_path,
                tensor_path='./intermediate/layer_39.npy'):
    in_video = cv2.VideoCapture(in_video_path)
    ret, frame = in_video.read()
    prediction = np.load(tensor_path)
    label_boxes = yolov2tiny.postprocessing(prediction)
    frame = draw_output_frame(frame, label_boxes)
    cv2.imwrite(out_photo_path, frame)
    in_video.release()
Example #3
0
def video_object_detection(in_video_path: str,
                           out_video_path: str,
                           proc="cpu"):
    """
	Read a videofile, scan each frame and draw objects using pretrained yolo_v2_tiny model.
	Finally, store drawed frames into 'out_video_path'
	"""
    reader, writer = open_video_with_opencv(in_video_path, out_video_path)
    yolo = yolov2tiny.YOLO_V2_TINY((416, 416, 3), "./y2t_weights.pickle", proc)

    width = int(reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    acc, firstTime = [], True
    while reader.isOpened():
        okay, original_image = reader.read()
        if not okay:
            break
        beg_start = datetime.now()
        image = resize_input(original_image)
        beg_infer = datetime.now()
        batched_tensors_list = yolo.inference(image)
        inference_time = (datetime.now() - beg_infer).total_seconds()

        tensor = batched_tensors_list[-1][0]

        proposals = yolov2tiny.postprocessing(tensor)
        proposals = restore_shape(proposals, width, height)
        out_image = draw(original_image, proposals)
        writer.write(out_image)

        end_to_end_time = (datetime.now() - beg_start).total_seconds()
        acc.append((inference_time, end_to_end_time))
        print("#{} inference: {:.3f}\tend-to-end: {:.3f}".format(
            len(acc), inference_time, end_to_end_time))

        if firstTime:
            store_tensors(map(lambda x: x[0],
                              batched_tensors_list))  # Remove batch shape
            firstTime = False

    reader.release()
    writer.release()
    inference_sum, end_to_end_sum = reduce(
        lambda x, y: (x[0] + y[0], x[1] + y[1]), acc)
    size = len(acc)
    print("Total inference: {:.3f}s\ttotal end-to-end: {:.3f}s".format(
        inference_sum, end_to_end_sum))
    print("Average inference: {:.3f}s\taverage end-to-end: {:.3f}s".format(
        inference_sum / size, end_to_end_sum / size))
    print("Throughput: {:.3f}fps".format(size / end_to_end_sum))
    return
Example #4
0
def photo_object_detection(in_photo_path, out_photo_path, proc="cpu"):
    frame = cv2.imread(in_photo_path)

    weight_pickle_path = os.path.join(os.getcwd(),
                                      '../test-proj3/y2t_weights.pickle')
    model = yolov2tiny.YOLO_V2_TINY([1, k_input_height, k_input_width, 3],
                                    weight_pickle_path, proc)

    input_img = resize_input(frame)
    input_img = np.expand_dims(input_img, 0)

    predictions = model.inference(input_img)
    save_tensors(predictions)

    label_boxes = yolov2tiny.postprocessing(predictions[-1])

    frame = draw_output_frame(frame, label_boxes)
    cv2.imwrite(out_photo_path, frame)
Example #5
0
def video_object_detection(in_video_path, out_video_path, proc="cpu"):
    #
    # This function runs the inference for each frame and creates the output video.
    #

    in_video, out_video = open_video_with_opencv(in_video_path, out_video_path)

    # Create an instance of the YOLO_V2_TINY class. Pass the dimension of
    # the input, a path to weight file, and which device you will use as arguments.

    weight_pickle_path = os.path.join(os.getcwd(),
                                      '../test-proj3/y2t_weights.pickle')
    model = yolov2tiny.YOLO_V2_TINY([1, k_input_height, k_input_width, 3],
                                    weight_pickle_path, proc)

    # Start the main loop. For each frame of the video, the loop must do the followings:
    # 1. Do the inference.
    # 2. Run postprocessing using the inference result, accumulate them through the video writer object.
    #    The coordinates from postprocessing are calculated according to resized input; you must adjust
    #    them to fit into the original video.
    # 3. Measure the end-to-end time and the time spent only for inferencing.
    # 4. Save the intermediate values for the first frame.
    # Note that your input must be adjusted to fit into the algorithm,
    # including resizing the frame and changing the dimension.

    e2e_time = 0
    inference_time = 0
    frame_count = 0

    while True:
        e2e_time_start = time.time()

        ret, frame = in_video.read()
        if not ret:
            break

        frame_count += 1

        input_img = resize_input(frame)
        input_img = np.expand_dims(input_img, 0)

        inference_time_start = time.time()
        predictions = model.inference(input_img)
        inference_time += time.time() - inference_time_start

        label_boxes = yolov2tiny.postprocessing(predictions[-1])

        frame = draw_output_frame(frame, label_boxes)
        out_video.write(frame)

        e2e_time += time.time() - e2e_time_start

        # Exclude time for save_tensors in e2e time.
        if frame_count == 1:
            save_tensors(predictions)

    # Check the inference peformance; end-to-end elapsed time and inferencing time.
    # Check how many frames are processed per second respectivly.
    inference_fps = frame_count / inference_time
    e2e_fps = frame_count / e2e_time
    print("Inference time: {}".format(inference_time))
    print("End-to-end time: {}".format(e2e_time))
    print("Inference fps: {}".format(inference_fps))
    print("End-to-end fps: {}".format(e2e_fps))

    # Release the opened videos.
    in_video.release()
    out_video.release()
Example #6
0
def video_object_detection(in_video_path, out_video_path, proc="cpu"):
    #
    # This function runs the inference for each frame and creates the output video.
    #
    # Your code from here. You may clear the comments.
    #
    # print('video_object_detection is not yet implemented')
    # sys.exit()

    # Open video using open_video_with_opencv.
    input_video, output_video, dim = open_video_with_opencv(
        in_video_path, out_video_path)
    in_shape = (1, 416, 416, 3)
    pickle_path = "./y2t_weights.pickle"
    total_elapsed_time = 0
    # scale_w = dim[0] / 416
    # scale_h = dim[1] / 416

    # Check if video is opened. Otherwise, exit.
    if not input_video.isOpened():
        print('video is not opened')
        sys.exit()
    # Create an instance of the YOLO_V2_TINY class. Pass the dimension of
    # the input, a path to weight file, and which device you will use as arguments.
    model = YOLO_V2_TINY(in_shape, pickle_path, proc)
    first = True

    # Start the main loop. For each frame of the video, the loop must do the followings:
    # 1. Do the inference.
    # 2. Run postprocessing using the inference result, accumulate them through the video writer object.
    #    The coordinates from postprocessing are calculated according to resized input; you must adjust
    #    them to fit into the original video.
    # 3. Measure the end-to-end time and the time spent only for inferencing.
    # 4. Save the intermediate values for the first layer.
    # Note that your input must be adjusted to fit into the algorithm,
    # including resizing the frame and changing the dimension.
    while True:
        ret, img = input_video.read()
        if not ret:
            break
        img = resize_input(img)

        start = time.time()
        output_tensors = model.inference(img)
        if first:
            first = False
            for i, tensor in enumerate(output_tensors):
                np.save("./intermediate/layer_{}.npy".format(i + 1), tensor)
        output_tensor = output_tensors[-1]
        end = time.time()
        elapsed_time = end - start
        total_elapsed_time += elapsed_time
        # print("Elapsed time to run inference: {}".format(elapsed_time))

        label_boxes = postprocessing(output_tensor)
        # print(len(label_boxes))
        img = recover_input(img, dim)
        for cl, (x1, y1), (x2, y2), (b, g, r) in label_boxes:
            # cl, (x1, y1), (x2, y2), col = label_boxes
            # x1 = int(x1*scale_w)
            # y1 = int(y1*scale_h)
            # x2 = int(x2*scale_w)
            # y2 = int(y2*scale_h)
            cv2.rectangle(img, (x1, y1), (x2, y2), (r, g, b), 3)
            cv2.putText(img, cl, (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 0.5,
                        (0, 0, 0), 1)
        img = cv2.resize(img, dim)
        output_video.write(img)

    # Check the inference peformance; end-to-end elapsed time and inferencing time.
    # Check how many frames are processed per second respectivly.
    # length = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = input_video.get(5)
    performance = fps / total_elapsed_time
    print("Total elapsed time for running inference: {}".format(
        total_elapsed_time))
    print("FPS processed per second: {}".format(performance))

    # Release the opened videos.
    input_video.release()
    output_video.release()