예제 #1
0
import argparse
import cv2
import detect_utils
from PIL import Image

# Construct argument parser
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', help='path to input image/video')
parser.add_argument('-m',
                    '--min-size',
                    dest='min_size',
                    default=800,
                    help='minimum input size for the FasterRCNN network')
args = vars(parser.parse_args())

# Load model
#model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True,
#min_size=args['min_size'])
model = torch.load("model_epoch18.pth", map_location=torch.device('cpu'))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

image = Image.open(args['input'])
model.eval().to(device)
boxes, classes, labels = detect_utils.predict(image, model, device, 0.8)
image = detect_utils.draw_boxes(boxes, classes, labels, image)
cv2.imshow('Image', image)
save_name = "Pred_img"
cv2.imwrite(f"outputs/{save_name}.jpg", image)
cv2.waitKey(0)
예제 #2
0
[{
"image_id": int, "category_id": int, "bbox": [x,y,width,height], "score": float,
}]
"""
annotations = []

# read until end of video
while (cap.isOpened()):
    # capture each frame of the video
    ret, frame = cap.read()
    if ret == True:
        # get the start time
        start_time = time.time()
        with torch.no_grad():
            # get predictions for the current frame
            boxes, classes, labels, scores = detect_utils.predict(
                frame, model, device, 0.0)

            #generate json objects in correct form for each frame
            # print(boxes)
            # print(classes)
            # print(labels)
            # print(scores)
            for i in range(len(boxes)):
                print(labels[i].item())
                print(boxes[i].tolist())
                print(scores[i])
                data = {}
                data['image_id'] = int(frame_count)
                data['category_id'] = int(labels[i].item())
                data["bbox"] = boxes[i].tolist()
                data["score"] = float(scores[i])
예제 #3
0
frame_height = int(cap.get(4))  #mendapatkan tinggi frame
save_name = f"{args['input'].split('/')[-1].split('.')[0]}_{args['min_size']}"  #saving input
out = cv2.VideoWriter(
    f"outputs/{save_name}.mp4", cv2.VideoWriter_fourcc(*'mp4v'), 30,
    (frame_width, frame_height))  # define codec and create VideoWriter object

frame_count = 0  # menghitung jumlah frame
total_fps = 0  # mendapatkan final FPS

while (cap.isOpened()):  # membaca vidoe sampai berakhir
    ret, frame = cap.read()  # capture tiap frame pada video
    if ret == True:
        start_time = time.time()  #start time
        with torch.no_grad():
            boxes, classes, labels = detect_utils.predict(
                frame, model, device,
                0.8)  # get predictions for the current frame
            image = detect_utils.draw_boxes(
                boxes, classes, labels,
                frame)  # draw boxes and show current frame on screen
            counter = detect_utils.Counter(classes,
                                           nama_kelas='car')  # counter class

        end_time = time.time()  # get the end time
        fps = 1 / (end_time - start_time)  # get the end time
        total_fps += fps  # get the end time
        frame_count += 1  # increment frame count
        wait_time = max(1, int(fps / 4))
        cv2.imshow('image', image)
        out.write(image)
        if cv2.waitKey(wait_time) & 0xFF == ord('q'):
예제 #4
0
parser.add_argument('-t', '--threshold', default=0.6, type=float,
                    help='minimum confidence score for detection')
args = vars(parser.parse_args())

print('USING: ')
print(f"Minimum image size: {args['min_size']}")
print(f"Confidence threshold: {args['threshold']}")

# download or load the model from disk
model = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True,
                                                min_size=args['min_size'])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# load the model onto the computation device
model.eval().to(device)

image = Image.open(args['input']).convert('RGB')
# a Numpy copy for OpenCV functions
image_array = np.array(image)
# convert to OpenCV BGR to color format
image_array = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)

# get the bounding boxes and class labels
boxes, classes = detect_utils.predict(image, model, device, args['threshold'])
# get the final image
result = detect_utils.draw_boxes(boxes, classes, image_array)

cv2.imshow('Image', result)
cv2.waitKey(0)
save_name = f"{args['input'].split('/')[-1].split('.')[0]}_{args['min_size']}_t{int(args['threshold']*100)}"
cv2.imwrite(f"outputs/{save_name}.jpg", result)
예제 #5
0
frame_count = 0  # to count total frames
total_fps = 0  # to get the final frames per second
# load the model onto the computation device
model = model.eval().to(device)

# read until end of video
while (cap.isOpened()):
    # capture each frame of the video
    ret, frame = cap.read()
    if ret == True:
        # get the start time
        start_time = time.time()
        with torch.no_grad():
            # get predictions for the current frame
            boxes, classes, labels = predict(frame, model, device, 0.8)

        # draw boxes and show current frame on screen
        image = draw_boxes(boxes, classes, labels, frame)
        # get the end time
        end_time = time.time()
        # get the fps
        fps = 1 / (end_time - start_time)
        # add fps to total fps
        total_fps += fps
        # increment frame count
        frame_count += 1
        # press `q` to exit
        wait_time = max(1, int(fps / 4))
        cv2.imshow('image', image)
        out.write(image)