import argparse import cv2 import detect_utils from PIL import Image # Construct argument parser parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help='path to input image/video') parser.add_argument('-m', '--min-size', dest='min_size', default=800, help='minimum input size for the FasterRCNN network') args = vars(parser.parse_args()) # Load model #model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True, #min_size=args['min_size']) model = torch.load("model_epoch18.pth", map_location=torch.device('cpu')) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') image = Image.open(args['input']) model.eval().to(device) boxes, classes, labels = detect_utils.predict(image, model, device, 0.8) image = detect_utils.draw_boxes(boxes, classes, labels, image) cv2.imshow('Image', image) save_name = "Pred_img" cv2.imwrite(f"outputs/{save_name}.jpg", image) cv2.waitKey(0)
parser.add_argument('-t', '--threshold', default=0.6, type=float, help='minimum confidence score for detection') args = vars(parser.parse_args()) print('USING: ') print(f"Minimum image size: {args['min_size']}") print(f"Confidence threshold: {args['threshold']}") # download or load the model from disk model = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True, min_size=args['min_size']) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # load the model onto the computation device model.eval().to(device) image = Image.open(args['input']).convert('RGB') # a Numpy copy for OpenCV functions image_array = np.array(image) # convert to OpenCV BGR to color format image_array = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR) # get the bounding boxes and class labels boxes, classes = detect_utils.predict(image, model, device, args['threshold']) # get the final image result = detect_utils.draw_boxes(boxes, classes, image_array) cv2.imshow('Image', result) cv2.waitKey(0) save_name = f"{args['input'].split('/')[-1].split('.')[0]}_{args['min_size']}_t{int(args['threshold']*100)}" cv2.imwrite(f"outputs/{save_name}.jpg", result)
f"outputs/{save_name}.mp4", cv2.VideoWriter_fourcc(*'mp4v'), 30, (frame_width, frame_height)) # define codec and create VideoWriter object frame_count = 0 # menghitung jumlah frame total_fps = 0 # mendapatkan final FPS while (cap.isOpened()): # membaca vidoe sampai berakhir ret, frame = cap.read() # capture tiap frame pada video if ret == True: start_time = time.time() #start time with torch.no_grad(): boxes, classes, labels = detect_utils.predict( frame, model, device, 0.8) # get predictions for the current frame image = detect_utils.draw_boxes( boxes, classes, labels, frame) # draw boxes and show current frame on screen counter = detect_utils.Counter(classes, nama_kelas='car') # counter class end_time = time.time() # get the end time fps = 1 / (end_time - start_time) # get the end time total_fps += fps # get the end time frame_count += 1 # increment frame count wait_time = max(1, int(fps / 4)) cv2.imshow('image', image) out.write(image) if cv2.waitKey(wait_time) & 0xFF == ord('q'): break else: break
total_fps = 0 # to get the final frames per second # read until end of video while(cap.isOpened()): # capture each frame of the video ret, frame = cap.read() if ret == True: # convert the frame into PIL Image format pil_image = Image.fromarray(frame).convert('RGB') # get the start time start_time = time.time() # get predictions for the current frame boxes, classes = detect_utils.predict(pil_image, model, device, args['threshold']) # draw boxes and show current frame on screen result = detect_utils.draw_boxes(boxes, classes, frame) # get the end time end_time = time.time() # get the fps fps = 1 / (end_time - start_time) # add fps to total fps total_fps += fps # increment frame count frame_count += 1 # press 'q' to exit wait_time = max(1, int(fps/4)) cv2.imshow('image', result) out.write(result) if cv2.waitKey(wait_time) & 0xFF == ord('q'):
# load the model onto the computation device model = model.eval().to(device) # read until end of video while (cap.isOpened()): # capture each frame of the video ret, frame = cap.read() if ret == True: # get the start time start_time = time.time() with torch.no_grad(): # get predictions for the current frame boxes, classes, labels = predict(frame, model, device, 0.8) # draw boxes and show current frame on screen image = draw_boxes(boxes, classes, labels, frame) # get the end time end_time = time.time() # get the fps fps = 1 / (end_time - start_time) # add fps to total fps total_fps += fps # increment frame count frame_count += 1 # press `q` to exit wait_time = max(1, int(fps / 4)) cv2.imshow('image', image) out.write(image) if cv2.waitKey(wait_time) & 0xFF == ord('q'): break else: