def main(args): """ Show detected objects with boxes, lables and prediction scores in a vide stream """ # Load yolo model with pretrained weights print("Create YoloV3 model") config_parser = ConfigParser(args.config) model = config_parser.create_model(skip_detect_layer=False) detector = config_parser.create_detector(model) # Open video stream cap = cv2.VideoCapture(args.camera) if (cap.isOpened() == False): print("(Error) Could not open video stream") exit() # Detect objects in stream times = [] detect = 0 while True: # Capture every nth frame only because we are too slow # to capture every frame... ret, image = cap.read() #image, _ = resize_image(image, None, config_parser.get_net_size(), keep_ratio=True) if not ret: print("(Error) Lost connection to video stream") break # Detect objects and measure timing if detect <= 0: t1 = time.time() min_prob = 0.90 boxes, labels, probs = detector.detect(image, min_prob) t2 = time.time() times.append(t2 - t1) times = times[-20:] detect = 50 detect -= 1 # Display detected objects visualize_boxes(image, boxes, labels, probs, config_parser.get_labels()) image = cv2.putText( image, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('Frame', image) # Exit with 'q' if cv2.waitKey(25) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def _loop_train(config_parser, model, optimizer, generator, epoch, writer): # one epoch n_steps = generator.steps_per_epoch loss_value = 0 for i in tqdm(range(n_steps)): xs, yolo_1, yolo_2, yolo_3, true_boxes = generator.next_batch( ) # true_boxes = [[x1, y1, x2, y2], [x1, ...], ...] ys = [yolo_1, yolo_2, yolo_3] grads, loss = _grad_fn(model, xs, ys) loss_value += loss optimizer.apply_gradients(zip(grads, model.trainable_variables)) if i % 100 == 0: step = epoch * n_steps + i tf.summary.scalar("training_loss", loss, step=step) # Training image with ground truth boxes training_image = xs[0] * 255 for training_boxes in true_boxes[0]: x1 = training_boxes[0] y1 = training_boxes[1] x2 = training_boxes[2] y2 = training_boxes[3] draw_bounding_box_on_image_array( training_image, y1, x1, y2, x2, use_normalized_coordinates=False) tf.summary.image("training_image", [training_image / 255.], step=step) # Training images with predictions of network image = xs[0] * 255 detector = config_parser.create_detector(model) boxes, labels, probs = detector.detect(image, 0.8) visualize_boxes(image, boxes, labels, probs, config_parser.get_labels()) tf.summary.image("training_prediction", [image / 255.], step=step) writer.flush() loss_value /= generator.steps_per_epoch return loss_value
def _loop_validation(config_parser, model, generator, epoch, writer): # one epoch n_steps = generator.steps_per_epoch loss_value = 0 for i in range(n_steps): xs, yolo_1, yolo_2, yolo_3, true_boxes = generator.next_batch() ys = [yolo_1, yolo_2, yolo_3] logits = model(xs) loss_value += loss_fn(ys, logits) loss_value /= generator.steps_per_epoch # Log validation loss step = (epoch + 1) * n_steps tf.summary.scalar("validation_loss", loss_value, step=step) # Log input validation image with bounding boxes validation_image = xs[0] * 255 for validation_boxes in true_boxes[0]: x1 = validation_boxes[0] y1 = validation_boxes[1] x2 = validation_boxes[2] y2 = validation_boxes[3] draw_bounding_box_on_image_array(validation_image, y1, x1, y2, x2, use_normalized_coordinates=False) tf.summary.image("validation_image", [validation_image / 255.], step=step) # Log prediction with bounding boxes image = xs[0] * 255 detector = config_parser.create_detector(model) boxes, labels, probs = detector.detect(image, 0.8) visualize_boxes(image, boxes, labels, probs, config_parser.get_labels()) tf.summary.image("validation_prediction", [image / 255.], step=step) writer.flush() return loss_value
def show_frame_with_labels(frame, bboxes, labels, probs): """Visualize the video frame with bounding boxes, labels and probabilities. Args: frame: Current video frame. bboxes: Bounding box data for objects in the current frame. labels: Labels for the bounding boxes. probs: Probabilities for the bounding boxes. """ text_labels = [ "Cyclist", "Misc", "Person_sitting", "Tram", "Truck", "Van", "Car", "Person" ] visualize_boxes(frame, bboxes, labels, probs, text_labels) # Show the frame with the bounding boxes, labels and probabilities. plt.imshow(frame.astype(np.uint8)) plt.axis('off') plt.show(block=False) plt.pause(0.1) plt.close()
def main(): # imagesPattern = '/hdd/Datasets/counters/1_from_phone/1_all_downsized/*.jpg' # imagesPattern = '/hdd/Datasets/counters/2_from_phone/val/*.jpg' imagesPattern = '/hdd/Datasets/counters/1_from_phone/val/*.jpg' imagesPattern = '/hdd/Datasets/counters/3_from_phone/*.jpg' detector = CounterScreenModel('weights/2_from_scratch/weights.h5') for image_path in sorted(glob(imagesPattern)): if os.path.splitext(image_path)[1] == '.xml': continue image = cv2.imread(image_path)[..., ::-1] # to RGB image = fit_image_to_shape(image, (1000, 1800)) boxes, labels, probs = detector.detect(image, 0.5) labelNames = detector.labelNames labelIndex = {_id: name for _id, name in enumerate(labelNames)} print(labels, [labelIndex[_id] for _id in labels]) visualize_boxes(image, boxes, labels, probs, labelNames) if imshowWait(img=(image[..., ::-1], image_path)) == 27: break
help='path to image file') if __name__ == '__main__': args = argparser.parse_args() image_path = args.image # 1. create yolo model & load weights config_parser = ConfigParser(args.config) model = config_parser.create_model(skip_detect_layer=False) detector = config_parser.create_detector(model) # 2. Load image image = cv2.imread(image_path) image = image[:,:,::-1] # 3. Run detection boxes, labels, probs = detector.detect(image, 0.5) print(probs) # 4. draw detected boxes visualize_boxes(image, boxes, labels, probs, config_parser.get_labels()) # 5. plot plt.imshow(image) plt.show()
counting += 1 if (log_progress == True): print("Processing Frame : ", str(counting)) check_frame_interval = counting % frame_detection_interval if (counting == 1 or check_frame_interval == 0): try: # detected_frame, output_objects_array = self.__detector.detectObjectsFromImage( # input_image=frame, input_type="array", output_type="array", # minimum_percentage_probability=minimum_percentage_probability, # display_percentage_probability=display_percentage_probability, # display_object_name=display_object_name) detected_frame_pre, output_objects_array, pred = detector.detect(frame, 0.2) visualize_boxes(frame, detected_frame_pre, output_objects_array, pred, config_parser.get_labels()) detected_frame = frame # plt.imshow(detected_frame) # plt.show() except: print('none') None output_frames_dict[counting] = output_objects_array output_objects_count = {} for eachItem in output_objects_array: eachItemName = eachItem try: