def detect_objects_from_image(self, img_raw, save_detection=False): img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = self.yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') objects_detected_list = [] for i in range(nums[0]): logging.info('\t{}, {}, {}'.format( self.class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) objects_detected_list.append(self.class_names[int(classes[0][i])]) rospy.logdebug("Result-Detection=" + str(objects_detected_list)) #img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB) img_detection = draw_outputs(img, (boxes, scores, classes, nums), self.class_names) if save_detection: cv2.imwrite(FLAGS.output, img) logging.info('output saved to: {}'.format(FLAGS.output)) return img_detection, objects_detected_list
def main(_argv): input_layer = tf.keras.layers.Input([FLAGS.size, FLAGS.size, 3]) feature_maps = YOLOv3(input_layer) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) # model.summary() utils.load_weights(model, FLAGS.weights) test_img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img_size = test_img.shape[:2] test_img = tf.expand_dims(test_img, 0) test_img = utils.transform_images(test_img, FLAGS.size) pred_bbox = model.predict(test_img) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) boxes = utils.postprocess_boxes(pred_bbox, img_size, FLAGS.size, 0.3) boxes = utils.nms(boxes, 0.45, method='nms') original_image = cv2.imread(FLAGS.image) img = utils.draw_outputs(original_image, boxes) cv2.imwrite(FLAGS.output, img)
def main(_argv): class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') dataset = load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size) dataset = dataset.shuffle(512) for image, labels in dataset.take(1): boxes = [] scores = [] classes = [] for x1, y1, x2, y2, label in labels: if x1 == 0 and x2 == 0: continue boxes.append((x1, y1, x2, y2)) scores.append(1) classes.append(label) nums = [len(boxes)] boxes = [boxes] scores = [scores] classes = [classes] logging.info('labels:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.cvtColor(image.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(FLAGS.output, img) logging.info('output saved to: {}'.format(FLAGS.output))
def main(img_path, image_name): model = YOLOv3Net(cfgfile,model_size,num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) image = cv2.imread(os.path.join(img_path, "{}.jpg".format(image_name))) image = np.array(image) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (model_size[0],model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) image = np.squeeze(image) img = draw_outputs(image, boxes, scores, classes, nums, class_names) # win_name = 'Image detection' # cv2.imshow(win_name, img) # time.sleep(20) # cv2.destroyAllWindows() #If you want to save the result, uncommnent the line below: os.path.join(img_path, 'image_yolo.jpg') cv2.imwrite(os.path.join(img_path, "{}_yolo.jpg".format(image_name)), img)
def get_prediction(inputimage): model = YOLOv3Net(cfgfile, model_size, num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) win_name = 'Yolov3 detection' cv2.namedWindow(win_name) #specify the vidoe input. # 0 means input from cam 0. # For vidio, just change the 0 to video path frame = cv2.imread(inputimage, 1) frame_size = frame.shape try: # Read frame resized_frame = tf.expand_dims(frame, 0) resized_frame = resize_image(resized_frame, (model_size[0], model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) img = draw_outputs(frame, boxes, scores, classes, nums, class_names) cv2.imshow(win_name, img) cv2.imwrite('outputimgage.jpg', img) # print("Time taken : {0} seconds".format(seconds)) # Calculate frames per second finally: cv2.waitKey() cv2.destroyAllWindows() print('Detections have been performed successfully.') return img
def main(img,model): # model = YOLOv3Net(cfgfile,model_size,num_classes) # model.load_weights(weightfile) # class_names = load_class_names(class_name) # image = cv2.imread(img_path) image = img image = np.array(image) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (model_size[0],model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) image = np.squeeze(image) img,person_num = draw_outputs(image, boxes, scores, classes, nums, class_names) # cv2.putText(img, str(person_num)+" Persons", (10,200), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 2, cv2.LINE_AA) win_name = 'Image detection' return img,person_num,boxes,scores, classes, nums,class_names cv2.imshow(win_name, img) cv2.waitKey(0) cv2.destroyAllWindows()
def main(): model = YOLOv3Net(cfgfile, model_size, num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) image = cv2.imread(img_path) image = np.array(image) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (model_size[0], model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) image = np.squeeze(image) img = draw_outputs(image, boxes, scores, classes, nums, class_names) win_name = 'Image detection' cv2.imshow(win_name, img) cv2.waitKey(0) cv2.destroyAllWindows()
def detectAndLocate(image_path, output_path, classes_file=Config.CLASSES_TXT): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) yolo = YoloV3Tiny(classes=Config.NUM_CLASSES) yolo.load_weights(Config.WEIGHTS).expect_partial() class_names = [c.strip() for c in open(classes_file).readlines()] img_raw = tf.image.decode_image(open(image_path, 'rb').read(), channels=3) # img_to_draw = cv2.resize(cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR),(Config.SIZE,Config.SIZE)) img_to_draw = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) if Config.CHANNELS == 1: img_raw = tf.image.rgb_to_grayscale(img_raw) img = tf.expand_dims(img_raw, 0) img = transform_images(img, Config.SIZE) # t1 = time.time() boxes, scores, classes, nums = yolo(img) # t2 = time.time() # print('time: {}'.format(t2 - t1)) # print('detections:') # for i in range(nums[0]): # print(('\t{}, {}, {}'.format(class_names[int(classes[0][i])], # np.array(scores[0][i]), # np.array(boxes[0][i])))) img = draw_outputs(img_to_draw, (boxes, scores, classes, nums), class_names) cv2.imwrite(output_path, img)
def main(): model = YOLOv3Net(cfgfile, model_size, num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) win_name = 'Yolov3 detection' cv2.namedWindow(win_name) #specify the vidoe input. # 0 means input from cam 0. # For vidio, just change the 0 to video path cap = cv2.VideoCapture(0) frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) try: while True: start = time.time() ret, frame = cap.read() if not ret: break resized_frame = tf.expand_dims(frame, 0) resized_frame = resize_image(resized_frame, (model_size[0], model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) img = draw_outputs(frame, boxes, scores, classes, nums, class_names) cv2.imshow(win_name, img) stop = time.time() seconds = stop - start # print("Time taken : {0} seconds".format(seconds)) # Calculate frames per second fps = 1 / seconds print("Estimated frames per second : {0}".format(fps)) key = cv2.waitKey(1) & 0xFF if key == ord('q'): break finally: cv2.destroyAllWindows() cap.release() print('Detections have been performed successfully.')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--file', default=None, type=str) yolo = YoloV3() yolo.load_weights(WEIGHTS_PATH) class_names = [c.strip() for c in open(CLASSES_PATH).readlines()] if parser.file is not None: img = tf.image.decode_image(open(parser.file, 'rb').read(), channels=3) plt.imshow(img) plt.show() input_img = tf.expand_dims(img, 0) input_img = transform_images(input_img, IMAGE_SIZE) boxes, scores, classes, nums = yolo(input_img) logging.info('detections:') for i in range(nums[0]): print('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) prediction_img = draw_outputs(img.numpy(), (boxes, scores, classes, nums), class_names) plt.figure(figsize=(20, 20)) plt.imshow(prediction_img) plt.show() else: print("Add image file path!")
def main(_argv): input_layer = tf.keras.layers.Input([FLAGS.size, FLAGS.size, 3]) feature_maps = YOLOv3(input_layer) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) # model.summary() utils.load_weights(model, FLAGS.weights) times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) continue img_size = img.shape[:2] img_in = tf.expand_dims(img, 0) img_in = utils.transform_images(img_in, FLAGS.size) t1 = time.time() pred_bbox = model.predict(img_in) t2 = time.time() times.append(t2 - t1) times = times[-20:] pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) boxes = utils.postprocess_boxes(pred_bbox, img_size, FLAGS.size, 0.3) boxes = utils.nms(boxes, 0.45, method='nms') img = utils.draw_outputs(img, boxes) img = cv2.putText( img, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) if FLAGS.output: out.write(img) cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') if FLAGS.tfrecord: dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes, FLAGS.size) dataset = dataset.shuffle(512) img_raw, _label = next(iter(dataset.take(1))) else: img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(FLAGS.output, img) logging.info('output saved to: {}'.format(FLAGS.output))
def main(): model = YOLOv3Net(cfgfile, model_size, num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) print("class_names", class_names) image = cv2.imread(img_path) image = np.array(image) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (model_size[0], model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) image = np.squeeze(image) img = draw_outputs(image, boxes, scores, classes, nums, class_names) cv2.imwrite('result1.jpg', img)
def Run(image__): img = utils.img_process(image__) boxes, scores, classes, nums = yolo(img) image__ = utils.draw_outputs(image__, (boxes, scores, classes, nums), class_names) count = 0 obj_ = [] values_ = [] for i in range(class_num): obj_.append(0) for i in range(nums[0]): idx = int(classes[0][i]) obj_[idx] += 1 for x in range(len(obj_)): if obj_[x] != 0: count += 1 print(class_names[x]) for y in student_list_: if y[1] == class_names[x]: print(class_names[x]) values_ += [ 'Object : ' + y[1], 'Student : ' + y[2], 'Age : ' + str(y[3]), 'Gmail: ' + y[4], 'Description : ' + y[5], '---------------' ] if count == 0: values_ = ['There is no object detected !!!'] if count != 0: values_ += [ 'TOTAL STUDENTS : ' + str(count) ] # print(sys.gettrace()) width = 1280 height = 720 result_img = cv2.cvtColor(image__, cv2.COLOR_BGR2RGB) h, w, ch = result_img.shape bytes_per_line = ch * w qt_img = QImage(result_img.data, w, h, bytes_per_line, QImage.Format_RGB888) qt_img = QPixmap.fromImage(qt_img.scaled(width, height, Qt.KeepAspectRatio)) return qt_img, values_
def detect_image(img_path): model = YOLOv3Net(cfg.CFGFILE,cfg.MODEL_SIZE,cfg.NUM_CLASSES) model.load_weights(cfg.WEIGHTFILE) class_names = load_class_names(cfg.CLASS_NAME) image = cv2.imread(img_path) image = np.array(image) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (cfg.MODEL_SIZE[0],cfg.MODEL_SIZE[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, cfg.MODEL_SIZE, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=cfg.IOU_THRESHOLD, confidence_threshold=cfg.CONFIDENCE_THRESHOLD) image = np.squeeze(image) img = draw_outputs(image, boxes, scores, classes, nums, class_names) win_name = 'Detection' cv2.imshow(win_name, img) cv2.waitKey(0) cv2.destroyAllWindows()
def main(Args): model_size = (Args.model_size, Args.model_size, 3) model = YOLOv3Net(Args.cfgfile, Args.num_classes, model_size, Args.max_total_size,\ Args.max_output_size_per_class, Args.iou_threshold, Args.score_threshold) model.load_weights(Args.weightfile) # tf.keras.utils.plot_model(model, to_file='Model.png', show_shapes=True) class_names = load_class_names(Args.class_name) image = cv2.imread(Args.img_path) original_image_size = image.shape org_img = image image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (model_size[0], model_size[1])) boxes, scores, classes, num_detections = model.predict(resized_frame) print('Number of detections = ', num_detections) img_final = draw_outputs(org_img, boxes, scores, classes, num_detections, class_names) cv2.imwrite('./Result/' + Args.img_path.split('/')[-1], img_final) cv2.imshow('Detections', img_final) print('Press ESC on image display wondow to exit') cv2.waitKey(0)
def detect_video(video_path): model = YOLOv3Net(cfg.CFGFILE, cfg.MODEL_SIZE, cfg.NUM_CLASSES) model.load_weights(cfg.WEIGHTFILE) class_names = load_class_names(cfg.CLASS_NAME) win_name = 'Detection' cv2.namedWindow(win_name) cap = cv2.VideoCapture(returnCameraOrFile(video_path)) frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) try: while True: start = time.time() ret, frame = cap.read() if not ret: break resized_frame = tf.expand_dims(frame, 0) resized_frame = resize_image( resized_frame, (cfg.MODEL_SIZE[0], cfg.MODEL_SIZE[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, cfg.MODEL_SIZE, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=cfg.IOU_THRESHOLD, confidence_threshold=cfg.CONFIDENCE_THRESHOLD) img = draw_outputs(frame, boxes, scores, classes, nums, class_names) cv2.imshow(win_name, img) stop = time.time() seconds = stop - start # Calculate frames per second fps = 1 / seconds print("Frames per second : {0}".format(fps)) key = cv2.waitKey(1) & 0xFF if key == ord('q'): break finally: cv2.destroyAllWindows() cap.release() print('Detections performed successfully.')
def write_predict(raw_image, graph, fps): with tf.Session(graph=graph) as sess: # Encode test image raw_img, test_input = encode_img(raw_image, MODEL_SIZE) print('test_input shape', test_input.shape) # Run tf model pred = sess.run(y, feed_dict={x: test_input}) # Handle model output boxes, scores, classes, nums = output_boxes( \ pred, MODEL_SIZE, max_output_size=MAX_OUTPUT_SIZE, max_output_size_per_class=MAX_OUTPUT_SIZE_PER_CLASS, iou_threshold=IOU_THRESHOLD, confidence_threshold=CONFIDENCE_THRESHOLD) img = draw_outputs(raw_img, boxes, scores, classes, nums, class_names) # Add fps value words_color = (0, 0, 255) #BGR if fps is not None: cv2.putText(img, "FPS: {:.2f}".format(fps), (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, words_color, 1) # Write final result cv2.imwrite('result.jpg', img) print('scores', scores.eval())
def main(): model = YOLOv3Net(cfgfile, model_size, num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) image = cv2.imread(img_filename) image = np.array(image) image = tf.expand_dims(image, 0) resized_frame = resize_image(image, (model_size[0], model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) print('boxes', boxes) print('scores', scores[scores >= confidence_threshold]) print('classes', classes[classes != 0]) print('nums', nums) return 0 image = np.squeeze(image) img = draw_outputs(image, boxes, scores, classes, nums, class_names) # win_name = 'Image detection' # cv2.imshow(win_name, img) # cv2.waitKey(0) # cv2.destroyAllWindows() #If you want to save the result, uncommnent the line below: cv2.imwrite('data/images/output_dog.jpg', img)
from utils import draw_outputs, transform_images import matplotlib.pyplot as plt CLASSES_PATH = './coco.names' WEIGHTS_PATH = './weights/yolov3.tf' IMAGE_SIZE = 416 yolo = YoloV3() yolo.load_weights(WEIGHTS_PATH) class_names = [c.strip() for c in open(CLASSES_PATH).readlines()] img = tf.image.decode_image(open('./dog_example.jpg', 'rb').read(), channels=3) plt.imshow(img) plt.show() input_img = tf.expand_dims(img, 0) input_img = transform_images(input_img, IMAGE_SIZE) boxes, scores, classes, nums = yolo(input_img) print('boxes, scores, classes, nums', boxes, scores, classes, nums) logging.info('detections:') for i in range(nums[0]): print('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) prediction_img = draw_outputs(img.numpy(), (boxes, scores, classes, nums), class_names) plt.figure(figsize=(10, 20)) plt.imshow(prediction_img) plt.show()
def main(): model = YOLOv3Net(cfgfile, model_size, num_classes) model.load_weights(weightfile) class_names = load_class_names(class_name) win_name = 'Yolov3 detection' cv2.namedWindow(win_name) # Specify the camera url. # For camera, just change the camera URL to match your IP camera RTSP stream or MPEG stream. cap = cv2.VideoCapture( "rtsp://*****:*****@172.168.50.208:554/cam/realmonitor?channel=1&subtype=1" ) frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) try: while True: start = time.time() cap.grab() # Grab the most recent frame from the camera stream ret, frame = cap.read() # Read it into a frame buffer if not ret: break resized_frame = tf.expand_dims(frame, 0) resized_frame = resize_image(resized_frame, (model_size[0], model_size[1])) pred = model.predict(resized_frame) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) img = draw_outputs(frame, boxes, scores, classes, nums, class_names) cv2.imshow(win_name, img) stop = time.time() seconds = stop - start # print("Time taken : {0} seconds".format(seconds)) # Calculate frames per second fps = 1 / seconds print("Estimated frames per second : {0}".format(fps)) key = cv2.waitKey(1) & 0xFF if key == ord('q'): break if key == 27: break # Adjust frame rate #if fps > 30: # fps = fps * 0.5 # cap.set(cv2.CAP_PROP_FPS, int(fps)) # print("Changing frame rate to: {0}".format(int(fps))) #else: # cap.set(cv2.CAP_PROP_FPS, 10) # print("Changing frame rate to: {0}".format(int(fps))) finally: cv2.destroyAllWindows() cap.release() print('Detections have been performed successfully.')
def main(): # Kreiranje modela model = YOLOv3Net(cfgfile, model_size, num_classes) # Učitavanje istreniranih koeficijenata u model model.load_weights(weightfile) # Učitavanje imena klasa class_names = load_class_names(class_name) # Učitavanje ulaznih fotografija i predobrada u format koji očekuje model images_left = [] resized_images_left = [] filenames_left = [] # Load left camera data [images_left, resized_images_left, filenames_left] = loadAndResize(img_path_left_cam) images_right = [] resized_images_right = [] filenames_right = [] # Load right camera data [images_right, resized_images_right, filenames_right] = loadAndResize(img_path_right_cam) # Object distance and bounding box index distanceIndexPair = [] # Inferencija nad ulaznom slikom # izlazne predikcije pred - skup vektora (10647), gde svaki odgovara jednom okviru lokacije objekta for i in range(0, len(filenames_left)): resized_image = [] image = images_left[i] resized_image.append(resized_images_left[i]) resized_image.append(resized_images_right[i]) resized_image = tf.expand_dims(resized_image, 0) resized_image = np.squeeze(resized_image) pred = model.predict(resized_image) # Određivanje okvira oko detektovanih objekata (za određene pragove) boxes, scores, classes, nums = output_boxes( \ pred, model_size, max_output_size=max_output_size, max_output_size_per_class=max_output_size_per_class, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold) # calculate distance distanceIndexPair = objectDistance(images_left[i], images_right[i], boxes, nums, classes) out_img = draw_outputs(image, boxes, scores, classes, nums, class_names, cLeftCamId, distanceIndexPair) # Čuvanje rezultata u datoteku out_file_name = './out/Izlazna slika.png' cv2.imwrite(out_file_name, out_img) # Prikaz rezultata na ekran cv2.imshow(out_file_name, out_img) #cv2.waitKey(0) if(cv2.waitKey(20) & 0xFF == ord('q')): cv2.destroyAllWindows() break
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True session = tf.compat.v1.Session(config=config) # if len(physical_devices) > 0: # tf.config.experimental.set_memory_growth(physical_devices[0], True) yolo = MNET_complete() yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) continue img_in = tf.expand_dims(img, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) t2 = time.time() times.append(t2 - t1) times = times[-20:] img = draw_outputs(img, (boxes, scores, classes, nums), class_names) img = cv2.putText( img, "Time: {:.2f}fps".format(1 / (sum(times) / len(times))), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) if FLAGS.output: out.write(img) cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()