def main(_argv): if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') tf.saved_model.save(yolo, FLAGS.output) logging.info("model saved to: {}".format(FLAGS.output)) model = tf.saved_model.load(FLAGS.output) infer = model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] logging.info(infer.structured_outputs) class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img, 0) img = transform_images(img, 416) t1 = time.time() outputs = infer(img) boxes, scores, classes, nums = outputs["yolo_nms"], outputs[ "yolo_nms_1"], outputs["yolo_nms_2"], outputs["yolo_nms_3"] t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], scores[0][i].numpy(), boxes[0][i].numpy()))
def main(_argv): # import weights yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info("weights loaded") # Import classes class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info("classes loaded") # list of time for procces on each frame times = [] # try to load webcam or a video file try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: #by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) while True: _, img = vid.read() if img is None: logging.info("Empty Frame") time.sleep(0.1) img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) for i in nums: print(i) t2 = time.time() times.append(t2 - t1) times = times[-20:] img = draw_outputs(img, (boxes, scores, classes, nums), class_names) img = cv2.putText( img, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (1, 0, 255), 2) if FLAGS.output: out.write(img) cv2.imshow('Output', img) if cv2.waitKey(1) == ord('q'): break vid.release() cv2.destroyAllWindows()
def main(_argv): if FLAGS.tiny: yolo = YoloV3Tiny() else: yolo = YoloV3() yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = coco_label_map_list img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.imread(FLAGS.image) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(FLAGS.output, img) logging.info('output saved to: {}'.format(FLAGS.output))
def main(_argv): class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') if FLAGS.tiny: yolo = YoloV3Tiny(classes=len(class_names)) else: yolo = YoloV3(classes=len(class_names)) yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') if not FLAGS.image: login.error('Detect image must be specified') return 1 elif os.path.isdir(FLAGS.image): detect_images = [os.path.join(FLAGS.image, x) for x in os.listdir(FLAGS.image) if x[-3:] == 'jpg'] else: detect_images = [FLAGS.image] for image in detect_images: img_raw = tf.image.decode_image( open(image, 'rb').read(), channels=3) img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') if nums[0].numpy() == 0: continue for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imshow(image, img) cv2.waitKey(0)
def main(_argv): if FLAGS.tiny: yolo = YoloV3Tiny() else: yolo = YoloV3() yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) continue img_in = tf.expand_dims(img, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) t2 = time.time() times.append(t2 - t1) times = times[-20:] img = draw_outputs(img, (boxes, scores, classes, nums), class_names) img = cv2.putText( img, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
def main(_argv): # 打开内存增长 physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) # 创建网络模型 if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) # 加载权重 yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') # 加载类别列表 class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') # 读取图片数据 if FLAGS.tfrecord: dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes, FLAGS.size) dataset = dataset.shuffle(512) img_raw, _label = next(iter(dataset.take(1))) else: img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) # 调整图片 img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) # 对图片进行目标检测 t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) # 可视化输出 logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(FLAGS.output, img) logging.info('output saved to: {}'.format(FLAGS.output))
def main(_argv): # 打开内存增长 physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) # 创建网络模型 if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) # 加载权重 yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') # 加载类别列表 class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') # 读取数据集 root_path = os.getcwd() os.chdir(FLAGS.path) image_list = glob.glob('*.jpg') for image in image_list: # 处理图片 img_raw = tf.image.decode_image(open(image, 'rb').read(), channels=3) img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) # 对图片进行目标检测 boxes, scores, classes, nums = yolo(img) # 保存结果 save_file = os.path.join(root_path, 'data', 'detection_result', image.replace('.jpg', '.txt')) with open(save_file, 'a') as f: for i in range(nums[0]): obj_name = class_names[int(classes[0][i])] score = float('%.6f' % scores[0][i]) left = int(boxes[0][i][0] * FLAGS.size) top = int(boxes[0][i][1] * FLAGS.size) right = int(boxes[0][i][2] * FLAGS.size) bottom = int(boxes[0][i][3] * FLAGS.size) f.write("%s %s %s %s %s %s\n" % (obj_name, score, left, top, right, bottom)) logging.info('output saved complete')
def main(_argv): class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') if FLAGS.tiny: yolo = YoloV3Tiny(classes=len(class_names)) else: yolo = YoloV3(classes=len(class_names)) yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') # Saved path will be 'output_dir/model_name/version' saved_path = os.path.join(FLAGS.output_dir, 'yolov3', str(FLAGS.version)) tf.saved_model.save(yolo, saved_path) logging.info("model saved to: {}".format(saved_path)) model = tf.saved_model.load(saved_path) infer = model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] logging.info(infer.structured_outputs) if not FLAGS.image: return img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img, 0) img = transform_images(img, FLAGS.size) t1 = time.time() outputs = infer(img) boxes, scores, classes, nums = outputs["yolo_nms"], outputs[ "yolo_nms_1"], outputs["yolo_nms_2"], outputs["yolo_nms_3"] t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], scores[0][i].numpy(), boxes[0][i].numpy()))
def main(_argv): if FLAGS.tiny: yolo = YoloV3Tiny(size=FLAGS.size, classes=FLAGS.num_classes) else: yolo = YoloV3(size=FLAGS.size, classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') converter = tf.lite.TFLiteConverter.from_keras_model(yolo) tflite_model = converter.convert() open(FLAGS.output, 'wb').write(tflite_model) logging.info("model saved to: {}".format(FLAGS.output)) interpreter = tf.lite.Interpreter(model_path=FLAGS.output) interpreter.allocate_tensors() logging.info('tflite model loaded') input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img, 0) img = transform_images(img, 416) t1 = time.time() outputs = interpreter.set_tensor(input_details[0]['index'], img) interpreter.invoke() output_data = interpreter.get_tensor(output_details[0]['index']) print(output_data)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks train_dataset = dataset.load_fake_dataset() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size) train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes, FLAGS.size) val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # Configure the model for transfer learning if FLAGS.transfer == 'none': pass # Nothing to do elif FLAGS.transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights) if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights( model_pretrained.get_layer(l.name).get_weights()) freeze_all(l) else: # All other transfer require matching classes model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.transfer == 'frozen': # freeze everything freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset)
def main(_argv): # 打开内存增长 physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) # 创建网络模型 if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks # 加载训练集 if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size) else: train_dataset = dataset.load_fake_dataset() # 对训练集进行 随机化 取数据 做映射 预加载 train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) # 加载验证集 if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes, FLAGS.size) else: val_dataset = dataset.load_fake_dataset() # 对验证集进行 取数据 做映射 val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # 配置迁移学习 if FLAGS.transfer == 'none': pass elif FLAGS.transfer in ['darknet', 'no_output']: # darknet 迁移学习可以在类别数量不兼容的情况下工作 # 重置顶层 if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights) # 固化 darknet if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) # 固化输出层以外的层 elif FLAGS.transfer == 'no_output': for layer in model.layers: if not layer.name.startswith('yolo_output'): layer.set_weights( model_pretrained.get_layer(layer.name).get_weights()) freeze_all(layer) else: # 其他类型的迁移学习需要正确的类别数 model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # 固化 darknet 并微调其他层 darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.transfer == 'frozen': # 固化全部 freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] # 是否使用 Eager Execution if FLAGS.mode == 'eager_tf': # Eager Execution 模式可以在运行过程中查看,方便调试 avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): # 使用梯度下降法用自动微分优化训练集的损失 for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) # 计算验证集的损失 for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset) print(history.history)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) ppe_input_size = FLAGS.ppe_size helmet_input_size = FLAGS.helmet_size try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) times = [] if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) ppe_detector = create_ppe_detector(ppe_input_size) helmet_detector = create_helmet_detector(helmet_input_size) nacho_image1 = face_recognition.load_image_file("./data/faces/nacho1.jpg") nacho_image2 = face_recognition.load_image_file("./data/faces/nacho2.jpg") nacho_image3 = face_recognition.load_image_file("./data/faces/nacho3.jpg") nacho_face_encoding1 = face_recognition.face_encodings(nacho_image1)[0] nacho_face_encoding2 = face_recognition.face_encodings(nacho_image2)[0] nacho_face_encoding3 = face_recognition.face_encodings(nacho_image3)[0] known_face_encodings = [ nacho_face_encoding1, nacho_face_encoding2, nacho_face_encoding3 ] known_face_names = ["Nacho", "Nacho", "Nacho"] face_locations = [] face_encodings = [] face_names = [] max_cosine_distance = 0.7 # 0.5 / 0.7 nn_budget = None model_filename = './weights/tracker/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) Track_only = [] logging.info("Models loaded!") while True: return_value, frame = vid.read() if not return_value: logging.warning("Empty Frame") break frame_size = frame.shape[:2] frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25) img_in = tf.expand_dims(frame, 0) img_in = transform_images(img_in, helmet_input_size) image_data = utils.image_preprocess(np.copy(frame), [ppe_input_size, ppe_input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if FLAGS.framework == 'tf': ppe_pred_bbox = ppe_detector.predict(image_data) elif FLAGS.framework == 'trt': batched_input = tf.constant(image_data) ppe_pred_bbox = [] result = ppe_detector(batched_input) for _, value in result.items(): value = value.numpy() ppe_pred_bbox.append(value) helmet_pred_bbox = helmet_detector.predict(img_in) # face_locations = face_recognition.face_locations(small_frame) face_locations = face_recognition.face_locations(frame) face_encodings = face_recognition.face_encodings(frame, face_locations) face_names = [] for face_encoding in face_encodings: matches = face_recognition.compare_faces(known_face_encodings, face_encoding) name = "Unknown" # if True in matches: # first_match_index = matches.index(True) # name = known_face_names[first_match_index] face_distances = face_recognition.face_distance( known_face_encodings, face_encoding) best_match_index = np.argmin(face_distances) if matches[best_match_index]: name = known_face_names[best_match_index] face_names.append(name) t2 = time.time() times.append(t2 - t1) times = times[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms ppe_bboxes = post_process_boxes(ppe_pred_bbox, 'yolov4', frame_size, ppe_input_size) helmet_bboxes = post_process_boxes(helmet_pred_bbox, 'yolov3', frame_size, helmet_input_size) face_bboxes = [] for (top, right, bottom, left), name in zip(face_locations, face_names): # top *= 4 # left *= 4 # right *= 4 # bottom *= 4 face_bboxes.append([left, top, right, bottom, name]) bboxes = utils.calculate_status(ppe_bboxes, helmet_bboxes, []) boxes, safety_scores, site_roles, face_names = [], [], [], [] for bbox in bboxes: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) safety_scores.append(bbox[4]) site_roles.append(bbox[5]) face_names.append("None") for bbox in face_bboxes: boxes.append( [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]) safety_scores.append(0) site_roles.append(-1) face_names.append(bbox[4]) boxes = np.array(boxes) safety_scores = np.array(safety_scores) site_roles = np.array(site_roles) face_names = np.array(face_names) features = np.array(encoder(frame, boxes)) detections = [ Detection(bbox, 0.9, 0, feature, safety_score, site_role, face_name) for bbox, feature, safety_score, site_role, face_name in zip( boxes, features, safety_scores, site_roles, face_names) ] tracker.predict() tracker.update(detections) tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 1: # 1 / 5 continue bbox = track.to_tlbr() tracking_id = track.track_id safety_score = track.get_safety_score() site_role = track.get_site_role() face_name = track.get_face_name() if site_role == -1: to_add = [face_name, site_role, tracking_id] else: to_add = [safety_score, site_role, tracking_id] tracked_bboxes.append(bbox.tolist() + to_add) image = utils.draw_demo(frame, tracked_bboxes) image = cv2.putText(image, "Time: {:.2f} FPS".format(fps), (0, 24), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE) cv2.imshow("Detections", image) if FLAGS.output: out.write(image) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) continue img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) t2 = time.time() times.append(t2 - t1) times = times[-20:] img = draw_outputs(img, (boxes, scores, classes, nums), class_names) img = cv2.putText( img, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) if FLAGS.output: out.write(img) cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
def main(_argv): num_classes = len([c.strip() for c in open(FLAGS.classes).readlines()]) logging.info('classes loaded, number of classes = %d' % num_classes) if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks # TensorFlow 2.0/2.1 has a bug when putting finite dateset to model.fit(). # Walk around by using dataset.repeat() and give the number of steps to model fit. # Refer to https://github.com/tensorflow/tensorflow/issues/31509 training_set_size = dataset.get_dataset_size(FLAGS.train_labels) steps_for_train = training_set_size // FLAGS.batch_size steps_for_val = dataset.get_dataset_size( FLAGS.val_labels) // FLAGS.batch_size logging.info('Training in %d steps, validation in %d steps' % (steps_for_train, steps_for_val)) if not (FLAGS.train_images and FLAGS.train_labels): logging.error('Training images and labels must be specified.') return train_dataset = dataset.load_yolo_dataset( FLAGS.train_images, FLAGS.train_labels, FLAGS.size, shuffle=True, shuffle_buffer_size=training_set_size) train_dataset = train_dataset.repeat() train_dataset = train_dataset.batch(FLAGS.batch_size, drop_remainder=True) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) if not (FLAGS.val_images and FLAGS.val_labels): logging.error('Validation images and labels must be specified.') return val_dataset = dataset.load_yolo_dataset(FLAGS.val_images, FLAGS.val_labels, FLAGS.size) val_dataset = val_dataset.repeat() val_dataset = val_dataset.batch(FLAGS.batch_size, drop_remainder=True) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) val_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) # Configure the model # Weights configuration if FLAGS.weights_num_classes is None: if FLAGS.weights: model.load_weights(FLAGS.weights) else: # Transfer learning with incompatible number of classes assert FLAGS.weights # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or num_classes) model_pretrained.load_weights(FLAGS.weights) if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights( model_pretrained.get_layer(l.name).get_weights()) # Freeze layers if FLAGS.freeze == 'none': pass elif FLAGS.freeze == 'darknet': freeze_all(model.get_layer('yolo_darknet')) elif FLAGS.freeze == 'no_output': # freeze all but output layers for l in model.layers: if not l.name.startswith('yolo_output'): freeze_all(l) model.summary() # Configure training process optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=num_classes, ignore_thresh=0.5) for mask in anchor_masks ] if FLAGS.mode == 'eager': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): batch = 0 for images, labels in train_dataset: with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) batch += 1 if batch == steps_for_train: break batch = 0 for images, labels in val_dataset: outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) batch += 1 if batch == steps_for_val: break logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss) callbacks = [ ReduceLROnPlateau(verbose=1), # EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True, save_best_only=True), ] model.fit(train_dataset, epochs=FLAGS.epochs, steps_per_epoch=steps_for_train, callbacks=callbacks, validation_data=val_dataset, validation_steps=steps_for_val)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) ppe_input_size = FLAGS.ppe_size helmet_input_size = FLAGS.helmet_size try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) times = [] if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) # TODO: switch here ppe_detector = create_ppe_detector(ppe_input_size) helmet_detector = create_helmet_detector(helmet_input_size) nacho_image1 = face_recognition.load_image_file("./data/faces/nacho1.jpg") nacho_image2 = face_recognition.load_image_file("./data/faces/nacho2.jpg") nacho_image3 = face_recognition.load_image_file("./data/faces/nacho3.jpg") nacho_face_encoding1 = face_recognition.face_encodings(nacho_image1)[0] nacho_face_encoding2 = face_recognition.face_encodings(nacho_image2)[0] nacho_face_encoding3 = face_recognition.face_encodings(nacho_image3)[0] known_face_encodings = [ nacho_face_encoding1, nacho_face_encoding2, nacho_face_encoding3 ] known_face_names = ["Cosmin", "Cosmin", "Cosmin"] face_locations = [] face_encodings = [] face_names = [] logging.info("Models loaded!") while True: return_value, frame = vid.read() # frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE) # TODO: here if not return_value: logging.warning("Empty Frame") break frame_size = frame.shape[:2] frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25) img_in = tf.expand_dims(frame, 0) img_in = transform_images(img_in, helmet_input_size) image_data = utils.image_preprocess(np.copy(frame), [ppe_input_size, ppe_input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() if FLAGS.framework == 'tf': ppe_pred_bbox = ppe_detector.predict(image_data) elif FLAGS.framework == 'trt': batched_input = tf.constant(image_data) ppe_pred_bbox = [] result = ppe_detector(batched_input) for _, value in result.items(): value = value.numpy() ppe_pred_bbox.append(value) helmet_pred_bbox = helmet_detector.predict(img_in) face_locations = face_recognition.face_locations(frame) # face_locations = face_recognition.face_locations(small_frame) face_encodings = face_recognition.face_encodings(frame, face_locations) face_names = [] for face_encoding in face_encodings: matches = face_recognition.compare_faces(known_face_encodings, face_encoding) name = "Unkwown" # if True in matches: # first_match_index = matches.index(True) # name = known_face_names[first_match_index] face_distances = face_recognition.face_distance( known_face_encodings, face_encoding) best_match_index = np.argmin(face_distances) if matches[best_match_index]: name = known_face_names[best_match_index] face_names.append(name) curr_time = time.time() times.append(curr_time - prev_time) times = times[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms ppe_bboxes = post_process_boxes(ppe_pred_bbox, 'yolov4', frame_size, ppe_input_size) helmet_bboxes = post_process_boxes(helmet_pred_bbox, 'yolov3', frame_size, helmet_input_size) face_bboxes = [] for (top, right, bottom, left), name in zip(face_locations, face_names): # # top *= 4 # # left *= 4 # # right *= 4 # # bottom *= 4 face_bboxes.append([left, top, right, bottom, name, -1]) bboxes = [] bboxes = utils.calculate_status(ppe_bboxes, helmet_bboxes, []) bboxes.extend(face_bboxes) image = utils.draw_demo(frame, bboxes) image = cv2.putText( image, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 36), # 24 cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2) image = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE) cv2.imshow("Detections", image) if FLAGS.output: out.write(image) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()