def main(_argv): if FLAGS.tiny: yolo = YoloV3Tiny() else: yolo = YoloV3() yolo.load_weights(FLAGS.weights) logging.info('weights loaded from {}'.format(FLAGS.weights)) class_names = voc_label_map_list img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.imread(FLAGS.image) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imshow('rr', img) cv2.waitKey(0)
def main(_argv): # import weights yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info("weights loaded") # Import classes class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info("classes loaded") # list of time for procces on each frame times = [] # try to load webcam or a video file try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: #by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) while True: _, img = vid.read() if img is None: logging.info("Empty Frame") time.sleep(0.1) img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) for i in nums: print(i) t2 = time.time() times.append(t2 - t1) times = times[-20:] img = draw_outputs(img, (boxes, scores, classes, nums), class_names) img = cv2.putText( img, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (1, 0, 255), 2) if FLAGS.output: out.write(img) cv2.imshow('Output', img) if cv2.waitKey(1) == ord('q'): break vid.release() cv2.destroyAllWindows()
def main(_argv): if FLAGS.tiny: yolo = YoloV3Tiny() else: yolo = YoloV3() yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = get_coco_names(FLAGS.classes) logging.info('classes loaded') img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.imread(FLAGS.image) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(FLAGS.output, img) logging.info('output saved to: {}'.format(FLAGS.output))
def main(_argv): if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') tf.saved_model.save(yolo, FLAGS.output) logging.info("model saved to: {}".format(FLAGS.output)) model = tf.saved_model.load(FLAGS.output) infer = model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] logging.info(infer.structured_outputs) class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img, 0) img = transform_images(img, 416) t1 = time.time() outputs = infer(img) boxes, scores, classes, nums = outputs["yolo_nms"], outputs[ "yolo_nms_1"], outputs["yolo_nms_2"], outputs["yolo_nms_3"] t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], scores[0][i].numpy(), boxes[0][i].numpy()))
def main(_argv): if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.summary() logging.info('model created') load_darknet_weights(yolo, FLAGS.weights, FLAGS.tiny) logging.info('weights loaded') img = np.random.random((1, 320, 320, 3)).astype(np.float32) output = yolo(img) logging.info('sanity check passed') yolo.save_weights(FLAGS.output) logging.info('weights saved')
def main(_argv): class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') if FLAGS.tiny: yolo = YoloV3Tiny(classes=len(class_names)) else: yolo = YoloV3(classes=len(class_names)) yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') if not FLAGS.image: login.error('Detect image must be specified') return 1 elif os.path.isdir(FLAGS.image): detect_images = [os.path.join(FLAGS.image, x) for x in os.listdir(FLAGS.image) if x[-3:] == 'jpg'] else: detect_images = [FLAGS.image] for image in detect_images: img_raw = tf.image.decode_image( open(image, 'rb').read(), channels=3) img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') if nums[0].numpy() == 0: continue for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imshow(image, img) cv2.waitKey(0)
def main(_argv): # 打开内存增长 physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) # 创建网络模型 if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) # 加载权重 yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') # 加载类别列表 class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') # 读取图片数据 if FLAGS.tfrecord: dataset = load_tfrecord_dataset(FLAGS.tfrecord, FLAGS.classes, FLAGS.size) dataset = dataset.shuffle(512) img_raw, _label = next(iter(dataset.take(1))) else: img_raw = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) # 调整图片 img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) # 对图片进行目标检测 t1 = time.time() boxes, scores, classes, nums = yolo(img) t2 = time.time() logging.info('time: {}'.format(t2 - t1)) # 可视化输出 logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], np.array(scores[0][i]), np.array(boxes[0][i]))) img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names) cv2.imwrite(FLAGS.output, img) logging.info('output saved to: {}'.format(FLAGS.output))
def main(_argv): if FLAGS.tiny: yolo = YoloV3Tiny() else: yolo = YoloV3() yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) continue img_in = tf.expand_dims(img, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) t2 = time.time() times.append(t2 - t1) times = times[-20:] img = draw_outputs(img, (boxes, scores, classes, nums), class_names) img = cv2.putText( img, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) yolo = YoloV3(classes=FLAGS.num_classes) yolo.summary() logging.info("model created") load_darknet_weights(yolo, FLAGS.weights, False) # False for absence of yolo-TinY logging.info("weights loaded") img = np.random.random((1, 320, 320, 3)).astype(np.float32) output = yolo(img) logging.info("sanity check passed") yolo.save_weights(FLAGS.output) logging.info("weights saved")
def main(_argv): # 打开内存增长 physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) # 创建网络模型 if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) # 加载权重 yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') # 加载类别列表 class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') # 读取数据集 root_path = os.getcwd() os.chdir(FLAGS.path) image_list = glob.glob('*.jpg') for image in image_list: # 处理图片 img_raw = tf.image.decode_image(open(image, 'rb').read(), channels=3) img = tf.expand_dims(img_raw, 0) img = transform_images(img, FLAGS.size) # 对图片进行目标检测 boxes, scores, classes, nums = yolo(img) # 保存结果 save_file = os.path.join(root_path, 'data', 'detection_result', image.replace('.jpg', '.txt')) with open(save_file, 'a') as f: for i in range(nums[0]): obj_name = class_names[int(classes[0][i])] score = float('%.6f' % scores[0][i]) left = int(boxes[0][i][0] * FLAGS.size) top = int(boxes[0][i][1] * FLAGS.size) right = int(boxes[0][i][2] * FLAGS.size) bottom = int(boxes[0][i][3] * FLAGS.size) f.write("%s %s %s %s %s %s\n" % (obj_name, score, left, top, right, bottom)) logging.info('output saved complete')
def main(_argv): global yolo, class_names, vid, W, H # initialize YoloV3 with the provided weights file yolo = YoloV3(classes=args["num_classes"]) yolo.load_weights(args["weights"]) logging.info('Weights loaded') # initialize the list of class labels for the weight to detect class_names = [c.strip() for c in open(args["classes"]).readlines()] logging.info('Classes loaded') # grab a reference to the webcam vid = VideoStream(src=0).start() logging.info('video stream started') # initialize image dimensions (we'll set them as soon as we read the first image from the stream) H = W = None # startup the SocketIO server eventlet.wsgi.server(eventlet.listen(('', 8080)), appweb)
def main(_argv): class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') if FLAGS.tiny: yolo = YoloV3Tiny(classes=len(class_names)) else: yolo = YoloV3(classes=len(class_names)) yolo.load_weights(FLAGS.weights).expect_partial() logging.info('weights loaded') # Saved path will be 'output_dir/model_name/version' saved_path = os.path.join(FLAGS.output_dir, 'yolov3', str(FLAGS.version)) tf.saved_model.save(yolo, saved_path) logging.info("model saved to: {}".format(saved_path)) model = tf.saved_model.load(saved_path) infer = model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] logging.info(infer.structured_outputs) if not FLAGS.image: return img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img, 0) img = transform_images(img, FLAGS.size) t1 = time.time() outputs = infer(img) boxes, scores, classes, nums = outputs["yolo_nms"], outputs[ "yolo_nms_1"], outputs["yolo_nms_2"], outputs["yolo_nms_3"] t2 = time.time() logging.info('time: {}'.format(t2 - t1)) logging.info('detections:') for i in range(nums[0]): logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])], scores[0][i].numpy(), boxes[0][i].numpy()))
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.summary() logging.info('model created') load_darknet_weights(yolo, FLAGS.weights, FLAGS.tiny) logging.info('weights loaded') img = np.random.random((1, 320, 320, 3)).astype(np.float32) output = yolo.predict(img) logging.info('sanity check passed') yolo.save_weights(FLAGS.output) logging.info('weights saved')
def main(_argv): if FLAGS.tiny: yolo = YoloV3Tiny(size=FLAGS.size, classes=FLAGS.num_classes) else: yolo = YoloV3(size=FLAGS.size, classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') converter = tf.lite.TFLiteConverter.from_keras_model(yolo) tflite_model = converter.convert() open(FLAGS.output, 'wb').write(tflite_model) logging.info("model saved to: {}".format(FLAGS.output)) interpreter = tf.lite.Interpreter(model_path=FLAGS.output) interpreter.allocate_tensors() logging.info('tflite model loaded') input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img = tf.expand_dims(img, 0) img = transform_images(img, 416) t1 = time.time() outputs = interpreter.set_tensor(input_details[0]['index'], img) interpreter.invoke() output_data = interpreter.get_tensor(output_details[0]['index']) print(output_data)
def create_model(): if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) # Configure the model for transfer learning if FLAGS.transfer != 'none': # if we need all weights, no need to create another model if FLAGS.transfer == 'all': model.load_weights(FLAGS.prefix + FLAGS.weights) # else, we need only some of the weights # create appropriate model_pretrained, load all weights and copy the ones we need else: if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) # load pretrained weights model_pretrained.load_weights(FLAGS.prefix + FLAGS.weights) # transfer darknet model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) # transfer 'yolo_conv_i' layer weights if FLAGS.transfer in ['yolo_conv', 'yolo_output_conv']: for l in model.layers: if l.name.startswith('yolo_conv'): model.get_layer(l.name).set_weights( model_pretrained.get_layer(l.name).get_weights()) # transfer 'yolo_output_i' first conv2d layer if FLAGS.transfer == 'yolo_output_conv': # transfer tiny output conv2d if FLAGS.tiny: # get and set the weights of the appropriate layers model.layers[4].layers[1].set_weights( model_pretrained.layers[4].layers[1].get_weights()) model.layers[5].layers[1].set_weights( model_pretrained.layers[5].layers[1].get_weights()) # should I freeze batch_norm as well? else: # get and set the weights of the appropriate layers model.layers[5].layers[1].set_weights( model_pretrained.layers[5].layers[1].get_weights()) model.layers[6].layers[1].set_weights( model_pretrained.layers[6].layers[1].get_weights()) model.layers[7].layers[1].set_weights( model_pretrained.layers[7].layers[1].get_weights()) # should I freeze batch_norm as well? # no transfer learning else: pass # freeze layers, if requested if FLAGS.freeze != 'none': if FLAGS.freeze == 'all': freeze_all(model) if FLAGS.freeze in ['yolo_darknet' 'yolo_conv', 'yolo_output_conv']: freeze_all(model.get_layer('yolo_darknet')) if FLAGS.freeze in ['yolo_conv', 'yolo_output_conv']: for l in model.layers: if l.name.startswith('yolo_conv'): freeze_all(l) if FLAGS.freeze == 'yolo_output_conv': if FLAGS.tiny: # freeze the appropriate layers freeze_all(model.layers[4].layers[1]) freeze_all(model.layers[5].layers[1]) else: # freeze the appropriate layers freeze_all(model.layers[5].layers[1]) freeze_all(model.layers[6].layers[1]) freeze_all(model.layers[7].layers[1]) # freeze nothing else: pass return model
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks train_dataset = dataset.load_fake_dataset() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size) train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes, FLAGS.size) val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # Configure the model for transfer learning if FLAGS.transfer == 'none': pass # Nothing to do elif FLAGS.transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights) if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights( model_pretrained.get_layer(l.name).get_weights()) freeze_all(l) else: # All other transfer require matching classes model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.transfer == 'frozen': # freeze everything freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset)
def main(_argv): if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True) anchors = yolo_anchors anchor_masks = yolo_anchor_masks train_dataset = dataset.load_fake_dataset() if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes) train_dataset = train_dataset.shuffle(buffer_size=1024) # TODO: not 1024 train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map( lambda x, y: (dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, 80))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_fake_dataset() if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes) val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map( lambda x, y: (dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, 80))) if FLAGS.transfer != 'none': model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.mode == 'frozen': # freeze everything freeze_all(model) else: # reset top layers if FLAGS.tiny: # get initial weights init_model = YoloV3Tiny(FLAGS.size, training=True) else: init_model = YoloV3(FLAGS.size, training=True) if FLAGS.transfer == 'darknet': for l in model.layers: if l.name != 'yolo_darknet' and l.name.startswith('yolo_'): l.set_weights( init_model.get_layer(l.name).get_weights()) else: freeze_all(l) elif FLAGS.transfer == 'no_output': for l in model.layers: if l.name.startswith('yolo_output'): l.set_weights( init_model.get_layer(l.name).get_weights()) else: freeze_all(l) latest_cp = tf.train.latest_checkpoint(os.path.dirname(FLAGS.weights)) start_epoch = 0 if latest_cp: start_epoch = int(latest_cp.split('-')[1].split('.')[0]) model.load_weights(latest_cp) logging.info('model resumed from: {}, start at epoch: {}'.format( latest_cp, start_epoch)) else: logging.info( 'passing resume since weights not there. training from scratch') optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [YoloLoss(anchors[mask]) for mask in anchor_masks] if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(start_epoch, FLAGS.epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights(FLAGS.weights.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_voc-{epoch}', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, initial_epoch=start_epoch, callbacks=callbacks, validation_data=val_dataset)
def main(_argv): num_classes = len([c.strip() for c in open(FLAGS.classes).readlines()]) logging.info('classes loaded, number of classes = %d' % num_classes) if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks # TensorFlow 2.0/2.1 has a bug when putting finite dateset to model.fit(). # Walk around by using dataset.repeat() and give the number of steps to model fit. # Refer to https://github.com/tensorflow/tensorflow/issues/31509 training_set_size = dataset.get_dataset_size(FLAGS.train_labels) steps_for_train = training_set_size // FLAGS.batch_size steps_for_val = dataset.get_dataset_size( FLAGS.val_labels) // FLAGS.batch_size logging.info('Training in %d steps, validation in %d steps' % (steps_for_train, steps_for_val)) if not (FLAGS.train_images and FLAGS.train_labels): logging.error('Training images and labels must be specified.') return train_dataset = dataset.load_yolo_dataset( FLAGS.train_images, FLAGS.train_labels, FLAGS.size, shuffle=True, shuffle_buffer_size=training_set_size) train_dataset = train_dataset.repeat() train_dataset = train_dataset.batch(FLAGS.batch_size, drop_remainder=True) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) if not (FLAGS.val_images and FLAGS.val_labels): logging.error('Validation images and labels must be specified.') return val_dataset = dataset.load_yolo_dataset(FLAGS.val_images, FLAGS.val_labels, FLAGS.size) val_dataset = val_dataset.repeat() val_dataset = val_dataset.batch(FLAGS.batch_size, drop_remainder=True) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) val_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) # Configure the model # Weights configuration if FLAGS.weights_num_classes is None: if FLAGS.weights: model.load_weights(FLAGS.weights) else: # Transfer learning with incompatible number of classes assert FLAGS.weights # reset top layers if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or num_classes) model_pretrained.load_weights(FLAGS.weights) if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights( model_pretrained.get_layer(l.name).get_weights()) # Freeze layers if FLAGS.freeze == 'none': pass elif FLAGS.freeze == 'darknet': freeze_all(model.get_layer('yolo_darknet')) elif FLAGS.freeze == 'no_output': # freeze all but output layers for l in model.layers: if not l.name.startswith('yolo_output'): freeze_all(l) model.summary() # Configure training process optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=num_classes, ignore_thresh=0.5) for mask in anchor_masks ] if FLAGS.mode == 'eager': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): batch = 0 for images, labels in train_dataset: with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) batch += 1 if batch == steps_for_train: break batch = 0 for images, labels in val_dataset: outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) batch += 1 if batch == steps_for_val: break logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss) callbacks = [ ReduceLROnPlateau(verbose=1), # EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True, save_best_only=True), ] model.fit(train_dataset, epochs=FLAGS.epochs, steps_per_epoch=steps_for_train, callbacks=callbacks, validation_data=val_dataset, validation_steps=steps_for_val)
def main(_argv): # 打开内存增长 physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) # 创建网络模型 if FLAGS.tiny: model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks else: model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_anchors anchor_masks = yolo_anchor_masks # 加载训练集 if FLAGS.dataset: train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size) else: train_dataset = dataset.load_fake_dataset() # 对训练集进行 随机化 取数据 做映射 预加载 train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) # 加载验证集 if FLAGS.val_dataset: val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset, FLAGS.classes, FLAGS.size) else: val_dataset = dataset.load_fake_dataset() # 对验证集进行 取数据 做映射 val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # 配置迁移学习 if FLAGS.transfer == 'none': pass elif FLAGS.transfer in ['darknet', 'no_output']: # darknet 迁移学习可以在类别数量不兼容的情况下工作 # 重置顶层 if FLAGS.tiny: model_pretrained = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) else: model_pretrained = YoloV3(FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) model_pretrained.load_weights(FLAGS.weights) # 固化 darknet if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) # 固化输出层以外的层 elif FLAGS.transfer == 'no_output': for layer in model.layers: if not layer.name.startswith('yolo_output'): layer.set_weights( model_pretrained.get_layer(layer.name).get_weights()) freeze_all(layer) else: # 其他类型的迁移学习需要正确的类别数 model.load_weights(FLAGS.weights) if FLAGS.transfer == 'fine_tune': # 固化 darknet 并微调其他层 darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif FLAGS.transfer == 'frozen': # 固化全部 freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] # 是否使用 Eager Execution if FLAGS.mode == 'eager_tf': # Eager Execution 模式可以在运行过程中查看,方便调试 avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, FLAGS.epochs + 1): # 使用梯度下降法用自动微分优化训练集的损失 for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) logging.info("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) # 计算验证集的损失 for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss logging.info("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) logging.info("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch)) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset) print(history.history)
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) continue img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) t2 = time.time() times.append(t2 - t1) times = times[-20:] img = draw_outputs(img, (boxes, scores, classes, nums), class_names) img = cv2.putText( img, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) if FLAGS.output: out.write(img) cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()