Example #1
0
def yolov4_model_create(input_size,
                        num_class=len(utils.read_class_names(
                            cfg.YOLO.CLASSES))):
    """
    # input layer 
    # YOLOv4 -> return [conv_sbbox, conv_mbbox, conv_lbbox]
    # return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes] contains (x, y, w, h, score, probability)
    # create model
    """

    input_layer = tf.keras.layers.Input([input_size, input_size, 3])
    sml_feature_maps = YOLOv4(
        input_layer,
        num_class)  # Return [conv_sbbox, conv_mbbox, conv_lbbox], len = 3

    output_layers = []
    # decode -> output_layers
    for feature_map in sml_feature_maps:
        # print("feature_map: ", feature_map)
        output_layer = decode(feature_map, num_class)
        # print("decode -> ",output_layer.shape)
        output_layers.append(output_layer)

    model = tf.keras.Model(input_layer, output_layers,
                           name="YOLOv4_jam")  # create model
    # model.summary()
    logging.info("YOLOv4 Model built.")

    return model
Example #2
0
def draw_bbox(image,
              bboxes,
              classes=utils.read_class_names(cfg.YOLO.CLASSES),
              show_label=True):
    """
    bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
    """

    num_classes = len(classes)
    image_h, image_w, _ = image.shape

    # colors
    hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(
        map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
            colors))

    # random color
    random.seed(0)
    random.shuffle(colors)
    random.seed(None)

    for i, bbox in enumerate(bboxes):
        coor = np.array(bbox[:4], dtype=np.int32)
        fontScale = 0.6
        score = bbox[4]
        class_ind = int(bbox[5])
        bbox_color = colors[class_ind]
        bbox_thick = int(0.6 * (image_h + image_w) / 600)
        c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
        cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)

        if show_label:
            bbox_mess = '%s: %.2f' % (classes[class_ind], score)
            t_size = cv2.getTextSize(bbox_mess,
                                     0,
                                     fontScale,
                                     thickness=bbox_thick // 2)[0]
            cv2.rectangle(image, c1,
                          (c1[0] + t_size[0], c1[1] - t_size[1] - 3),
                          bbox_color, -1)  # filled

            cv2.putText(image,
                        bbox_mess, (c1[0], c1[1] - 2),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        fontScale, (0, 0, 0),
                        bbox_thick // 2,
                        lineType=cv2.LINE_AA)

    return image
Example #3
0
def detecting_info(bboxes):
    CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)

    for bbox in bboxes:
        coordinate = np.array(bbox[:4], dtype=np.int32)
        score = bbox[4]
        class_index = int(bbox[5])
        class_name = CLASSES[class_index]
        score = '%.4f' % score
        xmin, ymin, xmax, ymax = list(map(str, coordinate))

        print(
            f"{class_name:8}: {score:6}, coordinate: ({xmin}, {ymin}, {xmax}, {ymax})"
        )
    logging.info("Detecting done...")
Example #4
0
    def __init__(self, dataset_type):
        self.annot_path = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH
        self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE
        self.batch_size = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE
        self.data_aug = cfg.TRAIN.DATA_AUG if dataset_type == 'train' else cfg.TEST.DATA_AUG

        self.train_input_sizes = cfg.TRAIN.INPUT_SIZE
        self.strides = np.array(cfg.YOLO.STRIDES)
        self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
        self.num_classes = len(self.classes)
        self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS))
        self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE  # 3
        self.max_bbox_per_scale = 150

        self.annotations = self.load_annotations(dataset_type)
        self.num_samples = len(self.annotations)  # len
        self.num_batchs = int(np.ceil(self.num_samples /
                                      self.batch_size))  # batchs
        self.batch_count = 0
Example #5
0
def yolov4_model_create_4_train(input_size=cfg.TRAIN.INPUT_SIZE,
                                num_class=len(
                                    utils.read_class_names(cfg.YOLO.CLASSES)),
                                anchors=utils.get_anchors(cfg.YOLO.ANCHORS),
                                strides=np.array(cfg.YOLO.STRIDES),
                                xyscale=cfg.YOLO.XYSCALE):

    input_layer = tf.keras.layers.Input([input_size, input_size, 3])
    feature_maps = YOLOv4(input_layer, num_class)

    bbox_tensors = []
    for i, fm in enumerate(feature_maps):
        bbox_tensor = decode_train(fm, num_class, strides, anchors, i, xyscale)
        bbox_tensors.append(fm)
        bbox_tensors.append(bbox_tensor)

    model = tf.keras.Model(input_layer, bbox_tensors, name="YOLOv4_4_training")
    logging.info("YOLOv4 Model built.")

    return model
Example #6
0
class YOLO(object):

    # basic settings
    _default_settings = {
        "STRIDES": np.array(cfg.YOLO.STRIDES),
        "ANCHORS": utils.get_anchors(cfg.YOLO.ANCHORS),
        "SCORE_THRESHOLD": cfg.YOLO.SCORE_THRESHOLD,
        "IOU_THRESHOLD": cfg.YOLO.IOU_THRESHOLD,
        "NUM_CLASS": len(utils.read_class_names(cfg.YOLO.CLASSES)),
        "CLASSES": utils.read_class_names(cfg.YOLO.CLASSES),
        "XYSCALE": cfg.YOLO.XYSCALE,
        "YOLOv4_WEIGHTS": cfg.YOLO.WEIGHTS,
        "IMAGE_OUTPUT_PATH": cfg.YOLO.IMAGE_OUTPUT_PATH,
        "VIDEO_OUTPUT_PATH": cfg.YOLO.VIDEO_OUTPUT_PATH,
        "SAVE_OR_NOT": cfg.SAVE_OR_NOT
    }

    # init
    def __init__(self, input_size, **params):
        self.INPUT_SIZE = input_size
        self.__dict__.update(self._default_settings)

    # create model and load weights
    def create_model(self,
                     input_size=cfg.YOLO.INPUT_SIZE,
                     darknet_weights=None):

        # create model
        model = api.yolov4_model_create(input_size)

        # laod weights
        if darknet_weights != None:
            api.load_darknet_weights(model, darknet_weights)

        return model

    # image detect
    def image_detecting(self, image, show_info=False):

        # process image
        # image must be opencv read
        image_original, image_processed, image_original_size = api.image_preprocess_before_predicting(
            image=image, fixed_size=self.INPUT_SIZE)

        # create model
        # model = api.yolov4_model_create(self.INPUT_SIZE)
        # # laod weights
        # utils.load_darknet_weights(model, self.YOLOv4_WEIGHTS)

        # create model & laod weights
        model = self.create_model(self.INPUT_SIZE, self.YOLOv4_WEIGHTS)

        time_begin = time.time()

        # predict
        bboxes_predicted = model.predict(image_processed)
        # bbox process
        bboxes_processed = api.bboxes_optimizing(
            bboxes_pred=bboxes_predicted,
            image_original_size=image_original_size,
            input_size=self.INPUT_SIZE,
            anchors=self.ANCHORS,
            strides=self.STRIDES,
            xyscale=self.XYSCALE,
            score_thr=self.SCORE_THRESHOLD,
            iou_thr=self.IOU_THRESHOLD,
            nms_method="nms",
            show_info=show_info)

        # draw bbox
        image_with_bboxes = api.draw_bbox(image_original, bboxes_processed)

        time_end = time.time()
        logging.info(f"Time consumed: {time_end - time_begin}")

        # show
        image = Image.fromarray(image_with_bboxes)
        image.show()

        if self.SAVE_OR_NOT:
            image.save(self.IMAGE_OUTPUT_PATH)

    # video detect
    def video_detecting(self, video, video_saver, show_info=False):

        # create model
        # model = api.yolov4_model_create(self.INPUT_SIZE)

        # # laod weights
        # utils.load_darknet_weights(model, self.YOLOv4_WEIGHTS)

        # create model & laod weights
        model = self.create_model(self.INPUT_SIZE, self.YOLOv4_WEIGHTS)

        # time span list
        times = []
        time_begin = time.time()

        while True:
            """
                returns: ret,frame。
                - ret: bool. read correctly->true, end of the video -> false
                - frame: image, 3-dim
            """
            return_value, frame = video.read()

            if return_value:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                image = Image.fromarray(frame)

            else:
                time_end = time.time()
                logging.info("Video detecting done.")
                logging.info(f"Time consumed: {time_end - time_begin} s")

                break

            frame, image_processed, frame_size = api.image_preprocess_before_predicting(
                image=frame, fixed_size=self.INPUT_SIZE)

            # # get size
            # frame_size = frame.shape[:2]

            # # frame(image) pre-porcess
            # image_processed = utils.image_preporcess(np.copy(frame), [self.INPUT_SIZE, self.INPUT_SIZE])

            # time which start to detect the frame
            prev_time = time.time()
            # logging.info("Video detecting...")

            # predict
            bboxes_predicted = model.predict(image_processed)
            # bbox process
            bboxes_processed = api.bboxes_optimizing(
                bboxes_pred=bboxes_predicted,
                image_original_size=frame_size,
                input_size=self.INPUT_SIZE,
                anchors=self.ANCHORS,
                strides=self.STRIDES,
                xyscale=self.XYSCALE,
                score_thr=0.25,
                iou_thr=0.213,
                nms_method="nms",
                show_info=show_info)

            curr_time = time.time()  # time which start to detect the frame
            exec_time = curr_time - prev_time  #  execute time.
            times.append(exec_time)  # save time span

            image = api.draw_bbox(frame, bboxes_processed)
            image = cv2.putText(
                image, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000),
                (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

            result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            if self.SAVE_OR_NOT:
                video_saver.write(image)
            cv2.imshow("result", result)

            if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(_argv):

    # file ops
    predicted_dir_path = './mAP/predicted'
    ground_truth_dir_path = './mAP/ground-truth'
    if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path)
    if os.path.exists(ground_truth_dir_path):
        shutil.rmtree(ground_truth_dir_path)
    if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH):
        shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH)

    os.mkdir(predicted_dir_path)
    os.mkdir(ground_truth_dir_path)
    os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH)

    # GPU or CPU
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
        print("GPU!!!!!!!!!!!")
    else:
        print("NO GPU!!!!!!!!!!!")

    # setting prep
    NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
    CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)

    # create model
    model = api.yolov4_model_create(FLAGS.input_size)
    # laod weights
    api.load_darknet_weights(model, cfg.YOLO.WEIGHTS)

    num_lines = sum(1 for line in open(
        cfg.TEST.ANNOT_PATH))  # get the number of line of annotation
    with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file:
        # example annotation
        # ./data/coco/images/val2017/000000289343.jpg 473,395,511,
        #                                             423,16 204,235,264,
        #                                             412,0 0,499,339,605,
        #                                             13 204,304,256,456,1
        for num, line in enumerate(annotation_file):
            # strip()删除开头和结尾的\t\r\n这些字符
            # split()返回各块的一个列表
            # 以上例子返回如下:
            # ['./data/coco/images/val2017/000000289343.jpg',
            #  '473,395,511,423,16',
            #  '204,235,264,412,0',
            #  '0,499,339,605,13',
            #  '204,304,256,456,1']

            annotation = line.strip().split()
            image_path = annotation[0]
            image_name = image_path.split('/')[-1]

            image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # get ground truth bbox data -> [x1, y1, x2, y2, class]
            bbox_data_gt = np.array(
                [list(map(int, box.split(','))) for box in annotation[1:]])

            if len(bbox_data_gt) == 0:
                bboxes_gt = []
                classes_gt = []
            else:
                bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]
            ground_truth_path = os.path.join(ground_truth_dir_path,
                                             str(num) + '.txt')

            print('=> ground truth of %s:' % image_name)
            num_bbox_gt = len(bboxes_gt)
            """
            ground-truth:指的是真实框的txt
            对应绘制mAP的 get_gt_txt.py 文件
            结果为:
            3 194 400 208 414
            0 43 372 57 386
            0 277 201 291 215
            1 143 134 199 190
            1 299 49 341 91
            5 150 218 192 260
            5 303 170 331 198
            7 101 92 129 120
            1 150 293 206 349
            5 0 102 112 214
            8 200 89 312 201
            """
            with open(ground_truth_path, 'w') as f:
                for i in range(num_bbox_gt):
                    class_name = CLASSES[classes_gt[i]]
                    xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))
                    bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax
                                          ]) + '\n'
                    f.write(bbox_mess)
                    print('\t' + str(bbox_mess).strip())
            print('=> predict result of %s:' % image_name)
            predict_result_path = os.path.join(predicted_dir_path,
                                               str(num) + '.txt')

            image_original, image_processed, image_original_size = api.image_preprocess_before_predicting(
                image=image, fixed_size=FLAGS.input_size)
            # predict
            # image shape -> (1, 608, 608, 3)
            bboxes_predicted = model.predict(image_processed)
            bboxes_processed = api.bboxes_optimizing(
                bboxes_pred=bboxes_predicted,
                image_original_size=image_original_size,
                input_size=FLAGS.input_size,
                show_info=False,
                show_detected_objects_numbers=False)

            # Predict Process
            # image_size = image.shape[:2]
            # image_data = utils.image_preporcess(np.copy(image), [input_size, input_size])

            # pred_bbox = model.predict(image_data)

            # XYSCALE = cfg.YOLO.XYSCALE
            # pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=XYSCALE)

            # pred_bbox = tf.concat(pred_bbox, axis=0)
            # bboxes = utils.postprocess_boxes(pred_bbox, image_size, input_size, cfg.TEST.SCORE_THRESHOLD)
            # bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms')

            # save predicted test images
            if cfg.TEST.DECTECTED_IMAGE_PATH is not None:
                image = api.draw_bbox(image, bboxes_processed)
                cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image)
            """
            detection-results:指的是预测结果的txt
            对应绘制mAP的 get_dr_txt.py文件
            结果为:
            0 0.9426 277 201 291 214
            0 0.9347 43 372 57 386
            1 0.9877 143 133 199 189
            1 0.9842 150 293 205 348
            1 0.9663 299 49 341 90
            5 0.9919 302 169 330 198
            5 0.9823 0 102 112 213
            5 0.9684 150 218 190 259
            7 0.9927 101 92 129 119
            8 0.9695 199 88 314 202

            """
            with open(predict_result_path, 'w') as f:
                for bbox in bboxes_processed:
                    coor = np.array(bbox[:4], dtype=np.int32)
                    score = bbox[4]
                    class_ind = int(bbox[5])
                    class_name = CLASSES[class_ind]
                    score = '%.4f' % score
                    xmin, ymin, xmax, ymax = list(map(str, coor))
                    bbox_mess = ' '.join(
                        [class_name, score, xmin, ymin, xmax, ymax]) + '\n'
                    f.write(bbox_mess)
                    print('\t' + str(bbox_mess).strip())
            print(num, num_lines)
def main(_argv):

    # train data
    dataset_train = Dataset(dataset_type="train")

    print(type(dataset_train))

    # log_dir
    logdir_train = "logs/gradient_tape/" + datetime.now().strftime(
        "%Y%m%d-%H%M%S") + "/train"

    steps_per_epoch = len(dataset_train)

    first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS  # 20
    second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS  # 30
    global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
    warmup_steps = cfg.TRAIN.WARMUP_EPOCHS * steps_per_epoch  #  5 * steps_per_epoch
    total_steps = (first_stage_epochs + second_stage_epochs) * steps_per_epoch

    NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
    STRIDES = np.array(cfg.YOLO.STRIDES)
    IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH
    XYSCALE = cfg.YOLO.XYSCALE
    ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS)

    LOAD_WEIGHTS = None

    model = api.yolov4_model_create_4_train(input_size=cfg.TRAIN.INPUT_SIZE)

    if LOAD_WEIGHTS == None:
        print("Training from scratch")
    else:
        utils.load_weights_tiny(model, FLAGS.weights)

    optimizer = tf.keras.optimizers.Adam()
    train_summary_writer = tf.summary.create_file_writer(logdir_train)

    def train_step(image_data, target):
        with tf.GradientTape() as tape:
            pred_result = model(image_data, training=True)
            ciou_loss = conf_loss = prob_loss = 0

            # optimizing process
            for i in range(3):
                conv, pred = pred_result[i *
                                         2], pred_result[i * 2 +
                                                         1]  # 0,1    2,3   4,5
                loss_items = compute_loss(pred,
                                          conv,
                                          target[i][0],
                                          target[i][1],
                                          STRIDES=STRIDES,
                                          NUM_CLASS=NUM_CLASS,
                                          IOU_LOSS_THRESH=IOU_LOSS_THRESH,
                                          i=i)
                ciou_loss += loss_items[0]
                conf_loss += loss_items[1]
                prob_loss += loss_items[2]

            total_loss = ciou_loss + conf_loss + prob_loss

            gradients = tape.gradient(total_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients,
                                          model.trainable_variables))
            tf.print(
                "=> STEP %4d   lr: %.6f   ciou_loss: %4.2f   conf_loss: %4.2f   "
                "prob_loss: %4.2f   total_loss: %4.2f" %
                (global_steps, optimizer.lr.numpy(), ciou_loss, conf_loss,
                 prob_loss, total_loss))

            # update learning rate
            global_steps.assign_add(1)
            if global_steps < warmup_steps:
                lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT  # cfg.TRAIN.LR_INIT = 1e-3,  cfg.TRAIN.LR_END =  1e-6
            else:
                lr = cfg.TRAIN.LR_END + 0.5 * (
                    cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ((1 + tf.cos(
                        (global_steps - warmup_steps) /
                        (total_steps - warmup_steps) * np.pi)))
            optimizer.lr.assign(lr.numpy())

            # writing summary data
            with train_summary_writer.as_default():
                tf.summary.scalar("lr", optimizer.lr, step=global_steps)
                tf.summary.scalar("loss/total_loss",
                                  total_loss,
                                  step=global_steps)
                tf.summary.scalar("loss/ciou_loss",
                                  ciou_loss,
                                  step=global_steps)
                tf.summary.scalar("loss/conf_loss",
                                  conf_loss,
                                  step=global_steps)
                tf.summary.scalar("loss/prob_loss",
                                  prob_loss,
                                  step=global_steps)
            train_summary_writer.flush()

    for epoch in range(first_stage_epochs + second_stage_epochs):

        for image_data, target in dataset_train:
            train_step(image_data, target)

        model.save_weights("./checkpoints/yolov4")