コード例 #1
0
    def index_mapping_file(self, larger_one, smaller_one):
        indexes = []
        items_l = read_class_names(larger_one, dot_name_file=False)
        items_s = read_class_names(smaller_one, dot_name_file=False)

        for item in items_s:
            indexes.append(items_l.index(item))
        return indexes
コード例 #2
0
    def __init__(self, dataset_type, NEW_CLASSES_TO_LEARN,
                 TOTAL_CLASSES_WILL_KNOW_AFTER_THIS):
        self.annot_path = TRAIN_ANNOT_PATH if dataset_type == 'train' else TEST_ANNOT_PATH
        self.input_sizes = TRAIN_INPUT_SIZE if dataset_type == 'train' else TEST_INPUT_SIZE
        self.batch_size = TRAIN_BATCH_SIZE if dataset_type == 'train' else TEST_BATCH_SIZE
        self.data_aug = TRAIN_DATA_AUG if dataset_type == 'train' else TEST_DATA_AUG

        self.train_input_sizes = TRAIN_INPUT_SIZE
        self.strides = np.array(YOLO_STRIDES)

        self.classes = read_class_names(TOTAL_CLASSES_WILL_KNOW_AFTER_THIS,
                                        dot_name_file=False)
        self.num_classes = len(self.classes)

        self.anchors = (np.array(YOLO_ANCHORS).T / self.strides).T
        self.anchor_per_scale = YOLO_ANCHOR_PER_SCALE
        self.max_bbox_per_scale = YOLO_MAX_BBOX_PER_SCALE

        self.annotations = self.load_annotations(dataset_type)
        self.num_samples = len(self.annotations)
        self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
        self.TOTAL_CLASSES_WILL_KNOW_AFTER_THIS = TOTAL_CLASSES_WILL_KNOW_AFTER_THIS

        self.new_classes = self.index_mapping_file(
            PASCAL_VOC_ALL_CLASSES, NEW_CLASSES_TO_LEARN)  # [3,8]

        self.annotation_goes = 0
        self.prev_aug_image = 0
コード例 #3
0
ファイル: dataset.py プロジェクト: djibril6/deepsort-yolov4
    def __init__(self,
                 FLAGS,
                 is_training: bool,
                 dataset_type: str = "converted_coco"):
        self.tiny = FLAGS.tiny
        self.strides, self.anchors, NUM_CLASS, XYSCALE = utils.load_config(
            FLAGS)
        self.dataset_type = dataset_type

        self.annot_path = (cfg.TRAIN.ANNOT_PATH
                           if is_training else cfg.TEST.ANNOT_PATH)
        self.input_sizes = (cfg.TRAIN.INPUT_SIZE
                            if is_training else cfg.TEST.INPUT_SIZE)
        self.batch_size = (cfg.TRAIN.BATCH_SIZE
                           if is_training else cfg.TEST.BATCH_SIZE)
        self.data_aug = cfg.TRAIN.DATA_AUG if is_training else cfg.TEST.DATA_AUG

        self.train_input_sizes = cfg.TRAIN.INPUT_SIZE
        self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
        self.num_classes = len(self.classes)
        self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
        self.max_bbox_per_scale = 150

        self.annotations = self.load_annotations()
        self.num_samples = len(self.annotations)
        self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
        self.batch_count = 0
コード例 #4
0
def compute_loss(pred, conv, label, bboxes, i=0, CLASSES=''):
    NUM_CLASS = len(read_class_names(CLASSES, dot_name_file=False))
    conv_shape = tf.shape(conv)
    batch_size = conv_shape[0]
    output_size = conv_shape[1]
    input_size = STRIDES[i] * output_size
    conv = tf.reshape(conv,
                      (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))

    conv_raw_conf = conv[:, :, :, :, 4:5]
    conv_raw_prob = conv[:, :, :, :, 5:]

    pred_xywh = pred[:, :, :, :, 0:4]
    pred_conf = pred[:, :, :, :, 4:5]

    label_xywh = label[:, :, :, :, 0:4]
    respond_bbox = label[:, :, :, :, 4:5]  # objectness
    label_prob = label[:, :, :, :, 5:]

    giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
    input_size = tf.cast(input_size, tf.float32)

    bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :,
                                             2:3] * label_xywh[:, :, :, :,
                                                               3:4] / (
                                                                   input_size**
                                                                   2)
    giou_loss = respond_bbox * bbox_loss_scale * (1 - giou)

    iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :],
                   bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
    # Find the value of IoU with the real box The largest prediction box
    max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)

    # If the largest iou is less than the threshold, it is considered that the prediction box contains no objects, then the background box
    respond_bgd = (1.0 - respond_bbox) * tf.cast(
        max_iou < YOLO_IOU_LOSS_THRESH, tf.float32)

    conf_focal = tf.pow(respond_bbox - pred_conf, 2)

    # Calculate the loss of confidence
    # we hope that if the grid contains objects, then the network output prediction box has a confidence of 1 and 0 when there is no object.
    conf_loss = conf_focal * (
        respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=respond_bbox, logits=conv_raw_conf) +
        respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=respond_bbox, logits=conv_raw_conf))

    prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
        labels=label_prob, logits=conv_raw_prob)

    giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1, 2, 3, 4]))
    conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1, 2, 3, 4]))
    prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1, 2, 3, 4]))

    return giou_loss, conf_loss, prob_loss
コード例 #5
0
    def __init__(self, dataset_type, TRAIN_CLASSES):
        self.annot_path = TRAIN_ANNOT_PATH if dataset_type == 'train' else TEST_ANNOT_PATH
        self.input_sizes = TRAIN_INPUT_SIZE if dataset_type == 'train' else TEST_INPUT_SIZE
        self.batch_size = TRAIN_BATCH_SIZE if dataset_type == 'train' else TEST_BATCH_SIZE
        self.data_aug = TRAIN_DATA_AUG if dataset_type == 'train' else TEST_DATA_AUG

        self.train_input_sizes = TRAIN_INPUT_SIZE
        self.strides = np.array(YOLO_STRIDES)
        self.classes = read_class_names(TRAIN_CLASSES)
        self.num_classes = len(self.classes)
        self.anchors = (np.array(YOLO_ANCHORS).T / self.strides).T
        self.anchor_per_scale = YOLO_ANCHOR_PER_SCALE
        self.max_bbox_per_scale = YOLO_MAX_BBOX_PER_SCALE

        self.annotations = self.load_annotations(dataset_type)
        self.num_samples = len(self.annotations)
        self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
        self.batch_count = 0
コード例 #6
0
def Create_Yolov3(input_size=416,
                  channels=3,
                  training=False,
                  CLASSES='',
                  dot_name_file=False):
    NUM_CLASS = len(read_class_names(CLASSES, dot_name_file=dot_name_file))

    input_layer = Input([input_size, input_size, channels])
    conv_tensors = YOLOv3(input_layer, NUM_CLASS)
    output_tensors = []

    for i, conv_tensor in enumerate(conv_tensors):
        pred_tensor = decode(conv_tensor, NUM_CLASS, i)
        if training: output_tensors.append(conv_tensor)
        output_tensors.append(pred_tensor)

    YoloV3 = tf.keras.Model(input_layer, output_tensors)
    return YoloV3
コード例 #7
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1

    # initialize deep sort
    model_filename = 'deep_sort/feature_extractor/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # configuration of object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                             tags=[tag_constants.SERVING])
    infer = saved_model_loaded.signatures['serving_default']

    # Capture video
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    if FLAGS.output:
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # looping over each frame of the video
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('End!')
            break
        frame_num += 1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run the detections
        batch_data = tf.constant(image_data)
        pred_bbox = infer(batch_data)
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # Class allowed to be tracked
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # allowed_classes = list(class_names.values())
        allowed_classes = ['person', 'car']

        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)

        # remove detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # The tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(frame, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if FLAGS.show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
コード例 #8
0
#trainset = Dataset('train', NEW_CLASSES_TO_LEARN, CLASSES_TO_EVALUATE)
testset = Dataset('test', TEST_CLASSES, TEST_CLASSES)

steps_per_epoch =0
for _  in testset:
	steps_per_epoch+=1 

print(steps_per_epoch)


yolo = Create_Yolov3(input_size=YOLO_INPUT_SIZE, training= False, 
		CLASSES=CLASSES_TO_EVALUATE, dot_name_file = False)

yolo.load_weights('./checkpoints/yolov3_custom_val_loss_  11.08')

num_classes = len(read_class_names(CLASSES_TO_EVALUATE))
CLASSES_NAME = read_class_names(CLASSES_TO_EVALUATE)





def evaluate(y_pred, y_true_temp, num_classes, score_thresh=0.4, iou_thresh=0.5):
	y_true = [y_true_temp[0][0], y_true_temp[1][0], y_true_temp[2][0]]

	num_images = y_true[0].shape[0]
	true_labels_dict   = {i:0 for i in range(num_classes)} # {class: count}
	pred_labels_dict   = {i:0 for i in range(num_classes)}
	true_positive_dict = {i:0 for i in range(num_classes)}

	for i in range(num_images):
コード例 #9
0
    def preprocess_true_boxes(self, bboxes):
        EXPANDED_CLASSES_NAME = read_class_names(
            self.TOTAL_CLASSES_WILL_KNOW_AFTER_THIS, dot_name_file=False)
        VOC_CLASSES_NAME = list(
            read_class_names(PASCAL_VOC_ALL_CLASSES, dot_name_file=False))
        label = [
            np.zeros((self.train_output_sizes[i], self.train_output_sizes[i],
                      self.anchor_per_scale, 5 + self.num_classes))
            for i in range(3)
        ]
        bboxes_xywh = [
            np.zeros((self.max_bbox_per_scale, 4)) for _ in range(3)
        ]
        bbox_count = np.zeros((3, ))

        for bbox in bboxes:
            bbox_coor = bbox[:4]
            bbox_class_ind = bbox[4]
            if bbox_class_ind in self.new_classes:
                onehot = np.zeros(self.num_classes, dtype=np.float)
                bbox_class_ind_new = EXPANDED_CLASSES_NAME.index(
                    VOC_CLASSES_NAME[bbox_class_ind])

                onehot[bbox_class_ind_new] = 1.0
                uniform_distribution = np.full(self.num_classes,
                                               1.0 / self.num_classes)
                deta = 0.01
                smooth_onehot = onehot * (1 -
                                          deta) + deta * uniform_distribution

                bbox_xywh = np.concatenate(
                    [(bbox_coor[2:] + bbox_coor[:2]) * 0.5,
                     bbox_coor[2:] - bbox_coor[:2]],
                    axis=-1)
                bbox_xywh_scaled = 1.0 * bbox_xywh[
                    np.newaxis, :] / self.strides[:, np.newaxis]

                iou = []
                exist_positive = False
                for i in range(3):
                    anchors_xywh = np.zeros((self.anchor_per_scale, 4))
                    anchors_xywh[:, 0:2] = np.floor(
                        bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
                    anchors_xywh[:, 2:4] = self.anchors[i]

                    iou_scale = bbox_iou(bbox_xywh_scaled[i][np.newaxis, :],
                                         anchors_xywh)
                    iou.append(iou_scale)
                    iou_mask = iou_scale > 0.3

                    if np.any(iou_mask):
                        xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(
                            np.int32)

                        label[i][yind, xind, iou_mask, :] = 0
                        label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
                        label[i][yind, xind, iou_mask, 4:5] = 1.0
                        label[i][yind, xind, iou_mask, 5:] = smooth_onehot

                        bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
                        bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                        bbox_count[i] += 1

                        exist_positive = True

                if not exist_positive:
                    best_anchor_ind = np.argmax(np.array(iou).reshape(-1),
                                                axis=-1)
                    best_detect = int(best_anchor_ind / self.anchor_per_scale)
                    best_anchor = int(best_anchor_ind % self.anchor_per_scale)
                    xind, yind = np.floor(
                        bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32)

                    label[best_detect][yind, xind, best_anchor, :] = 0
                    label[best_detect][yind, xind, best_anchor,
                                       0:4] = bbox_xywh
                    label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
                    label[best_detect][yind, xind, best_anchor,
                                       5:] = smooth_onehot

                    bbox_ind = int(bbox_count[best_detect] %
                                   self.max_bbox_per_scale)
                    bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
                    bbox_count[best_detect] += 1

        label_sbbox, label_mbbox, label_lbbox = label
        sbboxes, mbboxes, lbboxes = bboxes_xywh
        return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes
import numpy as np
import tensorflow as tf
from yolo.yolov3 import Create_Yolov3
from yolo.utils import image_preprocess, postprocess_boxes, nms, read_class_names
from yolo.configs import *

annot_path = './model_data/pascal_voc07_test.txt'
CURRENT_KNOWN_OBJECTS = EVALUATION_CLASSES = './model_data/classes_to_evaluate.txt'
PASCAL_VOC_ALL_CLASSES = './model_data/pascal_voc07_cls_names.txt'
dt = './dt/'

iou_threshold = 0.5
score_threshold = 0.3

input_size = YOLO_INPUT_SIZE
CURRENT_KNOWN_OBJECTS_NAME = read_class_names(EVALUATION_CLASSES,
                                              dot_name_file=False)
PASCAL_VOC_ALL_CLASSES_NAME = read_class_names(PASCAL_VOC_ALL_CLASSES)

yolo = Create_Yolov3(input_size=input_size, CLASSES=CURRENT_KNOWN_OBJECTS)
yolo.load_weights('./checkpoints/yolov3_custom_val_loss_ 808.03')


def load_annotations(annot_path):
    final_annotations = []
    with open(annot_path, 'r') as f:
        txt = f.readlines()
        annotations = [
            line.strip() for line in txt if len(line.strip().split()[1:]) != 0
        ]
    np.random.shuffle(annotations)
コード例 #11
0
def compute_loss(pred,
                 conv,
                 label,
                 bboxes,
                 i=0,
                 CLASSES='',
                 PRED_PREV='',
                 CLASSES_PREV=''):

    NUM_CLASS = len(read_class_names(CLASSES, dot_name_file=False))
    NUM_CLASS_PREV = len(read_class_names(CLASSES_PREV, dot_name_file=False))

    conv_shape = tf.shape(conv)
    batch_size = conv_shape[0]
    output_size = conv_shape[1]
    input_size = STRIDES[i] * output_size

    conv = tf.reshape(conv,
                      (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))

    extender = tf.zeros((batch_size, output_size, output_size, 3,
                         (NUM_CLASS - NUM_CLASS_PREV)), tf.float32) + 0.001
    PRED_PREV = tf.concat([PRED_PREV, extender], axis=-1)

    prev_confidence = PRED_PREV[:, :, :, :, 4:5]

    present_confidence = tf.sigmoid(conv[:, :, :, :, 4:5])

    prev_respond_bgd1 = tf.cast(prev_confidence > 0.007, tf.float32)
    #prev_respond_bgd2 = tf.cast(present_confidence > 0.75 , tf.float32)

    prev_respond_bgd = prev_respond_bgd1  # + prev_respond_bgd2

    conv_raw_conf = conv[:, :, :, :, 4:5]
    conv_raw_prob = conv[:, :, :, :, 5:]

    pred_xywh = pred[:, :, :, :, 0:4]
    pred_conf = pred[:, :, :, :, 4:5]

    label_xywh = label[:, :, :, :, 0:4]
    respond_bbox = label[:, :, :, :, 4:5]  # objectness
    label_prob = label[:, :, :, :, 5:]

    giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
    input_size = tf.cast(input_size, tf.float32)

    bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :,
                                             2:3] * label_xywh[:, :, :, :,
                                                               3:4] / (
                                                                   input_size**
                                                                   2)
    giou_loss = respond_bbox * bbox_loss_scale * (1 - giou)

    iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :],
                   bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
    # Find the value of IoU with the real box The largest prediction box
    max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)

    # If the largest iou is less than the threshold, it is considered that the prediction box contains no objects, then the background box
    respond_bgd = (1.0 - respond_bbox) * tf.cast(
        max_iou < YOLO_IOU_LOSS_THRESH, tf.float32)

    conf_focal = tf.pow(respond_bbox - pred_conf, 2)

    # previous prediction background

    #

    # Calculate the loss of confidence
    # we hope that if the grid contains objects, then the network output prediction box has a confidence of 1 and 0 when there is no object.
    conf_loss = conf_focal * (
        respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=respond_bbox, logits=conv_raw_conf) + respond_bgd *
        (1 - prev_respond_bgd) * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=respond_bbox, logits=conv_raw_conf))

    distilation_loss_reg = tf.math.abs(
        respond_bgd * prev_respond_bgd *
        (tf.subtract(PRED_PREV[:, :, :, :, 0:4], pred[:, :, :, :, 0:4])))
    distilation_loss_conf = respond_bgd * (
        prev_respond_bgd) * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=PRED_PREV[:, :, :, :, 4:5], logits=conv[:, :, :, :, 4:5])
    distilation_loss_prob = respond_bgd * (
        prev_respond_bgd) * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=PRED_PREV[:, :, :, :, 5:], logits=conv[:, :, :, :, 5:])

    prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
        labels=label_prob, logits=conv_raw_prob)

    giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1, 2, 3, 4]))
    conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1, 2, 3, 4]))
    prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1, 2, 3, 4]))

    ####################
    distilation_loss_conf = tf.reduce_mean(
        tf.reduce_sum(distilation_loss_conf, axis=[1, 2, 3, 4]))
    distilation_loss_prob = tf.reduce_mean(
        tf.reduce_sum(distilation_loss_prob, axis=[1, 2, 3, 4]))
    distilation_loss_reg = tf.reduce_mean(
        tf.reduce_sum(distilation_loss_reg, axis=[1, 2, 3, 4]))
    distilation_loss = distilation_loss_prob + distilation_loss_reg * 0.02 + distilation_loss_conf

    return giou_loss, conf_loss, prob_loss, distilation_loss