Esempio n. 1
0
def main():
    webapi = WebAPI(IP_ADDR, PORT, ACCOUNT, PASSWORD)

    cameras = webapi.list_cameras()
    camera_ids = [camera['id'] for camera in cameras]

    # the last added camera in the Surveillance Station
    camera_id = camera_ids[-1]
    rtsp = webapi.get_liveview_rtsp(camera_id)

    fall_label = webapi.create_recording_label('fall_event')

    # Initialize fall detection model
    yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE)
    load_yolo_weights(yolo, YOLO_V3_WEIGHTS)  # use Darknet weights
    classes = read_class_names(YOLO_COCO_CLASSES)

    q = queue.Queue()

    p1 = threading.Thread(target=read_frame, args=([q, rtsp]))
    p2 = threading.Thread(
        target=process_frame,
        args=([q, webapi, camera_id, yolo, classes, fall_label]))
    p1.start()
    p2.start()

    p1.join()
    p2.join()

    webapi.logout()
Esempio n. 2
0
def Create_Yolo(input_size=416,
                channels=3,
                training=False,
                CLASSES=YOLO_COCO_CLASSES):
    NUM_CLASS = len(read_class_names(CLASSES))
    input_layer = Input([input_size, input_size, channels])

    if TRAIN_YOLO_TINY:
        if YOLO_TYPE == "yolov4":
            conv_tensors = YOLOv4_tiny(input_layer, NUM_CLASS)
        if YOLO_TYPE == "yolov3":
            conv_tensors = YOLOv3_tiny(input_layer, NUM_CLASS)
    else:
        if YOLO_TYPE == "yolov4":
            conv_tensors = YOLOv4(input_layer, NUM_CLASS)
        if YOLO_TYPE == "yolov3":
            conv_tensors = YOLOv3(input_layer, NUM_CLASS)

    output_tensors = []
    for i, conv_tensor in enumerate(conv_tensors):
        pred_tensor = decode(conv_tensor, NUM_CLASS, i)
        if training: output_tensors.append(conv_tensor)
        output_tensors.append(pred_tensor)

    Yolo = tf.keras.Model(input_layer, output_tensors)
    return Yolo
Esempio n. 3
0
def compute_loss(pred, conv, label, bboxes, i=0, CLASSES=YOLO_COCO_CLASSES):
    NUM_CLASS = len(read_class_names(CLASSES))
    conv_shape = tf.shape(conv)
    batch_size = conv_shape[0]
    output_size = conv_shape[1]
    input_size = STRIDES[i] * output_size
    conv = tf.reshape(conv,
                      (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))

    conv_raw_conf = conv[:, :, :, :, 4:5]
    conv_raw_prob = conv[:, :, :, :, 5:]

    pred_xywh = pred[:, :, :, :, 0:4]
    pred_conf = pred[:, :, :, :, 4:5]

    label_xywh = label[:, :, :, :, 0:4]
    respond_bbox = label[:, :, :, :, 4:5]
    label_prob = label[:, :, :, :, 5:]

    giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
    input_size = tf.cast(input_size, tf.float32)

    bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :,
                                             2:3] * label_xywh[:, :, :, :,
                                                               3:4] / (
                                                                   input_size**
                                                                   2)
    giou_loss = respond_bbox * bbox_loss_scale * (1 - giou)

    iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :],
                   bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
    # Find the value of IoU with the real box The largest prediction box
    max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)

    # If the largest iou is less than the threshold, it is considered that the prediction box contains no objects, then the background box
    respond_bgd = (1.0 - respond_bbox) * tf.cast(
        max_iou < YOLO_IOU_LOSS_THRESH, tf.float32)

    conf_focal = tf.pow(respond_bbox - pred_conf, 2)

    # Calculate the loss of confidence
    # we hope that if the grid contains objects, then the network output prediction box has a confidence of 1 and 0 when there is no object.
    conf_loss = conf_focal * (
        respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=respond_bbox, logits=conv_raw_conf) +
        respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=respond_bbox, logits=conv_raw_conf))

    prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
        labels=label_prob, logits=conv_raw_prob)

    giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1, 2, 3, 4]))
    conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1, 2, 3, 4]))
    prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1, 2, 3, 4]))

    return giou_loss, conf_loss, prob_loss
Esempio n. 4
0
def compute_loss(pred, conv, label, bboxes, i=0, CLASSES=YOLO_COCO_CLASSES):
    NUM_CLASS = len(read_class_names(CLASSES))
    conv_shape = tf.shape(conv)
    batch_size = conv_shape[0]
    output_size = conv_shape[1]
    input_size = STRIDES[i] * output_size
    conv = tf.reshape(conv,
                      (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))

    conv_raw_conf = conv[:, :, :, :, 4:5]
    conv_raw_prob = conv[:, :, :, :, 5:]

    pred_xywh = pred[:, :, :, :, 0:4]
    pred_conf = pred[:, :, :, :, 4:5]

    label_xywh = label[:, :, :, :, 0:4]
    respond_bbox = label[:, :, :, :, 4:5]
    label_prob = label[:, :, :, :, 5:]

    giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
    input_size = tf.cast(input_size, tf.float32)

    bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :,
                                             2:3] * label_xywh[:, :, :, :,
                                                               3:4] / (
                                                                   input_size**
                                                                   2)
    giou_loss = respond_bbox * bbox_loss_scale * (1 - giou)

    iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :],
                   bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
    max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)

    respond_bgd = (1.0 - respond_bbox) * tf.cast(max_iou < IOU_LOSS_THRESH,
                                                 tf.float32)

    conf_focal = tf.pow(respond_bbox - pred_conf, 2)

    conf_loss = conf_focal * (
        respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=respond_bbox, logits=conv_raw_conf) +
        respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=respond_bbox, logits=conv_raw_conf))

    prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
        labels=label_prob, logits=conv_raw_prob)

    giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1, 2, 3, 4]))
    conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1, 2, 3, 4]))
    prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1, 2, 3, 4]))

    return giou_loss, conf_loss, prob_loss
Esempio n. 5
0
    def __init__(self):
        # Initialize first object number
        self.nextObjectID = 0

        # Find class names and number of classes from model
        self.classes = read_class_names(YOLO_COCO_CLASSES)
        self.num_classes = len(self.classes)

        # Ordered dics keep track of the insertion order of the inputs to each dict
        self.objects = OrderedDict()
        self.disappeared = OrderedDict()

        self.MaxMissedFrames = 50
def Create_Yolov3(input_size=416, channels=3, training=False, CLASSES=YOLO_COCO_CLASSES):
    NUM_CLASS = len(read_class_names(CLASSES))
    input_layer  = Input([input_size, input_size, channels])

    conv_tensors = YOLOv3(input_layer, NUM_CLASS)

    output_tensors = []
    for i, conv_tensor in enumerate(conv_tensors):
        pred_tensor = decode(conv_tensor, NUM_CLASS, i)
        if training: output_tensors.append(conv_tensor)
        output_tensors.append(pred_tensor)

    YoloV3 = tf.keras.Model(input_layer, output_tensors)
    return YoloV3
Esempio n. 7
0
    def __init__(self, dataset_type, TEST_INPUT_SIZE=TEST_INPUT_SIZE):
        self.annot_path = TRAIN_ANNOT_PATH if dataset_type == 'train' else TEST_ANNOT_PATH
        self.input_sizes = TRAIN_INPUT_SIZE if dataset_type == 'train' else TEST_INPUT_SIZE
        self.batch_size = TRAIN_BATCH_SIZE if dataset_type == 'train' else TEST_BATCH_SIZE
        self.data_aug = TRAIN_DATA_AUG if dataset_type == 'train' else TEST_DATA_AUG

        self.train_input_sizes = TRAIN_INPUT_SIZE
        self.strides = np.array(YOLO_STRIDES)
        self.classes = read_class_names(TRAIN_CLASSES)
        self.num_classes = len(self.classes)
        self.anchors = (np.array(YOLO_ANCHORS).T / self.strides).T
        self.anchor_per_scale = YOLO_ANCHOR_PER_SCALE
        self.max_bbox_per_scale = YOLO_MAX_BBOX_PER_SCALE

        self.annotations = self.load_annotations(dataset_type)
        self.num_samples = len(self.annotations)
        self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
        self.batch_count = 0
def Create_Yolov3(input_size=416,
                  channels=3,
                  training=False,
                  CLASSES=YOLO_COCO_CLASSES):
    NUM_CLASS = len(read_class_names(CLASSES))
    input_layer = Input([input_size, input_size, channels])

    if TRAIN_YOLO_TINY:
        conv_tensors = YOLOv3_tiny(input_layer,
                                   NUM_CLASS)  #THIS Func uses tiny
    else:
        conv_tensors = YOLOv3(input_layer,
                              NUM_CLASS)  # and this uses normal YOLO

    output_tensors = []
    for i, conv_tensor in enumerate(conv_tensors):
        pred_tensor = decode(conv_tensor, NUM_CLASS, i)
        if training: output_tensors.append(conv_tensor)
        output_tensors.append(pred_tensor)

    YoloV3 = tf.keras.Model(input_layer, output_tensors)
    return YoloV3
Esempio n. 9
0
def Object_tracking(YoloV3,
                    webapi,
                    recording_id,
                    video_path,
                    model,
                    cate_predictor,
                    landmark_tensor,
                    input_size=416,
                    CLASSES=YOLO_COCO_CLASSES,
                    score_threshold=0.3,
                    iou_threshold=0.45,
                    rectangle_colors='',
                    Track_only=[]):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None

    # initialize deep sort object
    encoder = gdet.create_box_encoder(DEEP_SORT_MODEL_FILE, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    times = []

    if video_path:
        vid = cv2.VideoCapture(video_path)  # detect on video
    else:
        vid = cv2.VideoCapture(0)  # detect from webcam

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys())
    val_list = list(NUM_CLASS.values())

    bookmarks = {}

    while True:
        _, img = vid.read()
        print(vid.get(cv2.CAP_PROP_POS_MSEC))

        try:
            original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        except:
            break

        image_data = image_preprocess(np.copy(original_image),
                                      [input_size, input_size])
        image_data = tf.expand_dims(image_data, 0)

        t1 = time.time()
        pred_bbox = YoloV3.predict(image_data)
        t2 = time.time()

        times.append(t2 - t1)
        times = times[-20:]

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) != 0 and NUM_CLASS[int(
                    bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([
                    bbox[0].astype(int), bbox[1].astype(int),
                    bbox[2].astype(int) - bbox[0].astype(int),
                    bbox[3].astype(int) - bbox[1].astype(int)
                ])
                scores.append(bbox[4])
                names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(original_image, boxes))
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]

        # Pass detections to the deepsort object and obtain the track information.
        tracker.predict()
        deleted_tracks = tracker.update(detections,
                                        vid.get(cv2.CAP_PROP_POS_MSEC),
                                        original_image)

        # Throw frames into classifier once a person is deleted from the tracker
        marks = predict_tracks_cate(model, cate_predictor, deleted_tracks,
                                    landmark_tensor, video_path)
        add_text_to_bookmarks(bookmarks, marks)

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms

        print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps))

    marks = predict_tracks_cate(model, cate_predictor, tracker.tracks,
                                landmark_tensor, video_path)
    add_text_to_bookmarks(bookmarks, marks)

    timestamp = int(os.path.splitext(video_path)[0].split('-')[-1])
    for sec_since_start, texts in bookmarks.items():
        webapi.add_bookmark(recording_id, ' | '.join(texts), '',
                            timestamp + sec_since_start)
Esempio n. 10
0
def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only = []):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None
    
    #initialize deep sort object
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    times, times_2 = [], []

    if video_path:
        vid = cv2.VideoCapture(video_path) # detect on video
    else:
        vid = cv2.VideoCapture(0) # detect from webcam

    # by default VideoCapture returns float instead of int
    length = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    print("VIDEO PROPERTIES:FrameCount:{}\tWidth:{}\tHeight:{}\tFps:{}\t".format(length,width,height,fps))
    
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys()) 
    val_list = list(NUM_CLASS.values())
    
    
    #1.BACKGROUND DETECTION
    backSub = cv2.createBackgroundSubtractorMOG2(history = 400, varThreshold = 16, detectShadows = False)
    bgMask=None
    frame_no=0
    while True:
        _, frame = vid.read()
        frame_no=frame_no+1
        try:
            original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
        except:
            break
                
        
        #1.1 BACKGROUND Update
        fgMask = backSub.apply(original_frame)
        bgMask = backSub.getBackgroundImage()
        
        
        if frame_no % 100==0:
            print(frame_no)
        
        
        image_data = image_preprocess(np.copy(original_frame), [input_size, input_size])
        #image_data = tf.expand_dims(image_data, 0)
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)
        
        #t1 = time.time()
        #pred_bbox = Yolo.predict(image_data)
        t2 = time.time()
        
        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) !=0 and NUM_CLASS[int(bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int)-bbox[0].astype(int), bbox[3].astype(int)-bbox[1].astype(int)])
                scores.append(bbox[4])
                names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes) 
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(original_frame, boxes))
        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(boxes, scores, names, features)]

        # Pass detections to the deepsort object and obtain the track information.
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = []
        
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 5:
                continue 
            bbox = track.to_tlbr() # Get the corrected/predicted bounding box
            class_name = track.get_class() #Get the class name of particular object
            tracking_id = track.track_id # Get the ID for the particular track
            index = key_list[val_list.index(class_name)] # Get predicted object index by object name
            tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) # Structure data, that we could use it with our draw_bbox function
            
            #Save  to File          
            box_item=bbox.tolist() + [tracking_id, index,frame_no]
            ts.save(box_item)
            
        # draw detection on frame
        image = draw_bbox(original_frame, tracked_bboxes, CLASSES=CLASSES, tracking=True)

        t3 = time.time()
        times.append(t2-t1)
        times_2.append(t3-t1)
        
        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times)/len(times)*1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2)/len(times_2)*1000)
        
        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # draw original yolo detection
        #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)

        print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)
            
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break
     
    cv2.imwrite(ts.out_bg_img,bgMask)
    
    cv2.destroyAllWindows()
def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only = [], custom_yolo=None, custom_classes=YOLO_CUSTOM_CLASSES, Custom_track_only=[]):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None


    #initialize deep sort object
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    times, times_2 = [], []

    if video_path:
        vid = cv2.VideoCapture(video_path) # detect on video
    else:
        vid = cv2.VideoCapture(0) # detect from webcam

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys())
    val_list = list(NUM_CLASS.values())

    # set a bunch of flags and variables for made baskets and possessions
    possession = None
    possession_list = []
    combined_possession_avg = 0.5

    total_basket_count=0
    basket_frame_list = []

    baskets_dict = {"Dark": 0, "Light": 0}

    made_basket_first_frame = 0
    made_basket_frames = 0
    basket_marked = False

    if custom_yolo:
      NUM_CUSTOM_CLASS = read_class_names(custom_classes)
      custom_key_list = list(NUM_CUSTOM_CLASS.keys())
      custom_val_list = list(NUM_CUSTOM_CLASS.values())

    frame_counter = 0
    # loop through each frame in video
    while True:
        _, frame = vid.read()

        try:
            first_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            original_frame = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
            frame_counter += 1
        except:
            break

        image_data = image_preprocess(np.copy(first_frame), [input_size, input_size])
        #image_data = tf.expand_dims(image_data, 0)
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        # CUSTOM BLOCK FOR BASKETBALL
        if custom_yolo:

          if YOLO_FRAMEWORK == "tf":
            # use yolo model to make prediction on the image data
            custom_pred_bbox = custom_yolo.predict(image_data)

          # reshape our data to be in correct form for processing
          custom_pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in custom_pred_bbox]
          custom_pred_bbox = tf.concat(custom_pred_bbox, axis=0)

          # get boxes based on threshhold
          custom_bboxes = postprocess_boxes(custom_pred_bbox, original_frame, input_size, 0.3)
          # custom_bboxes = nms(custom_bboxes, iou_threshold, method='nms')

          # extract bboxes to boxes (x, y, width, height), scores and names
          custom_boxes, custom_scores, custom_names = [], [], []
          for bbox in custom_bboxes:
              if len(Custom_track_only) !=0 and NUM_CUSTOM_CLASS[int(bbox[5])] in Custom_track_only or len(Custom_track_only) == 0:
                  custom_boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int)-bbox[0].astype(int), bbox[3].astype(int)-bbox[1].astype(int)])
                  custom_scores.append(bbox[4])
                  custom_names.append(NUM_CUSTOM_CLASS[int(bbox[5])])

          # Obtain all the detections for the given frame.
          custom_boxes = np.array(custom_boxes)
          custom_names = np.array(custom_names)
          custom_scores = np.array(custom_scores)

          # take note of the highest "scoring" made basket and basketball obj in each frame
          highest_scoring_basketball = 0
          basketball_box = None
          basketball_center = None
          highest_scoring_made_basket = 0
          made_basket_box = None
          for i, bbox in enumerate(custom_bboxes):
            # loop through each bounding box to get the "best one" of the frame
            # we do this because sometimes our model will detect two, and we know there can only be one
            name = custom_names[i]
            score = round(custom_scores[i], 3)
            if name == 'basketball':
              if score > highest_scoring_basketball:
                highest_scoring_basketball = score
                basketball_box = bbox
            if name == 'made-basket':
              if score > .85 and score > highest_scoring_made_basket:
                highest_scoring_made_basket = score
                made_basket_box = bbox

          # if it sees a basketball, put a box on it and note the center (for possession)
          if basketball_box is not None:
            cv2.rectangle(original_frame, (int(basketball_box[0]), int(basketball_box[1])), (int(basketball_box[2]), int(basketball_box[3])), (0,0,255), 1)
            cv2.rectangle(original_frame, (int(basketball_box[0]), int(basketball_box[1]-30)), (int(basketball_box[0])+(10)*17, int(basketball_box[1])), (0,0,255), -1)
            cv2.putText(original_frame, "basketball" + "-" + str(highest_scoring_basketball),(int(basketball_box[0]), int(basketball_box[1]-10)),0, 0.5, (255,255,255),1)
            basketball_center = ( (basketball_box[2]+basketball_box[0])/2, (basketball_box[3]+basketball_box[1])/2 )


          if made_basket_box is not None:
            # if theres a made basket put the box on it
            cv2.rectangle(original_frame, (int(made_basket_box[0]), int(made_basket_box[1])), (int(made_basket_box[2]), int(made_basket_box[3])), (0,255,0), 1)
            cv2.rectangle(original_frame, (int(made_basket_box[0]), int(made_basket_box[1]-30)), (int(made_basket_box[0])+(15)*17, int(made_basket_box[1])), (0,255,0), -1)
            cv2.putText(original_frame, "made-basket" + " " + str(highest_scoring_made_basket),(int(made_basket_box[0]), int(made_basket_box[1]-10)),0, 0.6, (0,0,0),1)

            if made_basket_frames == 0:
              # if this is the first frame in the sequence
              made_basket_first_frame = frame_counter

            # increment a counter for made basket frames
            made_basket_frames += 1

            # if there were 3 consecuative frames AND we havnt marked the basket yet then lets count it!
            if made_basket_frames >= 3 and not basket_marked:
              basket_marked = True
              basket_frame_list.append(made_basket_first_frame)
              if possession:
                # record which "team" scored the basket
                baskets_dict[possession] += 1

          # if no made basket make sure the made basket counter is at zero
          else:
            # no made basket
            made_basket_frames = 0

          # 60 frames after a made basket we can reset the "marked basket" flag to False
          # in essence this means we start looking for made baskets again
          if basket_marked and frame_counter > basket_frame_list[-1] + 60:
            basket_marked = False

        # END CUSTOM BLOCK

        # PRESON PREDICTION and TRACKING BLOCK

        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)

        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')



        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) !=0 and NUM_CLASS[int(bbox[5])] in Track_only or len(Track_only) == 0:
                w = bbox[2].astype(int)-bbox[0].astype(int)
                h = bbox[3].astype(int)-bbox[1].astype(int)
                if h < height/3 and w < width/4:
                  if h > 120:
                    boxes.append([bbox[0].astype(int), bbox[1].astype(int), w, h])
                    scores.append(bbox[4])
                    names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)

        # detect jersey color using the tracked persons bounding box
        patches = [gdet.extract_image_patch(frame, box, [box[3], box[2]]) for box in boxes]
        color_ratios = [find_color(patch) for patch in patches]

        features = np.array(encoder(original_frame, boxes))

        # mark the detection
        detections = [Detection(bbox, score, class_name, feature, color_ratio) for bbox, score, class_name, feature, color_ratio in zip(boxes, scores, names, features, color_ratios)]

        # Pass detections to the deepsort object and obtain the track information.
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = []
        color_ratio_list = []
        check_possession = False
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 5:
                continue

            color_ratio = track.get_color_ratio()
            color_ratio_list.append(color_ratio)

            bbox = track.to_tlbr() # Get the corrected/predicted bounding box
            class_name = track.get_class() #Get the class name of particular object
            tracking_id = track.track_id # Get the ID for the particular track
            index = key_list[val_list.index(class_name)] # Get predicted object index by object name

            tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) # Structure data, that we could use it with our draw_bbox function

            # if there is a basketball in the frame, and its "in" a ersons bounding box, check what box it is in for psosession
            if basketball_center:
              if basketball_center[0] >= bbox[0] and basketball_center[0] <= bbox[2]:
                if basketball_center[1] >= bbox[1] and basketball_center[1] <= bbox[3]:
                  check_possession = True
                  if color_ratio <= .2:
                    # light team
                    possession_list.append(0)
                  else:
                    # dark team
                    possession_list.append(1)

            else:
              # no basketball in frame
              # possession_list.append(-1)
              # test_list.pop(0)
              pass

        # if the ball is in a bounding box, update out possession tracker
        if check_possession:
          if len(possession_list) > 60:
            # this function takes an average of the last 60 posessions marked to determine current position
            # it weights the most recent detections more
            # this algo is a WIP
            possession_list = possession_list[-60:]
            # full_avg = sum(possession_list)/len(possession)
            last_60_avg = sum(possession_list[-60:])/60
            last_30_avg = sum(possession_list[-30:])/30
            last_15_avg = sum(possession_list[-15:])/15
            last_5_avg = sum(possession_list[-5:])/5

            combined_possession_avg = round((last_60_avg + last_30_avg + last_15_avg + last_5_avg)/4,3)

            #most_common_possession = stats.mode(possession_list)[0]

          else:
            combined_possession_avg = round(sum(possession_list)/len(possession_list),3)

          # use our possession average to determine who has the ball right now
          if combined_possession_avg < 0.5:
            possession = "Light"
          elif combined_possession_avg > 0.5:
            possession = "Dark"


        # draw detection on frame
        image = draw_bbox(original_frame, tracked_bboxes, color_ratios=color_ratio_list, CLASSES=CLASSES, tracking=True)

        t3 = time.time()
        times.append(t2-t1)
        times_2.append(t3-t1)

        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times)/len(times)*1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2)/len(times_2)*1000)

        if possession == "Light":
          image = cv2.putText(image, "Posession: {}".format(possession), (width-400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (50, 255, 255), 2)
        else:
          image = cv2.putText(image, "Posession: {}".format(possession), (width-400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # image = cv2.putText(image, "Light: {} Dark: {} None: {}".format(possession_list.count(0), possession_list.count(1), possession_list.count(-1)), (400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        image = cv2.putText(image, "Posession Avg: {}".format(combined_possession_avg), (400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # draw original yolo detection
        #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)

        print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)

            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
    return_data = {"baskets_dict": baskets_dict, "basket_frame_list": basket_frame_list}
    print("video saved to {}".format(output_path))
    return(return_data)
def Object_tracking(Yolo,
                    video_path,
                    input_size=416,
                    CLASSES=YOLO_COCO_CLASSES,
                    score_threshold=0.3,
                    iou_threshold=0.45,
                    Track_only=["person"]):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None

    #initialize deep sort object
    model_filename = 'data/model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    times, times_2 = [], []

    vid = cv2.VideoCapture(video_path)  # detect on video

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys())
    val_list = list(NUM_CLASS.values())
    for x in range(120):
        _, frame = vid.read()

        try:
            original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
        except:
            break

        image_data = image_preprocess(np.copy(original_frame),
                                      [input_size, input_size])
        #image_data = tf.expand_dims(image_data, 0)
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)

        #t1 = time.time()
        #pred_bbox = Yolo.predict(image_data)
        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_frame, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) != 0 and NUM_CLASS[int(
                    bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([
                    bbox[0].astype(int), bbox[1].astype(int),
                    bbox[2].astype(int) - bbox[0].astype(int),
                    bbox[3].astype(int) - bbox[1].astype(int)
                ])
                scores.append(bbox[4])
                names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(original_frame, boxes))
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]

        # Pass detections to the deepsort object and obtain the track information.
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 5:
                continue
            bbox = track.to_tlbr()  # Get the corrected/predicted bounding box
            class_name = track.get_class(
            )  #Get the class name of particular object
            tracking_id = track.track_id  # Get the ID for the particular track
            index = key_list[val_list.index(
                class_name)]  # Get predicted object index by object name
            tracked_bboxes.append(
                bbox.tolist() + [tracking_id, index]
            )  # Structure data, that we could use it with our draw_bbox function

        # draw detection on frame
        image = draw_bbox(original_frame,
                          tracked_bboxes,
                          CLASSES=CLASSES,
                          tracking=True)

        t3 = time.time()
        times.append(t2 - t1)
        times_2.append(t3 - t1)

        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2) / len(times_2) * 1000)

        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # draw original yolo detection
        #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)

        out_tracked_bboxes.append(tracked_bboxes)

        print(
            "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(
                ms, fps, fps2))
Esempio n. 13
0
def get_mAP(model,
            dataset,
            score_threshold=0.25,
            iou_threshold=0.50,
            TEST_INPUT_SIZE=TEST_INPUT_SIZE):
    MINOVERLAP = 0.5  # default value (defined in the PASCAL VOC2012 challenge)
    NUM_CLASS = read_class_names(TRAIN_CLASSES)

    ground_truth_dir_path = 'mAP/ground-truth'
    if os.path.exists(ground_truth_dir_path):
        shutil.rmtree(ground_truth_dir_path)

    if not os.path.exists('mAP'): os.mkdir('mAP')
    os.mkdir(ground_truth_dir_path)

    print(f'\ncalculating mAP{int(iou_threshold*100)}...\n')

    gt_counter_per_class = {}
    for index in range(dataset.num_samples):
        ann_dataset = dataset.annotations[index]

        original_image, bbox_data_gt = dataset.parse_annotation(
            ann_dataset, True)

        if len(bbox_data_gt) == 0:
            bboxes_gt = []
            classes_gt = []
        else:
            bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]
        ground_truth_path = os.path.join(ground_truth_dir_path,
                                         str(index) + '.txt')
        num_bbox_gt = len(bboxes_gt)

        bounding_boxes = []
        for i in range(num_bbox_gt):
            class_name = NUM_CLASS[classes_gt[i]]
            xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))
            bbox = xmin + " " + ymin + " " + xmax + " " + ymax
            bounding_boxes.append({
                "class_name": class_name,
                "bbox": bbox,
                "used": False
            })

            # count that object
            if class_name in gt_counter_per_class:
                gt_counter_per_class[class_name] += 1
            else:
                # if class didn't exist yet
                gt_counter_per_class[class_name] = 1
            bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n'
        with open(f'{ground_truth_dir_path}/{str(index)}_ground_truth.json',
                  'w') as outfile:
            json.dump(bounding_boxes, outfile)

    gt_classes = list(gt_counter_per_class.keys())
    # sort the classes alphabetically
    gt_classes = sorted(gt_classes)
    n_classes = len(gt_classes)

    times = []
    json_pred = [[] for i in range(n_classes)]
    for index in range(dataset.num_samples):
        ann_dataset = dataset.annotations[index]

        image_name = ann_dataset[0].split('/')[-1]
        original_image, bbox_data_gt = dataset.parse_annotation(
            ann_dataset, True)

        image = image_preprocess(np.copy(original_image),
                                 [TEST_INPUT_SIZE, TEST_INPUT_SIZE])
        image_data = tf.expand_dims(image, 0)

        t1 = time.time()
        pred_bbox = model.predict(image_data)
        t2 = time.time()
        times.append(t2 - t1)

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image, TEST_INPUT_SIZE,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        for bbox in bboxes:
            coor = np.array(bbox[:4], dtype=np.int32)
            score = bbox[4]
            class_ind = int(bbox[5])
            class_name = NUM_CLASS[class_ind]
            score = '%.4f' % score
            xmin, ymin, xmax, ymax = list(map(str, coor))
            bbox = xmin + " " + ymin + " " + xmax + " " + ymax
            json_pred[gt_classes.index(class_name)].append({
                "confidence":
                str(score),
                "file_id":
                str(index),
                "bbox":
                str(bbox)
            })

    ms = sum(times) / len(times) * 1000
    fps = 1000 / ms

    for class_name in gt_classes:
        json_pred[gt_classes.index(class_name)].sort(
            key=lambda x: float(x['confidence']), reverse=True)
        with open(f'{ground_truth_dir_path}/{class_name}_predictions.json',
                  'w') as outfile:
            json.dump(json_pred[gt_classes.index(class_name)], outfile)

    # Calculate the AP for each class
    sum_AP = 0.0
    ap_dictionary = {}
    # open file to store the results
    with open("mAP/results.txt", 'w') as results_file:
        results_file.write("# AP and precision/recall per class\n")
        count_true_positives = {}
        for class_index, class_name in enumerate(gt_classes):
            count_true_positives[class_name] = 0
            # Load predictions of that class
            predictions_file = f'{ground_truth_dir_path}/{class_name}_predictions.json'
            predictions_data = json.load(open(predictions_file))

            # Assign predictions to ground truth objects
            nd = len(predictions_data)
            tp = [0] * nd  # creates an array of zeros of size nd
            fp = [0] * nd
            for idx, prediction in enumerate(predictions_data):
                file_id = prediction["file_id"]
                # assign prediction to ground truth object if any
                #   open ground-truth with that file_id
                gt_file = f'{ground_truth_dir_path}/{str(file_id)}_ground_truth.json'
                ground_truth_data = json.load(open(gt_file))
                ovmax = -1
                gt_match = -1
                # load prediction bounding-box
                bb = [float(x) for x in prediction["bbox"].split()
                      ]  # bounding box of prediction
                for obj in ground_truth_data:
                    # look for a class_name match
                    if obj["class_name"] == class_name:
                        bbgt = [float(x) for x in obj["bbox"].split()
                                ]  # bounding box of ground truth
                        bi = [
                            max(bb[0], bbgt[0]),
                            max(bb[1], bbgt[1]),
                            min(bb[2], bbgt[2]),
                            min(bb[3], bbgt[3])
                        ]
                        iw = bi[2] - bi[0] + 1
                        ih = bi[3] - bi[1] + 1
                        if iw > 0 and ih > 0:
                            # compute overlap (IoU) = area of intersection / area of union
                            ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (
                                bbgt[2] - bbgt[0] + 1) * (bbgt[3] - bbgt[1] +
                                                          1) - iw * ih
                            ov = iw * ih / ua
                            if ov > ovmax:
                                ovmax = ov
                                gt_match = obj

                # assign prediction as true positive/don't care/false positive
                if ovmax >= MINOVERLAP:  # if ovmax > minimum overlap
                    if not bool(gt_match["used"]):
                        # true positive
                        tp[idx] = 1
                        gt_match["used"] = True
                        count_true_positives[class_name] += 1
                        # update the ".json" file
                        with open(gt_file, 'w') as f:
                            f.write(json.dumps(ground_truth_data))
                    else:
                        # false positive (multiple detection)
                        fp[idx] = 1
                else:
                    # false positive
                    fp[idx] = 1

            # compute precision/recall
            cumsum = 0
            for idx, val in enumerate(fp):
                fp[idx] += cumsum
                cumsum += val
            cumsum = 0
            for idx, val in enumerate(tp):
                tp[idx] += cumsum
                cumsum += val
            #print(tp)
            rec = tp[:]
            for idx, val in enumerate(tp):
                rec[idx] = float(tp[idx]) / gt_counter_per_class[class_name]
            #print(rec)
            prec = tp[:]
            for idx, val in enumerate(tp):
                prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx])
            #print(prec)

            ap, mrec, mprec = voc_ap(rec, prec)
            sum_AP += ap
            text = "{0:.3f}%".format(
                ap * 100
            ) + " = " + class_name + " AP  "  #class_name + " AP = {0:.2f}%".format(ap*100)

            rounded_prec = ['%.3f' % elem for elem in prec]
            rounded_rec = ['%.3f' % elem for elem in rec]
            # Write to results.txt
            results_file.write(text + "\n Precision: " + str(rounded_prec) +
                               "\n Recall   :" + str(rounded_rec) + "\n\n")

            print(text)
            ap_dictionary[class_name] = ap

        results_file.write("\n# mAP of all classes\n")
        mAP = sum_AP / n_classes

        text = "mAP = {:.3f}%, {:.2f} FPS".format(mAP * 100, fps)
        results_file.write(text + "\n")
        print(text)

        return mAP * 100
Esempio n. 14
0
def Object_tracking(YoloV3,
                    video_path,
                    output_path,
                    input_size=416,
                    show=False,
                    CLASSES=YOLO_COCO_CLASSES,
                    score_threshold=0.3,
                    iou_threshold=0.45,
                    rectangle_colors='',
                    Track_only=[]):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None

    #initialize deep sort object
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    times = []

    if video_path:
        vid = cv2.VideoCapture(video_path)  # detect on video
    else:
        vid = cv2.VideoCapture(0)  # detect from webcam

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys())
    val_list = list(NUM_CLASS.values())
    while True:
        _, img = vid.read()

        try:
            original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        except:
            break
        image_data = image_preprocess(np.copy(original_image),
                                      [input_size, input_size])
        image_data = tf.expand_dims(image_data, 0)

        t1 = time.time()
        pred_bbox = YoloV3.predict(image_data)
        t2 = time.time()

        times.append(t2 - t1)
        times = times[-20:]

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) != 0 and NUM_CLASS[int(
                    bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([
                    bbox[0].astype(int), bbox[1].astype(int),
                    bbox[2].astype(int) - bbox[0].astype(int),
                    bbox[3].astype(int) - bbox[1].astype(int)
                ])
                scores.append(bbox[4])
                names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(original_image, boxes))
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]

        # Pass detections to the deepsort object and obtain the track information.
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 5:
                continue
            bbox = track.to_tlbr()  # Get the corrected/predicted bounding box
            class_name = track.get_class(
            )  #Get the class name of particular object
            tracking_id = track.track_id  # Get the ID for the particular track
            index = key_list[val_list.index(
                class_name)]  # Get predicted object index by object name
            tracked_bboxes.append(
                bbox.tolist() + [tracking_id, index]
            )  # Structure data, that we could use it with our draw_bbox function

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms

        # draw detection on frame
        image = draw_bbox(original_image,
                          tracked_bboxes,
                          CLASSES=CLASSES,
                          tracking=True)
        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # draw original yolo detection
        #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)

        #print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)

            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
Esempio n. 15
0
print("Output - ", output_video_filepath)

input_video = cv2.VideoCapture(input_video_filepath)
fps = input_video.get(cv2.CAP_PROP_FPS)

Width = int(input_video.get(cv2.CAP_PROP_FRAME_WIDTH))  # float
Height = int(input_video.get(cv2.CAP_PROP_FRAME_HEIGHT))  # float
total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video = cv2.VideoWriter(output_video_filepath, fourcc, fps,
                               (Width, Height))

times = []

CLASSES = "model_data/coco.names"
NUM_CLASS = read_class_names(CLASSES)
key_list = list(NUM_CLASS.keys())
val_list = list(NUM_CLASS.values())

classes = None
with open('coco.names.txt', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# read pre-trained model and config file
net = cv2.dnn.readNet(
    './model_data/yolov3.weights',
    'yolov3.cfg')  # or ./model_data/yolov4.weights and yolov4.cfg
max_cosine_distance = 1
nn_budget = None

#initialize deep sort object
Esempio n. 16
0
def track_object(Yolo,
                 video_path,
                 vid_output_path,
                 text_output_path,
                 input_size=416,
                 show=False,
                 CLASSES=YOLO_COCO_CLASSES,
                 score_threshold=0.3,
                 iou_threshold=0.45,
                 rectangle_colors='',
                 tracking=True,
                 track_only=[],
                 tracker_max_age=30,
                 passenger_det=False,
                 face_score_threshold=0.3,
                 color="bincount"):
    """
    Do detection on video
    :param Yolo: <model_obj> YOLO model for vehicle detection
    :param video_path: <str> Path to video file. Leave empty to use camera
    :param vid_output_path: <str> Path to save processed video. Leave empty to not save
    :param input_size: <int> YOLO model input size
    :param show: <bool> True if you want to see processing live
    :param CLASSES: <obj> YOLO model classed. By default they are taken from the config file
    :param score_threshold: <float> minimum confidence for vehicle detection
    :param iou_threshold: <float> minimum bounding box overlap for them to be counted as same object
    :param rectangle_colors: bounding box colors. Currently does nothing
    :param tracking: whether to use vehicle tracking
    :param track_only: <list> List of objects to track if detector detects more
    :param tracker_max_age: <int> number of missed before track is deleted
    :param face_det: <bool> whether to initialize face detection
    :param face_score_threshold: <float> minimum confidence for face detection
    :param color: <str> Color detection method to use. None if neither one
    :return:
    """
    if not Yolo:
        Yolo = load_yolo_model()

    if passenger_det:
        passenger_det = FaceDetector()
    else:
        passenger_det = None

    if text_output_path:
        write_csv([[
            "x1", "y1", "x2", "y2", "id", "class", "probability",
            "color" if color else None, "passengers" if passenger_det else None
        ]], text_output_path)

    # Definition of the deep sort parameters
    max_cosine_distance = 0.7
    nn_budget = None
    # initialize deep sort object
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric, max_age=tracker_max_age)

    times, times_2 = [], []

    if video_path:
        vid = cv2.VideoCapture(video_path)  # detect on video
    else:
        vid = cv2.VideoCapture(0)  # detect from webcam

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(vid_output_path, codec, fps,
                          (width, height))  # vid_output_path must be .mp4

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys())
    val_list = list(NUM_CLASS.values())
    while True:
        _, frame = vid.read()

        try:
            original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
        except:
            break

        image_data = image_preprocess(np.copy(original_frame),
                                      [input_size, input_size])
        # image_data = tf.expand_dims(image_data, 0)
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)

        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_frame, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(track_only) != 0 and NUM_CLASS[int(
                    bbox[5])] in track_only or len(track_only) == 0:
                boxes.append([
                    bbox[0].astype(int), bbox[1].astype(int),
                    bbox[2].astype(int) - bbox[0].astype(int),
                    bbox[3].astype(int) - bbox[1].astype(int)
                ])
                scores.append(bbox[4])
                names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(original_frame, boxes))
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]  # if score >= confidence_threshold]

        # Pass detections to the deep sort object and obtain the track information.
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 5:
                continue
            bbox = track.to_tlbr()  # Get the corrected/predicted bounding box
            class_name = track.get_class(
            )  # Get the class name of particular object
            tracking_id = track.track_id  # Get the ID for the particular track
            index = key_list[val_list.index(
                class_name)]  # Get predicted object index by object name
            tracked_bboxes.append(
                bbox.tolist() + [tracking_id, index, track.class_confidence]
            )  # Structure data, that we could use it with our draw_bbox function

        # draw detection on frame
        image = draw_bbox(original_frame,
                          tracked_bboxes,
                          CLASSES=CLASSES,
                          tracking=True,
                          color=color,
                          text_output_path=text_output_path,
                          passenger_detector=passenger_det,
                          passenger_threshold=face_score_threshold)

        t3 = time.time()
        times.append(t2 - t1)
        times_2.append(t3 - t1)

        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2) / len(times_2) * 1000)

        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2))
        if vid_output_path != '':
            out.write(image)
        if show:
            cv2.imshow('output', image)

            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
Esempio n. 17
0
def Object_tracking(Yolo,
                    video_path,
                    output_path,
                    input_size=416,
                    show=True,
                    CLASSES=YOLO_COCO_CLASSES,
                    score_threshold=0.3,
                    iou_threshold=0.45,
                    rectangle_colors='',
                    Track_only=[]):
    output_file = "D:/PELUSO/ITSligo/lectures_MENG/4-Symulation and Testing/assignments/assignment 2 - group/simV5_anotation.CSV"
    csv_file = open(output_file, mode='a')  #new
    results_csv = csv.writer(csv_file,
                             delimiter=',',
                             quotechar='"',
                             quoting=csv.QUOTE_MINIMAL)
    results_csv.writerow([
        'Frame_index', 'Score', 'Confidence', 'Pixel_Area', 'X1', 'Y1', 'X2',
        'Y2', 'ClassID'
    ])

    # Definition of the parameters
    max_cosine_distance = 0.9
    nn_budget = None

    #initialize deep sort object
    model_filename = "D:/PELUSO/ITSligo/lectures_MENG/4-Symulation and Testing/assignments/assignment 2 - group/TensorFlow-2.x-YOLOv3-master/model_data/mars-small128.pb"  ##'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    times, times_2 = [], []

    if video_path:
        vid = cv2.VideoCapture(video_path)  # detect on video
    else:
        vid = cv2.VideoCapture(0)  # detect from webcam

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'mp4v')  ##(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4
    print("FPS:::::" + str(fps))

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys())
    val_list = list(NUM_CLASS.values())
    frame_idx = 0
    while True:
        _, frame = vid.read()

        try:
            original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
        except:
            break

        image_data = image_preprocess(np.copy(original_frame),
                                      [input_size, input_size])
        #image_data = tf.expand_dims(image_data, 0)
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)

        #t1 = time.time()
        #pred_bbox = Yolo.predict(image_data)
        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_frame, input_size,
                                   score_threshold)

        bboxes = nms(bboxes, iou_threshold, method='nms', sigma=0.4)
        print(np.argmax(pred_bbox[:, 5:], axis=-1))

        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) != 0 and NUM_CLASS[int(
                    bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([
                    bbox[0].astype(int), bbox[1].astype(int),
                    bbox[2].astype(int) - bbox[0].astype(int),
                    bbox[3].astype(int) - bbox[1].astype(int)
                ])
                scores.append(bbox[4])
                names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)  ##---esse eh o fidaputi
        features = np.array(encoder(original_frame, boxes))
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]

        # Pass detections to the deepsort object and obtain the track information.
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = []
        tracked_scores = []
        for i, track in enumerate(tracker.tracks):
            if not track.is_confirmed() or track.time_since_update > 5:
                continue
            bbox = track.to_tlbr()  # Get the corrected/predicted bounding box
            class_name = track.get_class(
            )  #Get the class name of particular object
            tracking_id = track.track_id  # Get the ID for the particular track
            index = key_list[val_list.index(
                class_name)]  # Get predicted object index by object name
            tracked_bboxes.append(
                bbox.tolist() + [tracking_id, index]
            )  # Structure data, that we could use it with our draw_bbox function
            try:
                tracked_scores.append(track.Confidence)  ##new
            except:
                print("skip")

        # draw detection on frame draw_bbox(image, bboxes, CLASSES=YOLO_COCO_CLASSES, show_label=True, show_confidence = True, Text_colors=(255,255,0), rectangle_colors='', tracking=False)
        frame_idx += 1
        print("------frame_idx-------" + str(frame_idx))
        image = draw_bbox(original_frame,
                          results_csv,
                          tracked_bboxes,
                          frame_idx,
                          tracked_scores,
                          show_label=True,
                          show_confidence=True,
                          CLASSES=CLASSES,
                          tracking=True)  ##new

        t3 = time.time()
        times.append(t2 - t1)
        times_2.append(t3 - t1)

        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2) / len(times_2) * 1000)

        ##image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # draw original yolo detection
        #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)

        print(
            "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(
                ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)

            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    results_csv.close()
    cv2.destroyAllWindows()
Esempio n. 18
0
times, times_2 = [], []

vid = cv2.VideoCapture(video_path)
prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2(
) else cv2.CAP_PROP_FRAME_COUNT
totalFrames = int(vid.get(prop))
print(f"info - Total frames are {totalFrames}")

width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(vid.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*'MJPG')
out = cv2.VideoWriter(output_path, codec, fps, (width, height))

NUM_CLASS = read_class_names(YOLO_COCO_CLASSES)
key_list = list(NUM_CLASS.keys())
val_list = list(NUM_CLASS.values())

while True:
    _, frame = vid.read()

    try:
        original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
    except:
        break
    image_data = image_preprocess(np.copy(original_frame),
                                  [input_size, input_size])
    image_data = image_data[np.newaxis, ...].astype(np.float32)