예제 #1
0
def calibration_fn():
    """function to pass as argument to the builder function
    it will be used to optimize the network based on given examples """
    print("[CALIBRATION] Starting calibration process...")
    
    images_found = sorted(os.listdir(IMAGES_PATH))
    print("[CALIBRATION] Obtaining calibration images from {}"
        .format(images_found))
    print("[CALIBRATION] Done! Found {} images for calibration"
            .format(len(images_found)))

    print("[CALIBRATION] Starting image yielding...")
    start_yielding = time.time()
    for image_path in images_found:
        input_image = imgutils.read_image_from_cv2(IMAGES_PATH + image_path)
        resized_image = imgutils.resize_image(input_image, (INPUT_SIZE, INPUT_SIZE))

        final_image = resized_image[np.newaxis, ...].astype("uint8")

        print("[CALIBRATION] Yielding image from {}".format(image_path))
        start_yielding = time.time()
        yield (final_image,)
        print("[CALIBRATION] Image yielding Done, it took {} ms"
            .format((time.time()-start_yielding)*1000))

    CALIBRATION_TIME = (time.time()-start_yielding)*1000
    print("[CALIBRATION] Calibration procces finished, it took {} ms"
        .format(CALIBRATION_TIME))
예제 #2
0
def visualize_detections(output_path, detections_path, frames_path, save_results=False):
    labels = ['pig', 'person']
    warmup(20)
    
    # load detections data
    detections = pd.read_csv(detections_path)

    # get frame paths
    frame_paths = sorted(glob.glob(frames_path + "*.png"), key=numericalSort)

    frame_count = 1
    for frame_path in frame_paths:
        # load the frame
        frame = imgutils.read_image_from_cv2(frame_path)
        # get frame dimensions
        height, width, _ = frame.shape

        # get detections
        frame_detections = detections.loc[detections["frame"] == frame_count]

        # copy the image to draw on it
        drawed_frame = frame.copy()

        for _, row in frame_detections.iterrows():
            # prepare bbox detections 
            bbox = [row.ymin, row.xmin, row.ymax, row.xmax]

            # check if there is a valid detection
            if valid_detection(bbox):
                print("[VIS-DETECTIONS] Detection is valid!")

                # get label params
                label = row.detection_class
                score = float(row.detection_score)

                print("[VIS-DETECTIONS] Label: {}".format(label))
                print("[VIS-DETECTIONS] Score: {}".format(score))

                det_width = frame_detections["width"].unique()[0]
                det_height = frame_detections["height"].unique()[0]

                # if frame imensions are diferent from frame detections dims
                # inference was performed with network input size image
                # we will resize bounding boxes to original frame size
                if (det_width != width) | (det_height != height):
                    bbox = imgutils.resize_bounding_box(frame.shape, (det_height, det_width, 3), bbox)

                drawed_frame = imgutils.draw_bounding_box(drawed_frame, bbox, label, score)
            else:
                break

        imgutils.display_frame(drawed_frame, scaled=True)
        if save_results:
            imgutils.save_frame(drawed_frame, output_path, detections_path, frame_count)

        frame_count+=1
예제 #3
0
def batched_calibration_fn():
    """function to pass as argument to the builder function
    it will be used to optimize the network based on given examples """
    print("[CALIBRATION] Starting calibration process...")
    
    images_found = sorted(os.listdir(IMAGES_PATH))
    print("[CALIBRATION] Obtaining calibration images from {}"
        .format(images_found))
    print("[CALIBRATION] Done! Found {} images for calibration"
            .format(len(images_found)))

    print("[CALIBRATION] Starting image yielding...")
    start_calibration = time.time()
    batched_input = np.zeros((len(images_found), INPUT_SIZE, INPUT_SIZE, 3), dtype=np.uint8)
    for input_value in range(len(images_found)):

        # read and resize the image
        input_image = imgutils.read_image_from_cv2(IMAGES_PATH + images_found[input_value])
        image_data = imgutils.preprocess_image(input_image, (INPUT_SIZE, INPUT_SIZE))

        # add a new axis to match requested shape
        final_image = image_data[np.newaxis, ...].astype("uint8")

        # add image to batched images
        batched_input[input_value, :] = final_image

        print("[CALIBRATION] Adding image {} from {}".format(input_value+1, images_found[input_value]))
        start_adding = time.time()
        
        print("[CALIBRATION] Image adding step Done, it took {} ms"
            .format((time.time()-start_adding)*1000))

    print("[CALIBRATION] Yielding batched input")
    start_yielding = time.time()
    yield (final_image,)
    print("[CALIBRATION] Image yielding Done, it took {} ms"
        .format((time.time()-start_yielding)*1000))
    
    CALIBRATION_TIME = (time.time()-start_calibration)*1000
    print("[CALIBRATION] Calibration procces finished, it took {} ms"
        .format(CALIBRATION_TIME))
예제 #4
0
    def run_inference_on_images(self,
                                images_path,
                                warmup_iters=0,
                                model_dir=None,
                                labels=None,
                                threshold=0.3,
                                iou=0.45):
        input_size = int(self.attributes["INPUT_SIZE"])
        labels = self.attributes["LABELS"]

        # get a copy of the graph func
        #graph_func = self.attributes["graph_func"]
        if model_dir is not None:
            saved_model_loaded = self.get_func_from_saved_model(model_dir)
        else:
            saved_model_loaded = self.attributes["saved_model_loaded"]

        #warmup
        if warmup_iters > 0:
            print("[INFERENCE] Starting warmup on {} iterations...".format(
                warmup_iters))
            warmup_start = time.time()
            # get input size from model attributes
            input_size = int(self.attributes["INPUT_SIZE"])

            # create a set of random images and perform inference
            for i in range(warmup_iters):
                print("[INFERENCE] Generating image {} with dims {}x{}".format(
                    i + 1, input_size, input_size))

                # create random image
                image = np.random.normal(size=(input_size, input_size,
                                               3)).astype(np.float32) / 255.
                # conventional conversion (use with opencv option)
                # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
                # resize image to netwrk input dimensions
                resized_image = imgutils.resize_image(image,
                                                      (input_size, input_size))

                images_data = []
                for i in range(1):
                    images_data.append(resized_image)
                images_data = np.asarray(images_data).astype(np.float32)

                input_tensor = tf.constant(images_data)

                # get the detections
                print("[WARMUP] Now performing warmup inference...")
                inference_start_time = time.time()
                detections = saved_model_loaded(input_tensor)
                print("[WARMUP] Warmup inference took {} ms".format(
                    (time.time() - inference_start_time) * 1000))

                print("[INFERENCE] Preprocessing network outputs...")
                start_output = time.time()
                # extract output tensors metadata: boxes, confidence scores
                print("[INFERENCE] Extracting output tensors metadata...")
                keyval_start = time.time()
                for key, value in detections.items():
                    #print("[INFERENCE-DEBUG] key {}: value {}".format(key, value))
                    boxes = value[:, :, 0:4]
                    pred_conf = value[:, :, 4:]
                    #print("[INFERENCE-DEBUG] boxes ", boxes, type(boxes))
                    #print("[INFERENCE-DEBUG] confidence ", pred_conf, type(pred_conf))

                print(
                    "[INFERENCE] Done extracting metadata, it took {}".format(
                        (time.time() - keyval_start) * 1000))

                print("[INFERENCE] Performing NMS to output...")
                nms_start = time.time()
                # perform non-max supression to retrieve valid detections only
                boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                    boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                    scores=tf.reshape(
                        pred_conf,
                        (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                    max_output_size_per_class=50,
                    max_total_size=50,
                    iou_threshold=iou,
                    score_threshold=threshold)

                results = [
                    boxes.numpy(),
                    scores.numpy(),
                    classes.numpy(),
                    valid_detections.numpy()
                ]
                print("[INFERENCE] NMS done, it took {} ms".format(
                    (time.time() - nms_start) * 1000))
                total_output = (time.time() - start_output) * 1000
                print(
                    "[INFERENCE] Done procesing output!, it took {}ms".format(
                        total_output))

                _ = imgutils.draw_yolo_bounding_boxes(resized_image, results,
                                                      labels)
                # display results in ms
            print("[WARMUP] Warmup finished, it took {} ms".format(
                (time.time() - warmup_start) * 1000))

        # define frame count and a helper function to read
        # the images sorted by numerical index
        frame_count = 1
        numbers = re.compile(r'(\d+)')

        def numericalSort(value):
            parts = numbers.split(value)
            parts[1::2] = map(int, parts[1::2])
            return parts

        # read image sorted by numerical order
        image_paths = sorted(glob.glob(images_path + "*.png"),
                             key=numericalSort)
        print("[INFERENCE] Found {} images in {} ...".format(
            len(image_paths), images_path))

        #create a class to store results
        bbox_results = BboxResults()
        performance_results = YoloPerformanceResults()

        # Iterate over all images, perform inference and update
        # results dataframe
        for image_path in image_paths:
            print("[INFERENCE] Processing frame/image {} from {}".format(
                frame_count, image_path))
            image_filename = image_path.split('/')[-1]

            # init metadata
            bbox_results.init_frame_metadata(image_filename, frame_count)
            performance_results.init_frame_metadata(image_filename,
                                                    frame_count)

            # case inference
        print("[INFERENCE] Loading image with opencv backend...")
        # opencv option
        image_loading_start = time.time()
        image = imgutils.read_image_from_cv2(image_path)
        total_image_loading = (time.time() - image_loading_start) * 1000

        image = image.astype(np.float32)

        # preprocess image to work on tf
        print("[INFERENCE] Preprocessing image...")
        start_preprocessing = time.time()

        # resize image to netwrk input dimensions
        resized_image = imgutils.resize_image(image, (input_size, input_size))
        resized_image = resized_image / 255.

        images_data = []
        for i in range(1):
            images_data.append(resized_image)
        images_data = np.asarray(images_data).astype(np.float32)
        input_tensor = tf.constant(images_data)

        total_preprocessing = (time.time() - start_preprocessing) * 1000
        print("[INFERENCE] Preprocessing done!, it took {}ms".format(
            total_preprocessing))

        print("[INFERENCE] Images data: {} - shape: {} - dtype {}".format(
            images_data, images_data.shape, images_data.dtype))
        print("[INFERENCE] Input tensor: {} - shape: {} - dtype {}".format(
            input_tensor, input_tensor.shape, input_tensor.dtype))

        print("[INFERENCE] Now performing  inference...")
        inference_start_time = time.time()
        # get the detections
        detections = saved_model_loaded(input_tensor)

        total_inference = (time.time() - inference_start_time) * 1000
        print("[INFERENCE] Inference took {} ms".format(total_inference))

        print("[INFERENCE] Preprocessing network outputs...")
        start_output = time.time()
        # extract output tensors metadata: boxes, confidence scores
        print("[INFERENCE] Extracting output tensors metadata...")
        keyval_start = time.time()
        for key, value in detections.items():
            #print("[INFERENCE-DEBUG] key {}: value {}".format(key, value))
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]
            #print("[INFERENCE-DEBUG] boxes ", boxes, type(boxes))
            #print("[INFERENCE-DEBUG] confidence ", pred_conf, type(pred_conf))

        print("[INFERENCE] Done extracting metadata, it took {}".format(
            (time.time() - keyval_start) * 1000))

        print("[INFERENCE] Performing NMS to output...")
        nms_start = time.time()
        # perform non-max supression to retrieve valid detections only
        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=iou,
            score_threshold=threshold)

        results = [
            boxes.numpy(),
            scores.numpy(),
            classes.numpy(),
            valid_detections.numpy()
        ]
        print("[INFERENCE] NMS done, it took {} ms".format(
            (time.time() - nms_start) * 1000))
        total_output = (time.time() - start_output) * 1000
        print("[INFERENCE] Done procesing output!, it took {}ms".format(
            total_output))

        # draw results on image
        if (bbox_results is not None) and (performance_results is not None):
            drawing_time = imgutils.draw_yolo_bounding_boxes(
                resized_image, results, labels, bbox_results=bbox_results)

            height, width, _ = image.shape
            # add new performance results to object
            performance_results.add_new_result(width, height,
                                               total_image_loading,
                                               total_preprocessing,
                                               total_inference, total_output,
                                               drawing_time)
        else:
            _ = imgutils.draw_yolo_bounding_boxes(resized_image,
                                                  results,
                                                  labels,
                                                  save=True)

        print("[INFERENCE] Image/frame {} processed".format(frame_count))
        frame_count += 1

        print("[INFERENCE] All frames procesed!")

        output_path = "{}results/{}".format(images_path,
                                            self.attributes["MODEL_NAME"])

        output_path += "/opencv-backend"

        model_name = self.attributes["MODEL_NAME"]
        precision = self.attributes["precision"]

        # save results obtained from performance and detections to output
        bbox_results.save_results(output_path,
                                  model_name,
                                  precision,
                                  threshold,
                                  resize=True,
                                  opencv=True)
        performance_results.save_results(output_path,
                                         model_name,
                                         precision,
                                         threshold,
                                         resize=True,
                                         opencv=True)
예제 #5
0
    def run_inference(self,
                      image_path=None,
                      warmup_iters=0,
                      threshold=0.5,
                      iou=0.45,
                      model_dir=None):
        input_size = int(self.attributes["INPUT_SIZE"])
        labels = self.attributes["LABELS"]
        # get a copy of the graph func
        #graph_func = self.attributes["graph_func"]
        if model_dir is not None:
            saved_model_loaded = self.get_func_from_saved_model(model_dir)
        else:
            saved_model_loaded = self.attributes["saved_model_loaded"]

        #warmup
        if warmup_iters > 0:
            print("[INFERENCE] Starting warmup on {} iterations...".format(
                warmup_iters))
            warmup_start = time.time()
            # get input size from model attributes
            input_size = int(self.attributes["INPUT_SIZE"])

            # create a set of random images and perform inference
            for i in range(warmup_iters):
                print("[INFERENCE] Generating image {} with dims {}x{}".format(
                    i + 1, input_size, input_size))

                # working with numpy/cv backend
                # create random image
                resized_image = np.random.normal(
                    size=(input_size, input_size, 3)).astype(np.float32) / 255.

                images_data = np.asarray([resized_image]).astype(np.float32)
                input_tensor = tf.constant(images_data)

                images_data = []
                for i in range(1):
                    images_data.append(resized_image)
                images_data = np.asarray(images_data).astype(np.float32)
                input_tensor = tf.constant(images_data)

                # get the detections
                print("[WARMUP] Now performing warmup inference...")
                inference_start_time = time.time()
                detections = saved_model_loaded(input_tensor)
                print("[WARMUP] Warmup inference took {} ms".format(
                    (time.time() - inference_start_time) * 1000))

                print("[INFERENCE] Preprocessing network outputs...")
                start_output = time.time()
                # extract output tensors metadata: boxes, confidence scores
                print("[INFERENCE] Extracting output tensors metadata...")
                keyval_start = time.time()
                for key, value in detections.items():
                    #print("[INFERENCE-DEBUG] key {}: value {}".format(key, value))
                    boxes = value[:, :, 0:4]
                    pred_conf = value[:, :, 4:]
                    #print("[INFERENCE-DEBUG] boxes ", boxes, type(boxes))
                    #print("[INFERENCE-DEBUG] confidence ", pred_conf, type(pred_conf))

                print(
                    "[INFERENCE] Done extracting metadata, it took {}".format(
                        (time.time() - keyval_start) * 1000))

                print("[INFERENCE] Performing NMS to output...")
                nms_start = time.time()
                # perform non-max supression to retrieve valid detections only
                boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                    boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                    scores=tf.reshape(
                        pred_conf,
                        (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                    max_output_size_per_class=50,
                    max_total_size=50,
                    iou_threshold=iou,
                    score_threshold=threshold)

                results = [
                    boxes.numpy(),
                    scores.numpy(),
                    classes.numpy(),
                    valid_detections.numpy()
                ]
                print("[INFERENCE] NMS done, it took {} ms".format(
                    (time.time() - nms_start) * 1000))
                total_output = (time.time() - start_output) * 1000
                print(
                    "[INFERENCE] Done procesing output!, it took {}ms".format(
                        total_output))

                # draw dummy results
                _ = imgutils.draw_yolo_bounding_boxes(resized_image, results,
                                                      labels)

            # display results in ms
            print("[WARMUP] Warmup finished, it took {} ms".format(
                (time.time() - warmup_start) * 1000))

        # case inference
        print("[INFERENCE] Loading image with opencv backend...")
        # opencv option
        image_loading_start = time.time()
        image = imgutils.read_image_from_cv2(image_path)
        total_image_loading = (time.time() - image_loading_start) * 1000

        image = image.astype(np.float32)

        # preprocess image to work on tf
        print("[INFERENCE] Preprocessing image...")
        start_preprocessing = time.time()

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # resize image to netwrk input dimensions
        resized_image = imgutils.resize_image(image, (input_size, input_size))
        resized_image = resized_image / 255.

        images_data = []
        for i in range(1):
            images_data.append(resized_image)
        images_data = np.asarray(images_data).astype(np.float32)
        input_tensor = tf.constant(images_data)

        total_preprocessing = (time.time() - start_preprocessing) * 1000
        print("[INFERENCE] Preprocessing done!, it took {}ms".format(
            total_preprocessing))

        print("[INFERENCE] Images data: {} - shape: {} - dtype {}".format(
            images_data, images_data.shape, images_data.dtype))
        print("[INFERENCE] Input tensor: {} - shape: {} - dtype {}".format(
            input_tensor, input_tensor.shape, input_tensor.dtype))

        print("[INFERENCE] Now performing  inference...")
        inference_start_time = time.time()
        # get the detections
        detections = saved_model_loaded(input_tensor)

        total_inference = (time.time() - inference_start_time) * 1000
        print("[INFERENCE] Inference took {} ms".format(total_inference))

        print("[INFERENCE] Preprocessing network outputs...")
        start_output = time.time()
        # extract output tensors metadata: boxes, confidence scores
        print("[INFERENCE] Extracting output tensors metadata...")
        keyval_start = time.time()
        for key, value in detections.items():
            #print("[INFERENCE-DEBUG] key {}: value {}".format(key, value))
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]
            #print("[INFERENCE-DEBUG] boxes ", boxes, type(boxes))
            #print("[INFERENCE-DEBUG] confidence ", pred_conf, type(pred_conf))

        print("[INFERENCE] Done extracting metadata, it took {}".format(
            (time.time() - keyval_start) * 1000))

        print("[INFERENCE] Performing NMS to output...")
        nms_start = time.time()
        # perform non-max supression to retrieve valid detections only
        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=iou,
            score_threshold=threshold)

        results = [
            boxes.numpy(),
            scores.numpy(),
            classes.numpy(),
            valid_detections.numpy()
        ]
        print("[INFERENCE] NMS done, it took {} ms".format(
            (time.time() - nms_start) * 1000))
        total_output = (time.time() - start_output) * 1000
        print("[INFERENCE] Done procesing output!, it took {}ms".format(
            total_output))

        # draw results and save
        _ = imgutils.draw_yolo_bounding_boxes(resized_image,
                                              results,
                                              labels,
                                              save=True)

        # debugging info
        """print("[INFERENCE-DEBUG] tf-boxes ", boxes, type(boxes))
예제 #6
0
def convert_to_xml(detections_path, images_path):  
    # get output path to save files into
    output_path = get_output_path(detections_path, images_path)

    # get detections dataframe
    detections = get_detections_csv(detections_path)

    # get image paths
    image_paths = sorted(glob.glob(images_path + "*.png"), key=numericalSort)

    # iterate over all images
    for image_path in image_paths:
        splitted_path = image_path.split('/')
        filename = splitted_path[-1]
        folder = splitted_path[-2]

        image = imgutils.read_image_from_cv2(image_path)

        # get the shape of the image to compare to
        # image dimensions from detections
        height, width, depth = image.shape

        print("[CONVERT-TO-XML] Processing file {} from folder {}".format(filename, folder))
        image_detections = detections.loc[detections["filename"] == filename]

        if "N/D" in image_detections.values:
            continue

        # set file annotations
        root = ET.Element('annotation')
        folder_anno = ET.SubElement(root, "folder")
        folder_anno.text = folder
        filename_anno = ET.SubElement(root, "filename")
        filename_anno.text = filename
        path_anno = ET.SubElement(root, "path")
        path_anno.text = image_path

        # set source annotations
        source_anno = ET.SubElement(root, "source")
        db_anno = ET.SubElement(source_anno, "database")
        db_anno.text = "Unknown"

        # set size annotations
        size_anno = ET.SubElement(root, "size")
        width_anno = ET.SubElement(size_anno, "width")
        width_anno.text = str(width)
        height_anno = ET.SubElement(size_anno, 'height')
        height_anno.text = str(height)
        depth_anno = ET.SubElement(size_anno, "depth")
        depth_anno.text = str(depth)
         
        # set segmented
        segmented_anno = ET.SubElement(root, "segmented")
        segmented_anno.text = '0'

        for _, row in image_detections.iterrows():
            detection_class = row.detection_class
            det_width, det_height = row.width, row.height

            # format bbox to resize
            bbox = np.array([row.ymin, row.xmin, 
                            row.ymax, row.xmax]).astype(int)

            # check for resize bounding box
            if (width != det_width) | (height != det_height):
                bbox = imgutils.resize_bounding_box((height, width, 3), 
                                                    (det_height, det_width, 3),
                                                    bbox)

            ymin = bbox[0]
            xmin = bbox[1]
            ymax = bbox[2]
            xmax = bbox[3]

            object_anno = ET.SubElement(root, "object")

            name_anno = ET.SubElement(object_anno, "name")
            name_anno.text = detection_class
            pose_anno = ET.SubElement(object_anno, "pose")
            pose_anno.text = "Unspecified"
            truncated_anno = ET.SubElement(object_anno, "truncated")
            truncated_anno.text = '0'
            difficult_anno = ET.SubElement(object_anno, "difficult")
            difficult_anno.text = '0'

            bbox_anno = ET.SubElement(object_anno, "bndbox")
            xmin_anno = ET.SubElement(bbox_anno, "xmin")
            xmin_anno.text = str(xmin)
            ymin_anno = ET.SubElement(bbox_anno, "ymin")
            ymin_anno.text = str(ymin)
            xmax_anno = ET.SubElement(bbox_anno, "xmax")
            xmax_anno.text = str(xmax)
            ymax_anno = ET.SubElement(bbox_anno, "ymax")
            ymax_anno.text = str(ymax)

        tree = ET.ElementTree(root)
            
        filename_only = get_filename_only(filename) + ".xml"
        print("[CSV-TO-XML] Saving file to {}{}".format(output_path, filename_only))
        tree.write(output_path + filename_only)
예제 #7
0
def track_objects(detections_path, frames_path, output_path, display_results=False, save_results=False):    
    # perform warmup to visualize results in "real time"
    warmup(20, display_results=display_results)
    
    # instantiate a tracker results object
    sort_results = SortResults()
    sort_performance = SortPerformanceResults()

    # get the detections file
    detections = pd.read_csv(args.detections_path).iloc[:, 1:]

    # get image dims to set boundaries
    width = detections["width"].unique()[0]
    height = detections["height"].unique()[0]

    # instantiate the tracker with given boundaries
    tracker = Sort(dims=(width, height))

    # read frames sorted by numerical order
    frame_paths = sorted(glob.glob(frames_path + "*.png"), key=numericalSort)

    frame_count = 1

    for frame_path in frame_paths:
        frame_filename = frame_path.split('/')[-1]
        # init sort results metadata (imaage and frame count)
        sort_results.init_frame_metadata(frame_path, frame_count)
        sort_performance.init_frame_metadata(frame_path, frame_count)

        # init a bbox array to store bboxes
        current_bboxes = np.array([[]]).astype(int)
        
        # get detections of current frame
        pig_detections_data = detections.loc[(detections["frame"] == frame_count) & (detections["detection_class"] == 'pig')]
        if pig_detections_data.shape[0] == 0:
            print("[SORT] No detections for pigs here, skipping frame {}"
                    .format(frame_count))
            frame_count += 1
            continue            
        print(pig_detections_data.head())
        #sys.exit()

        # get frame and its dimensions
        print("[SORT] Loading image with opencv...")
        start_loading = time.time()
        frame = imgutils.read_image_from_cv2(frame_path)
        total_loading = (time.time()-start_loading)*1000
        print("[SORT] Done!, it took {}ms".format(total_loading))
        frame_height, frame_width, _ = frame.shape

        # get detections results image dims
        width = pig_detections_data["width"].unique()[0]
        height = pig_detections_data["height"].unique()[0]

        print("[SORT] Starting image preprocessing...")
        start_preprocessing = time.time()
        # resize image to match detections results
        if (frame_height != height) or (frame_width != width):
            print("[SORT] Frame dims are not the same from the detections")
            print("[SORT] Resizing...")
            resized_frame = imgutils.resize_image(frame, (width, height))
            print("[SORT] Done!")

        else:
            print("[SORT] Frame dims are the same from the detections")
            resized_frame = frame
        total_preprocessing = (time.time()-start_preprocessing)*1000
        print("[SORT] Done!, it took {}ms".format(total_preprocessing))
        
        print("[SORT] Preparing detections for tracker...")
        start_preparing = time.time()
        for _, row in pig_detections_data.iterrows():
            # prepare bbox detections to format required by sort tracker
            x1 = row.xmin
            x2 = row.xmax
            y1 = row.ymin
            y2 = row.ymax

            # check if there is a valid detection
            if valid_detection([x1, x2, y1, y2]):
                print("[SORT] Detection is valid!")
                # fomat detection for sort input
                bbox = np.array([[x1, y1, x2, y2]]).astype(int)

                print("[SORT] Bbox: ", bbox)
                if current_bboxes.size == 0:
                    current_bboxes = bbox
                else:
                    np.append(current_bboxes, bbox, axis=0).astype(int)
            else:
                print("[SORT] Detection is invalid!, skipping...")

            
        total_preparing = (time.time()-start_preparing)*1000
        print("[SORT] Done!, it took {}ms".format(total_preparing))

        # send bbox to tracker
        # if detection is unmatched, it will initialize a new tracker
        # if its matched it should should, predict and update
        print("[SORT] Updating trackers...")
        start_update = time.time()
        objects_tracked = tracker.update(current_bboxes)
        total_update = (time.time()-start_update)*1000
        print("[SORT] Done!, it took {}ms".format(total_update))

        print("[SORT] Actual trackers: ", tracker.total_trackers)
        print("[SORT] Objects tracked: ", objects_tracked)
        #sys.exit()
        # get tracking results
        print("[SORT] Getting tracker results...")
        start_results = time.time()

        # set a copy of the current frame to display results
        drawed_frame = np.copy(resized_frame)
        for object_tracked in objects_tracked:
            # get trackers info like state and time since update
            tracker_id = object_tracked.id
            tracker_state = "active" if object_tracked.active else "inactive"
            time_since_update = object_tracked.time_since_update
            initialized_in_roi = object_tracked.initialized_in_ROI

            # get the bbox returned as [x1, y1, x2, y2]
            bbox = object_tracked.get_state().astype(int)
            first_centroid = (object_tracked.first_centroid[0], object_tracked.first_centroid[1])
            last_centroid = (object_tracked.last_centroid[0], object_tracked.last_centroid[1])

            # get x-y coords
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = bbox[2]
            ymax = bbox[3]

            print("[SORT] Bbox from tracker: ", bbox)
            print("[SORT] Active?: ", tracker_state)
            print("[SORT] Time since update: ", time_since_update)
            print("[SORT] Initialized in roi?: " , initialized_in_roi)

            # draw bboxes and label on frame
            drawed_frame = imgutils.draw_trackers_bounding_box(drawed_frame, "PIG", object_tracked)
         
            # add a new result to trackers results
            sort_results.add_new_result(width, height,
                                    tracker_id, tracker_state, 
                                    time_since_update,
                                    initialized_in_roi,
                                    first_centroid, last_centroid,
                                    [xmin, xmax, ymin, ymax])

        total_results = (time.time()-start_results)*1000
        print("[SORT] Tracker results done!, it took {}ms".format(total_results))

        # update trackers state to active/inactive depending on position
        print("[SORT] Updating state to  trackers")
        start_update_state = time.time()
        tracker.update_trackers_state()
        total_update_state = (time.time()-start_update_state)*1000
        print("[SORT] States update!, it took {}ms".format(total_update_state))

        # draw trackers info on the image
        drawed_frame = imgutils.draw_tracker_info(drawed_frame, "PIG", tracker)
        # display results on screen, if scaled will adapt frame to screen dimensions
        if display_results:
            imgutils.display_frame(drawed_frame, scaled=True, message="Tracker")
        # save results to a given path
        if save_results:
            imgutils.save_frame(drawed_frame, output_path, detections_path, frame_count, "tracker_frames")
            
        total_trackers = tracker.total_trackers
        sort_performance.add_new_result(width, height, total_loading, total_preprocessing,
                                        total_preparing, total_update, total_results,
                                        total_update_state, total_trackers)

        frame_count+=1

    # save tracker results on given path
    sort_results.save_results(output_path, detections_path)
    # save performance results on given path
    sort_performance.save_results(output_path, detections_path)

    return tracker.total_trackers
예제 #8
0
    def run_inference(self,
                      image_path=None,
                      warmup_iters=0,
                      threshold=0.5,
                      iou=0.45,
                      model_dir=None,
                      bbox_results=None,
                      performance_results=None,
                      opencv=False,
                      resize=False):
        input_size = int(self.attributes["INPUT_SIZE"])
        labels = self.attributes["LABELS"]
        # get a copy of the graph func
        #graph_func = self.attributes["graph_func"]
        # Load the saved model
        model_loaded = tf.saved_model.load(model_dir,
                                           tags=[tag_constants.SERVING])
        saved_model_loaded = model_loaded.signatures['serving_default']

        #warmup
        if warmup_iters > 0:
            print("[INFERENCE] Starting warmup on {} iterations...".format(
                warmup_iters))
            warmup_start = time.time()
            # get input size from model attributes
            input_size = int(self.attributes["INPUT_SIZE"])

            # create a set of random images and perform inference
            for i in range(warmup_iters):
                print("[INFERENCE] Generating image {} with dims {}x{}".format(
                    i + 1, input_size, input_size))

                # working with numpy/cv backend
                if opencv:
                    # create random image
                    resized_image = np.random.normal(size=(
                        input_size, input_size, 3)).astype(np.float32) / 255.

                    # conventional conversion (use with opencv option)
                    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
                    #input_tensor = tf.convert_to_tensor(resized_image)
                    #input_tensor = tf.convert_to_tensor(image)
                    # The model expects a batch of images, so add an axis with `tf.newaxis`.
                    #input_tensor = input_tensor[tf.newaxis, ...]

                    images_data = []
                    for i in range(1):
                        images_data.append(resized_image)
                    images_data = np.asarray(images_data).astype(np.float32)
                    input_tensor = tf.constant(images_data)

                # working with tf backend for image
                else:
                    dataset, _, _ = imgutils.get_dataset_tf(
                        tensor_type='float', input_size=input_size)
                    """print("[WARMUP] Creating features...")
                    features = np.random.normal(loc=112, scale=70,
                            size=(1, input_size, input_size, 3)).astype(np.float32)
                    features = np.clip(features, 0.0, 255.0).astype(np.float32)
                    features = tf.convert_to_tensor(value=tf.compat.v1.get_variable(
                                        "features", initializer=tf.constant(features)))
                    print("[WARMUP] Creating dataset from features...")
                    dataset = tf.data.Dataset.from_tensor_slices([features])
                    dataset = dataset.repeat(count=1)"""

                    print("[WARMUP] Retrieving image and input tensor...")
                    dataset_enum = enumerate(dataset)
                    # get input tensor and cast to image (np)
                    input_tensor = list(dataset_enum)[0][1]
                    resized_image = input_tensor.numpy()[0]

                    images_data = []
                    for i in range(1):
                        images_data.append(resized_image)
                    images_data = np.asarray(images_data).astype(np.float32)
                    input_tensor = tf.constant(images_data)

                # get the detections
                print("[WARMUP] Now performing warmup inference...")
                inference_start_time = time.time()
                detections = saved_model_loaded(input_tensor)
                print("[WARMUP] Warmup inference took {} ms".format(
                    (time.time() - inference_start_time) * 1000))

                print("[INFERENCE] Preprocessing network outputs...")
                start_output = time.time()
                # extract output tensors metadata: boxes, confidence scores
                print("[INFERENCE] Extracting output tensors metadata...")
                keyval_start = time.time()
                for key, value in detections.items():
                    #print("[INFERENCE-DEBUG] key {}: value {}".format(key, value))
                    boxes = value[:, :, 0:4]
                    pred_conf = value[:, :, 4:]
                    #print("[INFERENCE-DEBUG] boxes ", boxes, type(boxes))
                    #print("[INFERENCE-DEBUG] confidence ", pred_conf, type(pred_conf))

                print(
                    "[INFERENCE] Done extracting metadata, it took {}".format(
                        (time.time() - keyval_start) * 1000))

                print("[INFERENCE] Performing NMS to output...")
                nms_start = time.time()
                # perform non-max supression to retrieve valid detections only
                boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                    boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                    scores=tf.reshape(
                        pred_conf,
                        (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                    max_output_size_per_class=50,
                    max_total_size=50,
                    iou_threshold=iou,
                    score_threshold=threshold)

                results = [
                    boxes.numpy(),
                    scores.numpy(),
                    classes.numpy(),
                    valid_detections.numpy()
                ]
                print("[INFERENCE] NMS done, it took {} ms".format(
                    (time.time() - nms_start) * 1000))
                total_output = (time.time() - start_output) * 1000
                print(
                    "[INFERENCE] Done procesing output!, it took {}ms".format(
                        total_output))

                _ = imgutils.draw_yolo_bounding_boxes(resized_image, results,
                                                      labels)
                # display results in ms
            print("[WARMUP] Warmup finished, it took {} ms".format(
                (time.time() - warmup_start) * 1000))

        # case inference
        if opencv:
            print("[INFERENCE] Loading image with opencv backend...")
            # opencv option
            image_loading_start = time.time()
            image = imgutils.read_image_from_cv2(image_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            total_image_loading = (time.time() - image_loading_start) * 1000

            # preprocess image to work on tf
            print("[INFERENCE] Preprocessing image...")
            start_preprocessing = time.time()

            # resize image to netwrk input dimensions
            resized_image = imgutils.resize_image(image,
                                                  (input_size, input_size))
            resized_image = resized_image / 255.

            # conventional conversion (use with opencv option)
            # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
            #input_tensor = tf.convert_to_tensor(resized_image)
            #input_tensor = tf.convert_to_tensor(image)
            # The model expects a batch of images, so add an axis with `tf.newaxis`.
            #input_tensor = input_tensor[tf.newaxis, ...]

            images_data = []
            for i in range(1):
                images_data.append(resized_image)
            images_data = np.asarray(images_data).astype(np.float32)
            input_tensor = tf.constant(images_data)

            total_preprocessing = (time.time() - start_preprocessing) * 1000
            print("[INFERENCE] Preprocessing done!, it took {}ms".format(
                total_preprocessing))

        # case tf backend to manipulate images
        else:
            print("[INFERENCE] Loading image with tf backend...")
            # dataset option, yolo models require resizing to input size
            dataset, total_image_loading, total_preprocessing = imgutils.get_dataset_tf(
                image_path=image_path,
                input_size=input_size,
                tensor_type='float')
            #print("[INFERENCE] dataset {}".format(dataset))
            # take the batched image
            dataset_enum = enumerate(dataset)
            input_tensor = list(dataset_enum)[0][1]

            # take image as np and convert to rgb
            image_bgr = input_tensor.numpy()[0]
            resized_image = image_bgr[..., ::-1].copy() / 255.

            images_data = []
            for i in range(1):
                images_data.append(resized_image)
            images_data = np.asarray(images_data).astype(np.float32)
            input_tensor = tf.constant(images_data)

            print(
                "[INFERENCE] Images data: {} - shape: {} - dtype {} - type {}".
                format(images_data, images_data.shape, images_data.dtype,
                       type(images_data)))

            print(
                "[INFERENCE] Input tensor: {} - shape: {} - dtype {} - type {}"
                .format(input_tensor, input_tensor.shape, input_tensor.dtype,
                        type(input_tensor)))

        print("[INFERENCE] Now performing inference...")
        inference_start_time = time.time()

        #print("[INFERENCE] Input tensor: {} - dtype {}"
        #    .format(input_tensor, input_tensor.dtype))

        # get the detections
        detections = saved_model_loaded(input_tensor)

        total_inference = (time.time() - inference_start_time) * 1000
        print("[INFERENCE] Inference took {} ms".format(total_inference))

        print("[INFERENCE] Preprocessing network outputs...")
        start_output = time.time()
        # extract output tensors metadata: boxes, confidence scores
        print("[INFERENCE] Extracting output tensors metadata...")
        keyval_start = time.time()
        for key, value in detections.items():
            #print("[INFERENCE-DEBUG] key {}: value {}".format(key, value))
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]
            #print("[INFERENCE-DEBUG] boxes ", boxes, type(boxes))
            #print("[INFERENCE-DEBUG] confidence ", pred_conf, type(pred_conf))

        print("[INFERENCE] Done extracting metadata, it took {}".format(
            (time.time() - keyval_start) * 1000))

        print("[INFERENCE] Performing NMS to output...")
        nms_start = time.time()
        # perform non-max supression to retrieve valid detections only
        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=iou,
            score_threshold=threshold)

        results = [
            boxes.numpy(),
            scores.numpy(),
            classes.numpy(),
            valid_detections.numpy()
        ]
        print("[INFERENCE] NMS done, it took {} ms".format(
            (time.time() - nms_start) * 1000))
        total_output = (time.time() - start_output) * 1000
        print("[INFERENCE] Done procesing output!, it took {}ms".format(
            total_output))

        # draw results on image
        if (bbox_results is not None) and (performance_results is not None):
            drawing_time = imgutils.draw_yolo_bounding_boxes(
                resized_image, results, labels, bbox_results=bbox_results)

            height, width, _ = image.shape
            # add new performance results to object
            performance_results.add_new_result(width, height,
                                               total_image_loading,
                                               total_preprocessing,
                                               total_inference, total_output,
                                               drawing_time)
        else:
            _ = imgutils.draw_yolo_bounding_boxes(resized_image,
                                                  results,
                                                  labels,
                                                  save=True)

        # debugging info
        """print("[INFERENCE-DEBUG] tf-boxes ", boxes, type(boxes))