コード例 #1
0
ファイル: custom_train.py プロジェクト: syleo22/Xtreme-Vision
class instance_custom_training:
    def __init__(self):
        self.model_dir = os.getcwd()

        
    def modelConfig(self,network_backbone = "resnet101",  num_classes =  1,  class_names = ["BG"], batch_size = 1, image_max_dim = 512, image_min_dim = 512, image_resize_mode ="square", gpu_count = 1):
        self.config = Config(BACKBONE = network_backbone, NUM_CLASSES = 1 +  num_classes,  class_names = class_names, 
        IMAGES_PER_GPU = batch_size, IMAGE_MAX_DIM = image_max_dim, IMAGE_MIN_DIM = image_min_dim, IMAGE_RESIZE_MODE = image_resize_mode,
        GPU_COUNT = gpu_count)
        if network_backbone == "resnet101":
            print("Using resnet101 as network backbone For Mask R-CNN model")
        else:
            print("Using resnet50 as network backbone For Mask R-CNN model")

    def load_pretrained_model(self, model_path):
        #load the weights for COCO
        self.model = modellib.MaskRCNN(mode="training", model_dir = self.model_dir, config=self.config)
        self.model.load_weights(model_path, by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",  "mrcnn_bbox", 
        "mrcnn_mask"])
    
    
    def load_dataset(self, dataset):
        labelme_folder1 = os.path.abspath(os.path.join(dataset, "train"))

        #dir where the converted json files will be saved
        save_json_path1 = os.path.abspath(os.path.join(dataset, "train.json"))
        
        #conversion of individual labelme json files into a single json file        
        labelme2coco.convert(labelme_folder1, save_json_path1)
        
        # Training dataset.
        self.dataset_train = Data()
        self.dataset_train.load_data(save_json_path1, labelme_folder1)
        self.dataset_train.prepare()
        
        
        labelme_folder2 = os.path.abspath(os.path.join(dataset, "test"))

        #dir where the converted json files will be saved
        save_json_path2 = os.path.abspath(os.path.join(dataset, "test.json"))
        
        
        #conversion of individual labelme json files into a single json file  
        labelme2coco.convert(labelme_folder2, save_json_path2)
        
        # Training dataset.
        self.dataset_test = Data()
        self.dataset_test.load_data(save_json_path2, labelme_folder2)
        self.dataset_test.prepare()
    

    def visualize_sample(self):
        image_id = np.random.choice(self.dataset_train.image_ids)

        image = self.dataset_train.load_image(image_id)
        mask, class_ids = self.dataset_train.load_mask(image_id)
        bbox = extract_bboxes(mask)


        # Display image and instances
        out = display_box_instances(image, bbox, mask, class_ids, self.dataset_train.class_names)  
        plt.imshow(out)
        plt.axis("off")
        plt.show()
            

        

    def train_model(self,num_epochs,path_trained_models,  layers = "all", augmentation = False):
        if augmentation == False:
    
            print('Train %d' % len(self.dataset_train.image_ids), "images")
            print('Validate %d' % len(self.dataset_test.image_ids), "images")
            print("No augmentation")
            self.model.train(self.dataset_train, self.dataset_test,models = path_trained_models,  epochs=num_epochs,layers=layers)

        else:
            augmentation = imgaug.augmenters.Sometimes(0.5, [
                    imgaug.augmenters.Fliplr(0.5),
                    iaa.Flipud(0.5),
                    imgaug.augmenters.GaussianBlur(sigma=(0.0, 5.0))
                ])
    

            print('Train %d' % len(self.dataset_train.image_ids), "images")
            print('Validate %d' % len(self.dataset_test.image_ids), "images")
            print("Applying augmentation on dataset")
            self.model.train(self.dataset_train, self.dataset_test,models = path_trained_models, augmentation = augmentation, epochs=num_epochs,layers=layers)


    def evaluate_model(self, model_path, iou_threshold = 0.5):
        self.model = MaskRCNN(mode = "inference", model_dir = os.getcwd(), config = self.config)  
        if os.path.isfile(model_path):
            model_files = [model_path]
             
        if os.path.isdir(model_path):
            model_files = sorted([os.path.join(model_path, file_name) for file_name in os.listdir(model_path)])
        for modelfile in model_files:
            if str(modelfile).endswith(".h5"):
                self.model.load_weights(modelfile, by_name=True)
            APs = []
            #outputs = list()
            for image_id in self.dataset_test.image_ids:                                                                                                                                                                                                                                                                                                                                                                             
                # load image, bounding boxes and masks for the image id
                image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt(self.dataset_test, self.config, image_id)
                # convert pixel values (e.g. center)
                scaled_image = mold_image(image, self.config)
                # convert image into one sample
                sample = np.expand_dims(scaled_image, 0)
		        # make prediction
                yhat = self.model.detect(sample, verbose=0)
		        # extract results for first sample
                r = yhat[0]
		        # calculate statistics, including AP
                AP, _, _, _ = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'])
		        # store
                APs.append(AP)
	        # calculate the mean AP across all images
            mAP = np.mean(APs)
            print(modelfile, "evaluation using iou_threshold", iou_threshold, "is", f"{mAP:01f}", '\n')
コード例 #2
0
class custom_segmentation:
    def __init__(self):
        self.model_dir = os.getcwd()

    def inferConfig(self,
                    name=None,
                    network_backbone="resnet101",
                    num_classes=1,
                    class_names=["BG"],
                    batch_size=1,
                    detection_threshold=0.7,
                    image_max_dim=512,
                    image_min_dim=512,
                    image_resize_mode="square",
                    gpu_count=1):
        self.config = Config(BACKBONE=network_backbone,
                             NUM_CLASSES=1 + num_classes,
                             class_names=class_names,
                             IMAGES_PER_GPU=batch_size,
                             IMAGE_MAX_DIM=image_max_dim,
                             IMAGE_MIN_DIM=image_min_dim,
                             DETECTION_MIN_CONFIDENCE=detection_threshold,
                             IMAGE_RESIZE_MODE=image_resize_mode,
                             GPU_COUNT=gpu_count)

    def load_model(self, model_path):
        #load the weights for COCO
        self.model = MaskRCNN(mode="inference",
                              model_dir=self.model_dir,
                              config=self.config)
        self.model.load_weights(model_path, by_name=True)

    def segmentImage(self,
                     image_path,
                     show_bboxes=False,
                     output_image_name=None,
                     verbose=None):
        image = cv2.imread(image_path)
        new_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        # Run detection
        if verbose is not None:
            print("Processing image...")

        results = self.model.detect([new_img])

        r = results[0]
        if show_bboxes == False:

            #apply segmentation mask
            output = display_instances(image, r['rois'], r['masks'],
                                       r['class_ids'], self.config.class_names)

            if output_image_name is not None:
                cv2.imwrite(output_image_name, output)
                print(
                    "Processed image saved successfully in your current working directory."
                )
            return r, output

        else:
            #apply segmentation mask with bounding boxes
            output = display_box_instances(image, r['rois'], r['masks'],
                                           r['class_ids'],
                                           self.config.class_names,
                                           r['scores'])

            if output_image_name is not None:
                cv2.imwrite(output_image_name, output)
                print(
                    "Processed Image saved successfully in your current working directory."
                )

            return r, output

    def segmentFrame(self,
                     frame,
                     show_bboxes=False,
                     output_image_name=None,
                     verbose=None):

        new_img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        if verbose is not None:
            print("Processing frame...")
        # Run detection
        results = self.model.detect([new_img])

        r = results[0]

        if show_bboxes == False:

            #apply segmentation mask
            output = display_instances(frame, r['rois'], r['masks'],
                                       r['class_ids'], self.config.class_names)

            if output_image_name is not None:
                cv2.imwrite(output_image_name, output)
                print(
                    "Processed image saved successfully in your current working directory."
                )
            return r, output

        else:
            #apply segmentation mask with bounding boxes
            output = display_box_instances(frame, r['rois'], r['masks'],
                                           r['class_ids'],
                                           self.config.class_names,
                                           r['scores'])

            if output_image_name is not None:
                cv2.imwrite(output_image_name, output)
                print(
                    "Processed Image saved successfully in your current working directory."
                )
            return r, output

    def process_video(self,
                      video_path,
                      show_bboxes=False,
                      output_video_name=None,
                      frames_per_second=None):
        capture = cv2.VideoCapture(video_path)
        width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        codec = cv2.VideoWriter_fourcc(*'DIVX')
        if frames_per_second is not None:
            save_video = cv2.VideoWriter(output_video_name, codec,
                                         frames_per_second, (width, height))
        counter = 0
        start = time.time()

        if show_bboxes == False:
            while True:
                counter += 1
                ret, frame = capture.read()
                if ret:
                    # Run detection
                    results = self.model.detect([frame], verbose=0)
                    print("No. of frames:", counter)
                    r = results[0]
                    #apply segmentation mask
                    output = display_instances(frame, r['rois'], r['masks'],
                                               r['class_ids'],
                                               self.config.class_names)
                    output = cv2.resize(output, (width, height),
                                        interpolation=cv2.INTER_AREA)

                    if output_video_name is not None:
                        save_video.write(output)

                else:
                    break

            end = time.time()
            print(f"Processed {counter} frames in {end-start:.1f} seconds")

            capture.release()
            if frames_per_second is not None:
                save_video.release()
            return r, output

        else:
            while True:
                counter += 1
                ret, frame = capture.read()
                if ret:
                    # Run detection
                    results = self.model.detect([frame], verbose=0)
                    print("No. of frames:", counter)
                    r = results[0]
                    #apply segmentation mask with bounding boxes
                    output = display_box_instances(frame, r['rois'],
                                                   r['masks'], r['class_ids'],
                                                   self.config.class_names,
                                                   r['scores'])
                    output = cv2.resize(output, (width, height),
                                        interpolation=cv2.INTER_AREA)

                    if output_video_name is not None:
                        save_video.write(output)
                else:
                    break

            capture.release()

            end = time.time()
            print(f"Processed {counter} frames in {end-start:.1f} seconds")

            if frames_per_second is not None:
                save_video.release()

            return r, output

    def process_camera(self,
                       cam,
                       show_bboxes=False,
                       output_video_name=None,
                       frames_per_second=None,
                       show_frames=None,
                       frame_name=None,
                       verbose=None,
                       check_fps=False):
        capture = cam

        if output_video_name is not None:
            width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
            codec = cv2.VideoWriter_fourcc(*'DIVX')
            save_video = cv2.VideoWriter(output_video_name, codec,
                                         frames_per_second, (width, height))

        counter = 0
        start = datetime.now()

        if show_bboxes == False:
            while True:

                ret, frame = capture.read()
                if ret:
                    # Run detection
                    results = self.model.detect([frame])

                    r = results[0]
                    #apply segmentation mask
                    output = display_instances(frame, r['rois'], r['masks'],
                                               r['class_ids'],
                                               self.config.class_names)
                    counter += 1

                    if show_frames == True:
                        if frame_name is not None:
                            cv2.imshow(frame_name, output)

                            if cv2.waitKey(25) & 0xFF == ord('q'):
                                break

                    if output_video_name is not None:
                        output = cv2.resize(output, (width, height),
                                            interpolation=cv2.INTER_AREA)
                        save_video.write(output)

                elif counter == 30:
                    break

            end = datetime.now()

            if check_fps == True:
                timetaken = (end - start).total_seconds()
                fps = counter / timetaken
                print(f"{fps} frames per seconds")

            if verbose is not None:
                print(f"Processed {counter} frames in {timetaken:.1f} seconds")

            capture.release()

            if output_video_name is not None:
                save_video.release()

            return r, output

        else:
            while True:

                ret, frame = capture.read()
                if ret:
                    # Run detection
                    results = self.model.detect([frame])

                    r = results[0]
                    #apply segmentation mask with bounding boxes
                    output = display_box_instances(frame, r['rois'],
                                                   r['masks'], r['class_ids'],
                                                   self.config.class_names,
                                                   r['scores'])

                    counter += 1
                    if show_frames == True:
                        if frame_name is not None:
                            cv2.imshow(frame_name, output)

                            if cv2.waitKey(25) & 0xFF == ord('q'):
                                break

                    if output_video_name is not None:
                        output = cv2.resize(output, (width, height),
                                            interpolation=cv2.INTER_AREA)
                        save_video.write(output)

                elif counter == 30:
                    break

            end = datetime.now()

            if check_fps == True:
                timetaken = (end - start).total_seconds()
                fps = counter / timetaken
                print(f"{fps} frames per seconds")

            if verbose is not None:
                print(f"Processed {counter} frames in {timetaken:.1f} seconds")

            capture.release()

            if output_video_name is not None:
                save_video.release()

            return r, output
コード例 #3
0
ファイル: instance.py プロジェクト: yogeshkumarpilli/PixelLib
class custom_segmentation:
    def __init__(self):
        self.model_dir = os.getcwd()

    def inferConfig(self,
                    name=None,
                    network_backbone="resnet101",
                    num_classes=1,
                    class_names=["BG"],
                    batch_size=1,
                    detection_threshold=0.7,
                    image_max_dim=512,
                    image_min_dim=512,
                    image_resize_mode="square",
                    gpu_count=1):
        self.config = Config(BACKBONE=network_backbone,
                             NUM_CLASSES=1 + num_classes,
                             class_names=class_names,
                             IMAGES_PER_GPU=batch_size,
                             IMAGE_MAX_DIM=image_max_dim,
                             IMAGE_MIN_DIM=image_min_dim,
                             DETECTION_MIN_CONFIDENCE=detection_threshold,
                             IMAGE_RESIZE_MODE=image_resize_mode,
                             GPU_COUNT=gpu_count)

    def load_model(self, model_path):
        #load the weights for COCO
        self.model = MaskRCNN(mode="inference",
                              model_dir=self.model_dir,
                              config=self.config)
        self.model.load_weights(model_path, by_name=True)

    def segmentImage(self,
                     image_path,
                     show_bboxes=False,
                     extract_segmented_objects=False,
                     save_extracted_objects=False,
                     mask_points_values=False,
                     process_frame=False,
                     output_image_name=None,
                     verbose=None):

        if process_frame == False:
            image = cv2.imread(image_path)

        else:
            image = image_path

        new_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        # Run detection
        if verbose is not None:
            print("Processing image...")
        results = self.model.detect([new_img])

        r = results[0]

        if show_bboxes == False:
            output = display_instances(image, r['rois'], r['masks'],
                                       r['class_ids'], self.config.class_names)
            if output_image_name is not None:
                cv2.imwrite(output_image_name, output)
                print(
                    "Processed image saved successfully in your current working directory."
                )
            """ Code to extract and crop out each of the objects segmented in an image """

            if extract_segmented_objects == False:

                if mask_points_values == True:
                    mask = r['masks']
                    contain_val = []
                    for a in range(mask.shape[2]):
                        m = mask[:, :, a]
                        mask_values = Mask(m).polygons()
                        val = mask_values.points
                        contain_val.append(val)

                    r['masks'] = contain_val

                return r, output

            else:

                mask = r['masks']
                m = 0
                for a in range(mask.shape[2]):
                    if process_frame == False:
                        img = cv2.imread(image_path)
                    else:
                        img = image_path
                    for b in range(img.shape[2]):

                        img[:, :, b] = img[:, :, b] * mask[:, :, a]
                    m += 1
                    extracted_objects = img[np.ix_(mask[:, :, a].any(1),
                                                   mask[:, :, a].any(0))]

                    if save_extracted_objects == True:
                        save_path = os.path.join("segmented_object" + "_" +
                                                 str(m) + ".jpg")
                        cv2.imwrite(save_path, extracted_objects)

                if mask_points_values == True:
                    mask = r['masks']
                    contain_val = []
                    for a in range(mask.shape[2]):
                        m = mask[:, :, a]
                        mask_values = Mask(m).polygons()
                        val = mask_values.points

                        contain_val.append(val)

                    r['masks'] = contain_val

                    extract_mask = extracted_objects
                    object_val = []

                    for a in range(extract_mask.shape[2]):
                        m = extract_mask[:, :, a]
                        mask_values = Mask(m).polygons()
                        val = mask_values.points
                        object_val.append(val)

                    extracted_objects = object_val
                """ The mask values of each of the extracted cropped object in the image
                is added to the dictionary containing an array of output values:
                """
                r.update({"extracted_objects": extracted_objects})

                return r, output

        else:
            output = display_box_instances(image, r['rois'], r['masks'],
                                           r['class_ids'],
                                           self.config.class_names,
                                           r['scores'])
            """ Code to extract and crop out each of the objects segmented in an image """

            if extract_segmented_objects == True:
                mask = r['masks']
                m = 0
                for a in range(mask.shape[2]):
                    if process_frame == False:
                        img = cv2.imread(image_path)
                    else:
                        img = image_path

                    for b in range(img.shape[2]):

                        img[:, :, b] = img[:, :, b] * mask[:, :, a]
                    m += 1
                    extracted_objects = img[np.ix_(mask[:, :, a].any(1),
                                                   mask[:, :, a].any(0))]

                    if save_extracted_objects == True:
                        save_path = os.path.join("segmented_object" + "_" +
                                                 str(m) + ".jpg")
                        cv2.imwrite(save_path, extracted_objects)

                if mask_points_values == True:
                    mask = r['masks']
                    contain_val = []
                    for a in range(mask.shape[2]):
                        m = mask[:, :, a]
                        mask_values = Mask(m).polygons()
                        val = mask_values.points

                        contain_val.append(val)

                    r['masks'] = contain_val

                    extract_mask = extracted_objects
                    object_val = []

                    for a in range(extract_mask.shape[2]):
                        m = extract_mask[:, :, a]
                        mask_values = Mask(m).polygons()
                        val = mask_values.points
                        object_val.append(val)

                    extracted_objects = object_val

                if output_image_name is not None:
                    cv2.imwrite(output_image_name, output)
                    print(
                        "Processed image saved successfully in your current working directory."
                    )
                """ The mask values of each of the extracted cropped object in the image
                is added to the dictionary containing an array of output values:
                """

                r.update({"extracted_objects": extracted_objects})
                return r, output

            else:

                if mask_points_values == True:
                    mask = r['masks']
                    contain_val = []
                    for a in range(mask.shape[2]):
                        m = mask[:, :, a]
                        mask_values = Mask(m).polygons()
                        val = mask_values.points
                        contain_val.append(val)

                    r['masks'] = contain_val

                if output_image_name is not None:
                    cv2.imwrite(output_image_name, output)
                    print(
                        "Processed image saved successfully in your current working directory."
                    )

                return r, output

    def segmentFrame(self,
                     frame,
                     show_bboxes=False,
                     mask_points_values=False,
                     output_image_name=None,
                     verbose=None):

        segmask, output = self.segmentImage(
            frame,
            show_bboxes=show_bboxes,
            process_frame=True,
            mask_points_values=mask_points_values)

        if output_image_name is not None:
            cv2.imwrite(output_image_name, output)
            print(
                "Processed image saved successfully in your current working directory."
            )

        return segmask, output

    def process_video(self,
                      video_path,
                      show_bboxes=False,
                      mask_points_values=False,
                      output_video_name=None,
                      frames_per_second=None):
        capture = cv2.VideoCapture(video_path)
        width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        codec = cv2.VideoWriter_fourcc(*'DIVX')
        if frames_per_second is not None:
            save_video = cv2.VideoWriter(output_video_name, codec,
                                         frames_per_second, (width, height))
        counter = 0
        start = time.time()

        while True:
            counter += 1
            ret, frame = capture.read()
            if ret:

                segmask, output = self.segmentImage(
                    frame,
                    show_bboxes=show_bboxes,
                    process_frame=True,
                    mask_points_values=mask_points_values)
                print("No. of frames:", counter)

                output = cv2.resize(output, (width, height),
                                    interpolation=cv2.INTER_AREA)

                if output_video_name is not None:
                    save_video.write(output)

            else:
                break

        end = time.time()
        print(f"Processed {counter} frames in {end-start:.1f} seconds")

        capture.release()
        if frames_per_second is not None:
            save_video.release()

        return segmask, output

    def process_camera(self,
                       cam,
                       show_bboxes=False,
                       mask_points_values=False,
                       output_video_name=None,
                       frames_per_second=None,
                       show_frames=None,
                       frame_name=None,
                       verbose=None,
                       check_fps=False):
        capture = cam

        if output_video_name is not None:
            width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
            codec = cv2.VideoWriter_fourcc(*'DIVX')
            save_video = cv2.VideoWriter(output_video_name, codec,
                                         frames_per_second, (width, height))

        counter = 0
        start = datetime.now()

        while True:

            ret, frame = capture.read()
            if ret:

                segmask, output = self.segmentImage(
                    frame,
                    show_bboxes=False,
                    process_frame=True,
                    mask_points_values=mask_points_values)

                output = cv2.resize(output, (width, height),
                                    interpolation=cv2.INTER_AREA)

                if show_frames == True:
                    if frame_name is not None:
                        cv2.imshow(frame_name, output)

                        if cv2.waitKey(25) & 0xFF == ord('q'):
                            break

                if output_video_name is not None:
                    save_video.write(output)

            elif counter == 30:
                break

        end = datetime.now()

        if check_fps == True:
            timetaken = (end - start).total_seconds()
            fps = counter / timetaken
            print(f"{fps} frames per seconds")

        if verbose is not None:
            print(f"Processed {counter} frames in {timetaken:.1f} seconds")

        capture.release()

        if output_video_name is not None:
            save_video.release()

        return segmask, output
コード例 #4
0
class instance_segmentation():
    def __init__(self, infer_speed=None):
        if infer_speed == "average":
            coco_config.IMAGE_MAX_DIM = 512
            coco_config.IMAGE_MIN_DIM = 512
            coco_config.DETECTION_MIN_CONFIDENCE = 0.45

        elif infer_speed == "fast":
            coco_config.IMAGE_MAX_DIM = 384
            coco_config.IMAGE_MIN_DIM = 384
            coco_config.DETECTION_MIN_CONFIDENCE = 0.25

        elif infer_speed == "rapid":
            coco_config.IMAGE_MAX_DIM = 256
            coco_config.IMAGE_MIN_DIM = 256
            coco_config.DETECTION_MIN_CONFIDENCE = 0.20

        self.model_dir = os.getcwd()

    def load_model(self, model_path):
        self.model = MaskRCNN(mode="inference",
                              model_dir=self.model_dir,
                              config=coco_config)
        self.model.load_weights(model_path, by_name=True)

    def segmentImage(self,
                     image_path,
                     show_bboxes=False,
                     output_image_name=None,
                     verbose=None):

        image = cv2.imread(image_path)
        new_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        # Run detection
        if verbose is not None:
            print("Processing image...")
        results = self.model.detect([new_img])

        coco_config.class_names = [
            'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
            'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
            'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
            'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
            'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
            'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
            'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
            'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
            'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
            'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
            'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
            'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
            'scissors', 'teddy bear', 'hair drier', 'toothbrush'
        ]
        r = results[0]
        if show_bboxes == False:

            #apply segmentation mask
            output = display_instances(image, r['rois'], r['masks'],
                                       r['class_ids'], coco_config.class_names)

            if output_image_name is not None:
                cv2.imwrite(output_image_name, output)
                print(
                    "Processed image saved successfully in your current working directory."
                )
            return r, output

        else:
            #apply segmentation mask with bounding boxes
            output = display_box_instances(image, r['rois'], r['masks'],
                                           r['class_ids'],
                                           coco_config.class_names,
                                           r['scores'])

            if output_image_name is not None:
                cv2.imwrite(output_image_name, output)
                print(
                    "Processed Image saved successfully in your current working directory."
                )
            return r, output

    def segmentFrame(self,
                     frame,
                     show_bboxes=False,
                     output_image_name=None,
                     verbose=None):

        new_img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        if verbose is not None:
            print("Processing frame...")
        # Run detection
        results = self.model.detect([new_img])

        coco_config.class_names = [
            'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
            'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
            'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
            'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
            'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
            'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
            'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
            'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
            'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
            'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
            'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
            'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
            'scissors', 'teddy bear', 'hair drier', 'toothbrush'
        ]
        r = results[0]
        if show_bboxes == False:

            #apply segmentation mask
            output = display_instances(frame, r['rois'], r['masks'],
                                       r['class_ids'], coco_config.class_names)

            if output_image_name is not None:
                cv2.imwrite(output_image_name, output)
                print(
                    "Processed image saved successfully in your current working directory."
                )
            return r, output

        else:
            #apply segmentation mask with bounding boxes
            output = display_box_instances(frame, r['rois'], r['masks'],
                                           r['class_ids'],
                                           coco_config.class_names,
                                           r['scores'])

            if output_image_name is not None:
                cv2.imwrite(output_image_name, output)
                print(
                    "Processed Image saved successfully in your current working directory."
                )
            return r, output

    def process_video(self,
                      video_path,
                      show_bboxes=False,
                      output_video_name=None,
                      frames_per_second=None):
        capture = cv2.VideoCapture(video_path)
        width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        codec = cv2.VideoWriter_fourcc(*'DIVX')
        coco_config.class_names = [
            'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
            'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
            'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
            'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
            'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
            'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
            'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
            'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
            'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
            'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
            'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
            'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
            'scissors', 'teddy bear', 'hair drier', 'toothbrush'
        ]
        if frames_per_second is not None:
            save_video = cv2.VideoWriter(output_video_name, codec,
                                         frames_per_second, (width, height))
        counter = 0
        start = time.time()

        if show_bboxes == False:
            while True:
                counter += 1
                ret, frame = capture.read()
                if ret:
                    # Run detection
                    results = self.model.detect([frame])
                    print("No. of frames:", counter)
                    r = results[0]
                    #apply segmentation mask
                    output = display_instances(frame, r['rois'], r['masks'],
                                               r['class_ids'],
                                               coco_config.class_names)
                    output = cv2.resize(output, (width, height),
                                        interpolation=cv2.INTER_AREA)

                    if output_video_name is not None:
                        save_video.write(output)

                else:
                    break

            end = time.time()
            print(f"Processed {counter} frames in {end-start:.1f} seconds")

            capture.release()
            if frames_per_second is not None:
                save_video.release()
            return r, output

        else:
            while True:
                counter += 1
                ret, frame = capture.read()
                if ret:
                    # Run detection
                    results = self.model.detect([frame])
                    print("No. of frames:", counter)
                    r = results[0]
                    #apply segmentation mask with bounding boxes
                    output = display_box_instances(frame, r['rois'],
                                                   r['masks'], r['class_ids'],
                                                   coco_config.class_names,
                                                   r['scores'])
                    output = cv2.resize(output, (width, height),
                                        interpolation=cv2.INTER_AREA)

                    if output_video_name is not None:
                        save_video.write(output)
                else:
                    break

            capture.release()

            end = time.time()
            print(f"Processed {counter} frames in {end-start:.1f} seconds")

            if frames_per_second is not None:
                save_video.release()

            return r, output

    def process_camera(self,
                       cam,
                       show_bboxes=False,
                       output_video_name=None,
                       frames_per_second=None,
                       show_frames=None,
                       frame_name=None,
                       verbose=None,
                       check_fps=False):
        capture = cam
        if output_video_name is not None:
            width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
            save_video = cv2.VideoWriter(output_video_name,
                                         cv2.VideoWriter_fourcc(*'DIVX'),
                                         frames_per_second, (width, height))

        counter = 0

        start = datetime.now()

        coco_config.class_names = [
            'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
            'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
            'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
            'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
            'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
            'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
            'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
            'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
            'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
            'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
            'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
            'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
            'scissors', 'teddy bear', 'hair drier', 'toothbrush'
        ]

        if show_bboxes == False:
            while True:

                ret, frame = capture.read()
                if ret:
                    # Run detection
                    results = self.model.detect([frame])

                    r = results[0]
                    #apply segmentation mask
                    output = display_instances(frame, r['rois'], r['masks'],
                                               r['class_ids'],
                                               coco_config.class_names)
                    counter += 1

                    if show_frames == True:
                        if frame_name is not None:
                            cv2.imshow(frame_name, output)

                            if cv2.waitKey(25) & 0xFF == ord('q'):
                                break

                    if output_video_name is not None:
                        output = cv2.resize(output, (width, height),
                                            interpolation=cv2.INTER_AREA)
                        save_video.write(output)

                elif counter == 30:
                    break

            end = datetime.now()
            if check_fps == True:
                timetaken = (end - start).total_seconds()

                out = counter / timetaken
                print(f"{out:.3f} frames per second")

            if verbose is not None:
                print(f"Processed {counter} frames in {timetaken:.1f} seconds")

            capture.release()

            if output_video_name is not None:
                save_video.release()

            return r, output

        else:
            while True:

                ret, frame = capture.read()
                if ret:
                    # Run detection
                    results = self.model.detect([frame])

                    r = results[0]
                    #apply segmentation mask with bounding boxes
                    output = display_box_instances(frame, r['rois'],
                                                   r['masks'], r['class_ids'],
                                                   coco_config.class_names,
                                                   r['scores'])

                    counter += 1
                    if show_frames == True:
                        if frame_name is not None:
                            cv2.imshow(frame_name, output)

                            if cv2.waitKey(25) & 0xFF == ord('q'):
                                break

                    if output_video_name is not None:
                        output = cv2.resize(output, (width, height),
                                            interpolation=cv2.INTER_AREA)
                        save_video.write(output)

                elif counter == 30:
                    break

            end = datetime.now()
            if check_fps == True:
                timetaken = (end - start).total_seconds()
                fps = counter / timetaken
                print(f"{fps:.3f} frames per second")

            if verbose is not None:
                print(f"Processed {counter} frames in {timetaken:.1f} seconds")

            capture.release()

            if output_video_name is not None:
                save_video.release()

            return r, output
コード例 #5
0
ファイル: instance.py プロジェクト: yogeshkumarpilli/PixelLib
class instance_segmentation():
    def __init__(self, infer_speed=None):
        if infer_speed == "average":
            coco_config.IMAGE_MAX_DIM = 512
            coco_config.IMAGE_MIN_DIM = 512
            coco_config.DETECTION_MIN_CONFIDENCE = 0.45

        elif infer_speed == "fast":
            coco_config.IMAGE_MAX_DIM = 384
            coco_config.IMAGE_MIN_DIM = 384
            coco_config.DETECTION_MIN_CONFIDENCE = 0.25

        elif infer_speed == "rapid":
            coco_config.IMAGE_MAX_DIM = 256
            coco_config.IMAGE_MIN_DIM = 256
            coco_config.DETECTION_MIN_CONFIDENCE = 0.20

        self.model_dir = os.getcwd()

    def load_model(self, model_path):
        self.model = MaskRCNN(mode="inference",
                              model_dir=self.model_dir,
                              config=coco_config)
        self.model.load_weights(model_path, by_name=True)

    def select_target_classes(self,
                              BG=False,
                              person=False,
                              bicycle=False,
                              car=False,
                              motorcycle=False,
                              airplane=False,
                              bus=False,
                              train=False,
                              truck=False,
                              boat=False,
                              traffic_light=False,
                              fire_hydrant=False,
                              stop_sign=False,
                              parking_meter=False,
                              bench=False,
                              bird=False,
                              cat=False,
                              dog=False,
                              horse=False,
                              sheep=False,
                              cow=False,
                              elephant=False,
                              bear=False,
                              zebra=False,
                              giraffe=False,
                              backpack=False,
                              umbrella=False,
                              handbag=False,
                              tie=False,
                              suitcase=False,
                              frisbee=False,
                              skis=False,
                              snowboard=False,
                              sports_ball=False,
                              kite=False,
                              baseball_bat=False,
                              baseball_glove=False,
                              skateboard=False,
                              surfboard=False,
                              tennis_racket=False,
                              bottle=False,
                              wine_glass=False,
                              cup=False,
                              fork=False,
                              knife=False,
                              spoon=False,
                              bowl=False,
                              banana=False,
                              apple=False,
                              sandwich=False,
                              orange=False,
                              broccoli=False,
                              carrot=False,
                              hot_dog=False,
                              pizza=False,
                              donut=False,
                              cake=False,
                              chair=False,
                              couch=False,
                              potted_plant=False,
                              bed=False,
                              dining_table=False,
                              toilet=False,
                              tv=False,
                              laptop=False,
                              mouse=False,
                              remote=False,
                              keyboard=False,
                              cell_phone=False,
                              microwave=False,
                              oven=False,
                              toaster=False,
                              sink=False,
                              refrigerator=False,
                              book=False,
                              clock=False,
                              vase=False,
                              scissors=False,
                              teddy_bear=False,
                              hair_dryer=False,
                              toothbrush=False):

        detected_classes = {}
        target_class_names = [
            BG, person, bicycle, car, motorcycle, airplane, bus, train, truck,
            boat, traffic_light, fire_hydrant, stop_sign, parking_meter, bench,
            bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe,
            backpack, umbrella, handbag, tie, suitcase, frisbee, skis,
            snowboard, sports_ball, kite, baseball_bat, baseball_glove,
            skateboard, surfboard, tennis_racket, bottle, wine_glass, cup,
            fork, knife, spoon, bowl, banana, apple, sandwich, orange,
            broccoli, carrot, hot_dog, pizza, donut, cake, chair, couch,
            potted_plant, bed, dining_table, toilet, tv, laptop, mouse, remote,
            keyboard, cell_phone, microwave, oven, toaster, sink, refrigerator,
            book, clock, vase, scissors, teddy_bear, hair_dryer, toothbrush
        ]
        class_names = [
            "BG", "person", "bicycle", "car", "motorcycle", "airplane", "bus",
            "train", "truck", "boat", "traffic light", "fire hydrant",
            "stop sign", "parking meter", "bench", "bird", "cat", "dog",
            "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
            "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
            "skis", "snowboard", "sports ball", "kite", "baseball bat",
            "baseball glove", "skateboard", "surfboard", "tennis racket",
            "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
            "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
            "hot dog", "pizza", "donut", "cake", "chair", "couch",
            "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
            "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
            "toaster", "sink", "refrigerator", "book", "clock", "vase",
            "scissors", "teddy bear", "hair dryer", "toothbrush"
        ]

        for target_class_name, class_name in zip(target_class_names,
                                                 class_names):
            if (target_class_name == True):
                detected_classes[class_name] = "valid"
            else:
                detected_classes[class_name] = "invalid"

        return detected_classes

    def segmentImage(self,
                     image_path,
                     show_bboxes=False,
                     process_frame=False,
                     segment_target_classes=None,
                     extract_segmented_objects=False,
                     save_extracted_objects=False,
                     mask_points_values=False,
                     output_image_name=None,
                     verbose=None):

        if process_frame == False:
            image = cv2.imread(image_path)

        else:
            image = image_path

        new_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        # Run detection
        if verbose is not None:
            print("Processing image...")
        results = self.model.detect([new_img])

        coco_config.class_names = [
            'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
            'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
            'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
            'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
            'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
            'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
            'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
            'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
            'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
            'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
            'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
            'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
            'scissors', 'teddy bear', 'hair drier', 'toothbrush'
        ]

        r = results[0]
        """ Code to filter out unused detections and detect specific classes """
        if segment_target_classes is not None:
            bboxes = r['rois']
            scores = r['scores']
            masks = r['masks']
            class_ids = r['class_ids']

            com_bboxes = []
            com_masks = []
            com_scores = []
            com_class_ids = []

            final_dict = []
            for a, b in enumerate(r['class_ids']):
                name = coco_config.class_names[b]

                box = bboxes[a]

                ma = masks[:, :, a]

                score = scores[a]

                c_ids = class_ids[a]

                if (segment_target_classes[name] == "invalid"):
                    continue

                com_bboxes.append(box)
                com_class_ids.append(c_ids)
                com_masks.append(ma)
                com_scores.append(score)

            final_bboxes = np.array(com_bboxes)

            final_class_ids = np.array(com_class_ids)
            final_masks = np.array(com_masks)
            if len(final_masks != 0):
                final_masks = np.stack(final_masks, axis=2)

            final_scores = np.array(com_scores)

            final_dict.append({
                "rois": final_bboxes,
                "class_ids": final_class_ids,
                "scores": final_scores,
                "masks": final_masks,
            })
            r = final_dict[0]

        if show_bboxes == False:
            output = display_instances(image, r['rois'], r['masks'],
                                       r['class_ids'], coco_config.class_names)
            if output_image_name is not None:
                cv2.imwrite(output_image_name, output)
                print(
                    "Processed image saved successfully in your current working directory."
                )
            """ Code to extract and crop out each of the objects segmented in an image """
            if extract_segmented_objects == False:

                if mask_points_values == True:
                    mask = r['masks']
                    contain_val = []
                    for a in range(mask.shape[2]):
                        m = mask[:, :, a]
                        mask_values = Mask(m).polygons()
                        val = mask_values.points
                        contain_val.append(val)

                    r['masks'] = contain_val

                return r, output

            else:

                mask = r['masks']
                m = 0
                for a in range(mask.shape[2]):

                    img = cv2.imread(image_path)

                    for b in range(img.shape[2]):

                        img[:, :, b] = img[:, :, b] * mask[:, :, a]
                    m += 1
                    extracted_objects = img[np.ix_(mask[:, :, a].any(1),
                                                   mask[:, :, a].any(0))]

                    if save_extracted_objects == True:
                        save_path = os.path.join("segmented_object" + "_" +
                                                 str(m) + ".jpg")
                        cv2.imwrite(save_path, extracted_objects)

                if mask_points_values == True:
                    mask = r['masks']
                    contain_val = []
                    for a in range(mask.shape[2]):
                        m = mask[:, :, a]
                        mask_values = Mask(m).polygons()
                        val = mask_values.points

                        contain_val.append(val)

                    r['masks'] = contain_val

                    extract_mask = extracted_objects
                    object_val = []

                    for a in range(extract_mask.shape[2]):
                        m = extract_mask[:, :, a]
                        mask_values = Mask(m).polygons()
                        val = mask_values.points
                        object_val.append(val)

                    extracted_objects = object_val
                """ The mask values of each of the extracted cropped object in the image
                is added to the dictionary containing an array of output values:
                """

                r.update({"extracted_objects": extracted_objects})

                return r, output

        else:
            output = display_box_instances(image, r['rois'], r['masks'],
                                           r['class_ids'],
                                           coco_config.class_names,
                                           r['scores'])
            """ Code to extract and crop out each of the objects segmented in an image """
            if extract_segmented_objects == True:
                mask = r['masks']
                m = 0
                for a in range(mask.shape[2]):

                    img = cv2.imread(image_path)

                    for b in range(img.shape[2]):

                        img[:, :, b] = img[:, :, b] * mask[:, :, a]
                    m += 1
                    extracted_objects = img[np.ix_(mask[:, :, a].any(1),
                                                   mask[:, :, a].any(0))]

                    if save_extracted_objects == True:
                        save_path = os.path.join("segmented_object" + "_" +
                                                 str(m) + ".jpg")
                        cv2.imwrite(save_path, extracted_objects)

                if mask_points_values == True:
                    mask = r['masks']
                    contain_val = []
                    for a in range(mask.shape[2]):
                        m = mask[:, :, a]
                        mask_values = Mask(m).polygons()
                        val = mask_values.points

                        contain_val.append(val)

                    r['masks'] = contain_val

                    extract_mask = extracted_objects
                    object_val = []

                    for a in range(extract_mask.shape[2]):
                        m = extract_mask[:, :, a]
                        mask_values = Mask(m).polygons()
                        val = mask_values.points
                        object_val.append(val)

                    extracted_objects = object_val

                if output_image_name is not None:
                    cv2.imwrite(output_image_name, output)
                    print(
                        "Processed image saved successfully in your current working directory."
                    )
                """ The mask values of each of the extracted cropped object in the image
                is added to the dictionary containing an array of output values:
                """

                r.update({"extracted_objects": extracted_objects})
                return r, output

            else:

                if mask_points_values == True:
                    mask = r['masks']
                    contain_val = []
                    for a in range(mask.shape[2]):
                        m = mask[:, :, a]
                        mask_values = Mask(m).polygons()
                        val = mask_values.points
                        contain_val.append(val)

                    r['masks'] = contain_val

                if output_image_name is not None:
                    cv2.imwrite(output_image_name, output)
                    print(
                        "Processed image saved successfully in your current working directory."
                    )

                return r, output

    def segmentFrame(self,
                     frame,
                     show_bboxes=False,
                     segment_target_classes=None,
                     mask_points_values=False,
                     output_image_name=None):
        segmask, output = self.segmentImage(
            frame,
            show_bboxes=show_bboxes,
            process_frame=True,
            segment_target_classes=segment_target_classes,
            mask_points_values=mask_points_values,
            output_image_name=output_image_name)
        if output_image_name is not None:
            cv2.imwrite(output_image_name, output)
            print(
                "Processed image saved successfully in your current working directory."
            )

        return segmask, output

    def process_video(self,
                      video_path,
                      show_bboxes=False,
                      segment_target_classes=None,
                      mask_points_values=False,
                      output_video_name=None,
                      frames_per_second=None):
        capture = cv2.VideoCapture(video_path)
        width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        codec = cv2.VideoWriter_fourcc(*'DIVX')

        if frames_per_second is not None:
            save_video = cv2.VideoWriter(output_video_name, codec,
                                         frames_per_second, (width, height))
        counter = 0
        start = time.time()

        while True:
            counter += 1
            ret, frame = capture.read()
            if ret:
                #apply segmentation mask

                segmask, output = self.segmentImage(
                    frame,
                    show_bboxes=show_bboxes,
                    segment_target_classes=segment_target_classes,
                    process_frame=True,
                    mask_points_values=mask_points_values)
                print("No. of frames:", counter)

                output = cv2.resize(output, (width, height),
                                    interpolation=cv2.INTER_AREA)

                if output_video_name is not None:
                    save_video.write(output)

                output = cv2.resize(output, (width, height),
                                    interpolation=cv2.INTER_AREA)

                if output_video_name is not None:
                    save_video.write(output)

            else:
                break

        end = time.time()
        print(f"Processed {counter} frames in {end-start:.1f} seconds")

        capture.release()
        if frames_per_second is not None:
            save_video.release()

        return segmask, output

    def process_camera(self,
                       cam,
                       show_bboxes=False,
                       segment_target_classes=None,
                       mask_points_values=False,
                       output_video_name=None,
                       frames_per_second=None,
                       show_frames=None,
                       frame_name=None,
                       verbose=None,
                       check_fps=False):
        capture = cam
        if output_video_name is not None:
            width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
            save_video = cv2.VideoWriter(output_video_name,
                                         cv2.VideoWriter_fourcc(*'DIVX'),
                                         frames_per_second, (width, height))

        counter = 0

        start = datetime.now()

        while True:

            ret, frame = capture.read()
            if ret:

                segmask, output = self.segmentImage(
                    frame,
                    show_bboxes=show_bboxes,
                    segment_target_classes=segment_target_classes,
                    process_frame=True,
                    mask_points_values=mask_points_values)
                counter += 1

                output = cv2.resize(output, (width, height),
                                    interpolation=cv2.INTER_AREA)

                if show_frames == True:
                    if frame_name is not None:
                        cv2.imshow(frame_name, output)

                        if cv2.waitKey(25) & 0xFF == ord('q'):
                            break

                if output_video_name is not None:
                    save_video.write(output)

            elif counter == 30:
                break

        end = datetime.now()
        if check_fps == True:
            timetaken = (end - start).total_seconds()

            out = counter / timetaken
            print(f"{out:.3f} frames per second")

        if verbose is not None:
            print(f"Processed {counter} frames in {timetaken:.1f} seconds")

        capture.release()

        if output_video_name is not None:
            save_video.release()

        return segmask, output