Python get_checkpoint_url 예제들, detectron2.model_zoo.model_zoo.get_checkpoint_url Python 예제들

예제 #1

0

파일 보기

def get_default_instance_segmentation_config(use_direction_classes,
                                             training_set_name="train",
                                             test_set_name="val",
                                             max_iter=1000,
                                             threshold=0.7):
    cfg = get_cfg()

    cfg.DATA_TRANSFORMATIONS = CN()
    cfg.DATA_TRANSFORMATIONS.ROTATION = True
    cfg.DATA_TRANSFORMATIONS.FLIP = True
    cfg.DATA_TRANSFORMATIONS.RESIZE_FACTOR = 1

    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
    )  # only segmentation and bounding boxes
    cfg.TYPE = "mask_rcnn_R_50_FPN_3x"
    assert cfg.TYPE in cfg.MODEL.WEIGHTS, "miss match between type and weights"

    # General
    cfg.SEED = 42

    # Data set
    cfg.DATASETS.TRAIN = (get_name_with_prefix(training_set_name,
                                               use_direction_classes), )
    cfg.DATASETS.TEST = (get_name_with_prefix(test_set_name,
                                              use_direction_classes),
                         )  # TODO can I add test set also here?
    cfg.DATASETS.USE_DIRECTION_CLASSES = use_direction_classes

    cfg.DATALOADER.NUM_WORKERS = 0

    # validation
    cfg.TEST.EVAL_PERIOD = 60

    # Solver
    cfg.SOLVER.MAX_ITER = max_iter
    cfg.SOLVER.IMS_PER_BATCH = 5
    cfg.SOLVER.BASE_LR = 0.001
    cfg.SOLVER.WARMUP_ITERS = int(0.5 * cfg.SOLVER.MAX_ITER)
    cfg.SOLVER.WARMUP_FACTOR = 1.0 / (cfg.SOLVER.WARMUP_ITERS + 1)
    cfg.SOLVER.WEIGHT_DECAY_NORM = 0.0

    # Model
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 if not use_direction_classes else len(
        DIRECTION_CLASSES)
    cfg.MODEL.BACKBONE.FREEZE_AT = 2
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = threshold

    update_output_dir_path(cfg)

    return cfg

예제 #2

0

파일 보기

def main(image_dir,
         project,
         crop_size,
         batch_size,
         iterations,
         validation_split,
         backbone,
         output_dir,
         learning_rate,
         device,
         tb,
         tb_port,
         resume=None):
    if tb:
        print(f'Tensorboard URL: {launch_tb(output_dir, tb_port)}')

    DatasetCatalog.register("train",
                            lambda: get_balloon_dicts(image_dir, project))
    MetadataCatalog.get("train").set(thing_classes=["balloon"])
    DatasetCatalog.register("val",
                            lambda: get_balloon_dicts(image_dir, project))
    MetadataCatalog.get("val").set(thing_classes=["balloon"])
    cfg = get_cfg()
    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg = get_cfg()
    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("train", )
    cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 2
    if resume:
        cfg.MODEL.WEIGHTS = resume
    else:
        cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
            f"COCO-InstanceSegmentation/mask_rcnn_{backbone}.yaml"
        )  # Let training initialize from model zoo
    cfg.SOLVER.IMS_PER_BATCH = batch_size
    cfg.SOLVER.BASE_LR = learning_rate  # pick a good LR
    cfg.SOLVER.MAX_ITER = iterations  # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
    cfg.MODEL.DEVICE = device
    cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = False
    cfg.OUTPUT_DIR = output_dir
    cfg.INPUT.CROP.SIZE = crop_size
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = Trainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()

예제 #3

0

파일 보기

파일: config.py 프로젝트: aldopareja/easy_object_detector

def get_cfg(model_weights_path: Path = None,
            output_path: Path = None,
            debug: bool = True,
            num_input_channels: int = 1):
    cfg = detectron_get_cfg()
    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    # cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"))
    if model_weights_path is None:
        # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml")
        cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    else:
        cfg.MODEL.WEIGHTS = str(model_weights_path)

    cfg.MODEL.RESNETS.NORM = "BN"
    cfg.MODEL.BACKBONE.FREEZE_AT = 0

    cfg.OUTPUT_DIR = str(
        output_path) if output_path is not None else './output'
    Path(cfg.OUTPUT_DIR).mkdir(exist_ok=True)

    cfg.DATALOADER.NUM_WORKERS = 0 if debug else 6

    cfg.SOLVER.WARMUP_FACTOR = 1.0 / 500
    cfg.SOLVER.WARMUP_ITERS = 500  # a warm up is necessary to avoid diverging training while keeping the goal learning rate as high as possible
    cfg.SOLVER.IMS_PER_BATCH = 16 if not debug else 8
    cfg.SOLVER.BASE_LR = 0.0005  # pick a good LR
    cfg.SOLVER.MAX_ITER = 80000
    cfg.SOLVER.STEPS = (40000, 60000, 70000)
    cfg.SOLVER.GAMMA = 0.5  # after each milestone in SOLVER.STEPS gets reached, the learning rate gets scaled by Gamma.

    cfg.SOLVER.CHECKPOINT_PERIOD = 50 if debug else 3000  #5000

    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

    cfg.TEST.EVAL_PERIOD = 30 if debug else 3000

    cfg.INPUT.MASK_FORMAT = "bitmask"
    cfg.INPUT.FORMAT = "D" * num_input_channels
    cfg.MODEL.PIXEL_MEAN = [0.5] * num_input_channels
    cfg.MODEL.PIXEL_STD = [1.0] * num_input_channels
    cfg.MIN_AREA = 100

    cfg.DATASETS.TRAIN = ("val", ) if debug else ("train", )
    cfg.DATASETS.TEST = ("val", )

    cfg.DEBUG = debug
    return cfg

예제 #4

0

파일 보기

def main():
    args = parser.parse_args()

    ### regist datasethasattr
    register_coco_instances(args.dataset, {}, args.label, args.file)
    # detectron2.data.datasets.load_coco_json(args.label, args.file, "ACID_dataset")  # this will set thing_classes = ["excavator", "dump_truck", "cement_truck"]

    ## for keypoint training
    # MetadataCatalog.get("ACID_dataset").keypoint_names = ['body_end_x', 'body_end_y', 'body_end_v', 'cab_boom_x', 'cab_boom_y', 'cab_boom_v', 'boom_arm_x', 'boom_arm_y', 'boom_arm_v',
    #     'arm_bucket_x', 'arm_bucket_y', 'arm_bucket_v', 'bucket_end_left_x', 'bucket_end_left_y', 'bucket_end_left_v', 'bucket_end_right_x', 'bucket_end_right_y', 'bucket_end_right_v']
    # MetadataCatalog.get("ACID_dataset").keypoint_flip_map = []
    # MetadataCatalog.get("ACID_dataset").keypoint_connection_rules = []
    ## end for keypoint training

    ### set metadata
    ACID_meta = MetadataCatalog.get(args.dataset)
    dataset_dicts = DatasetCatalog.get(args.dataset)

    ### verify the data loading is correct
    # visualizeDataset(dataset_dicts, ACID_meta)

    ### train model
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(args.model))

    cfg.DATASETS.TRAIN = (args.dataset, )
    cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
        args.model)  # Let training initialize from model zoo

    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.02  #0.00025  # pick a good LR
    cfg.SOLVER.MAX_ITER = args.iter  # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512  # 128   # default: 512
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = args.num_class  # excavator, dump_truck, cement_truck

    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)

    # torch.cuda.empty_cache()
    trainer.train()

예제 #5

0

파일 보기

def new_model_cfg():
    cfg = get_cfg()
    model = "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"
    cfg.merge_from_file(model_zoo.get_config_file(model))
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model)
    cfg.MODEL.MASK_ON = False
    cfg.INPUT.RANDOM_FLIP = "none"
    cfg.OUTPUT_DIR = "output"

    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.CHECKPOINT_PERIOD = 2000
    cfg.SOLVER.STEPS = (10000, 21000, 35000, 55000, 75000)
    cfg.SOLVER.MAX_ITER = 150000
    cfg.SOLVER.BASE_LR = 0.001
    cfg.SOLVER.WEIGHT_DECAY = 0.001
    cfg.TEST.EVAL_PERIOD = 2000
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
    cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
    return cfg

예제 #6

0

파일 보기

 def create_cfg(self):
     """
     Creates configuration file for the model.
     :return:
     """
     cfg = get_cfg()
     cfg.merge_from_file(
         model_zoo.get_config_file(
             "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
     # Passing the Train and Validation sets
     cfg.DATASETS.TRAIN = ("smilesdetect_train", )
     cfg.DATASETS.TEST = ("smilesdetect_val", )
     cfg.OUTPUT_DIR = self.base_path + '/trained_models'
     cfg.INPUT.FORMAT = self.input_format
     # Number of data loading threads
     cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
         "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")
     cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(self.unique_labels)
     os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
     return cfg

예제 #7

0

파일 보기

def define_model(train_dts, val_dts, device, model, lr, iterations,
                 batch_size):
    cfg = get_cfg()
    cfg.OUTPUT_DIR = "output-training-" + datetime.datetime.now().strftime(
        "%d-%m-%Y-(%H:%M:%S)")
    cfg.merge_from_file(model_zoo.get_config_file(model))
    cfg.MODEL.DEVICE = device
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model)

    cfg.DATASETS.TRAIN = (train_dts, )
    cfg.DATASETS.TEST = (val_dts, )
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = lr
    cfg.SOLVER.MAX_ITER = iterations
    cfg.SOLVER.STEPS = []  # do not decay learning rate
    cfg.TEST.EVAL_PERIOD = 50
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = batch_size
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(grouped_classes_dict.keys())

    return cfg

예제 #8

0

파일 보기

def predict_surface(img):

    for d in ["train", "val"]:
        DatasetCatalog.clear()
        DatasetCatalog.register("surface_" + d,
                                lambda d=d: get_surface_dicts("surface/" + d))
        MetadataCatalog.get("surface_" + d).set(thing_classes=["surface"])
        surface_metadata = MetadataCatalog.get("surface_train")

    cfg = get_cfg()

    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("surface_train", )
    cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    cfg.SOLVER.IMS_PER_BATCH = 1
    cfg.SOLVER.BASE_LR = 0.0025
    cfg.SOLVER.MAX_ITER = 1000
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
    cfg.OUTPUT_DIR = "./drive/MyDrive/surface"

    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3  # set a custom testing threshold
    predictor = DefaultPredictor(cfg)

    dataset_dicts = "surface_img/val"
    #files=sorted(glob.glob("balloon/val/*.jpg"))

    teller = 1
    data = []

    middle = [img[0].shape[0] / 2, img[0].shape[1] / 2]
    background = backgroundsubtraction(img)
    cv2.imwrite("background.jpg", background)

    for teller, k in enumerate(img):  #go through all images

        minimum = None
        predictor = DefaultPredictor(cfg)
        outputs = predictor(k)  #predict surface of image k
        """
		v = Visualizer(k[:, :, ::-1], metadata=surface_metadata, scale=0.5)		#visualization of predicted surface
		out=v.draw_instance_predictions(outputs["instances"].to("cpu"))			#...
		v=out.get_image()[:, :, ::-1]							#...
		cv2.imwrite("./drive/MyDrive/wkvideo/surface/"+str(teller)+".jpg",v)		#...
		"""

        #maskoutput=outputs['instances'].pred_masks.to("cpu")[0][:2]			#next paragraph: check for multiple detections and make a
        #decision if the predicted surface is part of road where the cyclist ride on
        maskoutput = 0
        indexen = []
        y = []
        x = []
        coordinaten = []
        prev_x_min = 0
        prev_x_max = k.shape[1]
        if len(
                outputs['instances'].pred_boxes
        ) == 0:  #if only one road detection -> take detection as "finish road"
            maskoutput = np.zeros((k.shape[0], k.shape[1]),
                                  np.uint8)  #make new mask with certain shape

        else:
            for index, k in enumerate(
                    outputs['instances'].pred_boxes.to("cpu")
            ):  #if multiple road detections-> do some checks about position of detection(knowledge that "finish road" is mostly in middle of frame)
                #go through all predictions and take boundingboxescoordinates as variable k
                coordinates = k.numpy(
                )  #transform k from tensor object to numpy
                middle = coordinates[2] - coordinates[
                    0]  #calculate the center position of the bounding box in x direction

                if middle >= prev_x_min and middle <= prev_x_max:  #check if center position is between two boundaries( prev frame boundaries of bounding box position)
                    y.append(
                        coordinates[3] -
                        coordinates[1])  #add center of boundingbox to list
                    x.append(coordinates[2] - coordinates[0])
                    indexen.append(
                        index)  #add index of detected boundingbox to list
                    coordinaten.append(
                        coordinates)  #add coordinates of boundingbox to list

            best_ind = 0  #in next paragraph: if multiple surfaces where detected: do some logic,
            #to find the best matching detected surface, using the position of the last detected surface of previous image
            if len(indexen) > 1:
                best = None

                lastone = False
                for d, k in enumerate(indexen[:len(indexen) - 1]):
                    if abs(x[d] - x[d + 1]) > ((prev_x_max - prev_x_min) / 2):
                        if d == len(indexen) - 1:
                            lastone = True
                        dist = abs(x[d] - (prev_x_max - prev_x_min) / 2)
                        if best == None or dist < best:
                            best = dist
                            best_ind = d
                if lastone:
                    d = len(indexen) - 1
                    dist = abs(x[d] - (prev_x_max - prev_x_min) / 2)
                    if best == None or dist < best:
                        best = dist
                        best_ind = d

                indexen = [best_ind]

            prev_x_min = coordinaten[best_ind][0]
            prev_x_max = coordinaten[best_ind][2]

        for index, k in enumerate(
                outputs['instances'].pred_masks.to("cpu").numpy()):
            if indexen.count(index) == 1:
                maskoutput += k

        maskoutput = maskoutput * 255
        kernel = np.ones((9, 1), np.uint8)
        maskoutput = maskoutput.astype(np.uint8)
        maskoutput = cv2.dilate(maskoutput, kernel, iterations=4)
        maskoutput += background

        maskoutput = maskoutput.astype(np.uint8)
        mask = np.ones((k.shape[0], k.shape[1]), dtype=np.uint8)
        img_res = cv2.bitwise_and(mask, mask, mask=maskoutput)

        data.append(img_res)

    del (indexen)
    del (y)
    del (x)

    DatasetCatalog.clear()
    return data

예제 #9

0

파일 보기

    DatasetCatalog.register("renner_" + d,
                            lambda d=d: get_balloon_dicts("surface_img/" + d))
    MetadataCatalog.get("renner_" + d).set(thing_classes=["renner"])
    balloon_metadata = MetadataCatalog.get("renner_train")
    #dataset_dicts = get_balloon_dicts("surface_img/train")
cfg = get_cfg()
cfg.MODEL.DEVICE = 'cpu'

cfg.merge_from_file(
    model_zoo.get_config_file(
        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("renner_train", )
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
)  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 1
cfg.SOLVER.BASE_LR = 0.0025  #0.00025 # pick a good LR
cfg.SOLVER.MAX_ITER = 1000  # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512  # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ballon)
cfg.OUTPUT_DIR = "./drive/MyDrive/renner"

# cfg already contains everything we've set previously. Now we changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(
    cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  #0.7  # set a custom testing threshold
predictor = DefaultPredictor(cfg)
#dataset_dicts = get_balloon_dicts("balloon/train")
dataset_dicts = "surface_img/val"

예제 #10

0

파일 보기

from detectron2.modeling import build_model

register_coco_instances('my_dataset_train', {}, './data/train.json',
                        './data/train_images')
register_coco_instances('my_dataset_val', {}, './data/val.json',
                        './data/train_images')

cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file(
        'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml'))
cfg.DATASETS.TRAIN = ('my_dataset_train', )
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 4
# Let training initialize from model zoo
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml')
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 1000
# 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128  # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 20

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(

예제 #11

0

파일 보기

파일: test_installation.py 프로젝트: DeqiangWang/EFPN-detectron2

import glob
import multiprocessing as mp
import os
import time
import cv2

from detectron2.config import get_cfg  # returns a copy of the default config
from detectron2.engine import DefaultTrainer
from detectron2.model_zoo import model_zoo

cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file(
        "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
)  # Let training initialize from model zoo
# resnext ref: https://medium.com/@14prakash/understanding-and-implementing-architectures-of-resnet-and-resnext-for-state-of-the-art-image-cc5d0adf648e
cfg.MODEL.RESNETS.NUM_GROUPS = 32  # 1 ==> ResNet; > 1 ==> ResNeXt
cfg.MODEL.RESNETS.WIDTH_PER_GROUP = 8
cfg.MODEL.RESNETS.DEPTH = 101
cfg.MODEL.DEVICE = 'cpu'

#dataset default is coco
#cfg.DATASETS.TRAIN = ("",)
#cfg.DATASETS.TEST = ("",)

cfg.INPUT.MIN_SIZE_TRAIN = (100, )
cfg.INPUT.MAX_SIZE_TRAIN = 166
cfg.INPUT.MIN_SIZE_TEST = (100, )
cfg.INPUT.MAX_SIZE_TEST = 166

예제 #12

0

파일 보기

def run_colmap(video_data, frame_data_path, mask_data_path,
               colmap_workspace_path, logger):
    cfg = get_cfg()
    # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
    # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    predictor = DefaultPredictor(cfg)

    def get_mask(frame):
        segmentation_outputs = predictor(frame)
        masks = segmentation_outputs["instances"].pred_masks.cpu().numpy()
        pred_classes = segmentation_outputs["instances"].pred_classes.cpu(
        ).numpy()
        filtered_masks = []

        for mask, pred_class in zip(masks, pred_classes):
            # The class zero is "people".
            if pred_class == 0:
                filtered_masks.append(mask)

        filtered_masks = np.array(filtered_masks)

        flattened_mask = np.zeros(shape=(masks.shape[-2:]), dtype=bool)

        for mask in filtered_masks:
            flattened_mask |= mask

        flattened_mask = np.stack((flattened_mask, ) * 3, axis=-1)

        return flattened_mask

    if not os.path.exists(frame_data_path):
        os.makedirs(frame_data_path)
        logger.log(
            "Created folder for frame data at {}.".format(frame_data_path))

        os.makedirs(mask_data_path)
        logger.log(
            "Created folder for mask data at {}.".format(mask_data_path))

        for frame_i, frame in enumerate(video_data):
            frame_filename = "{:04d}.png".format(frame_i + 1)

            # If the video data is in the RGB format (as opposed to the BGR format), then it must be converted to BGR
            # before being written to disk since that is the format OpenCV uses.
            if video_data.is_rgb:
                frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

            frame_path = os.path.join(frame_data_path, frame_filename)
            cv2.imwrite(frame_path, frame)

            mask_file_name = f"{frame_filename}.png"
            mask_path = os.path.join(mask_data_path, mask_file_name)
            instance_mask = get_mask(frame)
            # Need to invert mask so that the masked areas are 'False' (zero) instead of True (one) and so COLMAP only
            # masks out people.
            instance_mask = ~instance_mask
            cv2.imwrite(mask_path, instance_mask.astype(np.uint8))

            logger.log(
                f"Wrote frame and mask {frame_i + 1:,d}/{len(video_data):,d} to {frame_path} and {mask_path}.\r",
                end="")

        print()

        logger.log(
            f"Wrote frame data for COLMAP to {frame_data_path} and dynamic object masks to {mask_data_path}."
        )
    else:
        logger.log(f"Found frame data at {frame_data_path}.")

    if not os.path.exists(colmap_workspace_path):
        os.makedirs(colmap_workspace_path)
        logger.log(
            "Created workspace folder at {}.".format(colmap_workspace_path))

        logger.log("Running COLMAP reconstruction. This may take a while...")
        colmap_process = subprocess.run([
            'colmap', 'automatic_reconstructor', '--image_path',
            frame_data_path, '--mask_path', mask_data_path, '--workspace_path',
            colmap_workspace_path, '--single_camera', '1', '--quality', 'low',
            '--data_type', 'video', '--camera_model', 'SIMPLE_PINHOLE',
            '--sparse', '1', '--dense', '0'
        ])

        if colmap_process.returncode != 0:
            raise RuntimeError(
                "COLMAP exited with the non-zero return code {}.".format(
                    colmap_process.returncode))
        else:
            logger.log("COLMAP finished processing the video.")

    else:
        logger.log(
            "Found COLMAP reconstruction workspace folder at {}.".format(
                colmap_workspace_path))