Exemple #1
0
def benchmark_train(args):
    cfg = setup(args)
    model = build_model(cfg)
    logger.info("Model:\n{}".format(model))
    if comm.get_world_size() > 1:
        model = DistributedDataParallel(
<<<<<<< HEAD
            model, device_ids=[comm.get_local_rank()], broadcast_buffers=False,
            find_unused_parameters=True
=======
            model, device_ids=[comm.get_local_rank()], broadcast_buffers=False, find_unused_parameters=True
>>>>>>> 7b936afd5b423c3188687d8b529a984bed528a87
        )
    optimizer = build_optimizer(cfg, model)
    checkpointer = DetectionCheckpointer(model, optimizer=optimizer)
    checkpointer.load(cfg.MODEL.WEIGHTS)

    cfg.defrost()
    cfg.DATALOADER.NUM_WORKERS = 0
    data_loader = build_detection_train_loader(cfg)
    dummy_data = list(itertools.islice(data_loader, 100))

    def f():
        data = DatasetFromList(dummy_data, copy=False)
        while True:
            yield from data

    max_iter = 400
    trainer = SimpleTrainer(model, f(), optimizer)
    trainer.register_hooks(
        [hooks.IterationTimer(), hooks.PeriodicWriter([CommonMetricPrinter(max_iter)])]
    )
    trainer.train(1, max_iter)
Exemple #2
0
def local_master_get_detection_dataset_dicts(*args, **kwargs):
    logger.info("Only load dataset dicts on local master process ...")

    dataset_dicts = (d2_get_detection_dataset_dicts(*args, **kwargs)
                     if comm.get_local_rank() == 0 else [])
    comm.synchronize()
    dataset_size = comm.all_gather(len(dataset_dicts))[0]

    if comm.get_local_rank() != 0:
        dataset_dicts = _FakeListObj(dataset_size)
    return dataset_dicts
def run(args):
    cfg = setup(args)

    model = build_model(cfg)
    logger.info("Model:\n{}".format(model))

    # count number of parameters for model
    net_params = model.parameters()
    weight_count = 0
    for param in net_params:
        weight_count += np.prod(param.size())
    logger.info("Number of model parameters: %.0f" % weight_count)

    if cfg.EVAL_ONLY:
        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
            cfg.MODEL.WEIGHTS, resume=False)
        return do_test(cfg, model)

    distributed = comm.get_world_size() > 1
    if distributed:
        model = DistributedDataParallel(model,
                                        device_ids=[comm.get_local_rank()],
                                        broadcast_buffers=False)

    return do_train(cfg, model, resume=args.resume)
Exemple #4
0
def main(sm_args):

    cfg = _setup(sm_args)

    model = build_model(cfg)

    # Converting string params to boolean flags as Sagemaker doesn't support currently boolean flags as hyperparameters.
    eval_only = True if sm_args.eval_only == "True" else False
    resume = True if sm_args.resume == "True" else False

    if eval_only:
        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
            cfg.MODEL.WEIGHTS, resume=args.resume)
        return do_test(cfg, model)

    distributed = comm.get_world_size() > 1

    if distributed:
        model = DistributedDataParallel(model,
                                        device_ids=[comm.get_local_rank()],
                                        broadcast_buffers=False)

    do_train(cfg, model, resume=resume)
    do_test(cfg, model)

    if sm_args.current_host == sm_args.hosts[0]:
        return _save_model(model)
Exemple #5
0
    def __init__(self, cfg):
        """
        Args:
            cfg (CfgNode):
        """
        logger = logging.getLogger("detectron2")
        if not logger.isEnabledFor(logging.INFO):  # setup_logger is not called for d2
            setup_logger()
        # Assume these objects must be constructed in this order.
        model = self.build_model(cfg)
        optimizer = self.build_optimizer(cfg, model)
        data_loader = self.build_train_loader(cfg)

        # For training, wrap with DDP. But don't need this for inference.
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(
                model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
            )
        super().__init__(model, data_loader, optimizer)

        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
        # Assume no other objects need to be checkpointed.
        # We can later make it checkpoint the stateful hooks
        self.checkpointer = DetectionCheckpointer(
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )
        self.start_iter = 0
        self.max_iter = cfg.SOLVER.MAX_ITER
        self.cfg = cfg

        self.register_hooks(self.build_hooks())
 def __init__(self, cfg):
     super().__init__(cfg)
     # init Meta Reweighter
     learner = Learner(cfg.MODEL.ROI_HEADS.NUM_CLASSES)
     learner.to(torch.device(cfg.MODEL.DEVICE))
     if comm.get_world_size() > 1:
         learner = DistributedDataParallel(
             learner,
             device_ids=[comm.get_local_rank()],
             broadcast_buffers=False,
         )
     self.learner = learner
     if comm.get_world_size() > 1:
         box_predictor = self.model.module.roi_heads.box_predictor
     else:
         box_predictor = self.model.roi_heads.box_predictor
     if isinstance(box_predictor, torch.nn.ModuleList):
         for predictor in box_predictor:
             predictor.register_meta_reweigher(self.learner)
     else:
         box_predictor.register_meta_reweigher(self.learner)
     self.optimizer_meta = torch.optim.Adam(self.learner.parameters(),
                                            lr=0.01)
     meta_data_loader = self.build_meta_loader(cfg)
     self._meta_data_loader_iter = iter(meta_data_loader)
Exemple #7
0
def main(args):
    cfg = setup(args)
    for d in ["train", 'val']:
        # train for 6998images , val for 1199 images
        DatasetCatalog.register(
            "chefCap_" + d, lambda d=d: get_chefcap_image_dicts("data/" + d))
        MetadataCatalog.get("chefCap_" + d).set(
            thing_classes=['face-head', 'mask-head', 'face-cap', 'mask-cap'])
        if d == 'val':
            MetadataCatalog.get("chefCap_val").evaluator_type = "pascal_voc"
            MetadataCatalog.get("chefCap_val").year = 2012
            MetadataCatalog.get(
                "chefCap_val").dirname = "/opt/work/chefCap/data/val"

    model = build_model(cfg)
    logger.info("Model:\n{}".format(model))
    # if args.eval_only:
    #     DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
    #         cfg.MODEL.WEIGHTS, resume=args.resume
    #     )
    #     return do_test(cfg, model)

    distributed = comm.get_world_size() > 1
    if distributed:
        model = DistributedDataParallel(model,
                                        device_ids=[comm.get_local_rank()],
                                        broadcast_buffers=False)

    return do_train(cfg, model, resume=args.resume)
Exemple #8
0
def main(args):
    cfg = setup(args)
    model = build_model(cfg)

    register_coco_instances("firevysor_train", {},
                            "data/Split_CleanedImage/train_annot.json",
                            "data/Split_CleanedImage/train")
    register_coco_instances("firevysor_val", {},
                            "data/Split_CleanedImage/val_annot.json",
                            "data/Split_CleanedImage/val")
    register_coco_instances("hardcases_val", {},
                            "data/annotations/hard_cases.json",
                            "data/hard_cases")

    if args.eval_only:
        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
            cfg.MODEL.WEIGHTS, resume=args.resume)
        return do_test(cfg, model)

    distributed = comm.get_world_size() > 1
    if distributed:
        model = DistributedDataParallel(model,
                                        device_ids=[comm.get_local_rank()],
                                        broadcast_buffers=False)

    do_train(cfg, model, val_set='firevysor_val')
    return do_test(cfg, model)
    def __init__(self, cfg):
        """
        Args:
            cfg (CfgNode):
        Use the custom checkpointer, which loads other backbone models
        with matching heuristics.
        """
        cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size())
        model = self.build_model(cfg)
        optimizer = self.build_optimizer(cfg, model)
        data_loader = self.build_train_loader(cfg)

        if comm.get_world_size() > 1:
            model = DistributedDataParallel(model,
                                            device_ids=[comm.get_local_rank()],
                                            broadcast_buffers=False)

        TrainerBase.__init__(self)
        self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else
                         SimpleTrainer)(model, data_loader, optimizer)

        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
        self.checkpointer = DetectionCheckpointer(
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )
        self.start_iter = 0
        self.max_iter = cfg.SOLVER.MAX_ITER
        self.cfg = cfg

        self.register_hooks(self.build_hooks())
def main(args):
    cfg = setup(args)

    # Technically, we should have used a dataset registry sort of object for these mappings.
    # Kinda like how ~/detectron2/data/datasets/builtin.py works.
    if cfg.DATASETS.TRAIN == "COCO_n_LVIS_train":
        JSON_ANNOTATION = "./datasets/coco/annotations/COCO_n_LVIS/COCO_n_LVIS_train.json"
        IMG_ROOT = "./datasets/coco/train2017"
    elif cfg.DATASETS.TRAIN == "LVIS80_train":
        JSON_ANNOTATION = "./datasets/coco/annotations/LVIS80/LVIS80_train.json"
        IMG_ROOT = "./datasets/coco/train2017"
    else:
        raise NotImplementedError("Unknown custom dataset: {}".format(
            cfg.DATASETS.TRAIN))

    from detectron2.data.datasets import register_coco_instances
    register_coco_instances(DATA, {}, JSON_ANNOTATION, IMG_ROOT)

    model = build_model(cfg)
    logger.info("Model:\n{}".format(model))

    distributed = comm.get_world_size() > 1
    if distributed:
        model = DistributedDataParallel(model,
                                        device_ids=[comm.get_local_rank()],
                                        broadcast_buffers=False)

    do_train(cfg, model)

    return OrderedDict(
    )  # This is what do_test() would have returned in absence of any testing.
    def __init__(self, cfg):
        
        self.logger = logging.getLogger("detectron2")

        model = self.build_model(cfg)
        optimizer = self.build_optimizer(cfg, model)
        
        self.model = model
        self.optimizer = optimizer
        self.al_dataset = self.build_al_dataset(cfg)
        self.object_fusion = ObjectFusion(cfg)
        # It should be moved to ObjectActiveLearningTrainer later when

        # For training, wrap with DDP. But don't need this for inference.
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(
                model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
            )
        
        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
        # Assume no other objects need to be checkpointed.
        # We can later make it checkpoint the stateful hooks
        self.checkpointer = DetectionCheckpointer(
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )
        self.cfg = cfg
Exemple #12
0
    def __init__(self, cfg):
        super().__init__(cfg)
        """
        Args:
            cfg (CfgNode):
        Use the custom checkpointer, which loads other backbone models
        with matching heuristics.
        """
        # Assume these objects must be constructed in this order.
        model = self.build_model(cfg)
        optimizer = self.build_optimizer(cfg, model)
        data_loader = self.build_train_loader(cfg)

        # For training, wrap with DDP. But don't need this for inference.
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(
                model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
            )
        #super(DefaultTrainer, self).__init__(model, data_loader, optimizer)

        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
        # Assume no other objects need to be checkpointed.
        # We can later make it checkpoint the stateful hooks
        self.checkpointer = AdetCheckpointer(
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )
        self.start_iter = 0
        self.max_iter = cfg.SOLVER.MAX_ITER
        self.cfg = cfg

        self.register_hooks(self.build_hooks())
Exemple #13
0
    def reset_model(self, cfg, model):
        """
        :return: except data_loader, reset the model
        """
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(
                model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
            )
        del self.model
        self.model = model

        optimizer = self.build_optimizer(cfg, model)
        del self.optimizer
        self.optimizer = optimizer

        scheduler = self.build_lr_scheduler(cfg, optimizer)
        del self.scheduler
        self.scheduler = scheduler

        checkpointer = DetectionCheckpointer(
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )
        del self.checkpointer
        self.checkpointer = checkpointer

        self.start_iter = 0
        # self.max_iter = cfg.SOLVER.MAX_ITER
        self.cfg = cfg

        self._hooks = []
        self.register_hooks(self.build_hooks())
def main(args):

    # Create the config file
    cfg = setup(args)

    # Build the model
    model = build_model(cfg)

    # Log what's going on
    logger.info("Model:\n{}".format(model))

    # TODO: Fix this (if it doesn't work)
    #wandb.watch(model, log="all")

    # Only do evaluation if the args say so
    if args.eval_only:
        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
            cfg.MODEL.WEIGHTS, resume=args.resume)
        return do_test(cfg, model)

    # Do distributed training? (depends on number of GPUs available)
    distributed = comm.get_world_size() > 1
    if distributed:
        # Put the model on multiple devices if available
        model = DistributedDataParallel(model,
                                        device_ids=[comm.get_local_rank()],
                                        broadcast_buffers=False)

    # Train the model
    do_train(cfg, model)
    # TODO - May want to evaluate in a different step?
    return do_test(cfg, model)
Exemple #15
0
    def __init__(self, cfg, model=None, data_loader=None):

        if model is not None:
            model = model
        else:
            model = self.build_model(cfg)
        optimizer = self.build_optimizer(cfg, model)
        if data_loader is not None:
            self.data_loader = data_loader
            self.data_len = data_loader.dataset._dataset._lst
        else:
            self.data_loader, self.data_len = self.build_train_loader(cfg)
        # For training, wrap with DDP. But don't need this for inference.
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(
                model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
            )
        super().__init__(model, self.data_loader, optimizer)

        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
        # Assume no other objects need to be checkpointed.
        # We can later make it checkpoint the stateful hooks
        self.checkpointer = DetectionCheckpointer(
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )
        self.start_iter = 0
        self.max_iter = int((270000 * self.data_len) / 45174)
        self.cfg = cfg
        self.register_hooks(self.build_hooks())
Exemple #16
0
    def __init__(self, cfg):
        """
        Args:
            cfg (CfgNode):
        Use the custom checkpointer, which loads other backbone models
        with matching heuristics.
        """
        # Assume these objects must be constructed in this order.
        model = self.build_model(cfg)
        optimizer = self.build_optimizer(cfg, model)
        data_loader = self.build_train_loader(cfg)

        # Load GAN model
        generator = esrgan_model.GeneratorRRDB(channels=3,
                                               filters=64,
                                               num_res_blocks=23).to(device)
        discriminator = esrgan_model.Discriminator(
            input_shape=(3, *hr_shape)).to(device)
        feature_extractor = esrgan_model.FeatureExtractor().to(device)
        feature_extractor.eval()

        # GAN losses
        criterion_GAN = torch.nn.BCEWithLogitsLoss().to(device)
        criterion_content = torch.nn.L1Loss().to(device)
        criterion_pixel = torch.nn.L1Loss().to(device)

        # GAN optimizers
        optimizer_G = torch.optim.Adam(generator.parameters(),
                                       lr=.0002,
                                       betas=(.9, .999))
        optimizer_D = torch.optim.Adam(discriminator.parameters(),
                                       lr=.0002,
                                       betas=(.9, .999))

        # For training, wrap with DDP. But don't need this for inference.
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(model,
                                            device_ids=[comm.get_local_rank()],
                                            broadcast_buffers=False)
        super(DefaultTrainer,
              self).__init__(model, data_loader, optimizer, discriminator,
                             generator, feature_extractor, optimizer_G,
                             optimizer_D, criterion_pixel, criterion_content,
                             criterion_GAN)

        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
        # Assume no other objects need to be checkpointed.
        # We can later make it checkpoint the stateful hooks
        self.checkpointer = AdetCheckpointer(
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )
        self.start_iter = 0
        self.max_iter = cfg.SOLVER.MAX_ITER
        self.cfg = cfg

        self.register_hooks(self.build_hooks())
Exemple #17
0
    def __init__(self, cfg):
        logger = logging.getLogger("detectron2")
        if not logger.isEnabledFor(logging.INFO):
            setup_logger()
        cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size())
        # Assume these objects must be constructed in this order.
        model = self.build_model(cfg)
        optimizer = self.build_optimizer(cfg, model)
        data_loader, num_per_epoch = self.build_train_loader(cfg)

        # update iteration cfg to epoch cfg
        if cfg.SOLVER.EPOCH.ENABLED:
            cfg = self.adjust_epoch_to_iter(cfg, num_per_epoch)

        # For training, wrap with DDP. But don't need this for inference.
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(model, device_ids=[comm.get_local_rank()], broadcast_buffers=False)
        
        super(DefaultTrainer, self).__init__(model, data_loader, optimizer)

        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
        # Assume no other objects need to be checkpointed.
        # We can later make it checkpoint the stateful hooks
        self.checkpointer = DetectionCheckpointer(
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )

        self.start_iter = 0
        self.max_iter = cfg.SOLVER.MAX_ITER
        self.cfg = cfg
        self.register_hooks(self.build_hooks())
def trainer(args, visual_, visual_threshold):
    cfg = setup_origin_configs(args)
    print('cfg', )
    regist_datasets(cfg)
    model = build_model(cfg)

    if visual_:
        # 不设置就是0.05
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = visual_threshold
        cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, args.best_model_name)
        visual(cfg)
        return None

    logger.info("Model:\n{}".format(model))
    if args.eval_only:
        # load pretrained model weights:
        cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, args.best_model_name)
        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
            cfg.MODEL.WEIGHTS, resume=args.resume)
        return do_test(cfg, model)

    distributed = comm.get_world_size() > 1
    if distributed:
        model = DistributedDataParallel(model,
                                        device_ids=[comm.get_local_rank()],
                                        broadcast_buffers=False)

    do_train(cfg, model)
    return do_test(cfg, model)
Exemple #19
0
def benchmark_train(args):
    cfg = setup(args)
    model = build_model(cfg)
    logger.info("Model:\n{}".format(model))
    if comm.get_world_size() > 1:
        model = DistributedDataParallel(model,
                                        device_ids=[comm.get_local_rank()],
                                        broadcast_buffers=False)
    optimizer = build_optimizer(cfg, model)
    checkpointer = DetectionCheckpointer(model, optimizer=optimizer)
    checkpointer.load(cfg.MODEL.WEIGHTS)

    cfg.defrost()
    cfg.DATALOADER.NUM_WORKERS = 2
    data_loader = build_detection_train_loader(cfg)
    dummy_data = list(itertools.islice(data_loader, 100))

    def f():
        data = DatasetFromList(dummy_data, copy=False, serialize=False)
        while True:
            yield from data

    max_iter = 400
    trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(
        model, f(), optimizer)
    trainer.register_hooks([
        hooks.IterationTimer(),
        hooks.PeriodicWriter([CommonMetricPrinter(max_iter)]),
        hooks.TorchProfiler(lambda trainer: trainer.iter == max_iter - 1,
                            cfg.OUTPUT_DIR,
                            save_tensorboard=True),
    ])
    trainer.train(1, max_iter)
Exemple #20
0
    def __init__(self, cfg):
        """
        Args:
            cfg (CfgNode):
        """
        # Assume these objects must be constructed in this order.
        # self.apply_mul_opts = True if cfg.MODEL.ROI_MASK_HEAD.RECON_NET.NAME != "" else False
        self.apply_mul_opts = False
        model = self.build_model(cfg)
        if self.apply_mul_opts:
            optimizer_main = self.build_optimizer(cfg, model, ty_opt="M")
            optimizer_recon = self.build_optimizer(cfg, model, ty_opt="A")
            optimizer = [optimizer_main, optimizer_recon]

            self.scheduler_main = self.build_lr_scheduler(cfg, optimizer_main)
            self.scheduler_recon = self.build_lr_scheduler(
                cfg, optimizer_recon)
            self.checkpointer = DetectionCheckpointer(
                # Assume you want to save checkpoints together with logs/statistics
                model,
                cfg.OUTPUT_DIR,
                optimizer_gen=optimizer_main,
                optimizer_dis=optimizer_recon,
                scheduler_gen=self.scheduler_main,
                scheduler_dis=self.scheduler_recon,
            )
        else:
            optimizer = self.build_optimizer(cfg,
                                             model,
                                             ty_opt=cfg.SOLVER.OPT_TYPE)
            # optimizer = self.build_optimizer(cfg, model, ty_opt='SGD')
            self.scheduler = self.build_lr_scheduler(cfg, optimizer)
            # Assume no other objects need to be checkpointed.
            # We can later make it checkpoint the stateful hooks
            self.checkpointer = DetectionCheckpointer(
                # Assume you want to save checkpoints together with logs/statistics
                model,
                cfg.OUTPUT_DIR,
                optimizer=optimizer,
                scheduler=self.scheduler,
            )

        logger = logging.getLogger(__name__)
        logger.info("optimizer information:{}".format(type(optimizer)))
        data_loader = self.build_train_loader(cfg)

        # For training, wrap with DDP. But don't need this for inference.
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(model,
                                            device_ids=[comm.get_local_rank()],
                                            broadcast_buffers=False)
        super().__init__(model, data_loader, optimizer, cfg)

        self.start_iter = 0
        self.max_iter = cfg.SOLVER.MAX_ITER
        self.cfg = cfg

        self.register_hooks(self.build_hooks())
Exemple #21
0
def convert_coco_text_to_coco_detection_json(
    source_json: str,
    target_json: str,
    set_type: Optional[str] = None,
    min_img_size: int = 100,
    text_cat_id: int = 1,
) -> Dict:
    """
    This function converts a COCOText style JSON to a COCODetection style
    JSON.
    For COCOText see: https://vision.cornell.edu/se3/coco-text-2/
    For COCODetection see: http://cocodataset.org/#overview
    """
    with PathManager.open(source_json, "r") as f:
        coco_text_json = json.load(f)

    coco_text_json["annotations"] = list(coco_text_json["anns"].values())
    coco_text_json["images"] = list(coco_text_json["imgs"].values())
    if set_type is not None:
        # COCO Text style JSONs often mix test, train, and val sets.
        # We need to make sure we only use the data type we want.
        coco_text_json["images"] = [
            x for x in coco_text_json["images"] if x["set"] == set_type
        ]
    coco_text_json["categories"] = [{"name": "text", "id": text_cat_id}]
    del coco_text_json["cats"]
    del coco_text_json["imgs"]
    del coco_text_json["anns"]
    for ann in coco_text_json["annotations"]:
        ann["category_id"] = text_cat_id
        ann["iscrowd"] = 0
        # Don't evaluate the model on illegible words
        if set_type == "val" and ann["legibility"] != "legible":
            ann["ignore"] = True
    # Some datasets seem to have extremely small images which break downstream
    # operations. If min_img_size is set, we can remove these.
    coco_text_json["images"] = [
        x for x in coco_text_json["images"]
        if x["height"] >= min_img_size and x["width"] >= min_img_size
    ]
    # Remap image_ids if necessary
    if isinstance(coco_text_json["images"][0]["id"], str):
        image_id_remap = {
            x["id"]: id_no
            for (id_no, x) in enumerate(coco_text_json["images"])
        }
        for x in coco_text_json["images"]:
            x["id"] = image_id_remap[x["id"]]
        for x in coco_text_json["annotations"]:
            if x["image_id"] in image_id_remap:
                x["image_id"] = image_id_remap[x["image_id"]]

    PathManager.mkdirs(os.path.dirname(target_json))
    if comm.get_local_rank() == 0:
        with PathManager.open(target_json, "w") as f:
            json.dump(coco_text_json, f)

    return coco_text_json
Exemple #22
0
    def __init__(self, cfg):
        """
        Args:
            cfg (CfgNode):
        """
        logger = logging.getLogger("detectron2")
        if not logger.isEnabledFor(logging.INFO):  # setup_logger is not called for d2
            logger = setup_logger()
        cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size())
        # Assume these objects must be constructed in this order.

        data_loader = self.build_train_loader(cfg)
        cfg = self.auto_scale_hyperparams(cfg, data_loader)

        model = self.build_model(cfg)
        # KD or not
        self.kd = cfg.MODEL.CENTERNET.KD.ENABLED
        self.model_t = None
        if self.kd:
            self.model_t = self.build_teacher_model(cfg)

        optimizer = self.build_optimizer(cfg, model)

        # For training, wrap with DDP. But don't need this for inference.
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(
                model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
            )
        super(DefaultTrainer, self).__init__(model, data_loader, optimizer)

        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
        # Assume no other objects need to be checkpointed.
        # We can later make it checkpoint the stateful hooks
        self.checkpointer = DetectionCheckpointer(
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )
        self.start_iter = 0

        if cfg.SOLVER.SWA.ENABLED:
            self.max_iter = cfg.SOLVER.MAX_ITER + cfg.SOLVER.SWA.ITER
        else:
            self.max_iter = cfg.SOLVER.MAX_ITER

        self.cfg = cfg
        self.skip_loss = cfg.MODEL.CENTERNET.LOSS.SKIP_LOSS
        self.history_loss = 10e8
        self.skip_weight = cfg.MODEL.CENTERNET.LOSS.SKIP_WEIGHT

        self.communism = cfg.MODEL.CENTERNET.LOSS.COMMUNISM.ENABLE
        self.communism_cls_loss = cfg.MODEL.CENTERNET.LOSS.COMMUNISM.CLS_LOSS
        self.communism_wh_loss = cfg.MODEL.CENTERNET.LOSS.COMMUNISM.WH_LOSS
        self.communism_off_loss = cfg.MODEL.CENTERNET.LOSS.COMMUNISM.OFF_LOSS

        self.register_hooks(self.build_hooks())
Exemple #23
0
def main(
    cfg,
    output_dir,
    runner=None,
    eval_only=False,
    # NOTE: always enable resume when running on cluster
    resume=True,
):
    setup_after_launch(cfg, output_dir, runner)

    model = runner.build_model(cfg)
    logger.info("Model:\n{}".format(model))

    if eval_only:
        checkpointer = runner.build_checkpointer(cfg,
                                                 model,
                                                 save_dir=output_dir)
        # checkpointer.resume_or_load() will skip all additional checkpointable
        # which may not be desired like ema states
        if resume and checkpointer.has_checkpoint():
            checkpoint = checkpointer.resume_or_load(cfg.MODEL.WEIGHTS,
                                                     resume=resume)
        else:
            checkpoint = checkpointer.load(cfg.MODEL.WEIGHTS)
        train_iter = checkpoint.get("iteration", None)
        model.eval()
        metrics = runner.do_test(cfg, model, train_iter=train_iter)
        print_metrics_table(metrics)
        return {
            "accuracy": metrics,
            "model_configs": {},
            "metrics": metrics,
        }

    model = create_ddp_model(
        model,
        fp16_compression=cfg.MODEL.DDP_FP16_GRAD_COMPRESS,
        device_ids=None
        if cfg.MODEL.DEVICE == "cpu" else [comm.get_local_rank()],
        broadcast_buffers=False,
        find_unused_parameters=cfg.MODEL.DDP_FIND_UNUSED_PARAMETERS,
    )

    trained_cfgs = runner.do_train(cfg, model, resume=resume)
    metrics = runner.do_test(cfg, model)
    print_metrics_table(metrics)

    # dump config files for trained models
    trained_model_configs = dump_trained_model_configs(cfg.OUTPUT_DIR,
                                                       trained_cfgs)
    return {
        # for e2e_workflow
        "accuracy": metrics,
        # for unit_workflow
        "model_configs": trained_model_configs,
        "metrics": metrics,
    }
Exemple #24
0
 def cuda(self, device):
     self.model.to(torch.device(device))
     logger = logging.getLogger(__name__)
     logger.info("Model:\n{}".format(self.model))
     # For training, wrap with DDP. But don't need this for inference.
     if comm.get_world_size() > 1:
         self.model = DistributedDataParallel(
             self.model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
         )
Exemple #25
0
 def wrap_model_with_ddp(self, cfg, model):
     # work with PR: https://github.com/facebookresearch/detectron2/pull/1820
     if comm.get_world_size() > 1:
         model = DistributedDataParallel(
             model,
             device_ids=[comm.get_local_rank()],
             broadcast_buffers=False,
             find_unused_parameters=True
         )
     return model
Exemple #26
0
    def __init__(self, cfg, parser, mapper_object, isShuffleData):
        """
        Args:
            cfg (CfgNode):
        """
        self.isShuffleData = isShuffleData
        self.mapper_object = mapper_object
        logger = logging.getLogger("detectron2")
        if not logger.isEnabledFor(
                logging.INFO):  # setup_logger is not called for d2
            setup_logger()
        # Assume these objects must be constructed in this order.
        model = self.build_model(cfg)
        optimizer = self.build_optimizer(cfg, model)
        data_loader = self.build_train_loader(cfg, self.mapper_object,
                                              self.isShuffleData)
        # If we're shuffling data, we're not doing curriculum learning
        if (isShuffleData):
            curr_data_loader = None
        # If we're NOT shuffling, then we're doing curriculum learning
        else:
            curr_data_loader = my_build_detection_train_loader(
                cfg,
                mapper=mapper_object.train_mapper,
                isShuffleData=isShuffleData,
                curriculum_fraction=0.3)

        # For training, wrap with DDP. But don't need this for inference.
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(model,
                                            device_ids=[comm.get_local_rank()],
                                            broadcast_buffers=False)
        super().__init__(cfg, model, data_loader, curr_data_loader, optimizer)

        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
        # Assume no other objects need to be checkpointed.
        # We can later make it checkpoint the stateful hooks
        self.checkpointer = DetectionCheckpointer(
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )
        self.start_iter = 0
        self.max_iter = cfg.SOLVER.MAX_ITER
        self.cfg = cfg

        self.register_hooks(self.build_hooks())

        if (parser.accuracy == 0):
            self.isTrackAccuracy = False
        else:
            self.isTrackAccuracy = True
Exemple #27
0
    def __init__(self, cfg):
        """
        Args:
            cfg (CfgNode):
        Use the custom checkpointer, which loads other backbone models
        with matching heuristics.
        """
        # Assume these objects must be constructed in this order.
        dprint("build model")
        model = self.build_model(cfg)
        dprint('build optimizer')
        optimizer = self.build_optimizer(cfg, model)
        dprint("build train loader")
        data_loader = self.build_train_loader(cfg)

        images_per_batch = cfg.SOLVER.IMS_PER_BATCH
        if isinstance(data_loader, AspectRatioGroupedDataset):
            dataset_len = len(data_loader.dataset.dataset)
            iters_per_epoch = dataset_len // images_per_batch
        else:
            dataset_len = len(data_loader.dataset)
            iters_per_epoch = dataset_len // images_per_batch

        self.iters_per_epoch = iters_per_epoch
        total_iters = cfg.SOLVER.TOTAL_EPOCHS * iters_per_epoch
        dprint("images_per_batch: ", images_per_batch)
        dprint("dataset length: ", dataset_len)
        dprint("iters per epoch: ", iters_per_epoch)
        dprint("total iters: ", total_iters)

        # For training, wrap with DDP. But don't need this for inference.
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(model,
                                            device_ids=[comm.get_local_rank()],
                                            broadcast_buffers=False)
        super(DefaultTrainer, self).__init__(model, data_loader, optimizer)

        self.scheduler = self.build_lr_scheduler(cfg,
                                                 optimizer,
                                                 total_iters=total_iters)
        # Assume no other objects need to be checkpointed.
        # We can later make it checkpoint the stateful hooks
        self.checkpointer = AdetCheckpointer(
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )
        self.start_iter = 0
        self.max_iter = total_iters  # NOTE: ignore cfg.SOLVER.MAX_ITER
        self.cfg = cfg

        self.register_hooks(self.build_hooks())
Exemple #28
0
    def wrap_model_with_ddp(self, cfg, model):
        """
        Returns:
            torch.nn.Module:

        Overwrite this function if you'd like to implement more with `torch.nn.parallel.DistributedDataParallel`,
        such as adding `find_unused_parameters=True`.
        """
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(model,
                                            device_ids=[comm.get_local_rank()],
                                            broadcast_buffers=False)
        return model
Exemple #29
0
def main(sm_args, world):

    cfg = _setup(sm_args)

    is_zero_rank = comm.get_local_rank() == 0

    trainer = DefaultTrainer(cfg)
    resume = True if sm_args.resume == "True" else False
    trainer.resume_or_load(resume=resume)
    trainer.train()

    if world["is_master"] and is_zero_rank:
        _save_model()
    def __init__(self, cfg):
        """
        Args:
            cfg (CfgNode):
        """
        super().__init__()
        logger = logging.getLogger("detectron2")
        if not logger.isEnabledFor(
                logging.INFO):  # setup_logger is not called for d2
            setup_logger()  # 通过torch.distributed 来确定实际使用gpu的数目
        # 根据使用gpu的数目,自动调整参数设置
        cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size())

        # Assume these objects must be constructed in this order.
        model = self.build_model(cfg)
        # build optimizer, defualt is SGD, cfg.solver.optimizer is not used
        optimizer = self.build_optimizer(cfg, model)
        # 是不是应该用 self.optimizer = ...,
        # 这里没用是因为最后调用的训练器是SimpleTrainer(...)
        # this train_loader will return a list[dict] rather than a (tensor,labels)
        # which combine a batch data as other task
        data_loader = self.build_train_loader(cfg)

        # For training, wrap with DDP. But don't need this for inference.
        if comm.get_world_size() > 1:
            model = DistributedDataParallel(model,
                                            device_ids=[comm.get_local_rank()],
                                            broadcast_buffers=False,
                                            find_unused_parameters=True)
        # cfg.SOLVER.AMP.ENABLED default:False
        # 这个的意义在哪
        self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else
                         SimpleTrainer)(model, data_loader, optimizer)
        # default: warmupMultistep
        self.scheduler = self.build_lr_scheduler(cfg, optimizer)

        # Assume no other objects need to be checkpointed.
        # We can later make it checkpoint the stateful hooks
        # checkpointer.save() / .load() 保存和加载checkpoint
        self.checkpointer = DetectionCheckpointer(
            # Assume you want to save checkpoints together with logs/statistics
            model,
            cfg.OUTPUT_DIR,
            optimizer=optimizer,
            scheduler=self.scheduler,
        )
        self.start_iter = 0
        self.max_iter = cfg.SOLVER.MAX_ITER
        self.cfg = cfg

        self.register_hooks(self.build_hooks())