Beispiel #1
0
    def __init__(self, context: PyTorchTrialContext) -> None:
        self.context = context
        self.hparams = AttrDict(self.context.get_hparams())

        # If backend is local download data in rank 0 slot.
        if self.hparams.backend == "local":
            if self.context.distributed.get_local_rank() == 0:
                if not all([
                        os.path.isdir(os.path.join(self.hparams.data_dir, d))
                        for d in ["train2017", "val2017"]
                ]):
                    download_coco_from_source(self.hparams.data_dir)
            else:
                # Other slots wait until rank 0 is done downloading, which will
                # correspond to the head writing a done.txt file.
                while not os.path.isfile(
                        os.path.join(self.hparams.data_dir, "done.txt")):
                    time.sleep(10)

        # Build the model and configure postprocessors for evaluation.
        model, self.criterion, self.postprocessors = build_model(
            self.hparams, world_size=self.context.distributed.get_size())
        self.model = self.context.wrap_model(model)

        n_parameters = sum(p.numel() for p in model.parameters()
                           if p.requires_grad)
        print("number of params:", n_parameters)

        param_dicts = [
            {
                "params": [
                    p for n, p in self.model.named_parameters()
                    if "backbone" not in n and p.requires_grad
                ]
            },
            {
                "params": [
                    p for n, p in self.model.named_parameters()
                    if "backbone" in n and p.requires_grad
                ],
                "lr":
                self.hparams.lr_backbone,
            },
        ]
        self.optimizer = self.context.wrap_optimizer(
            torch.optim.AdamW(param_dicts,
                              lr=self.hparams.lr,
                              weight_decay=self.hparams.weight_decay))

        # Wrap the LR scheduler.
        self.lr_scheduler = self.context.wrap_lr_scheduler(
            torch.optim.lr_scheduler.StepLR(self.optimizer,
                                            self.hparams.lr_drop),
            step_mode=LRScheduler.StepMode.STEP_EVERY_EPOCH,
        )

        self.clip_grads_fn = (lambda x: torch.nn.utils.clip_grad_norm_(
            x, self.hparams.clip_max_norm)
                              if self.hparams.clip_max_norm > 0 else None)
Beispiel #2
0
    def __init__(self, context: PyTorchTrialContext) -> None:
        self.context = context
        self.hparams = AttrDict(self.context.get_hparams())

        # If backend is local download data.
        if self.hparams.backend == "local":
            # Use a file lock so only one worker on each node does the download.
            with filelock.FileLock(
                    os.path.join(self.hparams.data_dir, "download.lock")):
                if not all([
                        os.path.isdir(os.path.join(self.hparams.data_dir, d))
                        for d in ["train2017", "val2017"]
                ]):
                    download_coco_from_source(self.hparams.data_dir)

        self.cat_ids = []

        # Build the model and configure postprocessors for evaluation.
        model, self.criterion, self.postprocessors = build_model(
            self.hparams, world_size=self.context.distributed.get_size())

        # Load checkpoint from DETR repo.
        if "warmstart" in self.hparams and self.hparams.warmstart:
            checkpoint = torch.hub.load_state_dict_from_url(
                url="https://dl.fbaipublicfiles.com/detr/detr-r50-e632da11.pth",
                map_location="cpu",
                check_hash=True,
            )

            # Remove class weights if finetuning.
            if "cat_ids" in self.hparams and len(self.hparams.cat_ids):
                del checkpoint["model"]["class_embed.weight"]
                del checkpoint["model"]["class_embed.bias"]
            model.load_state_dict(checkpoint["model"], strict=False)

        self.model = self.context.wrap_model(model)

        n_parameters = sum(p.numel() for p in model.parameters()
                           if p.requires_grad)
        print("number of params:", n_parameters)

        param_dicts = [
            {
                "params": [
                    p for n, p in self.model.named_parameters()
                    if "backbone" not in n and p.requires_grad
                ]
            },
            {
                "params": [
                    p for n, p in self.model.named_parameters()
                    if "backbone" in n and p.requires_grad
                ],
                "lr":
                self.hparams.lr_backbone,
            },
        ]
        self.optimizer = self.context.wrap_optimizer(
            torch.optim.AdamW(param_dicts,
                              lr=self.hparams.lr,
                              weight_decay=self.hparams.weight_decay))

        # Wrap the LR scheduler.
        self.lr_scheduler = self.context.wrap_lr_scheduler(
            torch.optim.lr_scheduler.StepLR(self.optimizer,
                                            self.hparams.lr_drop),
            step_mode=LRScheduler.StepMode.STEP_EVERY_EPOCH,
        )

        self.clip_grads_fn = (lambda x: torch.nn.utils.clip_grad_norm_(
            x, self.hparams.clip_max_norm)
                              if self.hparams.clip_max_norm > 0 else None)
Beispiel #3
0
    def __init__(self, context: PyTorchTrialContext) -> None:
        self.context = context
        self.hparams = AttrDict(self.context.get_hparams())

        # If backend is local download data in rank 0 slot.
        if self.hparams.backend == "local":
            if self.context.distributed.get_local_rank() == 0:
                if not all([
                        os.path.isdir(os.path.join(self.hparams.data_dir, d))
                        for d in ["train2017", "val2017"]
                ]):
                    download_coco_from_source(self.hparams.data_dir)
            else:
                # Other slots wait until rank 0 is done downloading, which will
                # correspond to the head writing a done.txt file.
                while not os.path.isfile(
                        os.path.join(self.hparams.data_dir, "done.txt")):
                    time.sleep(10)

        self.cat_ids = []

        # Build the model and configure postprocessors for evaluation.
        model, self.criterion, self.postprocessors = build_model(
            self.hparams, world_size=self.context.distributed.get_size())

        # Load pretrained weights downloaded in the startup-hook.sh from
        # the original repo.
        if "warmstart" in self.hparams and self.hparams.warmstart:
            checkpoint = torch.load("model.ckpt")
            ckpt = checkpoint["model"]
            # Remove class weights if finetuning.
            if "cat_ids" in self.hparams and len(self.hparams.cat_ids):
                delete_keys = [k for k in ckpt if "class_embed" in k]
                for k in delete_keys:
                    del ckpt[k]
            model.load_state_dict(ckpt, strict=False)

        self.model = self.context.wrap_model(model)

        n_parameters = sum(p.numel() for p in self.model.parameters()
                           if p.requires_grad)
        print("number of params:", n_parameters)
        param_dicts = [
            {
                "params": [
                    p for n, p in self.model.named_parameters() if
                    not match_name_keywords(n, self.hparams.lr_backbone_names)
                    and not match_name_keywords(
                        n, self.hparams.lr_linear_proj_names)
                    and p.requires_grad
                ],
                "lr":
                self.hparams.lr,
            },
            {
                "params": [
                    p for n, p in self.model.named_parameters()
                    if match_name_keywords(n, self.hparams.lr_backbone_names)
                    and p.requires_grad
                ],
                "lr":
                self.hparams.lr_backbone,
            },
            {
                "params": [
                    p for n, p in self.model.named_parameters()
                    if match_name_keywords(n, self.hparams.lr_linear_proj_names
                                           ) and p.requires_grad
                ],
                "lr":
                self.hparams.lr * self.hparams.lr_linear_proj_mult,
            },
        ]

        if self.hparams.sgd:
            self.optimizer = self.context.wrap_optimizer(
                torch.optim.SGD(
                    param_dicts,
                    lr=self.hparams.lr,
                    momentum=0.9,
                    weight_decay=self.hparams.weight_decay,
                ))
        else:
            self.optimizer = self.context.wrap_optimizer(
                torch.optim.AdamW(
                    param_dicts,
                    lr=self.hparams.lr,
                    weight_decay=self.hparams.weight_decay,
                ))

        # Wrap the LR scheduler.
        self.lr_scheduler = self.context.wrap_lr_scheduler(
            torch.optim.lr_scheduler.StepLR(self.optimizer,
                                            self.hparams.lr_drop),
            step_mode=LRScheduler.StepMode.STEP_EVERY_EPOCH,
        )

        self.clip_grads_fn = (lambda x: torch.nn.utils.clip_grad_norm_(
            x, self.hparams.clip_max_norm)
                              if self.hparams.clip_max_norm > 0 else None)