Example #1
0
 def handle_batch(self, batch):
     model = get_nn_from_ddp_module(self.model)
     student, teacher = model["student"], model["teacher"]
     if self.is_train_loader:
         teacher.eval()
         set_requires_grad(teacher, False)
         t_outputs = teacher(
             batch["features"],
             output_hidden_states=self.output_hidden_states,
             return_dict=True,
         )
     s_outputs = student(
         batch["features"], output_hidden_states=self.output_hidden_states, return_dict=True,
     )
     self.batch["s_logits"] = s_outputs["logits"]
     if self.is_train_loader:
         self.batch["t_logits"] = t_outputs["logits"]
         if self.apply_probability_shift:
             self.batch["t_logits"] = probability_shift(
                 logits=self.batch["t_logits"], labels=self.batch["targets"]
             )
     if self.output_hidden_states:
         self.batch["s_hidden_states"] = s_outputs["hidden_states"]
         if self.is_train_loader:
             self.batch["t_hidden_states"] = t_outputs["hidden_states"]
 def _handle_batch_distillation(self, batch):
     model = get_nn_from_ddp_module(self.model)
     student, teacher = model["student"], model["teacher"]
     if self.is_train_loader:
         teacher.eval()
         set_requires_grad(teacher, False)
         t_outputs = teacher(
             batch["features"],
             output_hidden_states=self.output_hidden_states,
             return_dict=True,
         )
     s_outputs = student(
         batch["features"],
         output_hidden_states=self.output_hidden_states,
         return_dict=True,
     )
     self.batch["s_logits"] = s_outputs["logits"]
     if self.is_train_loader:
         self.batch["t_logits"] = t_outputs["logits"]
     if self.output_hidden_states and self.is_train_loader:
         self.batch["s_hidden_states"] = s_outputs["hidden_states"]
         self.batch["t_hidden_states"] = t_outputs["hidden_states"]
     self.batch_metrics["task_loss"] = self.criterion(
         batch["s_logits"], batch["targets"])
     self.batch["logits"] = self.batch[
         "s_logits"]  # for accuracy callback or other metric callback
Example #3
0
    def __init__(
        self,
        arch: str = "resnet18",
        pretrained: bool = True,
        frozen: bool = True,
        pooling: str = None,
        pooling_kwargs: dict = None,
        cut_layers: int = 2,
        state_dict: Union[dict, str, Path] = None,
    ):
        """
        Args:
            arch: Name for resnet. Have to be one of
                resnet18, resnet34, resnet50, resnet101, resnet152
            pretrained: If True, returns a model pre-trained on ImageNet
            frozen: If frozen, sets requires_grad to False
            pooling: pooling
            pooling_kwargs: params for pooling
            state_dict (Union[dict, str, Path]): Path to ``torch.Model``
                or a dict containing parameters and persistent buffers.
        """
        super().__init__()

        resnet = torchvision.models.__dict__[arch](pretrained=pretrained)
        if state_dict is not None:
            if isinstance(state_dict, (Path, str)):
                state_dict = torch.load(str(state_dict))
            resnet.load_state_dict(state_dict)

        modules = list(resnet.children())[:-cut_layers]  # delete last layers

        if frozen:
            for module in modules:
                utils.set_requires_grad(module, requires_grad=False)

        if pooling is not None:
            pooling_kwargs = pooling_kwargs or {}
            pooling_layer_fn = MODULE.get(pooling)
            pooling_layer = (
                pooling_layer_fn(
                    in_features=resnet.fc.in_features, **pooling_kwargs
                )
                if "attn" in pooling.lower()
                else pooling_layer_fn(**pooling_kwargs)
            )
            modules += [pooling_layer]

            if hasattr(pooling_layer, "out_features"):
                out_features = pooling_layer.out_features(
                    in_features=resnet.fc.in_features
                )
            else:
                out_features = None
        else:
            out_features = resnet.fc.in_features

        modules += [Flatten()]
        self.out_features = out_features

        self.encoder = nn.Sequential(*modules)
Example #4
0
    def __init__(
        self,
        arch: str = "resnet18",
        pretrained: bool = True,
        requires_grad: bool = True,
        layers_indices: List[int] = None,
        state_dict: Union[dict, str, Path] = None,
    ):
        """
        Specifies encoders for segmentation network
        Args:
            arch (str): Name for resnet. Have to be one of
                resnet18, resnet34, resnet50, resnet101, resnet152
            pretrained (bool): If True, returns a model pre-trained on ImageNet
            requires_grad (bool): Flag for set_requires_grad.
                If None, calculates as ``not requires_grad``
            layers_indices (List[int]): layers of encoders
                used for segmentation
                If None, calculates as ``[1, 2, 3, 4]``
            state_dict (Union[dict, str, Path]): Path to ``torch.Model``
                or a dict containing parameters and persistent buffers.
        Examples:
            >>> encoders = ResnetEncoder(
            >>>    arch="resnet18",
            >>>    pretrained=False,
            >>>    state_dict="/model/path/resnet18-5c106cde.pth"
            >>> )
        """
        super().__init__()

        resnet = torchvision.models.__dict__[arch](pretrained=pretrained)
        resnet_params = RESNET_PARAMS[arch]
        if state_dict is not None:
            if isinstance(state_dict, (Path, str)):
                state_dict = torch.load(str(state_dict))
            resnet.load_state_dict(state_dict)
        self._layers_indices = layers_indices or [1, 2, 3, 4]
        self._channels, self._strides = (
            resnet_params["channels"],
            resnet_params["strides"],
        )
        self._channels = _take(self._channels, self._layers_indices)
        self._strides = _take(self._strides, self._layers_indices)

        layer0 = nn.Sequential(
            OrderedDict([
                ("conv1", resnet.conv1),
                ("bn1", resnet.bn1),
                ("relu", resnet.relu),
            ]))
        self._layers = nn.ModuleList([
            layer0, resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4
        ])
        self.maxpool0 = resnet.maxpool

        if requires_grad is None:
            requires_grad = not pretrained

        utils.set_requires_grad(self, requires_grad)
Example #5
0
def trace_model(
    model: Model,
    predict_fn: Callable,
    batch=None,
    method_name: str = "forward",
    mode: str = "eval",
    requires_grad: bool = False,
    opt_level: str = None,
    device: Device = "cpu",
    predict_params: dict = None,
) -> jit.ScriptModule:
    """Traces model using runner and batch.

    Args:
        model: Model to trace
        predict_fn: Function to run prediction with the model provided,
            takes model, inputs parameters
        batch: Batch to trace the model
        method_name (str): Model's method name that will be
            used as entrypoint during tracing
        mode (str): Mode for model to trace (``train`` or ``eval``)
        requires_grad (bool): Flag to use grads
        opt_level (str): Apex FP16 init level, optional
        device (str): Torch device
        predict_params (dict): additional parameters for model forward

    Returns:
        jit.ScriptModule: Traced model

    Raises:
        ValueError: if both batch and predict_fn must be specified or
          mode is not in 'eval' or 'train'.
    """
    if batch is None or predict_fn is None:
        raise ValueError("Both batch and predict_fn must be specified.")

    if mode not in ["train", "eval"]:
        raise ValueError(f"Unknown mode '{mode}'. Must be 'eval' or 'train'")

    predict_params = predict_params or {}

    tracer = _TracingModelWrapper(model, method_name)
    if opt_level is not None:
        assert_fp16_available()
        # If traced in AMP we need to initialize the model before calling
        # the jit
        # https://github.com/NVIDIA/apex/issues/303#issuecomment-493142950
        from apex import amp

        model = model.to(device)
        model = amp.initialize(model, optimizers=None, opt_level=opt_level)

    getattr(model, mode)()
    set_requires_grad(model, requires_grad=requires_grad)

    predict_fn(tracer, batch, **predict_params)

    return tracer.tracing_result
Example #6
0
 def get_model(self, stage: str):
     model = (self.model if self.model is not None else nn.Sequential(
         nn.Flatten(), nn.Linear(784, 128), nn.ReLU(), nn.Linear(128, 10)))
     if stage == "train_freezed":
         # freeze layer
         utils.set_requires_grad(model[1], False)
     else:
         utils.set_requires_grad(model, True)
     return model
Example #7
0
    def _postprocess_model_for_stage(self, stage: str, model: nn.Module):
        model_ = model
        if isinstance(model, torch.nn.DataParallel):
            model_ = model_.module

        if stage == "stage2":
            for key in ["conv1", "pool", "conv2"]:
                layer = getattr(model_, key)
                utils.set_requires_grad(layer, requires_grad=False)
        return model_
Example #8
0
 def get_model(self, stage: str):
     if self.model is not None:
         model = utils.get_nn_from_ddp_module(self.model)
     else:
         model = DummyModelFinetune(4, 3, 2)
     if stage == "train_freezed":
         # freeze layer
         utils.set_requires_grad(model.layer1, False)
     else:
         utils.set_requires_grad(model, True)
     return model
Example #9
0
 def get_model(self, stage: str):
     if self.model is None:
         # first stage
         model = super().get_model(stage=stage)
     else:
         model = self.model
     conv_layres = ["conv_net"]
     if stage == "tune":
         # second stage logic
         model = self.model
         for key in conv_layres:
             layer = getattr(model, key)
             utils.set_requires_grad(layer, requires_grad=False)
     return model
Example #10
0
    def handle_batch(self, batch):
        model = get_nn_from_ddp_module(self.model)
        student, teacher = model["student"], model["teacher"]
        if self.is_train_loader:
            teacher.eval()
            set_requires_grad(teacher, False)
            t_outputs = teacher(**batch, output_hidden_states=True, return_dict=True)

        s_outputs = student(**batch, output_hidden_states=True, return_dict=True)
        if self.is_train_loader:
            self.batch["t_logits"] = t_outputs["logits"]
            self.batch["t_hidden_states"] = t_outputs["hidden_states"]
        self.batch_metrics["task_loss"] = s_outputs["loss"]
        self.batch["s_logits"] = s_outputs["logits"]
        self.batch["s_hidden_states"] = s_outputs["hidden_states"]
Example #11
0
 def _handle_batch(self, batch: Mapping[str, Any]) -> None:
     self.output = OrderedDict()
     need_hiddens = self.is_train_loader and self.output_hiddens
     student = get_nn_from_ddp_module(self.model["student"])
     teacher = get_nn_from_ddp_module(self.model["teacher"])
     teacher.eval()
     set_requires_grad(teacher, False)
     s_outputs = student(batch["features"], output_hiddens=need_hiddens)
     t_outputs = teacher(batch["features"], output_hiddens=need_hiddens)
     if need_hiddens:
         self.output["logits"] = s_outputs[0]
         self.output["hiddens"] = s_outputs[1]
         self.output["teacher_logits"] = t_outputs[0]
         self.output["teacher_hiddens"] = t_outputs[1]
     else:
         self.output["logits"] = s_outputs
         self.output["teacher_logits"] = t_outputs
Example #12
0
    def get_model(self, stage: str):
        """
        Model specification for currect stage
        Args:
            stage: current stage name

        Returns:
            model
        """
        model = super().get_model(stage=stage)
        if isinstance(model, torch.nn.DataParallel):
            model = model.module

        if stage == "stage2":
            for key in ["conv1", "pool", "conv2"]:
                layer = getattr(model, key)
                utils.set_requires_grad(layer, requires_grad=False)
        return model
Example #13
0
def trace_model(
    model: nn.Module,
    experiment: Experiment,
    runner_type: Type[Runner],
    method_name: str = "forward",
    mode: str = "eval",
    requires_grad: bool = False,
) -> ScriptModule:
    """
    Traces model using it's native experiment and runner.

    Args:
        model: Model to trace
        experiment: Native experiment that was used to train model
        runner_type: Model's native runner that was used to train model
        method_name (str): Model's method name that will be
            used as entrypoint during tracing
        mode (str): Mode for model to trace (``train`` or ``eval``)
        requires_grad (bool): Flag to use grads

    Returns:
        Traced model ScriptModule
    """

    if mode not in ["train", "eval"]:
        raise ValueError(f"Unknown mode '{mode}'. Must be 'eval' or 'train'")

    getattr(model, mode)()
    utils.set_requires_grad(model, requires_grad=requires_grad)

    tracer = _TracingModelWrapper(model, method_name)
    runner: Runner = runner_type(tracer.cpu(), torch.device("cpu"))

    stage = list(experiment.stages)[0]
    batch = _get_native_batch(experiment, stage)
    batch = runner._batch2device(batch, device=runner.device)

    runner.predict_batch(batch)

    return tracer.tracing_result
Example #14
0
    # env_name = "LunarLander-v2"
    env_name = "CartPole-v1"
    env = gym.make(env_name)

    replay_buffer = OffpolicyReplayBuffer(
        observation_space=env.observation_space,
        action_space=env.action_space,
        epoch_len=epoch_size,
        capacity=buffer_size,
        n_step=1,
        gamma=gamma,
        history_len=1,
    )

    network, target_network = get_network(env), get_network(env)
    utils.set_requires_grad(target_network, requires_grad=False)
    models = {"origin": network, "target": target_network}
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(network.parameters(), lr=lr)
    loaders = {"train_game": DataLoader(replay_buffer, batch_size=batch_size)}

    runner = CustomRunner(gamma=gamma, tau=tau, tau_period=tau_period)
    runner.train(
        # for simplicity reasons, let's run everything on single gpu
        engine=dl.DeviceEngine("cuda"),
        model=models,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir="./logs_dqn",
        num_epochs=50,
Example #15
0
    env_name = "Pendulum-v0"
    env = NormalizedActions(gym.make(env_name))

    replay_buffer = OffpolicyReplayBuffer(
        observation_space=env.observation_space,
        action_space=env.action_space,
        epoch_len=epoch_size,
        capacity=buffer_size,
        n_step=1,
        gamma=gamma,
        history_len=1,
    )

    actor, target_actor = get_network_actor(env), get_network_actor(env)
    critic, target_critic = get_network_critic(env), get_network_critic(env)
    utils.set_requires_grad(target_actor, requires_grad=False)
    utils.set_requires_grad(target_critic, requires_grad=False)

    models = {
        "actor": actor,
        "critic": critic,
        "target_actor": target_actor,
        "target_critic": target_critic,
    }

    criterion = torch.nn.MSELoss()
    optimizer = {
        "actor": torch.optim.Adam(actor.parameters(), lr_actor),
        "critic": torch.optim.Adam(critic.parameters(), lr=lr_critic),
    }
Example #16
0
def trace_model_from_runner(
    runner: IRunner,
    checkpoint_name: str = None,
    method_name: str = "forward",
    mode: str = "eval",
    requires_grad: bool = False,
    opt_level: str = None,
    device: Device = "cpu",
) -> ScriptModule:
    """
    Traces model using created experiment and runner.

    Args:
        runner (Runner): Current runner.
        checkpoint_name (str): Name of model checkpoint to use, if None
            traces current model from runner
        method_name (str): Model's method name that will be
            used as entrypoint during tracing
        mode (str): Mode for model to trace (``train`` or ``eval``)
        requires_grad (bool): Flag to use grads
        opt_level (str): AMP FP16 init level
        device (str): Torch device

    Returns:
        (ScriptModule): Traced model
    """
    logdir = runner.logdir
    model = get_nn_from_ddp_module(runner.model)

    if checkpoint_name is not None:
        dumped_checkpoint = pack_checkpoint(model=model)
        checkpoint_path = logdir / "checkpoints" / f"{checkpoint_name}.pth"
        checkpoint = load_checkpoint(filepath=checkpoint_path)
        unpack_checkpoint(checkpoint=checkpoint, model=model)

    # getting input names of args for method since we don't have Runner
    # and we don't know input_key to preprocess batch for method call
    fn = getattr(model, method_name)
    method_argnames = _get_input_argnames(fn=fn, exclude=["self"])

    batch = {}
    for name in method_argnames:
        # TODO: We don't know input_keys without runner
        assert name in runner.input, (
            "Input batch should contain the same keys as input argument "
            "names of `forward` function to be traced correctly")
        batch[name] = runner.input[name]

    batch = any2device(batch, device)

    # Dumping previous runner of the model, we will need it to restore
    _device, _is_training, _requires_grad = (
        runner.device,
        model.training,
        get_requires_grad(model),
    )

    model.to(device)

    # Function to run prediction on batch
    def predict_fn(model: Model, inputs, **kwargs):
        return model(**inputs, **kwargs)

    traced_model = trace_model(
        model=model,
        predict_fn=predict_fn,
        batch=batch,
        method_name=method_name,
        mode=mode,
        requires_grad=requires_grad,
        opt_level=opt_level,
        device=device,
    )

    if checkpoint_name is not None:
        unpack_checkpoint(checkpoint=dumped_checkpoint, model=model)

    # Restore previous runner of the model
    getattr(model, "train" if _is_training else "eval")()
    set_requires_grad(model, _requires_grad)
    model.to(_device)

    return traced_model
Example #17
0
def trace_model(
    model: Model,
    runner: Runner,
    batch=None,
    method_name: str = "forward",
    mode: str = "eval",
    requires_grad: bool = False,
    opt_level: str = None,
    device: Device = "cpu",
    predict_params: dict = None,
) -> ScriptModule:
    """
    Traces model using runner and batch

    Args:
        model: Model to trace
        runner: Model's native runner that was used to train model
        batch: Batch to trace the model
        method_name (str): Model's method name that will be
            used as entrypoint during tracing
        mode (str): Mode for model to trace (``train`` or ``eval``)
        requires_grad (bool): Flag to use grads
        opt_level (str): Apex FP16 init level, optional
        device (str): Torch device
        predict_params (dict): additional parameters for model forward

    Returns:
        (ScriptModule): Traced model
    """
    if batch is None or runner is None:
        raise ValueError("Both batch and runner must be specified.")

    if mode not in ["train", "eval"]:
        raise ValueError(f"Unknown mode '{mode}'. Must be 'eval' or 'train'")

    predict_params = predict_params or {}

    tracer = _TracingModelWrapper(model, method_name)
    if opt_level is not None:
        utils.assert_fp16_available()
        # If traced in AMP we need to initialize the model before calling
        # the jit
        # https://github.com/NVIDIA/apex/issues/303#issuecomment-493142950
        from apex import amp
        model = model.to(device)
        model = amp.initialize(model, optimizers=None, opt_level=opt_level)
        # TODO: remove `check_trace=False`
        # after fixing this bug https://github.com/pytorch/pytorch/issues/23993
        params = {**predict_params, "check_trace": False}
    else:
        params = predict_params

    getattr(model, mode)()
    utils.set_requires_grad(model, requires_grad=requires_grad)

    _runner_model, _runner_device = runner.model, runner.device

    runner.model, runner.device = tracer, device
    runner.predict_batch(batch, **params)
    result: ScriptModule = tracer.tracing_result

    runner.model, runner.device = _runner_model, _runner_device
    return result
Example #18
0
    def get_from_params(
        cls,
        backbone_params: Dict = None,
        neck_params: Dict = None,
        heads_params: Dict = None,
    ) -> "GenericModel":

        backbone_params_ = deepcopy(backbone_params)
        neck_params_ = deepcopy(neck_params)
        heads_params_ = deepcopy(heads_params)

        if "requires_grad" in backbone_params_:
            requires_grad = backbone_params_.pop("requires_grad")
        else:
            requires_grad = False

        if "pretrained" in backbone_params_:
            pretrained = backbone_params_.pop("pretrained")
        else:
            pretrained = True

        if backbone_params_["model_name"] in pretrainedmodels.__dict__:
            model_name = backbone_params_.pop("model_name")

            backbone = pretrainedmodels.__dict__[model_name](
                num_classes=1000,
                pretrained="imagenet" if pretrained else None)

            enc_size = backbone.last_linear.in_features

        # elif backbone_params_["model_name"].startswith("efficientnet"):
        #     if pretrained is not None:
        #         backbone = EfficientNet.from_pretrained(**backbone_params_)
        #     else:
        #         backbone = EfficientNet.from_name(**backbone_params_)
        #
        #     backbone.set_swish(memory_efficient=True)
        #
        #     if in_channels != 3:
        #         Conv2d = get_same_padding_conv2d(
        #             image_size=backbone._global_params.image_size)
        #         out_channels = round_filters(32, backbone._global_params)
        #         backbone._conv_stem = Conv2d(in_channels, out_channels,
        #                                      kernel_size=3,
        #                                      stride=2, bias=False)
        #
        #     enc_size = backbone._conv_head.out_channels
        else:
            raise NotImplementedError("This model not yet implemented")

        del backbone.last_linear
        # backbone._adapt_avg_pooling = nn.AdaptiveAvgPool2d(1)
        # backbone._dropout = nn.Dropout(p=0.2)

        neck = None
        if neck_params_:
            neck_params_["hiddens"].insert(0, enc_size)
            emb_size = neck_params_["hiddens"][-1]

            if neck_params_ is not None:
                neck = SequentialNet(**neck_params_)
            # neck.requires_grad = requires_grad
        else:
            emb_size = enc_size

        if heads_params_ is not None:
            head_kwargs_ = {}
            for head, params in heads_params_.items():
                if isinstance(heads_params_, int):
                    head_kwargs_[head] = nn.Linear(emb_size, params, bias=True)
                elif isinstance(heads_params_, dict):
                    params["hiddens"].insert(0, emb_size)
                    head_kwargs_[head] = SequentialNet(**params)
                # head_kwargs_[head].requires_grad = requires_grad
            heads = nn.ModuleDict(head_kwargs_)
        else:
            heads = None

        model = cls(backbone=backbone, neck=neck, heads=heads)

        utils.set_requires_grad(model, requires_grad)

        print(model)

        return model
Example #19
0
    # create model and optimizer
    model = nn.ModuleDict(
        {
            "online": get_contrastive_model(
                in_size=DATASETS[args.dataset]["in_size"],
                in_channels=DATASETS[args.dataset]["in_channels"],
                feature_dim=args.feature_dim,
            ),
            "target": get_contrastive_model(
                in_size=DATASETS[args.dataset]["in_size"],
                in_channels=DATASETS[args.dataset]["in_channels"],
                feature_dim=args.feature_dim,
            ),
        }
    )
    utils.set_requires_grad(model["target"], False)
    optimizer = optim.Adam(model["online"].parameters(), lr=args.learning_rate)

    # define criterion
    criterion = NTXentLoss(tau=args.temperature)

    # and callbacks
    callbacks = [
        dl.CriterionCallback(
            input_key="online_projection_left",
            target_key="target_projection_right",
            metric_key="loss",
        ),
        dl.BackwardCallback(metric_key="loss"),
        dl.OptimizerCallback(metric_key="loss"),
        dl.ControlFlowCallbackWrapper(