def handle_batch(self, batch): model = get_nn_from_ddp_module(self.model) student, teacher = model["student"], model["teacher"] if self.is_train_loader: teacher.eval() set_requires_grad(teacher, False) t_outputs = teacher( batch["features"], output_hidden_states=self.output_hidden_states, return_dict=True, ) s_outputs = student( batch["features"], output_hidden_states=self.output_hidden_states, return_dict=True, ) self.batch["s_logits"] = s_outputs["logits"] if self.is_train_loader: self.batch["t_logits"] = t_outputs["logits"] if self.apply_probability_shift: self.batch["t_logits"] = probability_shift( logits=self.batch["t_logits"], labels=self.batch["targets"] ) if self.output_hidden_states: self.batch["s_hidden_states"] = s_outputs["hidden_states"] if self.is_train_loader: self.batch["t_hidden_states"] = t_outputs["hidden_states"]
def _handle_batch_distillation(self, batch): model = get_nn_from_ddp_module(self.model) student, teacher = model["student"], model["teacher"] if self.is_train_loader: teacher.eval() set_requires_grad(teacher, False) t_outputs = teacher( batch["features"], output_hidden_states=self.output_hidden_states, return_dict=True, ) s_outputs = student( batch["features"], output_hidden_states=self.output_hidden_states, return_dict=True, ) self.batch["s_logits"] = s_outputs["logits"] if self.is_train_loader: self.batch["t_logits"] = t_outputs["logits"] if self.output_hidden_states and self.is_train_loader: self.batch["s_hidden_states"] = s_outputs["hidden_states"] self.batch["t_hidden_states"] = t_outputs["hidden_states"] self.batch_metrics["task_loss"] = self.criterion( batch["s_logits"], batch["targets"]) self.batch["logits"] = self.batch[ "s_logits"] # for accuracy callback or other metric callback
def __init__( self, arch: str = "resnet18", pretrained: bool = True, frozen: bool = True, pooling: str = None, pooling_kwargs: dict = None, cut_layers: int = 2, state_dict: Union[dict, str, Path] = None, ): """ Args: arch: Name for resnet. Have to be one of resnet18, resnet34, resnet50, resnet101, resnet152 pretrained: If True, returns a model pre-trained on ImageNet frozen: If frozen, sets requires_grad to False pooling: pooling pooling_kwargs: params for pooling state_dict (Union[dict, str, Path]): Path to ``torch.Model`` or a dict containing parameters and persistent buffers. """ super().__init__() resnet = torchvision.models.__dict__[arch](pretrained=pretrained) if state_dict is not None: if isinstance(state_dict, (Path, str)): state_dict = torch.load(str(state_dict)) resnet.load_state_dict(state_dict) modules = list(resnet.children())[:-cut_layers] # delete last layers if frozen: for module in modules: utils.set_requires_grad(module, requires_grad=False) if pooling is not None: pooling_kwargs = pooling_kwargs or {} pooling_layer_fn = MODULE.get(pooling) pooling_layer = ( pooling_layer_fn( in_features=resnet.fc.in_features, **pooling_kwargs ) if "attn" in pooling.lower() else pooling_layer_fn(**pooling_kwargs) ) modules += [pooling_layer] if hasattr(pooling_layer, "out_features"): out_features = pooling_layer.out_features( in_features=resnet.fc.in_features ) else: out_features = None else: out_features = resnet.fc.in_features modules += [Flatten()] self.out_features = out_features self.encoder = nn.Sequential(*modules)
def __init__( self, arch: str = "resnet18", pretrained: bool = True, requires_grad: bool = True, layers_indices: List[int] = None, state_dict: Union[dict, str, Path] = None, ): """ Specifies encoders for segmentation network Args: arch (str): Name for resnet. Have to be one of resnet18, resnet34, resnet50, resnet101, resnet152 pretrained (bool): If True, returns a model pre-trained on ImageNet requires_grad (bool): Flag for set_requires_grad. If None, calculates as ``not requires_grad`` layers_indices (List[int]): layers of encoders used for segmentation If None, calculates as ``[1, 2, 3, 4]`` state_dict (Union[dict, str, Path]): Path to ``torch.Model`` or a dict containing parameters and persistent buffers. Examples: >>> encoders = ResnetEncoder( >>> arch="resnet18", >>> pretrained=False, >>> state_dict="/model/path/resnet18-5c106cde.pth" >>> ) """ super().__init__() resnet = torchvision.models.__dict__[arch](pretrained=pretrained) resnet_params = RESNET_PARAMS[arch] if state_dict is not None: if isinstance(state_dict, (Path, str)): state_dict = torch.load(str(state_dict)) resnet.load_state_dict(state_dict) self._layers_indices = layers_indices or [1, 2, 3, 4] self._channels, self._strides = ( resnet_params["channels"], resnet_params["strides"], ) self._channels = _take(self._channels, self._layers_indices) self._strides = _take(self._strides, self._layers_indices) layer0 = nn.Sequential( OrderedDict([ ("conv1", resnet.conv1), ("bn1", resnet.bn1), ("relu", resnet.relu), ])) self._layers = nn.ModuleList([ layer0, resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4 ]) self.maxpool0 = resnet.maxpool if requires_grad is None: requires_grad = not pretrained utils.set_requires_grad(self, requires_grad)
def trace_model( model: Model, predict_fn: Callable, batch=None, method_name: str = "forward", mode: str = "eval", requires_grad: bool = False, opt_level: str = None, device: Device = "cpu", predict_params: dict = None, ) -> jit.ScriptModule: """Traces model using runner and batch. Args: model: Model to trace predict_fn: Function to run prediction with the model provided, takes model, inputs parameters batch: Batch to trace the model method_name (str): Model's method name that will be used as entrypoint during tracing mode (str): Mode for model to trace (``train`` or ``eval``) requires_grad (bool): Flag to use grads opt_level (str): Apex FP16 init level, optional device (str): Torch device predict_params (dict): additional parameters for model forward Returns: jit.ScriptModule: Traced model Raises: ValueError: if both batch and predict_fn must be specified or mode is not in 'eval' or 'train'. """ if batch is None or predict_fn is None: raise ValueError("Both batch and predict_fn must be specified.") if mode not in ["train", "eval"]: raise ValueError(f"Unknown mode '{mode}'. Must be 'eval' or 'train'") predict_params = predict_params or {} tracer = _TracingModelWrapper(model, method_name) if opt_level is not None: assert_fp16_available() # If traced in AMP we need to initialize the model before calling # the jit # https://github.com/NVIDIA/apex/issues/303#issuecomment-493142950 from apex import amp model = model.to(device) model = amp.initialize(model, optimizers=None, opt_level=opt_level) getattr(model, mode)() set_requires_grad(model, requires_grad=requires_grad) predict_fn(tracer, batch, **predict_params) return tracer.tracing_result
def get_model(self, stage: str): model = (self.model if self.model is not None else nn.Sequential( nn.Flatten(), nn.Linear(784, 128), nn.ReLU(), nn.Linear(128, 10))) if stage == "train_freezed": # freeze layer utils.set_requires_grad(model[1], False) else: utils.set_requires_grad(model, True) return model
def _postprocess_model_for_stage(self, stage: str, model: nn.Module): model_ = model if isinstance(model, torch.nn.DataParallel): model_ = model_.module if stage == "stage2": for key in ["conv1", "pool", "conv2"]: layer = getattr(model_, key) utils.set_requires_grad(layer, requires_grad=False) return model_
def get_model(self, stage: str): if self.model is not None: model = utils.get_nn_from_ddp_module(self.model) else: model = DummyModelFinetune(4, 3, 2) if stage == "train_freezed": # freeze layer utils.set_requires_grad(model.layer1, False) else: utils.set_requires_grad(model, True) return model
def get_model(self, stage: str): if self.model is None: # first stage model = super().get_model(stage=stage) else: model = self.model conv_layres = ["conv_net"] if stage == "tune": # second stage logic model = self.model for key in conv_layres: layer = getattr(model, key) utils.set_requires_grad(layer, requires_grad=False) return model
def handle_batch(self, batch): model = get_nn_from_ddp_module(self.model) student, teacher = model["student"], model["teacher"] if self.is_train_loader: teacher.eval() set_requires_grad(teacher, False) t_outputs = teacher(**batch, output_hidden_states=True, return_dict=True) s_outputs = student(**batch, output_hidden_states=True, return_dict=True) if self.is_train_loader: self.batch["t_logits"] = t_outputs["logits"] self.batch["t_hidden_states"] = t_outputs["hidden_states"] self.batch_metrics["task_loss"] = s_outputs["loss"] self.batch["s_logits"] = s_outputs["logits"] self.batch["s_hidden_states"] = s_outputs["hidden_states"]
def _handle_batch(self, batch: Mapping[str, Any]) -> None: self.output = OrderedDict() need_hiddens = self.is_train_loader and self.output_hiddens student = get_nn_from_ddp_module(self.model["student"]) teacher = get_nn_from_ddp_module(self.model["teacher"]) teacher.eval() set_requires_grad(teacher, False) s_outputs = student(batch["features"], output_hiddens=need_hiddens) t_outputs = teacher(batch["features"], output_hiddens=need_hiddens) if need_hiddens: self.output["logits"] = s_outputs[0] self.output["hiddens"] = s_outputs[1] self.output["teacher_logits"] = t_outputs[0] self.output["teacher_hiddens"] = t_outputs[1] else: self.output["logits"] = s_outputs self.output["teacher_logits"] = t_outputs
def get_model(self, stage: str): """ Model specification for currect stage Args: stage: current stage name Returns: model """ model = super().get_model(stage=stage) if isinstance(model, torch.nn.DataParallel): model = model.module if stage == "stage2": for key in ["conv1", "pool", "conv2"]: layer = getattr(model, key) utils.set_requires_grad(layer, requires_grad=False) return model
def trace_model( model: nn.Module, experiment: Experiment, runner_type: Type[Runner], method_name: str = "forward", mode: str = "eval", requires_grad: bool = False, ) -> ScriptModule: """ Traces model using it's native experiment and runner. Args: model: Model to trace experiment: Native experiment that was used to train model runner_type: Model's native runner that was used to train model method_name (str): Model's method name that will be used as entrypoint during tracing mode (str): Mode for model to trace (``train`` or ``eval``) requires_grad (bool): Flag to use grads Returns: Traced model ScriptModule """ if mode not in ["train", "eval"]: raise ValueError(f"Unknown mode '{mode}'. Must be 'eval' or 'train'") getattr(model, mode)() utils.set_requires_grad(model, requires_grad=requires_grad) tracer = _TracingModelWrapper(model, method_name) runner: Runner = runner_type(tracer.cpu(), torch.device("cpu")) stage = list(experiment.stages)[0] batch = _get_native_batch(experiment, stage) batch = runner._batch2device(batch, device=runner.device) runner.predict_batch(batch) return tracer.tracing_result
# env_name = "LunarLander-v2" env_name = "CartPole-v1" env = gym.make(env_name) replay_buffer = OffpolicyReplayBuffer( observation_space=env.observation_space, action_space=env.action_space, epoch_len=epoch_size, capacity=buffer_size, n_step=1, gamma=gamma, history_len=1, ) network, target_network = get_network(env), get_network(env) utils.set_requires_grad(target_network, requires_grad=False) models = {"origin": network, "target": target_network} criterion = torch.nn.MSELoss() optimizer = torch.optim.Adam(network.parameters(), lr=lr) loaders = {"train_game": DataLoader(replay_buffer, batch_size=batch_size)} runner = CustomRunner(gamma=gamma, tau=tau, tau_period=tau_period) runner.train( # for simplicity reasons, let's run everything on single gpu engine=dl.DeviceEngine("cuda"), model=models, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir="./logs_dqn", num_epochs=50,
env_name = "Pendulum-v0" env = NormalizedActions(gym.make(env_name)) replay_buffer = OffpolicyReplayBuffer( observation_space=env.observation_space, action_space=env.action_space, epoch_len=epoch_size, capacity=buffer_size, n_step=1, gamma=gamma, history_len=1, ) actor, target_actor = get_network_actor(env), get_network_actor(env) critic, target_critic = get_network_critic(env), get_network_critic(env) utils.set_requires_grad(target_actor, requires_grad=False) utils.set_requires_grad(target_critic, requires_grad=False) models = { "actor": actor, "critic": critic, "target_actor": target_actor, "target_critic": target_critic, } criterion = torch.nn.MSELoss() optimizer = { "actor": torch.optim.Adam(actor.parameters(), lr_actor), "critic": torch.optim.Adam(critic.parameters(), lr=lr_critic), }
def trace_model_from_runner( runner: IRunner, checkpoint_name: str = None, method_name: str = "forward", mode: str = "eval", requires_grad: bool = False, opt_level: str = None, device: Device = "cpu", ) -> ScriptModule: """ Traces model using created experiment and runner. Args: runner (Runner): Current runner. checkpoint_name (str): Name of model checkpoint to use, if None traces current model from runner method_name (str): Model's method name that will be used as entrypoint during tracing mode (str): Mode for model to trace (``train`` or ``eval``) requires_grad (bool): Flag to use grads opt_level (str): AMP FP16 init level device (str): Torch device Returns: (ScriptModule): Traced model """ logdir = runner.logdir model = get_nn_from_ddp_module(runner.model) if checkpoint_name is not None: dumped_checkpoint = pack_checkpoint(model=model) checkpoint_path = logdir / "checkpoints" / f"{checkpoint_name}.pth" checkpoint = load_checkpoint(filepath=checkpoint_path) unpack_checkpoint(checkpoint=checkpoint, model=model) # getting input names of args for method since we don't have Runner # and we don't know input_key to preprocess batch for method call fn = getattr(model, method_name) method_argnames = _get_input_argnames(fn=fn, exclude=["self"]) batch = {} for name in method_argnames: # TODO: We don't know input_keys without runner assert name in runner.input, ( "Input batch should contain the same keys as input argument " "names of `forward` function to be traced correctly") batch[name] = runner.input[name] batch = any2device(batch, device) # Dumping previous runner of the model, we will need it to restore _device, _is_training, _requires_grad = ( runner.device, model.training, get_requires_grad(model), ) model.to(device) # Function to run prediction on batch def predict_fn(model: Model, inputs, **kwargs): return model(**inputs, **kwargs) traced_model = trace_model( model=model, predict_fn=predict_fn, batch=batch, method_name=method_name, mode=mode, requires_grad=requires_grad, opt_level=opt_level, device=device, ) if checkpoint_name is not None: unpack_checkpoint(checkpoint=dumped_checkpoint, model=model) # Restore previous runner of the model getattr(model, "train" if _is_training else "eval")() set_requires_grad(model, _requires_grad) model.to(_device) return traced_model
def trace_model( model: Model, runner: Runner, batch=None, method_name: str = "forward", mode: str = "eval", requires_grad: bool = False, opt_level: str = None, device: Device = "cpu", predict_params: dict = None, ) -> ScriptModule: """ Traces model using runner and batch Args: model: Model to trace runner: Model's native runner that was used to train model batch: Batch to trace the model method_name (str): Model's method name that will be used as entrypoint during tracing mode (str): Mode for model to trace (``train`` or ``eval``) requires_grad (bool): Flag to use grads opt_level (str): Apex FP16 init level, optional device (str): Torch device predict_params (dict): additional parameters for model forward Returns: (ScriptModule): Traced model """ if batch is None or runner is None: raise ValueError("Both batch and runner must be specified.") if mode not in ["train", "eval"]: raise ValueError(f"Unknown mode '{mode}'. Must be 'eval' or 'train'") predict_params = predict_params or {} tracer = _TracingModelWrapper(model, method_name) if opt_level is not None: utils.assert_fp16_available() # If traced in AMP we need to initialize the model before calling # the jit # https://github.com/NVIDIA/apex/issues/303#issuecomment-493142950 from apex import amp model = model.to(device) model = amp.initialize(model, optimizers=None, opt_level=opt_level) # TODO: remove `check_trace=False` # after fixing this bug https://github.com/pytorch/pytorch/issues/23993 params = {**predict_params, "check_trace": False} else: params = predict_params getattr(model, mode)() utils.set_requires_grad(model, requires_grad=requires_grad) _runner_model, _runner_device = runner.model, runner.device runner.model, runner.device = tracer, device runner.predict_batch(batch, **params) result: ScriptModule = tracer.tracing_result runner.model, runner.device = _runner_model, _runner_device return result
def get_from_params( cls, backbone_params: Dict = None, neck_params: Dict = None, heads_params: Dict = None, ) -> "GenericModel": backbone_params_ = deepcopy(backbone_params) neck_params_ = deepcopy(neck_params) heads_params_ = deepcopy(heads_params) if "requires_grad" in backbone_params_: requires_grad = backbone_params_.pop("requires_grad") else: requires_grad = False if "pretrained" in backbone_params_: pretrained = backbone_params_.pop("pretrained") else: pretrained = True if backbone_params_["model_name"] in pretrainedmodels.__dict__: model_name = backbone_params_.pop("model_name") backbone = pretrainedmodels.__dict__[model_name]( num_classes=1000, pretrained="imagenet" if pretrained else None) enc_size = backbone.last_linear.in_features # elif backbone_params_["model_name"].startswith("efficientnet"): # if pretrained is not None: # backbone = EfficientNet.from_pretrained(**backbone_params_) # else: # backbone = EfficientNet.from_name(**backbone_params_) # # backbone.set_swish(memory_efficient=True) # # if in_channels != 3: # Conv2d = get_same_padding_conv2d( # image_size=backbone._global_params.image_size) # out_channels = round_filters(32, backbone._global_params) # backbone._conv_stem = Conv2d(in_channels, out_channels, # kernel_size=3, # stride=2, bias=False) # # enc_size = backbone._conv_head.out_channels else: raise NotImplementedError("This model not yet implemented") del backbone.last_linear # backbone._adapt_avg_pooling = nn.AdaptiveAvgPool2d(1) # backbone._dropout = nn.Dropout(p=0.2) neck = None if neck_params_: neck_params_["hiddens"].insert(0, enc_size) emb_size = neck_params_["hiddens"][-1] if neck_params_ is not None: neck = SequentialNet(**neck_params_) # neck.requires_grad = requires_grad else: emb_size = enc_size if heads_params_ is not None: head_kwargs_ = {} for head, params in heads_params_.items(): if isinstance(heads_params_, int): head_kwargs_[head] = nn.Linear(emb_size, params, bias=True) elif isinstance(heads_params_, dict): params["hiddens"].insert(0, emb_size) head_kwargs_[head] = SequentialNet(**params) # head_kwargs_[head].requires_grad = requires_grad heads = nn.ModuleDict(head_kwargs_) else: heads = None model = cls(backbone=backbone, neck=neck, heads=heads) utils.set_requires_grad(model, requires_grad) print(model) return model
# create model and optimizer model = nn.ModuleDict( { "online": get_contrastive_model( in_size=DATASETS[args.dataset]["in_size"], in_channels=DATASETS[args.dataset]["in_channels"], feature_dim=args.feature_dim, ), "target": get_contrastive_model( in_size=DATASETS[args.dataset]["in_size"], in_channels=DATASETS[args.dataset]["in_channels"], feature_dim=args.feature_dim, ), } ) utils.set_requires_grad(model["target"], False) optimizer = optim.Adam(model["online"].parameters(), lr=args.learning_rate) # define criterion criterion = NTXentLoss(tau=args.temperature) # and callbacks callbacks = [ dl.CriterionCallback( input_key="online_projection_left", target_key="target_projection_right", metric_key="loss", ), dl.BackwardCallback(metric_key="loss"), dl.OptimizerCallback(metric_key="loss"), dl.ControlFlowCallbackWrapper(