Exemple #1
0
def _load_states_from_file_map(*, runner: "IRunner",
                               load_map: Dict[str, str]) -> None:
    """
    Load state of a model, criterion, optimizer, scheduler
    from files specified in ``load_map``.

    Arguments:
        runner: current runner
        load_map (Dict[str, str]): dict with mappings to load.
            Expected keys - ``'model'``, ``'criterion'``
            ``'optimizer'``, ``'scheduler'``, other keys will be
            ignored.
            Expected that values will be states (``'best'``,
            ``"best_full"``, ``"last"``, ``"last_full"``) or
            path to checkpoint.
            **NOTE:** for successful load criterion, optimizer,
            scheduler states required a full checkpoint.

    Raises:
        FileNotFoundError: when file/state specified in ``load_map``
            is not exist.
    """
    required_files = _required_files(runner.logdir, load_map)

    for filename in required_files.keys():
        if not os.path.isfile(filename):
            raise FileNotFoundError(f"No checkpoint found at {filename}!")

    # extracting parts from files
    for filename, parts_to_load in required_files.items():
        print(f"=> Loading {', '.join(parts_to_load)} from {filename}")
        checkpoint = load_checkpoint(filename)
        to_unpack = {part: getattr(runner, part) for part in parts_to_load}
        unpack_checkpoint(checkpoint, **to_unpack)
        print(f"   loaded: {', '.join(parts_to_load)}")
def load_model_from_path(model: Module, path: Path):
    if os.path.isdir(path):
        load_model_from_path(model, path / "best.pth")
    model_sd = load_checkpoint(path)
    try:
        unpack_checkpoint(model_sd, model=model)
    except KeyError:
        model_sd.load_state_dict(model_sd)
Exemple #3
0
    def predict_loader(
        self,
        *,
        loader: DataLoader,
        model: Model = None,
        resume: str = None,
        fp16: Union[Dict, bool] = None,
        initial_seed: int = 42,
    ) -> Generator:
        """
        Runs model inference on PyTorch Dataloader and returns
        python generator with model predictions from `runner.predict_batch`.
        Cleans up the experiment info to avoid possible collisions.
        Sets `is_train_loader` and `is_valid_loader` to `False` while
        keeping `is_infer_loader` as True. Moves model to evaluation mode.

        Args:
            loader: loader to predict
            model: model to use for prediction
            resume: path to checkpoint to resume
            fp16 (Union[Dict, bool]): fp16 usage flag
            initial_seed: seed to use before prediction

        Yields:
            bathes with model predictions
        """
        if isinstance(fp16, bool) and fp16:
            fp16 = {"opt_level": "O1"}

        if model is not None:
            self.model = model
        assert self.model is not None

        if resume is not None:
            checkpoint = load_checkpoint(resume)
            unpack_checkpoint(checkpoint, model=self.model)

        self.experiment = None
        set_global_seed(initial_seed)
        (model, _, _, _, device) = process_components(  # noqa: WPS122
            model=self.model,
            distributed_params=fp16,
            device=self.device,
        )
        self._prepare_inner_state(
            stage="infer",
            model=model,
            device=device,
            is_train_loader=False,
            is_valid_loader=False,
            is_infer_loader=True,
        )
        maybe_recursive_call(self.model, "train", mode=False)

        set_global_seed(initial_seed)
        for batch in loader:
            yield self.predict_batch(batch)
Exemple #4
0
def _load_checkpoint(*,
                     filename,
                     runner: "IRunner",
                     load_full: bool = True) -> None:
    """
    Load checkpoint from a file.

    Arguments:
        filename: path to checkpoint
        runner: current runner
        load_full: if true (default) then will be performed
            loading states for criterion, optimizer and scheduler.
            File should contain keys required for
            loading model (``'model_state_dict'``),
            criterion (``'criterion_state_dict'``) (only for full load),
            optimizer (``'optimizer_state_dict'``),
            scheduler (``'scheduler_state_dict'``).

    Raises:
        FileNotFoundError: when file specified in ``filename``
            is not exist.
    """
    if not os.path.isfile(filename):
        raise FileNotFoundError(f"No checkpoint found at {filename}!")

    print(f"=> Loading checkpoint {filename}")
    checkpoint = load_checkpoint(filename)

    if not runner.stage.startswith("infer") and load_full:
        runner.stage = checkpoint["stage"]
        runner.epoch = checkpoint["epoch"]
        runner.global_epoch = checkpoint["global_epoch"]
        # @TODO: should we also load,
        # checkpoint_data, main_metric, minimize_metric, valid_loader ?
        # epoch_metrics, valid_metrics ?

    if load_full:
        unpack_checkpoint(
            checkpoint,
            model=runner.model,
            criterion=runner.criterion,
            optimizer=runner.optimizer,
            scheduler=runner.scheduler,
        )

        print(f"loaded state checkpoint {filename} "
              f"(global epoch {checkpoint['global_epoch']}, "
              f"epoch {checkpoint['epoch']}, "
              f"stage {checkpoint['stage']})")
    else:
        unpack_checkpoint(
            checkpoint,
            model=runner.model,
        )

        print(f"loaded model checkpoint {filename}")
Exemple #5
0
def load_optimizer_from_checkpoint(
    optimizer: Optimizer,
    checkpoint_path: str,
    checkpoint_optimizer_key: str,
    model_parameters,
    optimizer_params,
) -> Optimizer:
    """
    Loads optimizer state from checkpoint

    Args:
        optimizer: optimizer
        checkpoint_path: path to checkpoint file
        checkpoint_optimizer_key: key if optimizer checkpoint
                                  in checkpoint state dict
        model_parameters: model parameters
        optimizer_params: optimizer config parameters

    Returns:
        optimizer loaded from checkpoint

    """
    checkpoint = load_checkpoint(checkpoint_path)
    dict2load = optimizer
    if checkpoint_optimizer_key is not None:
        dict2load = {checkpoint_optimizer_key: optimizer}
    unpack_checkpoint(checkpoint, optimizer=dict2load)
    # move optimizer to device
    device = get_device()
    for param in model_parameters:
        param = param["params"][0]
        optimizer_state = optimizer.state[param]
        for state_key, state_value in optimizer_state.items():
            optimizer_state[state_key] = any2device(state_value, device)
    # update optimizer params
    for key, value in optimizer_params.items():
        for optimizer_param_group in optimizer.param_groups:
            optimizer_param_group[key] = value

    return optimizer
Exemple #6
0
    def _get_optimizer(self, stage: str, model: Union[Model, Dict[str, Model]],
                       **params) -> Optimizer:
        # @TODO 1: refactoring; this method is too long
        # @TODO 2: load state dicts for schedulers & criterion
        layerwise_params = params.pop("layerwise_params", OrderedDict())
        no_bias_weight_decay = params.pop("no_bias_weight_decay", True)

        # linear scaling rule from https://arxiv.org/pdf/1706.02677.pdf
        lr_scaling_params = params.pop("lr_linear_scaling", None)
        if lr_scaling_params:
            data_params = dict(self.stages_config[stage]["data_params"])
            batch_size = data_params.get("batch_size")
            per_gpu_scaling = data_params.get("per_gpu_scaling", False)
            distributed_rank = get_rank()
            distributed = distributed_rank > -1
            if per_gpu_scaling and not distributed:
                num_gpus = max(1, torch.cuda.device_count())
                batch_size *= num_gpus

            base_lr = lr_scaling_params.get("lr")
            base_batch_size = lr_scaling_params.get("base_batch_size", 256)
            lr_scaling = batch_size / base_batch_size
            params["lr"] = base_lr * lr_scaling  # scale default lr
        else:
            lr_scaling = 1.0

        # getting model parameters
        model_key = params.pop("_model", None)
        if model_key is None:
            assert isinstance(
                model, nn.Module
            ), "model is key-value, but optimizer has no specified model"
            model_params = process_model_params(model, layerwise_params,
                                                no_bias_weight_decay,
                                                lr_scaling)
        elif isinstance(model_key, str):
            model_params = process_model_params(
                model[model_key],
                layerwise_params,
                no_bias_weight_decay,
                lr_scaling,
            )
        elif isinstance(model_key, (list, tuple)):
            model_params = []
            for model_key_el in model_key:
                model_params_el = process_model_params(
                    model[model_key_el],
                    layerwise_params,
                    no_bias_weight_decay,
                    lr_scaling,
                )
                model_params.extend(model_params_el)
        else:
            raise ValueError("unknown type of model_params")

        load_from_previous_stage = params.pop("load_from_previous_stage",
                                              False)
        optimizer_key = params.pop("optimizer_key", None)
        optimizer = OPTIMIZERS.get_from_params(**params, params=model_params)

        if load_from_previous_stage and self.stages.index(stage) != 0:
            checkpoint_path = f"{self.logdir}/checkpoints/best_full.pth"
            checkpoint = load_checkpoint(checkpoint_path)

            dict2load = optimizer
            if optimizer_key is not None:
                dict2load = {optimizer_key: optimizer}
            unpack_checkpoint(checkpoint, optimizer=dict2load)

            # move optimizer to device
            device = get_device()
            for param in model_params:
                param = param["params"][0]
                optimizer_state = optimizer.state[param]
                for state_key, state_value in optimizer_state.items():
                    optimizer_state[state_key] = any2device(
                        state_value, device)

            # update optimizer params
            for key, value in params.items():
                for optimizer_param_group in optimizer.param_groups:
                    optimizer_param_group[key] = value

        return optimizer
Exemple #7
0
                     criterion=nn.CrossEntropyLoss(),
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     logdir=LOGDIR,
                     num_epochs=EPOCHS,
                     fp16=tu.fp16_params,
                     callbacks=callbacks,
                     verbose=True,
                     load_best_on_end=False,
                     resume=RESUME)

    model = make_model()

    cp = load_checkpoint(f"{LOGDIR}/checkpoints/best.pth")
    unpack_checkpoint(cp, model)
    model = model.eval()

    labels_fold = []
    for b in tqdm(runner.predict_loader(model=model,
                                        loader=test_loader,
                                        fp16=tu.fp16_params),
                  total=len(test_loader)):

        labels_batch = nn.functional.softmax(b['logits'],
                                             dim=1).data.cpu().numpy()
        labels_fold.extend(list(labels_batch))
    if labels_blend is None:
        labels_blend = np.array(labels_fold)
    else:
        labels_blend += np.array(labels_fold)
Exemple #8
0
def trace_model_from_runner(
    runner: "IRunner",
    checkpoint_name: str = None,
    method_name: str = "forward",
    mode: str = "eval",
    requires_grad: bool = False,
    opt_level: str = None,
    device: Device = "cpu",
) -> jit.ScriptModule:
    """
    Traces model using created experiment and runner.

    Args:
        runner: current runner.
        checkpoint_name: Name of model checkpoint to use, if None
            traces current model from runner
        method_name: Model's method name that will be
            used as entrypoint during tracing
        mode: Mode for model to trace (``train`` or ``eval``)
        requires_grad: Flag to use grads
        opt_level: AMP FP16 init level
        device: Torch device

    Returns:
        ScriptModule: Traced model
    """
    logdir = runner.logdir
    model = get_nn_from_ddp_module(runner.model)

    if checkpoint_name is not None:
        dumped_checkpoint = pack_checkpoint(model=model)
        checkpoint_path = logdir / "checkpoints" / f"{checkpoint_name}.pth"
        checkpoint = load_checkpoint(filepath=checkpoint_path)
        unpack_checkpoint(checkpoint=checkpoint, model=model)

    # getting input names of args for method since we don't have Runner
    # and we don't know input_key to preprocess batch for method call
    fn = getattr(model, method_name)
    method_argnames = _get_input_argnames(fn=fn, exclude=["self"])

    batch = {}
    for name in method_argnames:
        # TODO: We don't know input_keys without runner
        assert name in runner.input, (
            "Input batch should contain the same keys as input argument "
            "names of `forward` function to be traced correctly")
        batch[name] = runner.input[name]

    batch = any2device(batch, device)

    # Dumping previous runner of the model, we will need it to restore
    device_dump, is_training_dump, requires_grad_dump = (
        runner.device,
        model.training,
        get_requires_grad(model),
    )

    model.to(device)

    # Function to run prediction on batch
    def predict_fn(model: Model, inputs, **kwargs):  # noqa: WPS442
        return model(**inputs, **kwargs)

    traced_model = trace_model(
        model=model,
        predict_fn=predict_fn,
        batch=batch,
        method_name=method_name,
        mode=mode,
        requires_grad=requires_grad,
        opt_level=opt_level,
        device=device,
    )

    if checkpoint_name is not None:
        unpack_checkpoint(checkpoint=dumped_checkpoint, model=model)

    # Restore previous runner of the model
    getattr(model, "train" if is_training_dump else "eval")()
    set_requires_grad(model, requires_grad_dump)
    model.to(device_dump)

    return traced_model
Exemple #9
0
def trace_model_from_checkpoint(
    logdir: Path,
    method_name: str,
    checkpoint_name: str,
    stage: str = None,
    loader: Union[str, int] = None,
    mode: str = "eval",
    requires_grad: bool = False,
    opt_level: str = None,
    device: Device = "cpu",
):
    """
    Traces model using created experiment and runner.

    Args:
        logdir (Union[str, Path]): Path to Catalyst logdir with model
        checkpoint_name: Name of model checkpoint to use
        stage: experiment's stage name
        loader (Union[str, int]): experiment's loader name or its index
        method_name: Model's method name that will be
            used as entrypoint during tracing
        mode: Mode for model to trace (``train`` or ``eval``)
        requires_grad: Flag to use grads
        opt_level: AMP FP16 init level
        device: Torch device

    Returns:
        the traced model
    """
    config_path = logdir / "configs" / "_config.json"
    checkpoint_path = logdir / "checkpoints" / f"{checkpoint_name}.pth"
    logging.info("Load config")
    config: Dict[str, dict] = load_config(config_path)

    # Get expdir name
    config_expdir = Path(config["args"]["expdir"])
    # We will use copy of expdir from logs for reproducibility
    expdir = Path(logdir) / "code" / config_expdir.name

    logger.info("Import experiment and runner from logdir")
    experiment: ConfigExperiment = None
    experiment, runner, _ = prepare_config_api_components(expdir=expdir,
                                                          config=config)

    logger.info(f"Load model state from checkpoints/{checkpoint_name}.pth")
    if stage is None:
        stage = list(experiment.stages)[0]

    model = experiment.get_model(stage)
    checkpoint = load_checkpoint(checkpoint_path)
    unpack_checkpoint(checkpoint, model=model)
    runner.model, runner.device = model, device

    if loader is None:
        loader = 0
    batch = get_native_batch_from_loaders(
        loaders=experiment.get_loaders(stage), loader=loader)

    # function to run prediction on batch
    def predict_fn(model, inputs, **kwargs):  # noqa: WPS442
        model_dump = runner.model
        runner.model = model
        result = runner.predict_batch(inputs, **kwargs)
        runner.model = model_dump
        return result

    logger.info("Tracing is running...")
    traced_model = trace_model(
        model=model,
        predict_fn=predict_fn,
        batch=batch,
        method_name=method_name,
        mode=mode,
        requires_grad=requires_grad,
        opt_level=opt_level,
        device=device,
    )

    logger.info("Done")
    return traced_model
def main(args, _=None):
    """Run the ``catalyst-contrib text2embeddings`` script."""
    batch_size = args.batch_size
    num_workers = args.num_workers
    max_length = args.max_length
    pooling_groups = args.pooling.split(",")
    bert_level = args.bert_level

    if bert_level is not None:
        assert (args.output_hidden_states
                ), "You need hidden states output for level specification"

    set_global_seed(args.seed)
    prepare_cudnn(args.deterministic, args.benchmark)

    if getattr(args, "in_huggingface", False):
        model_config = BertConfig.from_pretrained(args.in_huggingface)
        model_config.output_hidden_states = args.output_hidden_states
        model = BertModel.from_pretrained(args.in_huggingface,
                                          config=model_config)
        tokenizer = BertTokenizer.from_pretrained(args.in_huggingface)
    else:
        model_config = BertConfig.from_pretrained(args.in_config)
        model_config.output_hidden_states = args.output_hidden_states
        model = BertModel(config=model_config)
        tokenizer = BertTokenizer.from_pretrained(args.in_vocab)
    if getattr(args, "in_model", None) is not None:
        checkpoint = load_checkpoint(args.in_model)
        checkpoint = {"model_state_dict": checkpoint}
        unpack_checkpoint(checkpoint=checkpoint, model=model)

    model = model.eval()
    model, _, _, _, device = process_components(model=model)

    df = pd.read_csv(args.in_csv)
    df = df.dropna(subset=[args.txt_col])
    df.to_csv(f"{args.out_prefix}.df.csv", index=False)
    df = df.reset_index().drop("index", axis=1)
    df = list(df.to_dict("index").values())
    num_samples = len(df)

    open_fn = LambdaReader(
        input_key=args.txt_col,
        output_key=None,
        lambda_fn=partial(
            tokenize_text,
            strip=args.strip,
            lowercase=args.lowercase,
            remove_punctuation=args.remove_punctuation,
        ),
        tokenizer=tokenizer,
        max_length=max_length,
    )

    dataloader = get_loader(
        df,
        open_fn,
        batch_size=batch_size,
        num_workers=num_workers,
    )

    features = {}
    dataloader = tqdm(dataloader) if args.verbose else dataloader
    with torch.no_grad():
        for idx, batch_input in enumerate(dataloader):
            batch_input = any2device(batch_input, device)
            batch_output = model(**batch_input)
            mask = (batch_input["attention_mask"].unsqueeze(-1)
                    if args.mask_for_max_length else None)

            if check_ddp_wrapped(model):
                # using several gpu
                hidden_size = model.module.config.hidden_size
                hidden_states = model.module.config.output_hidden_states

            else:
                # using cpu or one gpu
                hidden_size = model.config.hidden_size
                hidden_states = model.config.output_hidden_states

            batch_features = process_bert_output(
                bert_output=batch_output,
                hidden_size=hidden_size,
                output_hidden_states=hidden_states,
                pooling_groups=pooling_groups,
                mask=mask,
            )

            # create storage based on network output
            if idx == 0:
                for layer_name, layer_value in batch_features.items():
                    if bert_level is not None and bert_level != layer_name:
                        continue
                    layer_name = (layer_name if isinstance(layer_name, str)
                                  else f"{layer_name:02d}")
                    _, embedding_size = layer_value.shape
                    features[layer_name] = np.memmap(
                        f"{args.out_prefix}.{layer_name}.npy",
                        dtype=np.float32,
                        mode="w+",
                        shape=(num_samples, embedding_size),
                    )

            indices = np.arange(idx * batch_size,
                                min((idx + 1) * batch_size, num_samples))
            for layer_name2, layer_value2 in batch_features.items():
                if bert_level is not None and bert_level != layer_name2:
                    continue
                layer_name2 = (layer_name2 if isinstance(layer_name2, str) else
                               f"{layer_name2:02d}")
                features[layer_name2][indices] = _detach(layer_value2)

    if args.force_save:
        for key, mmap in features.items():
            mmap.flush()
            np.save(f"{args.out_prefix}.{key}.force.npy",
                    mmap,
                    allow_pickle=False)