コード例 #1
0
    def load(self, save_path, model: torch.nn.Module = None, optimizer: Optimizer = None):
        """Loads saved model from file

        Args:
            save_path: Path to saved model (.pth). If a directory is provided instead, model-best.pth is used
            model: Torch model to restore weights to
            optimizer: Optimizer
        """
        if os.path.isdir(save_path):
            save_path = os.path.join(save_path, 'model-best.pth')

        state = torch.load(save_path)

        step = 0
        if 'step' in state:
            step = state['step']

        if 'state_dict' in state and model is not None:
            model.load_state_dict(state['state_dict'])

        if 'optimizer' in state and optimizer is not None:
            optimizer.load_state_dict(state['optimizer'])

        self._logger.info('Loaded models from {}'.format(save_path))
        return step
コード例 #2
0
def lr_find(model: UNet,
            data_loader,
            optimizer: Optimizer,
            criterion,
            use_gpu,
            min_lr=0.0001,
            max_lr=0.1):
    # Save model and optimizer states to revert
    model_state = model.state_dict()
    optimizer_state = optimizer.state_dict()

    losses = []
    lrs = []
    scheduler = CyclicExpLR(optimizer,
                            min_lr,
                            max_lr,
                            step_size_up=100,
                            mode='triangular',
                            cycle_momentum=True)
    model.train()
    for i, (data, target, class_ids) in enumerate(data_loader):
        data, target = data, target

        if use_gpu:
            data = data.cuda()
            target = target.cuda()

        optimizer.zero_grad()
        output_raw = model(data)
        # This step is specific for this project
        output = torch.zeros(output_raw.shape[0], 1, output_raw.shape[2],
                             output_raw.shape[3])

        if use_gpu:
            output = output.cuda()

        # This step is specific for this project
        for idx, (raw_o, class_id) in enumerate(zip(output_raw, class_ids)):
            output[idx] = raw_o[class_id - 1]

        loss = criterion(output, target)
        loss.backward()
        current_lr = optimizer.param_groups[0]['lr']
        # Stop if lr stopped increasing
        if len(lrs) > 0 and current_lr < lrs[-1]:
            break
        lrs.append(current_lr)
        losses.append(loss.item())
        optimizer.step()
        scheduler.step()

    # Plot in log scale
    plt.plot(lrs, losses)
    plt.xscale('log')

    plt.show()

    model.load_state_dict(model_state)
    optimizer.load_state_dict(optimizer_state)
コード例 #3
0
def load_optimizer(path: str,
                   optimizer: Optimizer,
                   map_location: Union[None, str] = "cpu"):
    """
    Load the state dict into an optimizer from a given file.

    :param path: the path to the pth file to load the state dict from
    :param optimizer: the optimizer to load the state dict into
    :param map_location: the location to map the values to when loading the
    :return: the epoch saved in the file, if any
    """
    model_dict = torch.load(path, map_location=map_location)
    optimizer.load_state_dict(model_dict["optimizer"])
コード例 #4
0
def load_ckpt(checkpoint_path: str, model: nn.Module, optim: optimizer.Optimizer) -> Tuple[int, float]:
    """Loads training checkpoint.
    :param checkpoint_path: path to checkpoint
    :param model: model to update state
    :param optim: optimizer to  update state
    :return tuple of starting epoch id, starting step id, best checkpoint score
    """
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint[_MODEL_STATE_DICT])
    optim.load_state_dict(checkpoint[_OPTIMIZER_STATE_DICT])
    start_epoch_id = checkpoint[_EPOCH] + 1
    best_score = checkpoint[_BEST_SCORE]
    return start_epoch_id,  best_score
コード例 #5
0
ファイル: pipeline_ops.py プロジェクト: xiamenwcy/MINet
def resume_checkpoint(
    model: nn.Module = None,
    optimizer: optim.Optimizer = None,
    scheduler: sche._LRScheduler = None,
    exp_name: str = "",
    load_path: str = "",
    mode: str = "all",
):
    """
    从保存节点恢复模型

    Args:
        model (nn.Module): model object
        optimizer (optim.Optimizer): optimizer object
        scheduler (sche._LRScheduler): scheduler object
        exp_name (str): exp_name
        load_path (str): 模型存放路径
        mode (str): 选择哪种模型恢复模式:
            - 'all': 回复完整模型,包括训练中的的参数;
            - 'onlynet': 仅恢复模型权重参数
            
    Returns mode: 'all' start_epoch; 'onlynet' None
    """
    if os.path.exists(load_path) and os.path.isfile(load_path):
        construct_print(f"Loading checkpoint '{load_path}'")
        checkpoint = torch.load(load_path)
        if mode == "all":
            if exp_name == checkpoint["arch"]:
                start_epoch = checkpoint["epoch"]
                model.load_state_dict(checkpoint["net_state"])
                optimizer.load_state_dict(checkpoint["opti_state"])
                scheduler.load_state_dict(checkpoint["sche_state"])
                construct_print(f"Loaded '{load_path}' "
                                f"(will train at epoch"
                                f" {checkpoint['epoch']})")
                return start_epoch
            else:
                raise Exception(f"{load_path} does not match.")
        elif mode == "onlynet":
            model.load_state_dict(checkpoint)
            construct_print(f"Loaded checkpoint '{load_path}' "
                            f"(only has the model's weight params)")
        else:
            raise NotImplementedError
    else:
        raise Exception(f"{load_path}路径不正常,请检查")
コード例 #6
0
def load_checkpoint(model: nn.Module, optim: optimizer.Optimizer,
                    checkpoint_path="./result/fr_en/checkpoint.tar") -> Tuple[int, int, float, float]:
    """Loads training checkpoint.

    :param checkpoint_path: path to checkpoint
    :param model: model to update state
    :param optim: optimizer to  update state
    :return tuple of starting epoch id, starting step id, best checkpoint score
    """
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint[_MODEL_STATE_DICT])
    optim.load_state_dict(checkpoint[_OPTIMIZER_STATE_DICT])
    start_epoch_id = checkpoint[_EPOCH] + 1
    step = checkpoint[_STEP] + 1
    best_score = checkpoint[_BEST_SCORE]
    loss = checkpoint[_LOSS]
    return start_epoch_id, step, best_score, loss
コード例 #7
0
def load_checkpoint(checkpoint_dir: str, model: nn.Module, optim: optimizer.Optimizer) -> Tuple[int, int, float]:
    """Loads training checkpoint.

    :param checkpoint_path: path to checkpoint
    :param model: model to update state
    :param optim: optimizer to  update state
    :return tuple of starting epoch id, starting step id, best checkpoint score
    """
    if not os.path.exists(checkpoint_dir):
        raise ("File doesn't exist {}".format(checkpoint_dir))
    checkpoint_path = os.path.join(checkpoint_dir, 'checkpoint.tar') 
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint[_MODEL_STATE_DICT])
    optim.load_state_dict(checkpoint[_OPTIMIZER_STATE_DICT])
    start_epoch_id = checkpoint[_EPOCH] + 1
    step = checkpoint[_STEP] + 1
    best_score = checkpoint[_BEST_SCORE]
    return start_epoch_id, step, best_score
コード例 #8
0
def load_model(checkpoint: str, model: Module, optimizer: Optimizer) -> Tuple[int, int, List[dict]]:
    """
    If fresh model then ``iteration == -1``.

    :return: (epoch, iteration, log_stats)
    """
    log_stats: List[dict]
    epoch, iteration, log_stats = 0, -1, []
    if path.isfile(checkpoint):
        _checkpoint = tr.load(checkpoint)
        model.load_state_dict(_checkpoint['model_state_dict'])
        optimizer.load_state_dict(_checkpoint['optimizer_state_dict'])
        model.eval()
        epoch = _checkpoint.get('epoch', epoch)
        iteration = _checkpoint.get('iteration', iteration)
        log_stats = _checkpoint.get('log_stats', log_stats)
        if not isinstance(log_stats, list) or not isinstance(epoch, int) or not isinstance(iteration, int):
            raise RuntimeError('Loaded log_stats should be a list, epoch and iteration should be int.')

    return epoch, iteration, log_stats
コード例 #9
0
ファイル: pipeline_ops.py プロジェクト: skeras/MINet
def resume_checkpoint(
    model: nn.Module = None,
    optimizer: optim.Optimizer = None,
    scheduler: sche._LRScheduler = None,
    amp=None,
    exp_name: str = "",
    load_path: str = "",
    mode: str = "all",
):
    """
    从保存节点恢复模型

    Args:
        model (nn.Module): model object
        optimizer (optim.Optimizer): optimizer object
        scheduler (sche._LRScheduler): scheduler object
        amp (): apex.amp
        exp_name (str): exp_name
        load_path (str): 模型存放路径
        mode (str): 选择哪种模型恢复模式:
            - 'all': 回复完整模型,包括训练中的的参数;
            - 'onlynet': 仅恢复模型权重参数

    Returns mode: 'all' start_epoch; 'onlynet' None
    """
    if os.path.exists(load_path) and os.path.isfile(load_path):
        construct_print(f"Loading checkpoint '{load_path}'")
        checkpoint = torch.load(load_path)
        if mode == "all":
            if exp_name and exp_name != checkpoint["arch"]:
                # 如果给定了exp_name,那么就必须匹配对应的checkpoint["arch"],否则不作要求
                raise Exception(
                    f"We can not match {exp_name} with {load_path}.")

            start_epoch = checkpoint["epoch"]
            if hasattr(model, "module"):
                model.module.load_state_dict(checkpoint["net_state"])
            else:
                model.load_state_dict(checkpoint["net_state"])
            optimizer.load_state_dict(checkpoint["opti_state"])
            scheduler.load_state_dict(checkpoint["sche_state"])
            if checkpoint.get("amp_state", None):
                if amp:
                    amp.load_state_dict(checkpoint["amp_state"])
                else:
                    construct_print("You are not using amp.")
            else:
                construct_print("The state_dict of amp is None.")
            construct_print(f"Loaded '{load_path}' "
                            f"(will train at epoch"
                            f" {checkpoint['epoch']})")
            return start_epoch
        elif mode == "onlynet":
            if hasattr(model, "module"):
                model.module.load_state_dict(checkpoint)
            else:
                model.load_state_dict(checkpoint)
            construct_print(f"Loaded checkpoint '{load_path}' "
                            f"(only has the model's weight params)")
        else:
            raise NotImplementedError
    else:
        raise Exception(f"{load_path}路径不正常,请检查")
コード例 #10
0
def resume_checkpoint(
    model: nn.Module = None,
    optimizer: optim.Optimizer = None,
    amp=None,
    exp_name: str = "",
    load_path: str = "",
    mode: str = "all",
    local_rank: int = 0,
):
    """
    从保存节点恢复模型

    Args:
        model (nn.Module): model object
        optimizer (optim.Optimizer): optimizer object
        scheduler (sche._LRScheduler): scheduler object
        amp (): apex.amp
        exp_name (str): exp_name
        load_path (str): 模型存放路径
        mode (str): 选择哪种模型恢复模式:
            - 'all': 回复完整模型,包括训练中的的参数;
            - 'onlynet': 仅恢复模型权重参数
        local_rank (int): 指定权重加载的目标GPU

    Returns mode: 'all' start_epoch; 'onlynet' None
    """
    if os.path.exists(load_path) and os.path.isfile(load_path):
        construct_print(f"Loading checkpoint '{load_path}'")
        checkpoint = torch.load(load_path,
                                map_location={"cuda:0": f"cuda:{local_rank}"})
        if mode == "all":
            if exp_name == checkpoint["arch"]:
                start_epoch = checkpoint["epoch"]
                # "net_state": model.module.state_dict()
                # if hasattr(model, "module") else model.state_dict(),
                if hasattr(model, "module"):
                    model.module.load_state_dict(checkpoint["net_state"])
                else:
                    model.load_state_dict(checkpoint["net_state"])
                optimizer.load_state_dict(checkpoint["opti_state"])
                if checkpoint["amp_state"]:
                    if amp:
                        amp.load_state_dict(checkpoint["amp_state"])
                    else:
                        construct_print("You are not using amp.")
                else:
                    construct_print("The state_dict of amp is None.")
                construct_print(f"Loaded '{load_path}' "
                                f"(will train at epoch"
                                f" {checkpoint['epoch']})")
                return start_epoch
            else:
                raise Exception(f"{load_path} does not match.")
        elif mode == "onlynet":
            if hasattr(model, "module"):
                model.module.load_state_dict(checkpoint["net_state"])
            else:
                model.load_state_dict(checkpoint["net_state"])
            construct_print(f"Loaded checkpoint '{load_path}' "
                            f"(only has the model's weight params)")
        else:
            raise NotImplementedError
    else:
        raise Exception(f"{load_path}路径不正常,请检查")
コード例 #11
0
def load_torch_state(model: nn.Module, optimizer: Optimizer, path: str,
                     device_id: int):
    checkpoint = torch.load(path, map_location=f"cuda:{device_id}")
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])