Beispiel #1
0
    def create(self, model: Model) -> "Optimizer":
        no_decay = ["bias", "LayerNorm.weight"]
        parameters = [{
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            self.weight_decay,
        }, {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0,
        }]

        optimizer = AdamW(
            parameters,
            betas=(0.9, 0.98),  # RoBERTa paper 参数
            lr=self.lr,
            eps=self.eps)

        return optimizer
Beispiel #2
0
    def create(self, model: Model) -> "BilstmGATOptimizerFactory":

        if self.optimizer_name == "SGD":
            optimizer = SGD(params=model.parameters(),
                            lr=self.lr,
                            weight_decay=self.weight_decay)
        elif self.optimizer_name == "Adam":
            optimizer = Adam(params=model.parameters(),
                             lr=self.lr,
                             weight_decay=self.weight_decay)
        else:
            raise RuntimeError(f"optimizer_name 必须是 Adam 或 SGD")
        return optimizer
    def create(self, model: Model) -> "Optimizer":
        """
        创建 optimizer
        :param model: 模型
        """

        sentence_embedding_param_name = "_sentence_embedder.weight"

        parameter_dict: Dict[str, torch.nn.Parameter] = \
            {name: parameter for name, parameter in model.named_parameters()}

        sentence_embedding_param = parameter_dict.pop(
            sentence_embedding_param_name)

        if self._is_fine_tuning:
            # 设置 requires_grad = True
            sentence_embedding_param.requires_grad = True

            # 分组设置 params 对于 微调的参数 设置 lr 要小一些
            params = [{
                "params": parameter_dict.values()
            }, {
                "params": [sentence_embedding_param],
                "lr": 1e-3
            }]

            optimizer = SGD(params=params, lr=0.01)
        else:
            # 将不需要 fine tuning 参数设置成 不需要梯度更新
            # 同时也不需要将这个参数放在 optimizer 中
            sentence_embedding_param.requires_grad = False
            optimizer = SGD(params=parameter_dict.values(), lr=0.01)
        return optimizer
Beispiel #4
0
    def __init__(self,
                 serialize_dir: str,
                 num_epoch: int,
                 model: Model,
                 loss: Loss,
                 metrics: ModelMetricAdapter,
                 optimizer_factory: OptimizerFactory,
                 lr_scheduler_factory: LRSchedulerFactory = None,
                 patient: int = None,
                 num_check_point_keep: int = None,
                 cuda_devices: List[str] = None):
        """
        训练器初始化
        :param num_epoch: 训练的 epoch 数量
        :param model: 要训练的模型
        :param loss: 模型的 loss function
        :param metrics: 模型的指标计算
        :param optimizer_factory: 模型的优化器的创建工厂。为什么不直接使用优化器?是因为, 优化器的创建依赖于 model, 所以
        这里传递的参数 optimizer factory, 避免使用者在 trainer 外面生成 optimizer, 导致在 trainer 内 optimizer 依赖于
        model 的参数产生问题。典型问题是: 设置 cuda.
        :param serialize_dir: 训练存储的文件路径
        :param patient: early stopping 的 patient. 如果是 `None`, 将不会进行 early stopping;
        否则, 当前训练的指标超出了 patient 个 epoch 将会 early stopping.
        :param num_check_point_keep: checkpoint 保留的数量。如果是 `None` 则全部保留;
        否则,保留 num_check_point_keep 个checkpoint.
        :param cuda_devices: cuda device列表. 字符串类型,那么就是 "cuda:0" 这种格式。
        """

        if cuda_devices is not None and len(cuda_devices) != 1:
            raise RuntimeError(
                f"目前仅仅支持单卡训练, 设置的 cuda devices 是 {cuda_devices}")

        if cuda_devices is not None:
            self._cuda_devices = [
                torch.device(device) for device in cuda_devices
            ]
            self._model = model.cuda(self._cuda_devices[0])
        else:
            self._cuda_devices = None
            self._model = model

        self._loss = loss
        self._metrics = metrics
        self._optimizer = optimizer_factory.create(model=self._model)

        if lr_scheduler_factory is not None:
            self._lr_scheduler = lr_scheduler_factory.create(
                optimizer=self.optimizer, model=self.model)
        else:
            self._lr_scheduler = None

        self._serialize_dir = serialize_dir
        self._metric_tracker = MetricTracker(patient=patient)
        self._num_check_point_keep = num_check_point_keep
        self._num_epoch = num_epoch
        self._current_epoch: int = None
Beispiel #5
0
    def create(self, model: Model) -> "Optimizer":

        return Adam(params=model.parameters(), lr=0.01)
Beispiel #6
0
 def create(self, model: Model) -> "Optimizer":
     return torch.optim.Adam(params=model.parameters(), lr=1e-1)
Beispiel #7
0
    def create(self, model: Model) -> "RnnWithCrfOptimizerFactory":

        optimizer = Adam(params=model.parameters(), lr=0.01)
        return optimizer
    def create(self, model: Model) -> "LatticeOptimizerFactory":

        optimizer = SGD(params=model.parameters(),
                        lr=self.lr,
                        momentum=self.momentum)
        return optimizer