Example #1
0
def build_optim_dec(args, model, checkpoint):
    """ Build optimizer """

    if checkpoint is not None:
        optim = checkpoint['optims'][1]
        saved_optimizer_state_dict = optim.optimizer.state_dict()
        optim.optimizer.load_state_dict(saved_optimizer_state_dict)
        if args.visible_gpus != '-1':
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    else:
        optim = Optimizer(args.optim,
                          args.lr_dec,
                          args.max_grad_norm,
                          beta1=args.beta1,
                          beta2=args.beta2,
                          decay_method='noam',
                          warmup_steps=args.warmup_steps_dec)

    params = [(n, p) for n, p in list(model.named_parameters())
              if not n.startswith('bert.model')]
    optim.set_parameters(params)

    return optim
Example #2
0
def build_optim(args, model, checkpoint):
    """ Build optimizer """
    saved_optimizer_state_dict = None

    if args.train_from != "":
        optim = checkpoint["optim"]
        saved_optimizer_state_dict = optim.optimizer.state_dict()
    else:
        optim = Optimizer(
            args.optim,
            args.lr,
            args.max_grad_norm,
            beta1=args.beta1,
            beta2=args.beta2,
            decay_method=args.decay_method,
            warmup_steps=args.warmup_steps,
        )

    optim.set_parameters(list(model.named_parameters()))

    if args.train_from != "":
        optim.optimizer.load_state_dict(saved_optimizer_state_dict)
        if args.visible_gpus != "-1":
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        if (optim.method == "adam") and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    return optim
Example #3
0
def build_optim(args, model, checkpoint):
    """ Build optimizer """
    saved_optimizer_state_dict = None

    if args.train_from != '' and checkpoint is not None:
        optim = checkpoint['optim']
        saved_optimizer_state_dict = optim.optimizer.state_dict()
    else:
        optim = Optimizer(args.optim,
                          args.lr,
                          args.max_grad_norm,
                          beta1=args.beta1,
                          beta2=args.beta2,
                          decay_method=args.decay_method,
                          warmup_steps=args.warmup_steps,
                          weight_decay=args.l2_lambda)
        #self.start_decay_steps take effect when decay_method is not noam

    optim.set_parameters(list(model.named_parameters()))

    if args.train_from != '' and checkpoint is not None:
        optim.optimizer.load_state_dict(saved_optimizer_state_dict)
        if args.device == "cuda":
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    return optim
Example #4
0
def build_optim_dec_inner(args, model, checkpoint, maml_type=None):
    """Builds inner optimizer for decoder.

    We don't need to load trained optimizer in inner loop.

    Args:
        model (models.model_builder.ABsSummarizer/MTLAbsSummarizer)
        checkpoint (dict)
    Returns:
        A optimizer in type models.optimizers.Optimizer.
    """

    assert maml_type == 'maml'  # only support MAML currently

    # NOTE: no warm up
    optim = Optimizer(args.inner_optim,
                      args.lr_dec_inner,
                      args.max_grad_norm,
                      beta1=args.beta1,
                      beta2=args.beta2)

    # NOTE: these params is pseudo, which will be replaced in forwarding
    params = [(n, p) for n, p in list(model.named_parameters())
              if not n.startswith('bert.model')]
    optim.set_parameters(params)

    return optim
Example #5
0
def build_optim(args, model, checkpoint):
    """ Build optimizer """

    if checkpoint is not None and args.new_optim == False and args.few_shot == False:
        optim = checkpoint['optim'][0]
        saved_optimizer_state_dict = optim.optimizer.state_dict()
        optim.optimizer.load_state_dict(saved_optimizer_state_dict)
        if args.visible_gpus != '-1':
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    else:
        optim = Optimizer(
            args.optim, args.lr, args.max_grad_norm,
            beta1=args.beta1, beta2=args.beta2,
            decay_method='noam',
            warmup_steps=args.warmup_steps)

    optim.set_parameters(list(model.named_parameters()))


    return optim
Example #6
0
def build_optim(args, model, checkpoint):
    saved_optimizer_state_dict = None

    if args.train_from != '':
        optim = checkpoint['optim']
        saved_optimizer_state_dict = optim.optimizer.state_dict()
    else:
        optim = Optimizer(
            args.optim, args.learning_rate, args.max_grad_norm,
            beta1=args.beta1, beta2=args.beta2,
            decay_method=args.decay_method,
            warmup_steps=args.warmup_steps)

    optim.set_parameters(list(model.named_parameters()))

    if args.train_from != '':
        optim.optimizer.load_state_dict(saved_optimizer_state_dict)
        optim.learning_rate = args.learning_rate
        for param_group in optim.optimizer.param_groups:
            param_group['lr'] = args.learning_rate

        if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    return optim
Example #7
0
def build_optim(args, model, checkpoint):
    """ Build optimizer """
    saved_optimizer_state_dict = None

    if args.train_from != '':
        optim = checkpoint['optim']
        saved_optimizer_state_dict = optim.optimizer.state_dict()
    else:
        optim = Optimizer(args.optim,
                          args.lr,
                          args.max_grad_norm,
                          beta1=args.beta1,
                          beta2=args.beta2,
                          decay_method=args.decay_method,
                          warmup_steps=args.warmup_steps,
                          model_size=args.hidden_size)

    # Stage 1:
    # Essentially optim.set_parameters (re-)creates and optimizer using
    # model.paramters() as parameters that will be stored in the
    # optim.optimizer.param_groups field of the torch optimizer class.
    # Importantly, this method does not yet load the optimizer state, as
    # essentially it builds a new optimizer with empty optimizer state and
    # parameters from the model.
    optim.set_parameters(list(model.named_parameters()))

    if args.train_from != '':
        # Stage 2: In this stage, which is only performed when loading an
        # optimizer from a checkpoint, we load the saved_optimizer_state_dict
        # into the re-created optimizer, to set the optim.optimizer.state
        # field, which was previously empty. For this, we use the optimizer
        # state saved in the "saved_optimizer_state_dict" variable for
        # this purpose.
        # See also: https://github.com/pytorch/pytorch/issues/2830
        optim.optimizer.load_state_dict(saved_optimizer_state_dict)
        # Convert back the state values to cuda type if applicable
        if args.visible_gpu != '-1':
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        # We want to make sure that indeed we have a non-empty optimizer state
        # when we loaded an existing model. This should be at least the case
        # for Adam, which saves "exp_avg" and "exp_avg_sq" state
        # (Exponential moving average of gradient and squared gradient values)
        if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    return optim
Example #8
0
def build_optim(args, model, checkpoint=None):
    """ Build optimizer """
    if checkpoint is not None and not args.transfer_learning:
        logger.info('Loading model optimizer...')
        optim = checkpoint['optim']
    else:
        optim = Optimizer(
            args.optim, args.lr, args.max_grad_norm,
            beta1=args.beta1, beta2=args.beta2,
            decay_method='noam',
            warmup_steps=args.warmup_steps)
    optim.set_parameters(list(model.named_parameters()))
    
    # optim = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
    
    return optim
Example #9
0
def build_optim_dec(args, model, checkpoint):
    """Builds optimizer for decoder.

    Args:
        model (models.model_builder.ABsSummarizer/MTLAbsSummarizer)
        checkpoint (dict)
    Returns:
        A optimizer in type models.optimizers.Optimizer.
    """

    # Load optimizer
    if checkpoint is not None and args.init_optim == False:
        optim = checkpoint['optims'][1]  # [0] -> encoder, [1] -> decoder
        optim.optimizer.load_state_dict(optim.optimizer.state_dict())
        if args.visible_gpus != '-1':
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    else:
        # Disable warm up
        if (args.outer_no_warm_up):
            optim = Optimizer(args.optim,
                              args.lr_dec,
                              args.max_grad_norm,
                              beta1=args.beta1,
                              beta2=args.beta2)
        else:
            optim = Optimizer(args.optim,
                              args.lr_dec,
                              args.max_grad_norm,
                              beta1=args.beta1,
                              beta2=args.beta2,
                              decay_method='noam',
                              warmup_steps=args.warmup_steps_dec)

    # Feed parameters to be optimized
    params = [(n, p) for n, p in list(model.named_parameters())
              if not n.startswith('bert.model')]
    optim.set_parameters(params)

    return optim
Example #10
0
def build_optim(args, model, checkpoint):
    """ Build optimizer """
    saved_optimizer_state_dict = None

    if args.train_from or args.recover_from != '':
        optim = checkpoint['optim']
        saved_optimizer_state_dict = optim.optimizer.state_dict()
    else:
        optim = Optimizer(args.optim,
                          args.lr,
                          args.max_grad_norm,
                          beta1=args.beta1,
                          beta2=args.beta2,
                          decay_method=args.decay_method,
                          warmup_steps=args.warmup_steps)
    if isinstance(model, list):
        tmp = []
        for _model in model:
            tmp.extend(list(_model.named_parameters()))
        optim.set_parameters(tmp)
    else:
        optim.set_parameters(list(model.named_parameters()))

    if args.train_from or args.recover_from != '':
        optim.optimizer.load_state_dict(saved_optimizer_state_dict)
        if args.visible_gpus != '-1':
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    return optim
def run(mtd="fold_split"):
    def _eval(data):
        model.eval()  # 不启用 BatchNormalization 和 Dropout
        # data = dev_data
        y_pred = []
        y_true = []
        with torch.no_grad():
            for batch_data in dataset_processer.data_iter(
                    data, config['test_batch_size'], shuffle=False):
                torch.cuda.empty_cache()
                batch_inputs, batch_labels = dataset_processer.batch2tensor(
                    batch_data)
                batch_outputs = model(batch_inputs)
                y_pred.extend(
                    torch.max(batch_outputs, dim=1)[1].cpu().numpy().tolist())
                y_true.extend(batch_labels.cpu().numpy().tolist())

            score, dev_f1 = scores.get_score(y_true, y_pred)
        return score, dev_f1

    if mtd == "fold_split":
        demo_preprocess.split_dataset(raw_path, train_path, dev_path,
                                      test_path)
    elif mtd == "process_data":
        demo_preprocess.process_data(config, train_path, dev_path)
    elif mtd == "train":
        Train_data = file_utils.read_json(config["train_set"])
        Dev_data = file_utils.read_json(config["dev_set"])
        # 生成模型可处理的格式
        train_data = dataset_processer.get_examples(Train_data, label_encoder)
        dev_data = dataset_processer.get_examples(Dev_data, label_encoder)
        del Train_data, Dev_data
        # 一个epoch的batch个数
        batch_num = int(
            np.ceil(len(train_data) / float(config["train_batch_size"])))
        print("batch_num:{}".format(batch_num))
        # model = BertSoftmaxModel(cfg.bert_path, label_encoder)
        optimizer = Optimizer(model.all_parameters,
                              steps=batch_num * config["epochs"])  # 优化器

        # loss
        # criterion = nn.CrossEntropyLoss()  # obj
        criterion = loss_factory.focal_loss()
        best_train_f1, best_dev_f1 = 0, 0
        early_stop = -1
        EarlyStopEpochs = 10  # 当多个epoch,dev的指标都没有提升,则早停
        # train
        print("start train")
        for epoch in range(cfg.RESUME_EPOCH + 1, config["epochs"] + 1):
            optimizer.zero_grad()
            model.train()  # 启用 BatchNormalization 和 Dropout
            overall_losses = 0
            losses = 0
            # batch_idx = 1
            y_pred = []
            y_true = []
            step = 0
            for batch_data in dataset_processer.data_iter(
                    train_data, config["train_batch_size"], shuffle=True):
                torch.cuda.empty_cache()
                batch_inputs, batch_labels = dataset_processer.batch2tensor(
                    batch_data)
                batch_outputs = model(batch_inputs)
                print(batch_outputs.shape)  #
                loss = criterion(batch_outputs, batch_labels)
                loss.backward()

                loss_value = loss.detach().cpu().item()
                losses += loss_value
                overall_losses += loss_value

                y_pred.extend(
                    torch.max(batch_outputs, dim=1)[1].cpu().numpy().tolist())
                y_true.extend(batch_labels.cpu().numpy().tolist())

                # nn.utils.clip_grad_norm_(optimizer.all_params, max_norm=config["clip"])  # 梯度裁剪
                for cur_optim, scheduler in zip(optimizer.optims,
                                                optimizer.schedulers):
                    cur_optim.step()
                    scheduler.step()
                optimizer.zero_grad()
                step += 1
                # print(step, time.time())
            overall_losses /= batch_num
            overall_losses = scores.reformat(overall_losses, 4)
            score, train_f1 = scores.get_score(y_true, y_pred)
            print("epoch:{},train_score:{}, train_f1:{}, overall_loss:{} ".
                  format(epoch, train_f1, score, overall_losses))
            # if set(y_true) == set(y_pred):
            #     print("report")
            #     report = classification_report(y_true, y_pred, digits=4, target_names=label_encoder.target_names)
            #     # logging.info('\n' + report)
            #     print(report)

            # eval
            _, dev_f1 = _eval(data=dev_data)

            if best_dev_f1 < dev_f1:
                best_dev_f1 = dev_f1
                early_stop = 0
                best_train_f1 = train_f1
                save_path = model_utils.save_checkpoint(
                    model,
                    epoch,
                    save_folder=os.path.join(cfg.proj_path, "data/bert_nn"))
                print("save_path:{}".format(save_path))
                # torch.save(model.state_dict(), save_model)
            else:
                early_stop += 1
                if early_stop == EarlyStopEpochs:  # 达到早停次数,则停止训练
                    break
            print(
                "early_stop:{}, score:{}, dev_f1:{}, best_train_f1:{}, best_dev_f1:{}"
                .format(early_stop, dev_f1, score, best_train_f1, best_dev_f1))