def __init__(self, config):
     super(UnifiedHGNModel, self).__init__()
     self.config = config
     self.encoder, _ = load_encoder_model(self.config.encoder_name_or_path,
                                          self.config.model_type)
     self.model = HierarchicalGraphNetwork(config=self.config)
     if self.config.fine_tuned_encoder is not None:
         encoder_path = join(self.config.fine_tuned_encoder_path,
                             self.config.fine_tuned_encoder, 'encoder.pkl')
         logging.info("Loading encoder from: {}".format(encoder_path))
         self.encoder.load_state_dict(torch.load(encoder_path))
         logging.info("Loading encoder completed")
Ejemplo n.º 2
0
    def __init__(self, hparams: Namespace):
        super().__init__()
        self.hparams = hparams
        cached_config_file = join(self.hparams.exp_name, 'cached_config.bin')
        if os.path.exists(cached_config_file):
            cached_config = torch.load(cached_config_file)
            encoder_path = join(self.hparams.exp_name,
                                cached_config['encoder'])
        else:
            if self.hparams.fine_tuned_encoder is not None:
                encoder_path = join(self.hparams.fine_tuned_encoder_path,
                                    self.hparams.fine_tuned_encoder,
                                    'encoder.pkl')
            else:
                encoder_path = None

        _, _, tokenizer_class = MODEL_CLASSES[self.hparams.model_type]
        self.tokenizer = tokenizer_class.from_pretrained(
            self.hparams.encoder_name_or_path,
            do_lower_case=self.hparams.do_lower_case)
        # Set Encoder and Model
        self.encoder, _ = load_encoder_model(self.hparams.encoder_name_or_path,
                                             self.hparams.model_type)
        self.model = HierarchicalGraphNetwork(config=self.hparams)
        if encoder_path is not None:
            self.encoder.load_state_dict(torch.load(encoder_path))
            logging.info('Initialize parameter with {}'.format(encoder_path))
        logging.info('Loading encoder and model completed')
        ##########
        self.save_hyperparameters(self.hparams)
Ejemplo n.º 3
0
else:
    model_path = None
    if args.fine_tuned_encoder is not None:
        # encoder_path = join(args.output_dir, args.fine_tuned_encoder, 'encoder.pkl')
        encoder_path = join(args.fine_tuned_encoder_path,
                            args.fine_tuned_encoder, 'encoder.pkl')
        logger.info("Loading encoder from: {}".format(encoder_path))
    else:
        encoder_path = None
    start_epoch = 0
    best_joint_f1 = 0
    learning_rate = args.learning_rate

# Set Encoder and Model
encoder, _ = load_encoder_model(args.encoder_name_or_path, args.model_type)
model = HierarchicalGraphNetwork(config=args)

if encoder_path is not None:
    encoder.load_state_dict(torch.load(encoder_path))
if model_path is not None:
    model.load_state_dict(torch.load(model_path))

#######################################################################################
if args.frozen_layer_number > 0:
    modules = [
        encoder.embeddings, *encoder.encoder.layer[:args.frozen_layer_number]
    ]
    for module in modules:
        for param in module.parameters():
            param.requires_grad = False
    logging.info('Frozen the first {} layers'.format(args.frozen_layer_number))
Ejemplo n.º 4
0
config_class, model_encoder, tokenizer_class = MODEL_CLASSES[args.model_type]
config = config_class.from_pretrained(args.encoder_name_or_path)

encoder_path = join(args.exp_name, args.encoder_name) ## replace encoder.pkl as encoder
model_path = join(args.exp_name, args.model_name) ## replace encoder.pkl as encoder
logger.info("Loading encoder from: {}".format(encoder_path))
logger.info("Loading model from: {}".format(model_path))

if torch.cuda.is_available():
    device_ids, _ = single_free_cuda()
    device = torch.device('cuda:{}'.format(device_ids[0]))
else:
    device = torch.device('cpu')

encoder, _ = load_encoder_model(args.encoder_name_or_path, args.model_type)
model = HierarchicalGraphNetwork(config=args)

if encoder_path is not None:
    state_dict = torch.load(encoder_path)
    print('loading parameter from {}'.format(encoder_path))
    for key in list(state_dict.keys()):
        if 'module.' in key:
            state_dict[key.replace('module.', '')] = state_dict[key]
            del state_dict[key]
    encoder.load_state_dict(state_dict)
if model_path is not None:
    state_dict = torch.load(model_path)
    print('loading parameter from {}'.format(model_path))
    for key in list(state_dict.keys()):
        if 'module.' in key:
            state_dict[key.replace('module.', '')] = state_dict[key]
Ejemplo n.º 5
0
else:
    model_path = None
    if args.fine_tuned_encoder is not None:
        # encoder_path = join(args.output_dir, args.fine_tuned_encoder, 'encoder.pkl')
        encoder_path = join(args.fine_tuned_encoder_path,
                            args.fine_tuned_encoder, 'encoder.pkl')
        logger.info("Loading encoder from: {}".format(encoder_path))
    else:
        encoder_path = None
    start_epoch = 0
    best_joint_f1 = 0
    learning_rate = args.learning_rate

# Set Encoder and Model
encoder, _ = load_encoder_model(args.encoder_name_or_path, args.model_type)
model = HierarchicalGraphNetwork(config=args)

print('hhhhhhhhhhhhhhhhhh{}'.format(encoder_path))

if encoder_path is not None:
    encoder.load_state_dict(torch.load(encoder_path))
    logging.info('Set parameters via {}'.format(encoder_path))
if model_path is not None:
    model.load_state_dict(torch.load(model_path))

#######################################################################################
if args.frozen_layer_number > 0:
    modules = [
        encoder.embeddings, *encoder.encoder.layer[:args.frozen_layer_number]
    ]
    for module in modules:
class UnifiedHGNModel(nn.Module):
    def __init__(self, config):
        super(UnifiedHGNModel, self).__init__()
        self.config = config
        self.encoder, _ = load_encoder_model(self.config.encoder_name_or_path,
                                             self.config.model_type)
        self.model = HierarchicalGraphNetwork(config=self.config)
        if self.config.fine_tuned_encoder is not None:
            encoder_path = join(self.config.fine_tuned_encoder_path,
                                self.config.fine_tuned_encoder, 'encoder.pkl')
            logging.info("Loading encoder from: {}".format(encoder_path))
            self.encoder.load_state_dict(torch.load(encoder_path))
            logging.info("Loading encoder completed")

    def forward(self, batch, return_yp=False):
        ###############################################################################################################
        inputs = {
            'input_ids':
            batch['context_idxs'],
            'attention_mask':
            batch['context_mask'],
            'token_type_ids':
            batch['segment_idxs']
            if self.config.model_type in ['bert', 'xlnet'] else None
        }  # XLM don't use segment_ids
        ####++++++++++++++++++++++++++++++++++++++
        outputs = self.encoder(**inputs)
        batch['context_encoding'] = outputs[0]
        ####++++++++++++++++++++++++++++++++++++++
        batch['context_mask'] = batch['context_mask'].float().to(
            self.config.device)
        if self.training:
            start, end, q_type, paras, sents, ents = self.model.forward(
                batch, return_yp=False)
            loss_list = compute_loss(self.config, batch, start, end, paras,
                                     sents, ents, q_type)
            return loss_list
        else:
            start, end, q_type, paras, sents, ents, y1, y2 = self.model.forward(
                batch, return_yp=True)
            return start, end, q_type, paras, sents, ents, y1, y2

    def fixed_learning_rate_optimizers(self, total_steps):
        "Prepare optimizer and schedule (linear warmup and decay)"
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [{
            "params": [
                p for n, p in self.named_parameters()
                if (p.requires_grad) and (not any(nd in n for nd in no_decay))
            ],
            "weight_decay":
            self.config.weight_decay,
        }, {
            "params": [
                p for n, p in self.named_parameters()
                if (p.requires_grad) and (any(nd in n for nd in no_decay))
            ],
            "weight_decay":
            0.0,
        }]
        optimizer = AdamW(optimizer_grouped_parameters,
                          lr=self.config.learning_rate,
                          eps=self.config.adam_epsilon)

        if self.config.lr_scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer,
                num_warmup_steps=self.config.warmup_steps,
                num_training_steps=total_steps)
        elif self.config.lr_scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer=optimizer,
                num_warmup_steps=self.config.warmup_steps,
                num_training_steps=total_steps)
        elif self.config.lr_scheduler == 'cosine_restart':
            scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
                optimizer=optimizer,
                num_warmup_steps=self.config.warmup_steps,
                num_training_steps=total_steps)
        else:
            raise '{} is not supported'.format(self.config.lr_scheduler)
        return optimizer, scheduler

    def rec_adam_learning_optimizer(self, total_steps):
        no_decay = ["bias", "LayerNorm.weight"]
        new_model = self.model
        args = self.config
        pretrained_model = self.encoder
        optimizer_grouped_parameters = [{
            "params": [
                p for n, p in new_model.named_parameters()
                if not any(nd in n for nd in no_decay) and args.model_type in n
            ],
            "weight_decay":
            args.weight_decay,
            "anneal_w":
            args.recadam_anneal_w,
            "pretrain_params": [
                p_p for p_n, p_p in pretrained_model.named_parameters()
                if not any(nd in p_n
                           for nd in no_decay) and args.model_type in p_n
            ]
        }, {
            "params": [
                p for n, p in new_model.named_parameters()
                if not any(nd in n
                           for nd in no_decay) and args.model_type not in n
            ],
            "weight_decay":
            args.weight_decay,
            "anneal_w":
            0.0,
            "pretrain_params": [
                p_p for p_n, p_p in pretrained_model.named_parameters()
                if not any(nd in p_n
                           for nd in no_decay) and args.model_type not in p_n
            ]
        }, {
            "params": [
                p for n, p in new_model.named_parameters()
                if any(nd in n for nd in no_decay) and args.model_type in n
            ],
            "weight_decay":
            0.0,
            "anneal_w":
            args.recadam_anneal_w,
            "pretrain_params": [
                p_p for p_n, p_p in pretrained_model.named_parameters()
                if any(nd in p_n for nd in no_decay) and args.model_type in p_n
            ]
        }, {
            "params": [
                p for n, p in new_model.named_parameters()
                if any(nd in n for nd in no_decay) and args.model_type not in n
            ],
            "weight_decay":
            0.0,
            "anneal_w":
            0.0,
            "pretrain_params": [
                p_p for p_n, p_p in pretrained_model.named_parameters()
                if any(nd in p_n
                       for nd in no_decay) and args.model_type not in p_n
            ]
        }]
        optimizer = RecAdam(optimizer_grouped_parameters,
                            lr=args.learning_rate,
                            eps=args.adam_epsilon,
                            anneal_fun=args.recadam_anneal_fun,
                            anneal_k=args.recadam_anneal_k,
                            anneal_t0=args.recadam_anneal_t0,
                            pretrain_cof=args.recadam_pretrain_cof)
        if self.config.lr_scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer,
                num_warmup_steps=self.config.warmup_steps,
                num_training_steps=total_steps)
        elif self.config.lr_scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer=optimizer,
                num_warmup_steps=self.config.warmup_steps,
                num_training_steps=total_steps)
        elif self.config.lr_scheduler == 'cosine_restart':
            scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
                optimizer=optimizer,
                num_warmup_steps=self.config.warmup_steps,
                num_training_steps=total_steps)
        else:
            raise '{} is not supported'.format(self.config.lr_scheduler)
        return optimizer, scheduler