def __init__(self, config): super(UnifiedHGNModel, self).__init__() self.config = config self.encoder, _ = load_encoder_model(self.config.encoder_name_or_path, self.config.model_type) self.model = HierarchicalGraphNetwork(config=self.config) if self.config.fine_tuned_encoder is not None: encoder_path = join(self.config.fine_tuned_encoder_path, self.config.fine_tuned_encoder, 'encoder.pkl') logging.info("Loading encoder from: {}".format(encoder_path)) self.encoder.load_state_dict(torch.load(encoder_path)) logging.info("Loading encoder completed")
def __init__(self, hparams: Namespace): super().__init__() self.hparams = hparams cached_config_file = join(self.hparams.exp_name, 'cached_config.bin') if os.path.exists(cached_config_file): cached_config = torch.load(cached_config_file) encoder_path = join(self.hparams.exp_name, cached_config['encoder']) else: if self.hparams.fine_tuned_encoder is not None: encoder_path = join(self.hparams.fine_tuned_encoder_path, self.hparams.fine_tuned_encoder, 'encoder.pkl') else: encoder_path = None _, _, tokenizer_class = MODEL_CLASSES[self.hparams.model_type] self.tokenizer = tokenizer_class.from_pretrained( self.hparams.encoder_name_or_path, do_lower_case=self.hparams.do_lower_case) # Set Encoder and Model self.encoder, _ = load_encoder_model(self.hparams.encoder_name_or_path, self.hparams.model_type) self.model = HierarchicalGraphNetwork(config=self.hparams) if encoder_path is not None: self.encoder.load_state_dict(torch.load(encoder_path)) logging.info('Initialize parameter with {}'.format(encoder_path)) logging.info('Loading encoder and model completed') ########## self.save_hyperparameters(self.hparams)
else: model_path = None if args.fine_tuned_encoder is not None: # encoder_path = join(args.output_dir, args.fine_tuned_encoder, 'encoder.pkl') encoder_path = join(args.fine_tuned_encoder_path, args.fine_tuned_encoder, 'encoder.pkl') logger.info("Loading encoder from: {}".format(encoder_path)) else: encoder_path = None start_epoch = 0 best_joint_f1 = 0 learning_rate = args.learning_rate # Set Encoder and Model encoder, _ = load_encoder_model(args.encoder_name_or_path, args.model_type) model = HierarchicalGraphNetwork(config=args) if encoder_path is not None: encoder.load_state_dict(torch.load(encoder_path)) if model_path is not None: model.load_state_dict(torch.load(model_path)) ####################################################################################### if args.frozen_layer_number > 0: modules = [ encoder.embeddings, *encoder.encoder.layer[:args.frozen_layer_number] ] for module in modules: for param in module.parameters(): param.requires_grad = False logging.info('Frozen the first {} layers'.format(args.frozen_layer_number))
config_class, model_encoder, tokenizer_class = MODEL_CLASSES[args.model_type] config = config_class.from_pretrained(args.encoder_name_or_path) encoder_path = join(args.exp_name, args.encoder_name) ## replace encoder.pkl as encoder model_path = join(args.exp_name, args.model_name) ## replace encoder.pkl as encoder logger.info("Loading encoder from: {}".format(encoder_path)) logger.info("Loading model from: {}".format(model_path)) if torch.cuda.is_available(): device_ids, _ = single_free_cuda() device = torch.device('cuda:{}'.format(device_ids[0])) else: device = torch.device('cpu') encoder, _ = load_encoder_model(args.encoder_name_or_path, args.model_type) model = HierarchicalGraphNetwork(config=args) if encoder_path is not None: state_dict = torch.load(encoder_path) print('loading parameter from {}'.format(encoder_path)) for key in list(state_dict.keys()): if 'module.' in key: state_dict[key.replace('module.', '')] = state_dict[key] del state_dict[key] encoder.load_state_dict(state_dict) if model_path is not None: state_dict = torch.load(model_path) print('loading parameter from {}'.format(model_path)) for key in list(state_dict.keys()): if 'module.' in key: state_dict[key.replace('module.', '')] = state_dict[key]
else: model_path = None if args.fine_tuned_encoder is not None: # encoder_path = join(args.output_dir, args.fine_tuned_encoder, 'encoder.pkl') encoder_path = join(args.fine_tuned_encoder_path, args.fine_tuned_encoder, 'encoder.pkl') logger.info("Loading encoder from: {}".format(encoder_path)) else: encoder_path = None start_epoch = 0 best_joint_f1 = 0 learning_rate = args.learning_rate # Set Encoder and Model encoder, _ = load_encoder_model(args.encoder_name_or_path, args.model_type) model = HierarchicalGraphNetwork(config=args) print('hhhhhhhhhhhhhhhhhh{}'.format(encoder_path)) if encoder_path is not None: encoder.load_state_dict(torch.load(encoder_path)) logging.info('Set parameters via {}'.format(encoder_path)) if model_path is not None: model.load_state_dict(torch.load(model_path)) ####################################################################################### if args.frozen_layer_number > 0: modules = [ encoder.embeddings, *encoder.encoder.layer[:args.frozen_layer_number] ] for module in modules:
class UnifiedHGNModel(nn.Module): def __init__(self, config): super(UnifiedHGNModel, self).__init__() self.config = config self.encoder, _ = load_encoder_model(self.config.encoder_name_or_path, self.config.model_type) self.model = HierarchicalGraphNetwork(config=self.config) if self.config.fine_tuned_encoder is not None: encoder_path = join(self.config.fine_tuned_encoder_path, self.config.fine_tuned_encoder, 'encoder.pkl') logging.info("Loading encoder from: {}".format(encoder_path)) self.encoder.load_state_dict(torch.load(encoder_path)) logging.info("Loading encoder completed") def forward(self, batch, return_yp=False): ############################################################################################################### inputs = { 'input_ids': batch['context_idxs'], 'attention_mask': batch['context_mask'], 'token_type_ids': batch['segment_idxs'] if self.config.model_type in ['bert', 'xlnet'] else None } # XLM don't use segment_ids ####++++++++++++++++++++++++++++++++++++++ outputs = self.encoder(**inputs) batch['context_encoding'] = outputs[0] ####++++++++++++++++++++++++++++++++++++++ batch['context_mask'] = batch['context_mask'].float().to( self.config.device) if self.training: start, end, q_type, paras, sents, ents = self.model.forward( batch, return_yp=False) loss_list = compute_loss(self.config, batch, start, end, paras, sents, ents, q_type) return loss_list else: start, end, q_type, paras, sents, ents, y1, y2 = self.model.forward( batch, return_yp=True) return start, end, q_type, paras, sents, ents, y1, y2 def fixed_learning_rate_optimizers(self, total_steps): "Prepare optimizer and schedule (linear warmup and decay)" no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [{ "params": [ p for n, p in self.named_parameters() if (p.requires_grad) and (not any(nd in n for nd in no_decay)) ], "weight_decay": self.config.weight_decay, }, { "params": [ p for n, p in self.named_parameters() if (p.requires_grad) and (any(nd in n for nd in no_decay)) ], "weight_decay": 0.0, }] optimizer = AdamW(optimizer_grouped_parameters, lr=self.config.learning_rate, eps=self.config.adam_epsilon) if self.config.lr_scheduler == 'linear': scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=self.config.warmup_steps, num_training_steps=total_steps) elif self.config.lr_scheduler == 'cosine': scheduler = get_cosine_schedule_with_warmup( optimizer=optimizer, num_warmup_steps=self.config.warmup_steps, num_training_steps=total_steps) elif self.config.lr_scheduler == 'cosine_restart': scheduler = get_cosine_with_hard_restarts_schedule_with_warmup( optimizer=optimizer, num_warmup_steps=self.config.warmup_steps, num_training_steps=total_steps) else: raise '{} is not supported'.format(self.config.lr_scheduler) return optimizer, scheduler def rec_adam_learning_optimizer(self, total_steps): no_decay = ["bias", "LayerNorm.weight"] new_model = self.model args = self.config pretrained_model = self.encoder optimizer_grouped_parameters = [{ "params": [ p for n, p in new_model.named_parameters() if not any(nd in n for nd in no_decay) and args.model_type in n ], "weight_decay": args.weight_decay, "anneal_w": args.recadam_anneal_w, "pretrain_params": [ p_p for p_n, p_p in pretrained_model.named_parameters() if not any(nd in p_n for nd in no_decay) and args.model_type in p_n ] }, { "params": [ p for n, p in new_model.named_parameters() if not any(nd in n for nd in no_decay) and args.model_type not in n ], "weight_decay": args.weight_decay, "anneal_w": 0.0, "pretrain_params": [ p_p for p_n, p_p in pretrained_model.named_parameters() if not any(nd in p_n for nd in no_decay) and args.model_type not in p_n ] }, { "params": [ p for n, p in new_model.named_parameters() if any(nd in n for nd in no_decay) and args.model_type in n ], "weight_decay": 0.0, "anneal_w": args.recadam_anneal_w, "pretrain_params": [ p_p for p_n, p_p in pretrained_model.named_parameters() if any(nd in p_n for nd in no_decay) and args.model_type in p_n ] }, { "params": [ p for n, p in new_model.named_parameters() if any(nd in n for nd in no_decay) and args.model_type not in n ], "weight_decay": 0.0, "anneal_w": 0.0, "pretrain_params": [ p_p for p_n, p_p in pretrained_model.named_parameters() if any(nd in p_n for nd in no_decay) and args.model_type not in p_n ] }] optimizer = RecAdam(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon, anneal_fun=args.recadam_anneal_fun, anneal_k=args.recadam_anneal_k, anneal_t0=args.recadam_anneal_t0, pretrain_cof=args.recadam_pretrain_cof) if self.config.lr_scheduler == 'linear': scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=self.config.warmup_steps, num_training_steps=total_steps) elif self.config.lr_scheduler == 'cosine': scheduler = get_cosine_schedule_with_warmup( optimizer=optimizer, num_warmup_steps=self.config.warmup_steps, num_training_steps=total_steps) elif self.config.lr_scheduler == 'cosine_restart': scheduler = get_cosine_with_hard_restarts_schedule_with_warmup( optimizer=optimizer, num_warmup_steps=self.config.warmup_steps, num_training_steps=total_steps) else: raise '{} is not supported'.format(self.config.lr_scheduler) return optimizer, scheduler