def __init__(self, model_name, output_token_states=False, from_checkpoint=None, use_segment_id=True, **kwargs): super().__init__() self.model_type = MODEL_NAME_TO_CLASS[model_name] self.output_token_states = output_token_states self.use_segment_id = use_segment_id assert not self.output_token_states or self.model_type in ( 'bert', 'roberta', ) if self.model_type in ('lstm', ): self.module = LSTMTextEncoder(**kwargs, output_hidden_states=True) self.sent_dim = self.module.output_size else: config = AutoConfig.from_pretrained(model_name, output_hidden_states=True) self.module = AutoModel.from_pretrained(model_name, config=config) if from_checkpoint is not None: self.module = self.module.from_pretrained( from_checkpoint, output_hidden_states=True) if self.model_type in ('gpt', ): self.module.resize_token_embeddings(get_gpt_token_num()) self.sent_dim = self.module.config.n_embd if self.model_type in ( 'gpt', ) else self.module.config.hidden_size
def __init__(self, model_name, encoder_pooler='module_pooler', output_token_states=False, from_checkpoint=None, use_segment_id=True, aristo_path=None): super().__init__() self.model_type = MODEL_NAME_TO_CLASS[model_name] self.encoder_pooler = encoder_pooler self.output_token_states = output_token_states self.use_segment_id = use_segment_id assert not self.output_token_states or self.model_type in ( 'bert', 'roberta', ) config = AutoConfig.from_pretrained(model_name, output_hidden_states=True) if encoder_pooler == 'att': print('use att pooler') self.att_merge = AttentionMerge(config.hidden_size, 1024, 0.1) self.module = AutoModel.from_pretrained(model_name, config=config) if aristo_path is not None: print('Loading weights for AristoRoberta...') weight = torch.load(aristo_path, map_location='cpu') new_dict = {} for k, v in weight.items(): nk = k.replace('_transformer_model.', '') if nk not in self.module.state_dict(): print(k) continue new_dict[nk] = v model_dict = self.module.state_dict() model_dict.update(new_dict) self.module.load_state_dict(model_dict) if from_checkpoint is not None: self.module = self.module.from_pretrained( from_checkpoint, output_hidden_states=True) if self.model_type in ('gpt', ): self.module.resize_token_embeddings(get_gpt_token_num()) self.sent_dim = self.module.config.n_embd if self.model_type in ( 'gpt', ) else self.module.config.hidden_size
def __init__(self, model_name, output_token_states=False, from_checkpoint=None, **kwargs): super().__init__() self.model_type = MODEL_NAME_TO_CLASS[model_name] self.output_token_states = output_token_states assert not self.output_token_states or self.model_type in ('bert', 'roberta', 'albert') if self.model_type in ('lstm', ): self.module = LSTMTextEncoder(**kwargs, output_hidden_states=True) self.sent_dim = self.module.output_size else: module_config = AutoConfig.from_pretrained( model_name, output_hidden_states=True, cache_dir='../cache/') self.module = AutoModel.from_pretrained(model_name, config=module_config, cache_dir='../cache/') if not from_checkpoint == 'None': # self.module = self.module.from_pretrained(from_checkpoint, config=module_config, cache_dir='../cache/') weight = torch.load(from_checkpoint, map_location='cpu') new_dict = {} for k, v in weight.items(): nk = k.replace('_transformer_model.', '') if nk not in self.module.state_dict(): print(k) continue new_dict[nk] = v model_dict = self.module.state_dict() model_dict.update(new_dict) self.module.load_state_dict(model_dict) if self.model_type in ('gpt', ): self.module.resize_token_embeddings(get_gpt_token_num()) self.sent_dim = self.module.config.n_embd if self.model_type in ( 'gpt', ) else self.module.config.hidden_size print(self.model_type)