Beispiel #1
0
    def __init__(self,
                 model_name,
                 output_token_states=False,
                 from_checkpoint=None,
                 use_segment_id=True,
                 **kwargs):
        super().__init__()
        self.model_type = MODEL_NAME_TO_CLASS[model_name]
        self.output_token_states = output_token_states
        self.use_segment_id = use_segment_id
        assert not self.output_token_states or self.model_type in (
            'bert',
            'roberta',
        )

        if self.model_type in ('lstm', ):
            self.module = LSTMTextEncoder(**kwargs, output_hidden_states=True)
            self.sent_dim = self.module.output_size
        else:
            config = AutoConfig.from_pretrained(model_name,
                                                output_hidden_states=True)
            self.module = AutoModel.from_pretrained(model_name, config=config)
            if from_checkpoint is not None:
                self.module = self.module.from_pretrained(
                    from_checkpoint, output_hidden_states=True)
            if self.model_type in ('gpt', ):
                self.module.resize_token_embeddings(get_gpt_token_num())
            self.sent_dim = self.module.config.n_embd if self.model_type in (
                'gpt', ) else self.module.config.hidden_size
Beispiel #2
0
    def __init__(self,
                 model_name,
                 encoder_pooler='module_pooler',
                 output_token_states=False,
                 from_checkpoint=None,
                 use_segment_id=True,
                 aristo_path=None):
        super().__init__()
        self.model_type = MODEL_NAME_TO_CLASS[model_name]
        self.encoder_pooler = encoder_pooler
        self.output_token_states = output_token_states
        self.use_segment_id = use_segment_id
        assert not self.output_token_states or self.model_type in (
            'bert',
            'roberta',
        )

        config = AutoConfig.from_pretrained(model_name,
                                            output_hidden_states=True)
        if encoder_pooler == 'att':
            print('use att pooler')
            self.att_merge = AttentionMerge(config.hidden_size, 1024, 0.1)
        self.module = AutoModel.from_pretrained(model_name, config=config)
        if aristo_path is not None:
            print('Loading weights for AristoRoberta...')
            weight = torch.load(aristo_path, map_location='cpu')
            new_dict = {}
            for k, v in weight.items():
                nk = k.replace('_transformer_model.', '')
                if nk not in self.module.state_dict():
                    print(k)
                    continue
                new_dict[nk] = v
            model_dict = self.module.state_dict()
            model_dict.update(new_dict)
            self.module.load_state_dict(model_dict)

        if from_checkpoint is not None:
            self.module = self.module.from_pretrained(
                from_checkpoint, output_hidden_states=True)
        if self.model_type in ('gpt', ):
            self.module.resize_token_embeddings(get_gpt_token_num())
        self.sent_dim = self.module.config.n_embd if self.model_type in (
            'gpt', ) else self.module.config.hidden_size
    def __init__(self,
                 model_name,
                 output_token_states=False,
                 from_checkpoint=None,
                 **kwargs):
        super().__init__()
        self.model_type = MODEL_NAME_TO_CLASS[model_name]
        self.output_token_states = output_token_states
        assert not self.output_token_states or self.model_type in ('bert',
                                                                   'roberta',
                                                                   'albert')

        if self.model_type in ('lstm', ):
            self.module = LSTMTextEncoder(**kwargs, output_hidden_states=True)
            self.sent_dim = self.module.output_size
        else:
            module_config = AutoConfig.from_pretrained(
                model_name, output_hidden_states=True, cache_dir='../cache/')
            self.module = AutoModel.from_pretrained(model_name,
                                                    config=module_config,
                                                    cache_dir='../cache/')
            if not from_checkpoint == 'None':
                # self.module = self.module.from_pretrained(from_checkpoint, config=module_config, cache_dir='../cache/')
                weight = torch.load(from_checkpoint, map_location='cpu')
                new_dict = {}
                for k, v in weight.items():
                    nk = k.replace('_transformer_model.', '')
                    if nk not in self.module.state_dict():
                        print(k)
                        continue
                    new_dict[nk] = v
                model_dict = self.module.state_dict()
                model_dict.update(new_dict)
                self.module.load_state_dict(model_dict)

            if self.model_type in ('gpt', ):
                self.module.resize_token_embeddings(get_gpt_token_num())
            self.sent_dim = self.module.config.n_embd if self.model_type in (
                'gpt', ) else self.module.config.hidden_size
        print(self.model_type)