예제 #1
0
def load_transformer_model(model_dir):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    config = RobertaConfig.from_json_file('{}/config.json'.format(model_dir))
    model = RobertaForSequenceClassification.from_pretrained(model_dir,
                                                             config=config)
    model = model.to(device)
    return model
    def __init__(
        self,
        pretrained_model_name=None,
        config_filename=None,
        vocab_size=None,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        max_position_embeddings=512,
    ):
        super().__init__()

        # Check that only one of pretrained_model_name, config_filename, and
        # vocab_size was passed in
        total = 0
        if pretrained_model_name is not None:
            total += 1
        if config_filename is not None:
            total += 1
        if vocab_size is not None:
            total += 1

        if total != 1:
            raise ValueError(
                "Only one of pretrained_model_name, vocab_size, "
                + "or config_filename should be passed into the "
                + "ROBERTA constructor."
            )

        # TK: The following code checks the same once again.
        if vocab_size is not None:
            config = RobertaConfig(
                vocab_size_or_config_json_file=vocab_size,
                vocab_size=vocab_size,
                hidden_size=hidden_size,
                num_hidden_layers=num_hidden_layers,
                num_attention_heads=num_attention_heads,
                intermediate_size=intermediate_size,
                hidden_act=hidden_act,
                max_position_embeddings=max_position_embeddings,
            )
            model = RobertaModel(config)
        elif pretrained_model_name is not None:
            model = RobertaModel.from_pretrained(pretrained_model_name)
        elif config_filename is not None:
            config = RobertaConfig.from_json_file(config_filename)
            model = RobertaModel(config)
        else:
            raise ValueError(
                "Either pretrained_model_name or vocab_size must" + " be passed into the ROBERTA constructor"
            )

        model.to(self._device)

        self.add_module("roberta", model)
        self.config = model.config
        self._hidden_size = model.config.hidden_size
예제 #3
0
def model_fn(model_dir):
    model_path = '{}/{}'.format(model_dir, MODEL_NAME)
    model_config_path = '{}/{}'.format(model_dir, 'config.json')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    config = RobertaConfig.from_json_file(model_config_path)
    model = RobertaForSequenceClassification.from_pretrained(model_path,
                                                             config=config)
    model.to(device)
    return model
예제 #4
0
    def __init__(self,
                 atokenizer: AtomTokenizer,
                 tokenizer: Tokenizer,
                 dec_dim: int = 64,
                 device: str = 'cpu',
                 load_pretrained: bool = False):
        super(Parser, self).__init__()
        self.enc_dim = 768
        self.dec_dim = dec_dim
        self.num_embeddings = len(atokenizer)
        self.device = device
        self.atom_tokenizer = atokenizer
        self.type_parser = TypeParser()
        self.tokenizer = tokenizer
        self.dropout = Dropout(0.1)
        self.enc_heads = 8
        self.dec_heads = 8
        self.d_atn_dec = self.dec_dim // self.dec_heads

        if load_pretrained:
            self.word_encoder = RobertaModel.from_pretrained(
                "pdelobelle/robbert-v2-dutch-base").to(device)
        else:
            json_path = path.join(
                path.join(path.dirname(path.dirname(__file__)), 'data'),
                'config.json')
            self.word_encoder = RobertaModel(
                RobertaConfig.from_json_file(json_path)).to(device)
        self.supertagger = make_decoder(num_layers=6,
                                        num_heads_enc=self.enc_heads,
                                        num_heads_dec=self.dec_heads,
                                        d_encoder=self.enc_dim,
                                        d_decoder=self.dec_dim,
                                        d_atn_enc=self.enc_dim //
                                        self.enc_heads,
                                        d_atn_dec=self.d_atn_dec,
                                        d_v_enc=self.enc_dim // self.enc_heads,
                                        d_v_dec=self.dec_dim // self.dec_heads,
                                        d_interm=self.dec_dim * 2,
                                        dropout_rate=0.1).to(device)
        self.atom_embedder = ComplexEmbedding(self.num_embeddings,
                                              self.dec_dim // 2).to(device)
        self.linker = make_encoder(num_layers=3,
                                   num_heads=self.enc_heads,
                                   d_intermediate=self.dec_dim * 4,
                                   dropout=0.15,
                                   d_model=self.dec_dim * 2,
                                   d_k=(self.dec_dim * 2) // self.dec_heads,
                                   d_v=(self.dec_dim * 2) //
                                   self.dec_heads).to(device)
        self.pos_transformation = Sequential(
            FFN(self.dec_dim * 2, self.dec_dim, 0.1, self.dec_dim // 2),
            LayerNorm(self.dec_dim // 2, eps=1e-12)).to(device)
        self.neg_transformation = Sequential(
            FFN(self.dec_dim * 2, self.dec_dim, 0.1, self.dec_dim // 2),
            LayerNorm(self.dec_dim // 2, eps=1e-12)).to(device)
예제 #5
0
    def __init__(self,
                 classifier_config_dir,
                 device,
                 task_type,
                 n_clf_layers=6,
                 use_dm=True,
                 use_pm=True,
                 use_rt=True,
                 use_bio=False,
                 use_name=False,
                 use_network=False,
                 use_count=False):
        super(ConcatenatedClassifier, self).__init__()
        # load text model
        self.device = device
        self.task_type = task_type
        self.use_text = use_dm | use_pm | use_rt
        self.use_bio = use_bio
        self.use_name = use_name
        self.use_etc = use_network | use_count
        self.text_model = RobertaModel.from_pretrained(
            "vinai/bertweet-base",
            output_attentions=False,
            output_hidden_states=False)
        if self.use_name:
            self.charEmbedding = nn.Embedding(
                num_embeddings=302, embedding_dim=300,
                padding_idx=301)  # 302: 300-top frequent + pad + unk
            self.conv3 = nn.Conv1d(in_channels=300,
                                   out_channels=256,
                                   kernel_size=3,
                                   padding=1)
            self.conv4 = nn.Conv1d(in_channels=300,
                                   out_channels=256,
                                   kernel_size=4,
                                   padding=1)
            self.conv5 = nn.Conv1d(in_channels=300,
                                   out_channels=256,
                                   kernel_size=5,
                                   padding=1)

        # load classifier for combining these features
        config = RobertaConfig()
        config = config.from_json_file(classifier_config_dir)
        config.num_hidden_layers = n_clf_layers
        config.num_attention_heads = n_clf_layers
        config.max_position_embeddings = 7
        if self.use_bio:
            config.max_position_embeddings += 2
        if self.use_name:
            config.max_position_embeddings += 4
        self.concat_model = RobertaModel(config)
        self.classifier = ClassifierLayer(use_count=use_count,
                                          use_network=use_network)
        return
예제 #6
0
    def __init__(self, bert_config, my_config):
        super(NqModel, self).__init__()
        #albert_base_configuration = AlbertConfig(vocab_size=30000,hidden_size=768,num_attention_heads=12,intermediate_size=3072,
        #                                        attention_probs_dropout_prob=0)
        self.my_mask = None
        
        roberta_config = RobertaConfig.from_json_file("./configs/roberta-mnli.config")
        roberta_config.hidden_dropout_prob = 0.15
        roberta_config.attention_probs_dropout_prob = 0.15
        roberta_config.layer_norm_eps = 5e-6
        print(roberta_config)
        self.bert =  RobertaModel.from_pretrained("roberta-large-mnli",config=roberta_config)
        
        my_config = bert_config  = self.bert.config
        #self.bert = RobertaModel.from_pretrained("roberta-base")

        self.right = 0
        self.all = 0
        #self.bert =  AlbertModel(albert_base_configuration)
        
        #self.bert2 = BertModel(bert_config)

        #self.bert = BertModel(BertConfig())
        
        
        #self.bert =  RobertaModel(RobertaConfig(max_position_embeddings=514,vocab_size=50265))

        #print(my_config,bert_config)
        self.tok_dense = nn.Linear(my_config.hidden_size, my_config.hidden_size)
#        self.para_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size)
#        self.doc_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size)
        
        self.dropout = nn.Dropout(my_config.hidden_dropout_prob)

        self.tok_outputs = nn.Linear(my_config.hidden_size, 1) # tune to avoid fell into bad places
#        self.para_outputs = nn.Linear(self.config.hidden_size, 1)
#        self.answer_type_outputs = nn.Linear(self.config.hidden_size, 2)
        
#        self.tok_to_label = nn.Linear(my_config.max_token_len,2)
#        self.par_to_label = nn.Linear(my_config.max_paragraph_len,2)

        #self.encoder = Encoder(my_config)
#        self.encoder = Encoder(my_config)
#        self.encoder2 = Encoder(my_config)
        self.my_config = my_config
#        self.my_mask = 

        self.ACC = 0
        self.ALL = 0
예제 #7
0
def modify_transformer_config(
    model,
    batch_size=8,
    attention_probs_dropout_prob=0.4,
    learning_rate=5e-7,
    adam_epsilon=1e-8,
    hidden_dropout_prob=0.3,
    lm_model_dir=None,
):
    if model == "bert":
        config = BertConfig.from_json_file(f"{lm_model_dir}/config.json")
    elif model == "distilbert":
        config = DistilBertConfig.from_json_file(f"{lm_model_dir}/config.json")
    elif model == "roberta":
        config = RobertaConfig.from_json_file(f"{lm_model_dir}/config.json")
    config.attention_probs_dropout_prob = attention_probs_dropout_prob
    config.do_sample = True
    config.num_beams = 500
    config.hidden_dropout_prob = hidden_dropout_prob
    config.repetition_penalty = 5
    config.num_labels = 3
    return config
예제 #8
0
    torch.manual_seed(args.seed)
    params.seed = args.seed

    # Set the logger
    utils.set_logger()

    # Create the input data pipeline
    logging.info("Loading the dataset...")
    dataloader = NERDataLoader(params)
    val_loader, test_loader = dataloader.load_data(mode='test')
    logging.info("- done.")

    # Define the model
    logging.info('Loading the model...')
    config_path = os.path.join(params.params_path, 'bert_config.json')
    config = RobertaConfig.from_json_file(config_path)
    model = ElectraForTokenClassification(config, params=params)

    model.to(params.device)
    # Reload weights from the saved file
    utils.load_checkpoint(
        os.path.join(params.model_dir, args.restore_file + '.pth.tar'), model)
    logging.info('- done.')

    logging.info("Starting prediction...")
    if mode == 'test':
        predict(model, test_loader, params, mode=mode)
    elif mode == 'val':
        predict(model, val_loader, params, mode=mode)
    logging.info('- done.')
예제 #9
0
if __name__ == '__main__':
    batch_size = 32
    device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
    kwargs = {
        'num_workers': 4,
        'pin_memory': True
    } if torch.cuda.is_available() else {}
    # tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    # config = BertConfig.from_json_file(
    #     '../pre_weights/bert-base-uncased_config.json')
    # model = BertForMultipleChoice(config)
    os.chdir('/mnt/ssd/qianqian/semeval2020')

    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    config = RobertaConfig.from_json_file(
        './pre_weights/roberta-base_config.json')
    model = RobertaForMultipleChoice(config)
    model.load_state_dict(torch.load('./checkpoints/checkpoint_2019.12.08-09.50.24_pair_91.805.pth'))
    model = model.to(device)

    questions = pd.read_csv(
        './SemEval2020-Task4-Commonsense-Validation-and-Explanation-master/Training Data/subtaskB_data_all.csv')
    answers = pd.read_csv(
        './SemEval2020-Task4-Commonsense-Validation-and-Explanation-master/Training Data/subtaskB_answers_all.csv',
        header=None, names=['id', 'ans'])
    data = pd.merge(questions, answers, how='left', on='id')
    questions = pd.read_csv(
        './SemEval2020-Task4-Commonsense-Validation-and-Explanation-master/Trial Data/taskB_trial_data.csv')
    answers = pd.read_csv(
        './SemEval2020-Task4-Commonsense-Validation-and-Explanation-master/Trial Data/taskB_trial_answer.csv',
        header=None, names=['id', 'ans'])
예제 #10
0
def load_encode_module():
    pass


train_df_1 = pd.read_csv(train_file_1, header=None, sep='\t', index_col=0)
train_df_1.columns = ['text', 'label']
label_map = dict(
    zip(train_df_1['label'].value_counts().index,
        range(len(train_df_1['label'].value_counts()))))
train_df_1['label'] = train_df_1[['label']].applymap(lambda x: label_map[x])
train_df_1, test_df = train_test_split(train_df_1, test_size=0.2)

model_name = "D:/model_file/hfl_chinese-roberta-wwm-ext"

rob_config = RobertaConfig.from_json_file(
    "D:/model_file/hfl_chinese-roberta-wwm-ext/config.json")
rob_config.num_labels = len(label_map)
tokenizer = BertTokenizer.from_pretrained(model_name)

train_sentences_1 = train_df_1.text.values
train_labels_1 = train_df_1.label.values

test_sentences_1 = test_df.text.values
test_labels_1 = test_df.label.values

train_input_ids, test_input_ids = [], []
train_attention_masks, test_attention_masks = [], []

for sent in train_sentences_1:
    encoded_dict = tokenizer.encode_plus(
        text=sent,  # Sentence to encode.
예제 #11
0
"""
模型加载方法
"""

import torch
from transformers import RobertaConfig, RobertaModel, BertTokenizer

config = RobertaConfig.from_json_file("data/checkpoint-100/config.json")
# 切记这里使用RobertaConfig而非BertConfig,因为参数命名前缀不一致会导致参数加载失败

tokenizer = BertTokenizer.from_pretrained(
    "bert-base-chinese")  # bert和roberta分词器一样,无所谓
roberta = RobertaModel.from_pretrained("data/checkpoint-100/pytorch_model.bin",
                                       config=config)  # 加载了参数的roberta
# 这里使用RobertaModel而非BertModel
"""
PS,
raw_roberta = RobertaModel(config)  # 这里会随机初始化参数
# raw_roberta.load_state_dict(new_state_dic)  # 由于参数命名不一致,会报错
"""