Exemple #1
0
    def __init__(
        self,
        config,
        output_attentions=False,
        keep_multihead_output=False,
        n_layers=2,
        activation='relu',
        beta=100,
    ):
        super(BertForCoQA, self).__init__(config)
        self.output_attentions = output_attentions
        self.bert = BertModel(config)
        hidden_size = config.hidden_size
        self.rational_l = Multi_linear_layer(n_layers, hidden_size,
                                             hidden_size, 1, activation)
        self.logits_l = Multi_linear_layer(n_layers, hidden_size, hidden_size,
                                           2, activation)
        self.unk_l = Multi_linear_layer(n_layers, hidden_size, hidden_size, 1,
                                        activation)
        self.attention_l = Multi_linear_layer(n_layers, hidden_size,
                                              hidden_size, 1, activation)
        self.yn_l = Multi_linear_layer(n_layers, hidden_size, hidden_size, 2,
                                       activation)
        self.beta = beta

        self.init_weights()
Exemple #2
0
	def __init__(self, config):
		super(BertSentClassifier, self).__init__()
		self.num_labels 		= 	config.num_labels
		self.bert  			= 	BertModel.from_pretrained('bert-base-uncased')	

		self.dropout 			= 	torch.nn.Dropout(config.hidden_dropout_prob)
		self.classifier         	=   	torch.nn.Linear(config.hidden_size, config.num_labels)
Exemple #3
0
 def __init__(self):
     super().__init__()
     self.bert = BertModel.from_pretrained('bert_base/')
     if args.bert_freeze:
         for param in self.bert.parameters():
             param.requires_grad = False
     self.dropout = nn.Dropout(args.bert_dropout)
     self.linear = nn.Linear(args.bert_hidden_size, len(labels), bias=True)
Exemple #4
0
	def __init__(self, config, pretrained_weights):
		super(PretrainedBert, self).__init__()
		self.num_labels 		= 	config.num_labels
		self.bert  			= 	BertModel.from_pretrained('bert-base-uncased')	

		self.dropout 			= 	torch.nn.Dropout(config.hidden_dropout_prob)
		self.classifier         	=   	torch.nn.Linear(config.hidden_size, config.num_labels)
		self.classifier.weight		= 	torch.nn.Parameter(pretrained_weights['weights'])
		self.classifier.bias		= 	torch.nn.Parameter(pretrained_weights['bias'])
Exemple #5
0
def train(**kwargs):
    train_dataset = ClassifierDataset(kwargs["--train_path"])
    valid_dataset = ClassifierDataset(kwargs["--valid_path"])
    print("Dataset loaded successfully")

    train_dl = DataLoader(train_dataset,
                          batch_size=BATCH_SIZE,
                          collate_fn=collate_fn,
                          shuffle=True)
    valid_dl = DataLoader(valid_dataset,
                          batch_size=BATCH_SIZE,
                          collate_fn=collate_fn,
                          shuffle=True)

    model = BertModel()
    optimizer = optim.Adam(model.parameters(), lr)

    BertModel.trainer(model, optimizer, EPOCH)
Exemple #6
0
    def __init__(self):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert_large/')
        if args.bert_freeze:
            for param in self.bert.parameters():
                param.requires_grad = False

        self.context_dropout = nn.Dropout(args.context_dropout)
        self.mention_dropout = nn.Dropout(args.mention_dropout)

        self.layer_norm = nn.LayerNorm(args.bert_hidden_size)
        self.multi_head_atten = MultiHeadAttention(args.bert_hidden_size,
                                                   num_heads=8,
                                                   dropout=0.1)
        self.mention_char_atten = MultiHeadAttention(args.bert_hidden_size,
                                                     num_heads=8,
                                                     dropout=0.1)

        self.context_lstm = BiLSTM(input_size=args.bert_hidden_size,
                                   hidden_size=args.rnn_hidden_size,
                                   num_layers=args.rnn_num_layers,
                                   dropout=args.rnn_dropout,
                                   num_dirs=args.rnn_num_dirs)

        self.mention_lstm = BiLSTM(input_size=args.bert_hidden_size,
                                   hidden_size=args.rnn_hidden_size,
                                   num_layers=args.rnn_num_layers,
                                   dropout=args.rnn_dropout,
                                   num_dirs=args.rnn_num_dirs)

        self.context_attn_sum = SelfAttentiveSum(args.bert_hidden_size, 100)
        self.mention_attn_sum = SelfAttentiveSum(args.bert_hidden_size, 1)

        self.char_cnn = CharCNN(embedding_num=len(char_vocab),
                                embedding_dim=args.cnn_embedding_dim,
                                filters=eval(args.cnn_filters),
                                output_dim=args.cnn_output_dim)

        self.linear = nn.Linear(in_features=2 * args.bert_hidden_size +
                                args.cnn_output_dim,
                                out_features=len(labels),
                                bias=True)

        if args.interaction:
            self.mention_linear = nn.Linear(in_features=args.bert_hidden_size +
                                            args.cnn_output_dim,
                                            out_features=args.bert_hidden_size,
                                            bias=True)
            self.affinity_matrix = nn.Linear(args.bert_hidden_size,
                                             args.bert_hidden_size)
            self.fusion = Fusion(args.bert_hidden_size)
            self.normalize = Normalize()
            self.fusion_linear = nn.Linear(in_features=2 *
                                           args.bert_hidden_size,
                                           out_features=len(labels),
                                           bias=True)
def create_model(args,
                 pyreader_name,
                 bert_config,
                 num_labels,
                 paradigm_inst,
                 is_prediction=False):
    """create dialogue task model"""
    if args.task_name == 'atis_slot':
        label_dim = [-1, args.max_seq_len]
        lod_level = 1
    elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']:
        label_dim = [-1, num_labels]
        lod_level = 0
    else:
        label_dim = [-1, 1]
        lod_level = 0
    pyreader = fluid.layers.py_reader(
        capacity=50,
        shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                label_dim],
        dtypes=['int64', 'int64', 'int64', 'float32', 'int64'],
        lod_levels=[0, 0, 0, 0, lod_level],
        name=pyreader_name,
        use_double_buffer=True)

    (src_ids, pos_ids, sent_ids, input_mask,
     labels) = fluid.layers.read_file(pyreader)

    bert = BertModel(src_ids=src_ids,
                     position_ids=pos_ids,
                     sentence_ids=sent_ids,
                     input_mask=input_mask,
                     config=bert_config,
                     use_fp16=args.use_fp16)

    params = {
        'num_labels': num_labels,
        'src_ids': src_ids,
        'pos_ids': pos_ids,
        'sent_ids': sent_ids,
        'input_mask': input_mask,
        'labels': labels,
        'is_prediction': is_prediction
    }

    if is_prediction:
        results = paradigm_inst.paradigm(bert, params)
        results['pyreader'] = pyreader
        return results

    results = paradigm_inst.paradigm(bert, params)
    results['pyreader'] = pyreader
    return results
Exemple #8
0
def load_model(model_path, num_hidden_layers=None):
    ckpt_reader = tf.train.load_checkpoint(
        os.path.join(model_path, 'bert_model.ckpt'))
    config = json.load(open(os.path.join(model_path, 'bert_config.json')))

    loaded_params = {k: config[k] for k in params.keys()}
    # import pdb; pdb.set_trace()
    if num_hidden_layers is not None and num_hidden_layers > 0:
        loaded_params['num_hidden_layers'] = num_hidden_layers

    tfbert = BertModel(**loaded_params)
    tfbert([tf.constant([[1]]), tf.constant([[1]]), tf.constant([[1]])])

    tfbert_weights = {w.name: w for w in tfbert.weights}
    official_weights = set(ckpt_reader.get_variable_to_dtype_map().keys())

    skip_tensor = [
        'cls/predictions/transform/dense/kernel',
        'cls/seq_relationship/output_weights',
        'cls/predictions/transform/LayerNorm/beta',
        'cls/predictions/output_bias',
        'cls/predictions/transform/LayerNorm/gamma',
        'cls/seq_relationship/output_bias',
        'cls/predictions/transform/dense/bias',
    ]

    good = True
    for x in official_weights - set(
        [x.split(':')[0] for x in tfbert_weights.keys()]):
        if 'adam' not in x and 'global_step' not in x:
            if x not in skip_tensor:
                print('diff offi', x)
                good = False

    for x in set([x.split(':')[0]
                  for x in tfbert_weights.keys()]) - official_weights:
        if 'adam' not in x and 'global_step' not in x:
            print('diff ours', x)
            good = False

    assert good

    weight_tuples = []
    for k, v in tfbert_weights.items():
        name = k[:-2]
        if ckpt_reader.has_tensor(name):
            ckpt_value = ckpt_reader.get_tensor(name)
            weight_tuples.append((v, ckpt_value))
            assert v.shape == ckpt_value.shape, \
                f'{name} shape invalid {v.shape}, {ckpt_value.shape}'
        else:
            print(f'{name} weight not loaded')
    tf.keras.backend.batch_set_value(weight_tuples)
    return tfbert
Exemple #9
0
    def __init__(self, config, head_dropout=None):
        super(BertForQuestRegression, self).__init__(config)
        self.config = config
        self.num_labels = config.num_labels
        if head_dropout is None:
            head_dropout = config.hidden_dropout_prob

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(head_dropout)
        self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)

        self.init_weights()
Exemple #10
0
    def __init__(self, config, num_tag, use_cuda):
        super(KBQA, self).__init__(config)
        # BERT
        self.bert = BertModel(config)

        # NER
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, num_tag)
        self.crf = CRF(num_tag, use_cuda)

        # relationship
        self.re_layer = nn.Linear(config.hidden_size, 1)  # yes/no
        self.apply(self.init_bert_weights)
Exemple #11
0
    def __init__(self, opt):
        super(BertMapping, self).__init__()
        bert_config = BertConfig.from_json_file(opt.bert_config_file)
        self.bert = BertModel(bert_config)
        self.bert.load_state_dict(
            torch.load(opt.init_checkpoint, map_location='cpu'))
        freeze_layers(self.bert)
        self.txt_stru = opt.txt_stru

        if opt.txt_stru == 'pooling':
            self.dropout = nn.Dropout(bert_config.hidden_dropout_prob)
            self.mapping = nn.Linear(bert_config.hidden_size, opt.final_dims)
        elif opt.txt_stru == 'cnn':
            Ks = [1, 2, 3]
            in_channel = 1
            out_channel = 512
            embedding_dim = bert_config.hidden_size
            self.convs1 = nn.ModuleList([
                nn.Conv2d(in_channel, out_channel, (K, embedding_dim))
                for K in Ks
            ])
            self.dropout = nn.Dropout(bert_config.hidden_dropout_prob)
            self.mapping = nn.Linear(len(Ks) * out_channel, opt.final_dims)
        elif opt.txt_stru == 'rnn':
            embedding_dim = bert_config.hidden_size
            self.bi_gru = opt.bi_gru
            self.rnn = nn.GRU(embedding_dim,
                              opt.embed_size,
                              opt.num_layers,
                              batch_first=True,
                              bidirectional=opt.bi_gru)
            self.dropout = nn.Dropout(bert_config.hidden_dropout_prob)
            self.mapping = nn.Linear(opt.embed_size, opt.final_dims)
        elif opt.txt_stru == 'trans':
            bert_config = BertConfig.from_json_file(opt.img_trans_cfg)
            self.layer = bert.BERTLayer(bert_config)
            self.dropout = nn.Dropout(bert_config.hidden_dropout_prob)
            self.mapping = nn.Linear(768, opt.final_dims)
Exemple #12
0
    def __init__(self, num_labels, bret_pretrainded_path):
        """
        在定义task model 的时候必须包含的两部分: self.train_state 和 self.device
        如果不包含着两部分将task model放入Training中进行训练的时候就会报错
        另外模型还要包含两部分,一部分是模型的结构部分,一部分是loss function
        :param num_labels:
        :param bret_pretrainded_path:
        """
        # 初始化
        super().__init__()

        # 构建深度学习的网络结构
        self.bert = BertModel.from_pretrained(bret_pretrainded_path)
        self.fc = nn.Linear(768, num_labels)
    def __init__(self, config):
        super(BertSentClassifier, self).__init__()
        self.num_labels = config.num_labels
        self.bert = BertModel.from_pretrained('bert-base-uncased')

        # pretrain mode does not require updating bert paramters.
        for param in self.bert.parameters():
            if config.option == 'pretrain':
                param.requires_grad = False
            elif config.option == 'finetune':
                param.requires_grad = True

        # todo
        raise NotImplementedError
Exemple #14
0
    def __init__(self, config):
        config.output_hidden_states = True
        super(CustomBert, self).__init__(config)
        self.num_labels = config.num_labels
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(p=0.2)
        self.high_dropout = nn.Dropout(p=0.5)

        n_weights = config.num_hidden_layers + 1
        weights_init = torch.zeros(n_weights).float()
        weights_init.data[:-1] = -3
        self.layer_weights = torch.nn.Parameter(weights_init)

        self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)

        self.init_weights()
Exemple #15
0
    def __init__(self, config, phrase_size, metric, use_sparse):
        encoder = BertWrapper(BertModel(config))
        sparse_layer = None
        if use_sparse:
            sparse_layer = SparseAttention(config, num_sparse_heads=1)
        super(BertPhraseModel, self).__init__(encoder, sparse_layer, phrase_size, metric)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0, std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0, std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0, std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()

        self.apply(init_weights)
Exemple #16
0
import torch.optim as optim
import torchtext
import sys

from dataloader import get_chABSA_DataLoaders_and_TEXT
from bert import BertTokenizer
from bert import get_config, BertModel, BertForchABSA, set_learned_params

train_dl, val_dl, TEXT, dataloaders_dict = get_chABSA_DataLoaders_and_TEXT(
    max_length=256, batch_size=32)

# モデル設定のJOSNファイルをオブジェクト変数として読み込みます
config = get_config(file_path="./weights/bert_config.json")

# BERTモデルを作成します
net_bert = BertModel(config)

# BERTモデルに学習済みパラメータセットします
net_bert = set_learned_params(net_bert,
                              weights_path="./weights/pytorch_model.bin")

# モデル構築
net = BertForchABSA(net_bert)

# 訓練モードに設定
net.train()

print('ネットワーク設定完了')

# 勾配計算を最後のBertLayerモジュールと追加した分類アダプターのみ実行
Exemple #17
0
class BertMapping(nn.Module):
    """
    """
    def __init__(self, opt):
        super(BertMapping, self).__init__()
        bert_config = BertConfig.from_json_file(opt.bert_config_file)
        self.bert = BertModel(bert_config)
        self.bert.load_state_dict(
            torch.load(opt.init_checkpoint, map_location='cpu'))
        freeze_layers(self.bert)
        self.txt_stru = opt.txt_stru

        if opt.txt_stru == 'pooling':
            self.dropout = nn.Dropout(bert_config.hidden_dropout_prob)
            self.mapping = nn.Linear(bert_config.hidden_size, opt.final_dims)
        elif opt.txt_stru == 'cnn':
            Ks = [1, 2, 3]
            in_channel = 1
            out_channel = 512
            embedding_dim = bert_config.hidden_size
            self.convs1 = nn.ModuleList([
                nn.Conv2d(in_channel, out_channel, (K, embedding_dim))
                for K in Ks
            ])
            self.dropout = nn.Dropout(bert_config.hidden_dropout_prob)
            self.mapping = nn.Linear(len(Ks) * out_channel, opt.final_dims)
        elif opt.txt_stru == 'rnn':
            embedding_dim = bert_config.hidden_size
            self.bi_gru = opt.bi_gru
            self.rnn = nn.GRU(embedding_dim,
                              opt.embed_size,
                              opt.num_layers,
                              batch_first=True,
                              bidirectional=opt.bi_gru)
            self.dropout = nn.Dropout(bert_config.hidden_dropout_prob)
            self.mapping = nn.Linear(opt.embed_size, opt.final_dims)
        elif opt.txt_stru == 'trans':
            bert_config = BertConfig.from_json_file(opt.img_trans_cfg)
            self.layer = bert.BERTLayer(bert_config)
            self.dropout = nn.Dropout(bert_config.hidden_dropout_prob)
            self.mapping = nn.Linear(768, opt.final_dims)

    def forward(self, input_ids, attention_mask, token_type_ids, lengths):
        all_encoder_layers, pooled_output = self.bert(
            input_ids,
            token_type_ids=token_type_ids,
            attention_mask=attention_mask)
        if self.txt_stru == 'pooling':
            output = self.mapping(all_encoder_layers[-1])
            output = torch.mean(output, 1)
            code = output
        elif self.txt_stru == 'cnn':
            x = all_encoder_layers[-1].unsqueeze(
                1)  # (batch_size, 1, token_num, embedding_dim)
            x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1
                 ]  # [(batch_size, out_channel, W), ...]*len(Ks)
            x = [F.max_pool1d(i, i.size(2)).squeeze(2)
                 for i in x]  # [(N, Co), ...]*len(Ks)
            output = torch.cat(x, 1)
        elif self.txt_stru == 'rnn':
            x = all_encoder_layers[
                -1]  # (batch_size, token_num, embedding_dim)
            packed = pack_padded_sequence(x, lengths, batch_first=True)
            # Forward propagate RNN
            out, _ = self.rnn(packed)
            # Reshape *final* output to (batch_size, hidden_size)
            padded = pad_packed_sequence(out, batch_first=True)
            cap_emb, cap_len = padded
            if self.bi_gru:
                cap_emb = (cap_emb[:, :, :cap_emb.size(2) / 2] +
                           cap_emb[:, :, cap_emb.size(2) / 2:]) / 2
            else:
                cap_emb = cap_emb
            output = torch.mean(cap_emb, 1)
        elif self.txt_stru == 'trans':

            hidden_states = self.mapping(all_encoder_layers[-1])
            extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
            extended_attention_mask = extended_attention_mask.float()
            extended_attention_mask = (1.0 -
                                       extended_attention_mask) * -10000.0
            hidden_states = self.layer(hidden_states, extended_attention_mask)
            # output = hidden_states[:, 0, :]
            output = torch.mean(hidden_states, 1)

        output = self.dropout(output)
        code = self.mapping(output)
        # code = F.tanh(code)
        code = F.normalize(code, p=2, dim=1)
        return code
Exemple #18
0
import torch
from bert import BertModel
sanity_data = torch.load("./sanity_check.data")
# text_batch = ["hello world", "hello neural network for NLP"]
# tokenizer here
sent_ids = torch.tensor([[101, 7592, 2088, 102, 0, 0, 0, 0],
                         [101, 7592, 15756, 2897, 2005, 17953, 2361, 102]])
att_mask = torch.tensor([[1, 1, 1, 1, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1]])

# load our model
bert = BertModel.from_pretrained('bert-base-uncased')
outputs = bert(sent_ids, att_mask)
for k in ['last_hidden_state', 'pooler_output']:
    assert torch.allclose(outputs[k], sanity_data[k], atol=1e-4, rtol=0)
Exemple #19
0
def load_model(model_path, num_hidden_layers=None):
    ckpt_reader = tf.train.load_checkpoint(
        os.path.join(model_path, 'bert_model.ckpt'))
    config = json.load(open(os.path.join(model_path, 'bert_config.json')))

    loaded_params = {k: config[k] for k in params.keys() if k in config}
    if 'embedding_size' in config:
        loaded_params['embedding_size'] = config['embedding_size']
    # import pdb; pdb.set_trace()
    if num_hidden_layers is not None and num_hidden_layers > 0:
        loaded_params['num_hidden_layers'] = num_hidden_layers

    tfbert = BertModel(**loaded_params)
    tfbert([tf.constant([[1]]), tf.constant([[1]]), tf.constant([[1]])])

    def convert_official_name(x):
        x = x.replace('electra/encoder', 'bert/encoder')
        x = x.replace('discriminator_predictions/dense/kernel',
                      'bert/pooler/dense/kernel')
        x = x.replace('discriminator_predictions/dense/bias',
                      'bert/pooler/dense/bias')
        x = x.replace('electra/embeddings_project/kernel',
                      'bert/encoder/embedding_hidden_mapping_in/kernel')
        x = x.replace('electra/embeddings_project/bias',
                      'bert/encoder/embedding_hidden_mapping_in/bias')
        x = x.replace('electra/embeddings', 'bert/embeddings')
        return x

    skip_tensor = [
        'discriminator_predictions/dense_1/kernel',
        'discriminator_predictions/dense_1/bias',
        'cls/seq_relationship/output_bias',
        'cls/predictions/output_bias',
        'cls/predictions/transform/dense/kernel',
        'cls/predictions/transform/LayerNorm/beta',
        'cls/predictions/transform/LayerNorm/gamma',
        'cls/predictions/transform/dense/bias',
        'cls/seq_relationship/output_weights',
    ]

    tfbert_weights = {
        w.name: w
        for w in tfbert.weights if 'generator' not in w.name
    }
    official_weights = {
        convert_official_name(k): ckpt_reader.get_tensor(k)
        for k in ckpt_reader.get_variable_to_dtype_map().keys()
    }

    good = True
    our_keys = set([x.split(':')[0] for x in tfbert_weights.keys()])
    for x in set(official_weights.keys()) - our_keys:
        if 'adam' not in x and 'global_step' not in x and 'generator' not in x:
            if x not in skip_tensor:
                print('diff offi', x, official_weights[x].shape)
                good = False

    for x in our_keys - set(official_weights.keys()):
        if 'adam' not in x and 'global_step' not in x and 'generator' not in x:
            if x not in skip_tensor:
                print('diff ours', x, tfbert_weights[x + ':0'].shape)
                good = False

    assert good

    weight_tuples = []
    for k, v in tfbert_weights.items():
        name = k[:-2]
        if name in skip_tensor:
            continue
        else:
            off_tensor = None
            for ok in ckpt_reader.get_variable_to_dtype_map().keys():
                if convert_official_name(ok) == name:
                    off_tensor = ckpt_reader.get_tensor(ok)
            if off_tensor is not None:
                weight_tuples.append((v, off_tensor))
                assert v.shape == off_tensor.shape, \
                    f'{name} shape invalid {v.shape}, {off_tensor.shape}'
            else:
                print(f'{name} weight not loaded')
    tf.keras.backend.batch_set_value(weight_tuples)
    return tfbert
Exemple #20
0
 def __init__(self, config):
     super(KBQA, self).__init__(config)
     self.bert = BertModel(config)
     self.ner_layer = nn.Linear(config.hidden_size, 2)  # head, tail
     self.re_layer = nn.Linear(config.hidden_size, 1)  # yes/no
     self.apply(self.init_bert_weights)
"""Test load official model, print difference."""

import tensorflow as tf
import tensorflow_hub as hub

from bert import BertModel, params

tfbert = BertModel(**params)

model_path = '../bert-embs/hub/chinese_L-12_H-768_A-12/'
bert_layer = hub.KerasLayer(model_path,
                            signature="tokens",
                            signature_outputs_as_dict=True,
                            trainable=False)

assert len(tfbert.weights) == len(bert_layer.weights)

weight_of_tfbert = set([x.name for x in tfbert.weights])
weight_of_official = set([x.name for x in bert_layer.weights])

fit_weight = len(weight_of_tfbert & weight_of_tfbert)
assert fit_weight == len(bert_layer.weights)


def get_name_values(model):
    names = [x.name for x in model.weights]
    values = tf.keras.backend.batch_get_value(model.weights)
    return dict(zip(names, values))


official_weights = get_name_values(bert_layer)