Ejemplo n.º 1
0
    def __init__(self,
                 *,
                 pretrained_model_name=None,
                 config_filename=None,
                 vocab_size=None,
                 hidden_size=768,
                 num_hidden_layers=12,
                 num_attention_heads=12,
                 intermediate_size=3072,
                 hidden_act="gelu",
                 max_position_embeddings=512,
                 random_init=False,
                 **kwargs):
        TrainableNM.__init__(self, **kwargs)

        # Check that only one of pretrained_model_name, config_filename, and
        # vocab_size was passed in
        total = 0

        if pretrained_model_name is not None:
            total += 1
        if config_filename is not None:
            total += 1
        if vocab_size is not None:
            total += 1

        if total != 1:
            raise ValueError(
                "Only one of pretrained_model_name, vocab_size, " +
                "or config_filename should be passed into the " +
                "BERT constructor.")

        if vocab_size is not None:
            config = BertConfig(
                vocab_size_or_config_json_file=vocab_size,
                hidden_size=hidden_size,
                num_hidden_layers=num_hidden_layers,
                num_attention_heads=num_attention_heads,
                intermediate_size=intermediate_size,
                hidden_act=hidden_act,
                max_position_embeddings=max_position_embeddings)
            model = BertModel(config)
        elif pretrained_model_name is not None:
            model = BertModel.from_pretrained(pretrained_model_name)
        elif config_filename is not None:
            config = BertConfig.from_json_file(config_filename)
            model = BertModel(config)
        else:
            raise ValueError(
                "Either pretrained_model_name or vocab_size must" +
                "be passed into the BERT constructor")

        model.to(self._device)

        self.add_module("bert", model)
        self.config = model.config

        if random_init:
            self.apply(
                lambda module: transformer_weights_init(module, xavier=False))
 def __init__(self, config, num_labels=2):
     super(BertForMultiLabelSequenceClassification, self).__init__(config)
     self.num_labels = num_labels
     self.bert = BertModel(config)
     self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
     self.classifier = torch.nn.Linear(config.hidden_size, num_labels)
     self.apply(self.init_bert_weights)
Ejemplo n.º 3
0
        def create_and_check_bert_model(
            self,
            config,
            input_ids,
            token_type_ids,
            input_mask,
            sequence_labels,
            token_labels,
            choice_labels,
        ):
            model = BertModel(config=config)
            model.eval()
            sequence_output, pooled_output = model(input_ids, token_type_ids,
                                                   input_mask)
            sequence_output, pooled_output = model(input_ids, token_type_ids)
            sequence_output, pooled_output = model(input_ids)

            result = {
                "sequence_output": sequence_output,
                "pooled_output": pooled_output
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].size()),
                [self.batch_size, self.seq_length, self.hidden_size],
            )
            self.parent.assertListEqual(list(result["pooled_output"].size()),
                                        [self.batch_size, self.hidden_size])
 def __init__(self, config):
     super(BertForSequenceClassification, self).__init__(config)
     self.num_labels = config.num_labels
     self.l2_reg_lambda = config.l2_reg_lambda
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(
         config.hidden_size*3, self.config.num_labels)
     self.tanh = nn.Tanh()
     rnn_hidden_size=300
     num_layers=1
     self.rnn = nn.LSTM(config.hidden_size, rnn_hidden_size, num_layers,
                        bidirectional=True, dropout=0.5, batch_first=True)
     self.W2 = nn.Linear(config.hidden_size + 2 * rnn_hidden_size, config.hidden_size)
     #self.convs = Conv1d(config.hidden_size, n_filters, filter_sizes)
     self.W_w = nn.Parameter(torch.Tensor(config.hidden_size, config.hidden_size))
     self.u_w = nn.Parameter(torch.Tensor(config.hidden_size, 1))
     #self.W_w1 = nn.Parameter(torch.Tensor(config.hidden_size, config.hidden_size))
     #self.W_w2 = nn.Parameter(torch.Tensor(config.hidden_size, config.hidden_size))
     #self.W_w3 = nn.Parameter(torch.Tensor(config.hidden_size, config.hidden_size))
     nn.init.uniform_(self.W_w, -0.1, 0.1)
     nn.init.uniform_(self.u_w, -0.1, 0.1)
     #nn.init.uniform_(self.W_w1, -0.1, 0.1)
     #nn.init.uniform_(self.W_w2, -0.1, 0.1)
     #nn.init.uniform_(self.W_w3, -0.1, 0.1)
     
     self.apply(self.init_weights)
Ejemplo n.º 5
0
 def __init__(self, config):
     super(BertForTokenClassification, self).__init__(config)
     self.num_labels = config.num_labels
     self.bert = BertModel(config)
     self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
     self.classifier = torch.nn.Linear(config.hidden_size,
                                       config.num_labels)
Ejemplo n.º 6
0
 def __init__(self, name='bert-base-uncased', dropout=0.1, num_class=2):
     super(BertC, self).__init__()
     config = BertConfig.from_pretrained(name)
     self.bert = BertModel(config)
     self.proj = nn.Linear(config.hidden_size, num_class)
     self.loss_f = nn.CrossEntropyLoss()
     self.drop = nn.Dropout(p=dropout)
    def __init__(self, vocab_size, tag_to_ix, hidden_dim, n_layers):
        super(BERT_BiLSTM_CRF, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.vocab_size = vocab_size
        self.tag_to_ix = tag_to_ix
        self.tagset_size = len(tag_to_ix)

        config = BertConfig.from_pretrained('bert-base-multilingual-cased')
        self.model = BertModel(config)

        self.lstm = nn.LSTM(768,
                            hidden_dim,
                            num_layers=n_layers,
                            bidirectional=True)

        # Maps the output of the LSTM into tag space.
        self.hidden2tag = nn.Linear(hidden_dim * 2, self.tagset_size)

        # Matrix of transition parameters.  Entry i,j is the score of
        # transitioning *to* i *from* j.
        self.transitions = nn.Parameter(
            torch.randn(self.tagset_size, self.tagset_size, device=device))

        # These two statements enforce the constraint that we never transfer
        # to the start tag and we never transfer from the stop tag
        self.transitions.data[tag_to_ix[START_TAG], :] = -10000
        self.transitions.data[:, tag_to_ix[STOP_TAG]] = -10000

        self.hidden = self.init_hidden()
Ejemplo n.º 8
0
    def __init__(self, args, device, checkpoint):
        super(ExtSummarizer, self).__init__()
        self.args = args
        self.device = device
        self.bert = Bert(args, args.temp_dir, args.finetune_bert)

        self.ext_layer = ExtTransformerEncoder(self.bert.model.config.hidden_size, args.ext_ff_size, args.ext_heads,
                                               args.ext_dropout, args.ext_layers)
        if (args.encoder == 'baseline'):
            bert_config = BertConfig(self.bert.model.config.vocab_size, hidden_size=args.ext_hidden_size,
                                     num_hidden_layers=args.ext_layers, num_attention_heads=args.ext_heads, intermediate_size=args.ext_ff_size)
            self.bert.model = BertModel(bert_config)
            self.ext_layer = Classifier(self.bert.model.config.hidden_size)

        if(args.max_pos>512):
            my_pos_embeddings = nn.Embedding(args.max_pos, self.bert.model.config.hidden_size)
            my_pos_embeddings.weight.data[:512] = self.bert.model.embeddings.position_embeddings.weight.data
            my_pos_embeddings.weight.data[512:] = self.bert.model.embeddings.position_embeddings.weight.data[-1][None,:].repeat(args.max_pos-512,1)
            self.bert.model.embeddings.position_embeddings = my_pos_embeddings


        if checkpoint is not None:
            self.load_state_dict(checkpoint['model'], strict=True)
        else:
            if args.param_init != 0.0:
                for p in self.ext_layer.parameters():
                    p.data.uniform_(-args.param_init, args.param_init)
            if args.param_init_glorot:
                for p in self.ext_layer.parameters():
                    if p.dim() > 1:
                        xavier_uniform_(p)

        self.to(device)
Ejemplo n.º 9
0
    def __init__(self, args, device, checkpoint=None, bert_from_extractive=None):
        super(AbsSummarizer, self).__init__()
        self.args = args
        self.device = device
        self.bert = Bert(args.large, args.temp_dir, args.finetune_bert)

        if bert_from_extractive is not None:
            self.bert.model.load_state_dict(
                dict([(n[11:], p) for n, p in bert_from_extractive.items() if n.startswith('bert.model')]), strict=True)

        if (args.encoder == 'baseline'):
            bert_config = BertConfig(self.bert.model.config.vocab_size, hidden_size=args.enc_hidden_size,
                                     num_hidden_layers=args.enc_layers, num_attention_heads=8,
                                     intermediate_size=args.enc_ff_size,
                                     hidden_dropout_prob=args.enc_dropout,
                                     attention_probs_dropout_prob=args.enc_dropout)
            self.bert.model = BertModel(bert_config)

        if(args.max_pos>512):
            my_pos_embeddings = nn.Embedding(args.max_pos, self.bert.model.config.hidden_size)
            my_pos_embeddings.weight.data[:512] = self.bert.model.embeddings.position_embeddings.weight.data
            my_pos_embeddings.weight.data[512:] = self.bert.model.embeddings.position_embeddings.weight.data[-1][None,:].repeat(args.max_pos-512,1)
            self.bert.model.embeddings.position_embeddings = my_pos_embeddings
        self.vocab_size = self.bert.model.config.vocab_size
        tgt_embeddings = nn.Embedding(self.vocab_size, self.bert.model.config.hidden_size, padding_idx=0)
        if (self.args.share_emb):
            tgt_embeddings = self.bert.model.embeddings.word_embeddings

        self.decoder = TransformerDecoder(
            self.args.dec_layers,
            self.args.dec_hidden_size, heads=self.args.dec_heads,
            d_ff=self.args.dec_ff_size, dropout=self.args.dec_dropout, embeddings=tgt_embeddings)

        self.generator = get_generator(self.vocab_size, self.args.dec_hidden_size, device)
        self.generator[0].weight = self.decoder.embeddings.weight


        if checkpoint is not None:
            self.load_state_dict(checkpoint['model'], strict=True)
        else:
            for module in self.decoder.modules():
                if isinstance(module, (nn.Linear, nn.Embedding)):
                    module.weight.data.normal_(mean=0.0, std=0.02)
                elif isinstance(module, nn.LayerNorm):
                    module.bias.data.zero_()
                    module.weight.data.fill_(1.0)
                if isinstance(module, nn.Linear) and module.bias is not None:
                    module.bias.data.zero_()
            for p in self.generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
                else:
                    p.data.zero_()
            if(args.use_bert_emb):
                tgt_embeddings = nn.Embedding(self.vocab_size, self.bert.model.config.hidden_size, padding_idx=0)
                tgt_embeddings.weight = copy.deepcopy(self.bert.model.embeddings.word_embeddings.weight)
                self.decoder.embeddings = tgt_embeddings
                self.generator[0].weight = self.decoder.embeddings.weight

        self.to(device)
    def __init__(self, config, num_tag):
        super(Bert_SenAnalysis, self).__init__(config)
        self.num_labels = num_tag

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        #self.classifier = nn.Linear(config.hidden_size, num_tag)
        #self.pooling = nn.Linear(config.hidden_size, config.hidden_size)
        self.classifier = nn.Linear(args.lstm_hidden_size * 2, self.num_labels)

        self.W = []
        self.gru = []
        for i in range(args.lstm_layers):
            self.W.append(
                nn.Linear(args.lstm_hidden_size * 2,
                          args.lstm_hidden_size * 2))
            self.gru.append(
                nn.GRU(
                    config.hidden_size if i == 0 else args.lstm_hidden_size *
                    4,
                    args.lstm_hidden_size,
                    num_layers=1,
                    bidirectional=True,
                    batch_first=True).cuda())
        self.W = nn.ModuleList(self.W)
        self.gru = nn.ModuleList(self.gru)
        self.init_weights()
Ejemplo n.º 11
0
    def __init__(self, args, device, checkpoint):
        super(ExtSummarizer, self).__init__()
        self.args = args
        self.device = device
        self.bert = Bert(args.large, args.temp_dir, args.finetune_bert)

        self.ext_layer = ExtTransformerEncoder(
            self.bert.model.config.hidden_size, args.ext_ff_size,
            args.ext_heads, args.ext_dropout, args.ext_layers)
        if (args.encoder == 'baseline'):
            bert_config = BertConfig(self.bert.model.config.vocab_size,
                                     hidden_size=args.hidden_size,
                                     num_hidden_layers=6,
                                     num_attention_heads=8,
                                     intermediate_size=args.ff_size)
            self.bert.model = BertModel(bert_config)
            self.ext_layer = Classifier(self.bert.model.config.hidden_size)

        if checkpoint is not None:
            self.load_state_dict(checkpoint['model'], strict=True)
        else:
            if args.param_init != 0.0:
                for p in self.ext_layer.parameters():
                    p.data.uniform_(-args.param_init, args.param_init)
            if args.param_init_glorot:
                for p in self.ext_layer.parameters():
                    if p.dim() > 1:
                        xavier_uniform_(p)

        self.to(device)
Ejemplo n.º 12
0
def createCsvData():
    config = BertConfig.from_pretrained('bert-base-uncased')
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertModel(config)
    with Cd("lemmadata"):
        with open("id_to_sent.json") as sent_id_dict_file:
            sent_id_dict = json.load(sent_id_dict_file)
        for dir_item in os.listdir():
            if os.path.isfile(dir_item):
                if dir_item.endswith(".json") and dir_item != "id_to_sent.json":
                    print(dir_item)
                    with open(dir_item, "r") as f:
                        lemma_data = json.load(f)
                    with Cd("vectors"):
                        with open(dir_item[:-5]+".csv", "w") as vector_file:
                            writer = csv.writer(vector_file, delimiter=",")
                            for instance in lemma_data:
                                inst_sent_id = instance["sent_id"]
                                inst_sense = instance["sense"]
                                inst_sent = sent_id_dict[str(inst_sent_id)]
                                if(len(inst_sent) > 511):
                                    continue 
                                vector = vectorizeWordInContext(inst_sent, instance["pos"], tokenizer, model)
                                vec_list = vector.detach().tolist()
                                row_data = [inst_sent_id, instance["pos"], inst_sense] + vec_list
                                writer.writerow(row_data)
Ejemplo n.º 13
0
 def __init__(self, config, num_labels):
     super(BERTCRF, self).__init__(config)
     self.num_labels = num_labels  # ent label num
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.fc = nn.Linear(config.hidden_size, num_labels)
     self.crf = CRF(num_labels, batch_first=True)
     self.apply(self.init_bert_weights)
Ejemplo n.º 14
0
    def __init__(self, config):
        super(BertForLes, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)

        self.apply(self.init_weights)
Ejemplo n.º 15
0
 def __init__(self, config):
     super(ABSABert, self).__init__(config)
     self.bert = BertModel(config)
     self.num_labels = config.num_labels
     self.classifier = torch.nn.Linear(config.hidden_size, self.num_labels)
     self.cls = DepBertPreTrainingHeads(config)
     self.domain_cls = DomainPredictionHead(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 16
0
 def __init__(self, bert_config, num_choices):
     super(BertCloze, self).__init__(bert_config)
     self.num_choices = num_choices
     self.bert = BertModel(bert_config)
     self.idiom_embedding = nn.Embedding(len(config.idiom2index),
                                         bert_config.hidden_size)
     self.my_fc = nn.Sequential(nn.Dropout(config.hidden_dropout_prob),
                                nn.Linear(bert_config.hidden_size, 1))
Ejemplo n.º 17
0
    def __init__(self, hidden_dim, n_layers, tagset_size):
        super(BertLSTM, self).__init__()
        config = BertConfig.from_pretrained('bert-base-multilingual-cased')
        self.model = BertModel(config)

        self.decoder = nn.LSTM(768, hidden_dim, n_layers)

        self.hiddentotag = nn.Linear(hidden_dim, tagset_size)
Ejemplo n.º 18
0
    def __init__(self, config):
        super(NoHiddenLayerClassification, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.fc1 = nn.Linear(in_features=768 * 5, out_features=2)

        self.init_weights()
Ejemplo n.º 19
0
    def __init__(self, config, num_choices=1, num_docs_rank=30):
        super(BertForMultipleChoice, self).__init__(config)

        self.num_choices = num_choices
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, num_choices)

        self.init_weights()
 def __init__(self, config):
     super(BertNer, self).__init__(config)
     self.num_labels = config.num_labels
     self.bert = BertModel(config)  # 载入bert模型
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     # 简单的线性层
     self.classifier = nn.Linear(config.hidden_size, num_labels)
     # 初始化权重
     self.init_weights(self.classifier)
Ejemplo n.º 21
0
    def __init__(self, config):
        super(BertForSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)

        self.init_weights()
Ejemplo n.º 22
0
 def __init__(self, config):
     super(CredPredictor, self).__init__()
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.reduce = nn.Linear(config.hidden_size, REDUCE_SIZE)
     self.prev_recursive = nn.LSTMCell(REDUCE_SIZE + 1, REDUCE_SIZE)
     self.aftr_recursive = nn.LSTMCell(REDUCE_SIZE + 1, REDUCE_SIZE)
     self.fc1 = nn.Linear(REDUCE_SIZE, int(REDUCE_SIZE / 2))
     self.fc2 = nn.Linear(int(REDUCE_SIZE / 2), 1)
     self.referee = nn.Linear(2, 1)
Ejemplo n.º 23
0
    def __init__(self, config):
        super(MultiTaskBert, self).__init__(config)
        self.num_labels = 3
        self.num_categories = 5
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier1 = nn.Linear(config.hidden_size, self.num_labels)
        self.classifier2 = nn.Linear(config.hidden_size, self.num_categories)

        self.init_weights()
Ejemplo n.º 24
0
 def __init__(self, bertConfig, num_classes):
     super(BertFine, self).__init__(bertConfig)
     self.bert = BertModel(bertConfig)  # bert模型
     self.dropout = nn.Dropout(bertConfig.hidden_dropout_prob)
     self.classifier = nn.Linear(in_features=bertConfig.hidden_size,
                                 out_features=num_classes)
     self.apply(self.init_weights)
     # 默认情况下,bert encoder模型所有的参数都是参与训练的,32的batch_size大概8.7G显存
     # 可以通过以下设置为将其设为不训练,只将classifier这一层进行反响传播,32的batch_size大概显存1.1G
     self.unfreeze_bert_encoder()
Ejemplo n.º 25
0
    def __init__(self, config):
        super(twoHiddenLayer_with_3840_3840_classifier, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.fc1 = nn.Linear(in_features=768 * 5, out_features=768 * 5)
        self.fc2 = nn.Linear(in_features=768 * 5, out_features=768 * 5)
        self.fc3 = nn.Linear(in_features=768 * 5, out_features=2)

        self.init_weights()
Ejemplo n.º 26
0
    def __init__(self, config, pooling='max', dropout_prob=0):
        super(BertForDocClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.pooling = pooling

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.num_labels)
        self.bn = nn.BatchNorm1d(768)

        self.init_weights()
Ejemplo n.º 27
0
 def __init__(self, config):
     super(BertClassifier, self).__init__()
     # Binary classification problem (num_labels = 2)
     self.num_labels = config.num_labels
     # Pre-trained BERT model
     self.bert = BertModel(config)
     # Dropout to avoid overfitting
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     # A single layer classifier added on top of BERT to fine tune for binary classification
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     # Weight initialization
     torch.nn.init.xavier_normal_(self.classifier.weight)
Ejemplo n.º 28
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Sequential(nn.Linear(config.hidden_size, 50),
                                        nn.ReLU(),
                                        nn.Linear(50, config.num_labels))
        #self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        self.init_weights()
Ejemplo n.º 29
0
    def __init__(self, config):
        super(FeatureBert, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        # self.classifier = nn.Linear(config.hidden_size + 5, self.config.num_labels)
        self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)

        self.features_bn = nn.BatchNorm1d(5)
        self.features_dense = nn.Linear(5, 5)
        self.init_weights()
Ejemplo n.º 30
0
    def __init__(self, config):
        super(BertForLesWithFeatures, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)

        # 添加特征后的维度
        # self.hidden_size = config.hidden_size + 14 + POS_DIM + NER_DIM + 2 + 2
        self.hidden_size = config.hidden_size + POS_DIM + NER_DIM + 2 + 2

        self.qa_outputs = nn.Linear(self.hidden_size, config.num_labels)

        self.apply(self.init_weights)