Beispiel #1
0
class BertNER(nn.Module):
    def __init__(self, vocab_size=None, device='cpu', training=False):
        super().__init__()
        bert_vocab_size = 30522
        config = BertConfig(bert_vocab_size, max_position_embeddings=512)
        self.bert = BertModel(config).from_pretrained('bert-base-cased').to(
            device)
        self.classifier = nn.Linear(768, vocab_size)
        self.device = device
        self.training = training
        self.bert.eval()

    def forward(self, x):
        x = x.to(self.device)
        if self.training:
            self.bert.train()
            layers_out, _ = self.bert(x)
            last_layer = layers_out[-1]
        else:
            with torch.no_grad():
                layers_out, _ = self.bert(x)
                last_layer = layers_out[-1]
        logits = self.classifier(last_layer)
        preds = logits.argmax(-1)
        return logits, preds
Beispiel #2
0
class Net(nn.Module):
    def __init__(self, config, bert_state_dict, vocab_len, device = 'cpu'):
        super().__init__()
        self.bert = BertModel(config)
        if bert_state_dict is not None:
            self.bert.load_state_dict(bert_state_dict)
        self.bert.eval()
        self.rnn = nn.LSTM(bidirectional=True, num_layers=2, input_size=768, hidden_size=768//2, batch_first=True)
        self.fc = nn.Linear(768, vocab_len)
        self.device = device

    def forward(self, x, y):
        '''
        x: (N, T). int64
        y: (N, T). int64

        Returns
        enc: (N, T, VOCAB)
        '''
        x = x.to(self.device)
        y = y.to(self.device)

        with torch.no_grad():
            encoded_layers, _ = self.bert(x)
            enc = encoded_layers[-1]
        enc, _ = self.rnn(enc)
        logits = self.fc(enc)
        y_hat = logits.argmax(-1)
        return logits, y, y_hat
Beispiel #3
0
def get_kobert_model(model_file, vocab_file, ctx="cpu"):
    bertmodel = BertModel(config=BertConfig.from_dict(bert_config))
    bertmodel.load_state_dict(torch.load(model_file))
    device = torch.device(ctx)
    bertmodel.to(device)
    bertmodel.eval()
    vocab_b_obj = nlp.vocab.BERTVocab.from_json(open(vocab_file, 'rt').read())
    return bertmodel, vocab_b_obj
Beispiel #4
0
def get_kobert_model(model_file, vocab_file, ctx="cpu"):
    bertmodel = BertModel(config=BertConfig.from_dict(bert_config))
    bertmodel.load_state_dict(torch.load(model_file))
    device = torch.device(ctx)
    bertmodel.to(device)
    bertmodel.eval()
    vocab_b_obj = nlp.vocab.BERTVocab.from_sentencepiece(vocab_file,
                                                         padding_token='[PAD]')
    return bertmodel, vocab_b_obj
Beispiel #5
0
 def create_bert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
     model = BertModel(config=config)
     model.eval()
     all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask)
     outputs = {
         "sequence_output": all_encoder_layers[-1],
         "pooled_output": pooled_output,
         "all_encoder_layers": all_encoder_layers,
     }
     return outputs
def prepare_bert(params, texts):
    # Load pre-trained model tokenizer (vocabulary)
    tokenizer = BertTokenizer.from_pretrained('bert-large-uncased') \
            if params['bert']['large'] else  BertTokenizer.from_pretrained('bert-base-uncased')
    if params['bert']['trained']:
        model = BertModel.from_pretrained('bert-large-uncased') \
            if params['bert']['large'] else BertModel.from_pretrained('bert-base-uncased')
    else:
        model = BertModel(BertModel.from_pretrained('bert-large-uncased').config \
            if params['bert']['large'] else BertModel.from_pretrained('bert-base-uncased').config)
    model.eval()
    params['encoder'] = model
    params['tokenizer'] = tokenizer
Beispiel #7
0
def get_kobert_model(ctx="cpu"):
    model_file = './kobert_model/pytorch_kobert_2439f391a6.params'
    vocab_file = './kobert_model/kobertvocab_f38b8a4d6d.json'
    bertmodel = BertModel(config=BertConfig.from_dict(bert_config))
    bertmodel.load_state_dict(torch.load(model_file))
    device = torch.device(ctx)
    bertmodel.to(device)
    bertmodel.eval()
    #print(vocab_file) #./kobertvocab_f38b8a4d6d.json
    vocab_b_obj = nlp.vocab.BERTVocab.from_json(
        open(vocab_file, 'rt').read())
    #print(vocab_b_obj)
    return bertmodel, vocab_b_obj
def encode_bert(texts, trained=True, large=False):
    # Load pre-trained model tokenizer (vocabulary)
    tokenizer = BertTokenizer.from_pretrained('bert-large-uncased') \
            if large else  BertTokenizer.from_pretrained('bert-base-uncased')
    if trained:
        model = BertModel.from_pretrained('bert-large-uncased') \
            if large else BertModel.from_pretrained('bert-base-uncased')
    else:
        model = BertModel(BertModel.from_pretrained('bert-large-uncased').config \
            if large else BertModel.from_pretrained('bert-base-uncased').config)
    model.eval()
    for text in texts:
        text = "[CLS] {} [SEP]".format(text.lower())
        tokenized_text = tokenizer.tokenize(text)
        indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
        tokens_tensor = torch.tensor([indexed_tokens])
        with torch.no_grad():
            encoded_layers, pooled = model(tokens_tensor)
            yield encoded_layers, pooled
Beispiel #9
0
class STS_NET(nn.Module):
    def __init__(self, config, bert_state_dict, device=Param.device):
        super().__init__()
        self.bert = BertModel(config)
        #print('bert initialized from config')
        if bert_state_dict is not None:
            self.bert.load_state_dict(bert_state_dict)
        self.bert.eval()
        self.dropout = nn.Dropout(p=Param.p)
        self.rnn = nn.LSTM(bidirectional=True,
                           num_layers=1,
                           input_size=768,
                           hidden_size=768 // 2)
        self.f1 = nn.Linear(768 // 2, 128)
        self.f2 = nn.Linear(128, 32)
        self.out = nn.Linear(32, 1)
        self.device = device

    def init_hidden(self, batch_size):
        return torch.zeros(2, batch_size,
                           768 // 2).to(self.device), torch.zeros(
                               2, batch_size, 768 // 2).to(self.device)

    def forward(self, x_f, x_r):
        batch_size = x_f.size()[0]
        x_f = x_f.to(self.device)
        x_r = x_r.to(self.device)
        xf_encoded_layers, _ = self.bert(x_f)
        enc_f = xf_encoded_layers[-1]
        enc = enc_f.permute(1, 0, 2)
        enc = self.dropout(enc)
        self.hidden = self.init_hidden(batch_size)
        rnn_out, self.hidden = self.rnn(enc, self.hidden)
        last_hidden_state, last_cell_state = self.hidden
        rnn_out = self.dropout(last_hidden_state)
        f1_out = F.relu(self.f1(last_hidden_state[-1]))
        f2_out = F.relu(self.f2(f1_out))
        out = self.out(f2_out)
        return out
def get_bert_word_emb(text_array):
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # Load pre-trained model (weights)
    BertModel = BertModel.from_pretrained('bert-base-uncased').to(device)
    BertModel.eval()
Beispiel #11
0
    def update(self, val, n=1):
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Load pre-trained model (weights)
BertModel = BertModel.from_pretrained('bert-base-uncased').to(device)
BertModel.eval()

# Load GloVe
glove_vectors = pickle.load(open('glove.6B/glove_words.pkl', 'rb'))
glove_vectors = torch.tensor(glove_vectors)


#####################
# Encoder RASNET CNN
#####################
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        resnet = models.resnet101(pretrained=True)
        self.resnet = nn.Sequential(*list(resnet.children())[:-2])
        self.adaptive_pool = nn.AdaptiveAvgPool2d((14, 14))
Beispiel #12
0
class Net(nn.Module):
    def __init__(self, config, bert_state_dict, vocab_len, device='cuda'):
        super().__init__()
        self.bert = BertModel(config)
        self.num_layers = 2
        self.input_size = 768
        self.hidden_size = 768
        self.tagset_size = vocab_len
        # BERT always returns hidden_dim*2 dimensional representations.
        if bert_state_dict is not None:
            self.bert.load_state_dict(bert_state_dict)
        self.bert.eval()
        # Each input has vector size 768, and outpus a vector size of 768//2.
        self.lstm = nn.LSTM(self.input_size,
                            self.hidden_size // 2,
                            self.num_layers,
                            batch_first=True,
                            bidirectional=True)
        self.fc = nn.Linear(self.hidden_size, vocab_len)
        self.device = device

    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        if self.device == 'cuda':
            hidden = (nn.init.xavier_normal_(
                weight.new(self.num_layers * 2, batch_size,
                           self.hidden_size // 2).zero_()).cuda(),
                      nn.init.xavier_normal_(
                          weight.new(self.num_layers * 2, batch_size,
                                     self.hidden_size // 2).zero_()).cuda())
        else:
            hidden = (nn.init.xavier_normal_(
                weight.new(self.num_layers * 2, batch_size,
                           self.hidden_size // 2).zero_()),
                      nn.init.xavier_normal_(
                          weight.new(self.num_layers * 2, batch_size,
                                     self.hidden_size // 2).zero_()))

        return hidden

    def init_eval_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        hidden = (nn.init.xavier_normal_(
            weight.new(self.num_layers * 2, 1, self.hidden_size // 2).zero_()),
                  nn.init.xavier_normal_(
                      weight.new(self.num_layers * 2, 1,
                                 self.hidden_size // 2).zero_()))

        return hidden

    def forward(self, x, hidden):
        with torch.no_grad():
            encoded_layers, _ = self.bert(x)
            enc = encoded_layers[-1]
        out, hidden = self.lstm(enc, hidden)
        logits = self.fc(out)
        # softmax = torch.nn.Softmax(dim=2)
        # logits = softmax(logits)
        y_hat = logits.argmax(-1)
        return logits, hidden, y_hat