import numpy as np import torch from torch import nn from torch.autograd import Variable from torch.utils.data import TensorDataset, DataLoader, Dataset import torch.nn.functional as F from utils import * from tqdm import tqdm from trainDataloader import SimDataset, EvalSimDataset, EvalSimWithLabelDataset from transformers import BertModel, BertConfig, BertTokenizer, BertForSequenceClassification # %% tokenizer = BertTokenizer.from_pretrained('./dataset/vocab') eval_list = load_sim_dev('./dataset/101/c_dev_with_label') myData_eval = EvalSimWithLabelDataset(tokenizer, './dataset/std_data', 100) # %% class SelfAttention(nn.Module): def __init__(self, hidden_dim): super().__init__() self.hidden_dim = hidden_dim self.projection = nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU(True), nn.Linear(hidden_dim, hidden_dim)) def forward(self, encoder_outputs): batch_size = encoder_outputs.size(0) # (B, L, H) -> (B , L, 1) energy = self.projection(encoder_outputs)
from utils import * from tqdm import tqdm from trainDataloader import SupremeSimDataset, EvalSimWithLabelDataset from transformers import BertModel, BertConfig, BertTokenizer, BertForSequenceClassification # %% LABEL_ID = '1' tokenizer = BertTokenizer.from_pretrained('./dataset/vocab') myDataset = SupremeSimDataset(tokenizer, './dataset/supreme/l{}/s_train'.format(LABEL_ID), './dataset/std_data', 100) myDataset.make_data() dataiter = DataLoader(myDataset, batch_size=1024) eval_list = load_sim_dev('./dataset/supreme/l{}/s_dev'.format(LABEL_ID)) myData_eval = EvalSimWithLabelDataset(tokenizer, './dataset/std_data', 100) # %% class SelfAttention(nn.Module): def __init__(self, hidden_dim): super().__init__() self.hidden_dim = hidden_dim self.projection = nn.Sequential( nn.Linear(hidden_dim, hidden_dim), nn.ReLU(True), nn.Linear(hidden_dim, hidden_dim) ) def forward(self, encoder_outputs): batch_size = encoder_outputs.size(0) # (B, L, H) -> (B , L, 1)