def __init__(self, config): super(Model, self).__init__() # 这里就是相当于Bert这个模型类的实例化的过程 self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True # Bert 构建完成之后,接下来构建 RNN(通常使用LSTM或者GRU, 这里使用的是双向的LSTM) # 参数分析 # input_size: 放入的就是Bert模型输出的内容 congfig.hidden_size = 768 # hidden_size: RNN(LSTM) 本身的隐藏层的数量 congfig.rnn_hidden = 256 # num_layers: LSTM 的层数 # batch_first: 按照指定的方式输入和输出内容 (batch, seq, feature(hidden_size))这样的一个tensor # bidirectional: 设置是否使用双向的LSTM self.lstm = nn.LSTM(config.hidden_size, config.rnn_hidden, config.num_layers, batch_first=True, dropout=config.dropout, bidirectional=True) self.dropout = nn.Dropout(config.dropout) # 因为是双向的LSTM,所以参数翻倍 self.fc = nn.Linear(config.rnn_hidden * 2, config.num_classes)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True self.f1 = nn.Linear(32 * 768, 768) self.hooklayer = nn.Linear(768, 2)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True # self.fc = nn.Linear(config.hidden_size, config.num_classes) self.hooklayer = nn.Linear(768, 18)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters( ): #对bert中的参数进行处理,为True时候表示训练时候梯度都要变化 param.requires_grad = True self.fc = nn.Linear(config.hidden_size, config.num_classes)
def __init__(self, config): super(Model, self).__init__() # 定义Bert的模型结构 self.bert = BertModel.from_pretrained(config.bert_path) # 设置是否对Bert模型原来的参数进行微调 for param in self.bert.parameters(): param.requires_grad = True # 定义TextCNN的模型结构 # 构建一个容器,根据filter的大小,存储三个不同尺寸的卷积 self.convs = nn.ModuleList([ nn.Conv2d( in_channels=1, # 原本指输入图片数据的通道数(channel),这里对于文本数据,没有多通道,维度为1 out_channels=config. num_filters, # 经过卷积层后输出的维度(channel数量),对应的就是卷积核数量 # filter_size = kernel_size # 这里的kernel是2D的(两个维度),输入的是一个元组类型的数据,这里之所以是两个维度是因为,卷积核是有高和宽的 # 这里的高就是一次选几个字(词)进行卷积,而这里的宽就是Bert模型输出的词向量的维度,也就是对应的它的隐藏层的大小 # kernel_size的数据样本:(k, Embedding), 这里的 Embedding的维度, 对应的就是模型的 hidden_size 的大小 # (2, 768); (3, 768); (4, 768) # 这里对应着从Bert里边出来的数据都是 Embedding 维度(hidden_size)为768 # 所以这里卷积核的大小也要设置为768 kernel_size=(k, config.hidden_size)) for k in config.filter_sizes ]) self.dropout = nn.Dropout(config.dropout) # fc 这个层是都要加的, 需要传入的参数:输入是什么,输出是什么 # 对于原始的Bert,输入是从Bert中输出的结果,768;输出的就是要分类的数目,num_classes # 对于这里的 fc, 输入是卷积的数量乘以len(config.filter_sizes),前边将的TextCNN,最终输入的就是每对应一个卷积核的大小,就会生成 # 256个channel,这里总共有三个卷积核的尺 寸,所以得到的是3个256个channel. # 输出依旧是,num_classes self.fc = nn.Linear(config.num_filters * len(config.filter_sizes), config.num_classes)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True # True表示参数不固定,微调 False表示参数固定 self.fc = nn.Linear(config.hidden_size, config.num_classes)
def __init__(self, config): super(Model, self).__init__() #加载预训练里的bert self.bert = BertModel.from_pretrained(config.bert_path) #True可以对bert的参数进行微调fine-tuning for param in self.bert.parameters(): param.requires_grad = True self.fc = nn.Linear(config.hidden_size, config.num_classes)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) #self.bert = BertModel.from_pretrained('bert-base-chinese') for param in self.bert.parameters(): param.requires_grad = config.fine_tune self.fc = nn.Linear(config.hidden_size, config.num_classes)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in list(self.bert.parameters())[:-4]: # for param in self.bert.parameters(): param.requires_grad = False self.fc = nn.Linear(config.hidden_size, 192) # self.fc1 = nn.Linear(192, 48) self.fc2 = nn.Linear(192, config.num_classes)
def __init__(self, config): super(Model, self).__init__() # 加载预训练模型 self.bert = BertModel.from_pretrained(config.bert_path) # 预训练模型的参数梯度不冻结,进行对预训练的微调 for param in self.bert.parameters(): param.requires_grad = True # 下游分类任务使用线性分类器 self.fc = nn.Linear(config.hidden_size, config.num_classes)
def __init__(self, config): super(BertHAN, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True # self.dropout = nn.Dropout(0.1) # 加了一层fc层 用于输出类别 self.fc = nn.Linear(config.hidden_size, config.num_classes) pass
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True self.lstm = nn.LSTM(config.hidden_size, config.rnn_hidden, config.num_layers, bidirectional=True, batch_first=True, dropout=config.dropout) self.dropout = nn.Dropout(config.dropout) self.fc_rnn = nn.Linear(config.rnn_hidden * 2, config.num_classes)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True self.fc = nn.Linear(config.hidden_size, config.num_classes) self.dropout = nn.Dropout(0.1) self.m = nn.Sigmoid() torch.nn.init.xavier_normal_(self.fc.weight)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True self.convs = nn.ModuleList( [nn.Conv2d(1, config.num_filters, (k, config.hidden_size)) for k in config.filter_sizes]) self.dropout = nn.Dropout(config.dropout) self.fc_cnn = nn.Linear(config.num_filters * len(config.filter_sizes), config.num_classes)
def __init__(self, config): super(Model, self).__init__() self.config = config self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True self.log_var_1 = nn.Parameter(torch.zeros((1, ), requires_grad=True)) self.log_var_2 = nn.Parameter(torch.zeros((1, ), requires_grad=True)) self.log_var_3 = nn.Parameter(torch.zeros((1, ), requires_grad=True)) self.OCNLI_fc = nn.Linear(config.hidden_size, 3) self.OCEMOTION_fc = nn.Linear(config.hidden_size, 7) self.TNEWS_fc = nn.Linear(config.hidden_size, 15)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True # self.fc = nn.Linear(config.hidden_size, config.num_classes) self.conv_region = nn.Conv2d(1, config.num_filters, (3, config.hidden_size), stride=1) self.conv = nn.Conv2d(config.num_filters, config.num_filters, (3, 1), stride=1) self.max_pool = nn.MaxPool2d(kernel_size=(3, 1), stride=2) self.padding1 = nn.ZeroPad2d((0, 0, 1, 1)) # top bottom self.padding2 = nn.ZeroPad2d((0, 0, 0, 1)) # bottom self.relu = nn.ReLU() self.fc = nn.Linear(config.num_filters, config.num_classes)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True self.convs = nn.ModuleList([ nn.Conv2d(in_channels=1, out_channels=config.num_filters, kernel_size=(k, config.bert_hidden)) for k in config.kernel_size ]) self.dropout = nn.Dropout(config.dropout) self.fc = nn.Linear(config.num_filters * len(config.kernel_size), config.num_classes)
def __init__(self, config): """ 构建Bert原生模型 :param config: 模型的配置参数,模型构建过程中,各个部分高多少,宽多少 """ super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) """是否对Bert模型中的参数根据自己的数据进行微调,即对梯度进行调整 根据自己的需求,看是否需要对Bert的参数进行微调 通常都是设置为True,进行微调的,用来和自己的业务进行匹配 """ for param in self.bert.parameters(): param.requires_grad = True # 设置为 True 就是对参数进行微调 self.fc = nn.Linear(config.hidden_size, config.num_classes)
def __init__(self, config): super(HierarchicalModel, self).__init__() self._tree_tools = TreeTools() self.tree = config.tree self.count_nodes = self._tree_tools.count_nodes(self.tree) self.batch_size = config.batch_size # create a weight matrix and bias vector for each node in the tree self.fc = nn.ModuleList([ nn.Linear(config.hidden_size, len(subtree[1])) for subtree in self._tree_tools.get_subtrees(self.tree) ]) self.value_to_path_and_nodes_dict = config.value_to_path_and_nodes_dict self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True
def __init__(self, config): super(Model, self).__init__() self.embedding_dim = config.embedding_dim self.num_layers = config.num_layers self.hidden_size = config.hidden_size self.device = config.device self.bert = BertModel.from_pretrained(config.bert_path) self.lstm = nn.LSTM(config.embedding_dim, config.hidden_size, num_layers=config.num_layers, bidirectional=True, dropout=config.dropout, batch_first=True) self.dropout = nn.Dropout(config.dropout) self.crf = CRF(config.tagset_size) self.fc = nn.Linear(config.hidden_size * 2, config.tagset_size)
def __init__(self): super(BiLSTMCRF, self).__init__() self.tag2id = { 'B-LAW': 0, 'B-ROLE': 1, 'B-TIME': 2, 'I-LOC': 3, 'I-LAW': 4, 'B-PER': 5, 'I-PER': 6, 'B-ORG': 7, 'I-ROLE': 8, 'I-CRIME': 9, 'B-CRIME': 10, 'I-ORG': 11, 'B-LOC': 12, 'I-TIME': 13, 'O': 14, START_TAG: 15, STOP_TAG: 16 } self.tag2id_size = len(self.tag2id) self.bert_path = './bert_pretrain' self.bert = BertModel.from_pretrained(self.bert_path) self.batch_size = Config.batch_size self.embedding_dim = 768 self.hidden_dim = Config.hidden_size # 概率转移矩阵 self.transitions = nn.Parameter( torch.randn(self.tag2id_size, self.tag2id_size)) self.transitions.data[:, self.tag2id[START_TAG]] = -1000. self.transitions.data[self.tag2id[STOP_TAG], :] = -1000. self.layerNorm = nn.LayerNorm(self.embedding_dim) # embeddings # self.word_embeddings = nn.Embedding(self.word2id_size, self.embedding_dim) # batch_first=True:batch_size在第一维而非第二维 self.lstm = nn.LSTM(self.embedding_dim, self.hidden_dim // 2, num_layers=1, bidirectional=True, batch_first=True, dropout=0.5) self.hidden2tag = nn.Linear(self.hidden_dim, self.tag2id_size)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True self.lstm = nn.LSTM(config.hidden_size, config.rnn_hidden, config.num_layers, bidirectional=True, batch_first=True, dropout=config.dropout) self.maxpool = nn.MaxPool1d(config.pad_size) self.length = config.rnn_hidden * 2 + config.hidden_size self.OCNLI_fc = nn.Linear(config.rnn_hidden * 2 + config.hidden_size, 3) self.OCEMOTION_fc = nn.Linear( config.rnn_hidden * 2 + config.hidden_size, 7) self.TNEWS_fc = nn.Linear(config.rnn_hidden * 2 + config.hidden_size, 15)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True # 在 init 函数中传入的参数都是用来构建模型使用的 # Bert 构建完成之后,接下来构建 RCNN(这里其实就是使用的双向的LSTM) # 参数解析: # config.hidden_size: Bert隐藏层的大小 self.lstm = nn.LSTM(config.hidden_size, config.rnn_hidden, config.num_layers, bidirectional=True, batch_first=True, dropout=config.dropout) # 这里的池化层中的 kernel_size 参数, 直接使用的是config.pad_size: 每句话的长度,也就是说一次池化就是对一句话的长度进行池化 self.maxpool = nn.MaxPool1d(config.pad_size) self.fc = nn.Linear(config.rnn_hidden * 2, config.num_classes)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True # 输入: input_size,(h0, c0) # input_szie: [seq_len, batch, input_size], # h0=c0: [num_layers*num_directions, batch, hidden_size] # 输出: output, (hn, cn) # output: [seq_len, batch, num_directions*hidden_size] # hn=cn: [num_layers*num_directions, batch, hidden_size] self.lstm = nn.LSTM(config.hidden_size, config.rnn_hidden, config.num_layers, batch_first=True, dropout=config.dropout, bidirectional=True) self.dropout = nn.Dropout(config.dropout) self.fc = nn.Linear(config.rnn_hidden * 2, config.num_classes)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True #调用Conv2d()做卷积 # conv2d: # in_channels: 文本为1 # out_channels: 256 输出多少个卷积核 # kernel_size(tuple)=(k, hiiden_size) self.convs = nn.ModuleList([ nn.Conv2d(in_channels=1, out_channels=config.num_filters, kernel_size=(k, config.hidden_size)) for k in config.filter_sizes ]) self.droptout = nn.Dropout(config.dropout) #[卷积核数量*卷积种类,输出类别] self.fc = nn.Linear(config.num_filters * len(config.filter_sizes), config.num_classes)
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True # Bert 构建完成之后,接下来构建 DPCNN # Conv2D 参数解析: # in_channels: 输入的通道数:文本数据,通道数为1 # out_channels: 输出通道数:250 (论文中的参数设置) # kernel_size: 卷积核的大小: 3 (论文中的参数设置) # kernel_size是卷积核的大小,有高宽两个部分:这的高设为3(就是类似于N-gram=3,一次选三个词进行卷积), # 宽设置为Bert输出的词向量的维度 self.conv_region = nn.Conv2d( in_channels=1, out_channels=config.num_filters, kernel_size=(3, config.hidden_size)) # 这里使用的是Conv2D # 在这里的conv的基础上再接一个conv, 即上一个conv的输出作为这个conv的输入 self.conv = nn.Conv2d(in_channels=config.num_filters, out_channels=config.num_filters, kernel_size=(3, 1)) # 到这里为止,就对应着模型图的前两个conv结束了,接下来就是一个block块,里边包含一个池化,两个卷积 # block # 图上最后的一个池化层 self.max_pool = nn.MaxPool2d(kernel_size=(3, 1), stride=2) # 为block做准备,定义两个padd ??? # (0, 0, 1, 1)表示填充的时候只对后两维(高度和宽度)进行填充,前边的batch_size和channel两个维度不管 self.padd1 = nn.ZeroPad2d((0, 0, 1, 1)) # 两个维度,所以里边要使用括号 # (0, 0, 0, 1)表示填充的时候只对最后一维(宽度)进行填充 self.padd2 = nn.ZeroPad2d((0, 0, 0, 1)) self.relu = nn.ReLU() # 最后依旧是要接一个线性的分类层 self.fc = nn.Linear(config.num_filters, config.num_classes)
def __init__(self): self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False) self.model = BertModel.from_pretrained('bert-base-cased') self.model.eval()
from pytorch_pretrained import BertModel, BertTokenizer import torch tokenizer = BertTokenizer.from_pretrained('ERNIE_pretrain') bert = BertModel.from_pretrained('ERNIE_pretrain') token = tokenizer.tokenize('今天天气很好') # 切成字 token = ['[CLS]']+token print(token) token_ids = tokenizer.convert_tokens_to_ids(token) print(token_ids) token_ids = torch.LongTensor([token_ids]) torch.LongTensor(token_ids) encoder_out, pooled = bert(token_ids, attention_mask=torch.LongTensor([[1]*7]), output_all_encoded_layers=False) print(pooled) # import transformers # transformers.DataCollatorForLanguageModeling
from utils import built_train_dataset, built_dev_dataset import random import operator from functools import reduce import torch.optim as optim from torch.optim import lr_scheduler from net import BiLSTMCRF random.seed(1) config = Config() best_score = float("inf") # config = Config() print("Loading Datas...") train_dataset = built_train_dataset(config) dev_dataset = built_dev_dataset(config) tokenzier = BertTokenizer.from_pretrained(config.bert_path) bert_model = BertModel.from_pretrained(config.bert_path) flag = False model = BiLSTMCRF() optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, momentum=0.9) scheduler = lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5) if os.path.exists("./model/params.pkl"): model.load_state_dict(torch.load("./model/params.pkl")) for epoch in range(100): scheduler.step() total_loss = 0 batch_count = 0 start_time = time.time() for i, batch in enumerate(train_dataset):
def __init__(self, config): super(Model, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True self.fc = nn.Linear(1024, config.num_classes)