def __init__(self, vocab_size, embed_size, hidden_size, output_size, pre_word_embed=None, dropout=0.5, use_gpu=False): super(Intent_Model, self).__init__() self.use_gpu = use_gpu self.embed_size = embed_size self.hidden_size = hidden_size self.vocab_size = vocab_size self.output_size = output_size self.dropout = nn.Dropout(dropout) self.word_embed = nn.Embedding(vocab_size, embed_size) if pre_word_embed is not None: self.word_embed.weight = nn.Parameter( torch.FloatTensor(pre_word_embed)) self.pre_word_embed = True else: self.pre_word_embed = False init_embedding(self.word_embed.weight) self.lstm = nn.LSTM(embed_size, hidden_size, bidirectional=True, batch_first=True) init_lstm(self.lstm) self.output_layer = nn.Linear(hidden_size * 2, 13) init_linear(self.output_layer)
def rand_init(self): """ random initialization args: init_char_embedding: random initialize char embedding or not init_word_embedding: random initialize word embedding or not """ utils.init_lstm(self.word_lstm) utils.init_linear(self.linear_layer)
def rand_init(self, init_embedding=False): """ random initialization args: init_embedding: random initialize word embedding or not """ if init_embedding: utils.init_embedding(self.word_embeds.weight) if self.position: utils.init_embedding(self.position_embeds.weight) utils.init_lstm(self.lstm) utils.init_linear(self.att2out)
def rand_init(self, init_char_embedding=True, init_word_embedding=False): """ random initialization args: init_char_embedding: random initialize char embedding or not init_word_embedding: random initialize word embedding or not """ if init_char_embedding: utils.init_embedding(self.char_embeds.weight) if init_word_embedding: utils.init_embedding(self.word_embeds.weight) if self.if_highway: self.forw2char.rand_init() self.back2char.rand_init() self.forw2word.rand_init() self.back2word.rand_init() self.fb2char.rand_init() utils.init_lstm(self.forw_char_lstm) utils.init_lstm(self.back_char_lstm) utils.init_lstm(self.word_lstm) utils.init_linear(self.char_pre_train_out) utils.init_linear(self.word_pre_train_out) self.crf.rand_init()
def __init__(self, vocab_size, embed_size, hidden_size, tag2id, pre_word_embed=None, dropout=0.5, use_gpu=False): super(BiLSTM_CRF, self).__init__() self.use_gpu = use_gpu self.embed_size = embed_size self.hidden_size = hidden_size self.vocab_size = vocab_size self.tag2id = tag2id self.tag_size = len(tag2id) # cnn after # self.cnn = CNN_Encoder(hidden_size, hidden_size) # self.bridge = nn.Linear(hidden_size*2, self.hidden_size) self.lstm = nn.LSTM(embed_size, hidden_size, bidirectional=True) # cnn before # self.cnn = CNN_Encoder(embed_size, hidden_size) # self.bridge = nn.Linear(hidden_size * 2, self.hidden_size) # self.lstm = nn.LSTM(hidden_size, hidden_size, bidirectional=True) self.dropout = nn.Dropout(dropout) self.word_embed = nn.Embedding(vocab_size, embed_size) if pre_word_embed is not None: self.word_embed.weight = nn.Parameter(torch.FloatTensor(pre_word_embed)) self.pre_word_embed = True else: self.pre_word_embed = False init_embedding(self.word_embed.weight) init_lstm(self.lstm) self.hidden2tag = nn.Linear(hidden_size*2, self.tag_size) init_linear(self.hidden2tag) self.tanh = nn.Tanh() # crf layer self.transitions = nn.Parameter(torch.zeros(self.tag_size, self.tag_size)) self.transitions.data[tag2id['START'], :] = -10000 self.transitions.data[:, tag2id['STOP']] = -10000
def __init__(self, vocab_size, embed_size, hidden_size, tag2id, pre_word_embed=None, dropout=0.5, use_gpu=False): super(BiLSTM, self).__init__() self.use_gpu = use_gpu self.embed_size = embed_size self.hidden_size = hidden_size self.vocab_size = vocab_size self.tag2id = tag2id self.tag_size = len(tag2id) self.lstm = nn.LSTM(embed_size, hidden_size, bidirectional=True) self.dropout = nn.Dropout(dropout) self.word_embed = nn.Embedding(vocab_size, embed_size) if pre_word_embed is not None: self.word_embed.weight = nn.Parameter(torch.FloatTensor(pre_word_embed)) self.pre_word_embed = True else: self.pre_word_embed = False init_embedding(self.word_embed.weight) init_lstm(self.lstm) self.hidden2tag = nn.Linear(hidden_size * 2, self.tag_size) init_linear(self.hidden2tag)
def rand_init(self): """ random initialization args: init_char_embedding: random initialize char embedding or not """ utils.init_embedding(self.char_embeds.weight) if self.char_lstm: utils.init_lstm(self.forw_char_lstm) utils.init_lstm(self.back_char_lstm) utils.init_lstm(self.word_lstm_lm) utils.init_linear(self.char_pre_train_out) utils.init_linear(self.word_pre_train_out) if self.if_highway: self.forw2char.rand_init() self.back2char.rand_init() self.forw2word.rand_init() self.back2word.rand_init() self.fb2char.rand_init() else: utils.init_lstm(self.word_lstm_cnn)
def __init__(self, vocab_size, tag_to_ix, embedding_dim, hidden_dim, char_to_ix=None, pre_word_embeds=None, char_out_dimension=25, char_embedding_dim=25, use_gpu=True, use_crf=True, char_mode='CNN', encoder_mode='LSTM', dropout=0.5): ''' Input parameters: vocab_size= Size of vocabulary (int) tag_to_ix = Dictionary that maps NER tags to indices embedding_dim = Dimension of word embeddings (int) hidden_dim = The hidden dimension of the LSTM layer (int) char_to_ix = Dictionary that maps characters to indices pre_word_embeds = Numpy array which provides mapping from word embeddings to word indices char_out_dimension = Output dimension from the CNN encoder for character char_embedding_dim = Dimension of the character embeddings use_gpu = defines availability of GPU, when True: CUDA function calls are made else: Normal CPU function calls are made use_crf = parameter which decides if you want to use the CRF layer for output decoding ''' super(BiLSTM_CRF, self).__init__() # parameter initialization for the model self.use_gpu = use_gpu self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size self.tag_to_ix = tag_to_ix self.use_crf = use_crf self.tagset_size = len(tag_to_ix) self.out_channels = char_out_dimension self.char_mode = char_mode self.encoder_mode = encoder_mode self.char_lstm_dim = char_out_dimension if char_embedding_dim is not None: self.char_embedding_dim = char_embedding_dim # Initializing the character embedding layer self.char_embeds = nn.Embedding(len(char_to_ix), char_embedding_dim) init_embedding(self.char_embeds.weight) # Performing LSTM encoding on the character embeddings if self.char_mode == 'LSTM': self.char_lstm = nn.LSTM(char_embedding_dim, self.char_lstm_dim, num_layers=1, bidirectional=True) init_lstm(self.char_lstm) # Performing CNN encoding on the character embeddings if self.char_mode == 'CNN': self.char_cnn3 = nn.Conv2d(in_channels=1, out_channels=self.out_channels, kernel_size=(3, char_embedding_dim), padding=(2, 0)) # Creating Embedding layer with dimension of ( number of words * dimension of each word) self.word_embeds = nn.Embedding(vocab_size, embedding_dim) if pre_word_embeds is not None: # Initializes the word embeddings with pretrained word embeddings self.pre_word_embeds = True self.word_embeds.weight = nn.Parameter( torch.FloatTensor(pre_word_embeds)) else: self.pre_word_embeds = False # Initializing the dropout layer, with dropout specificed in parameters self.dropout = nn.Dropout(dropout) # Lstm Layer: if self.encoder_mode == 'LSTM': if self.char_mode == 'LSTM': self.lstm = nn.LSTM(embedding_dim + self.char_lstm_dim * 2, hidden_dim, bidirectional=True) if self.char_mode == 'CNN': self.lstm = nn.LSTM(embedding_dim + self.out_channels, hidden_dim, bidirectional=True) # Initializing the lstm layer using predefined function for initialization init_lstm(self.lstm) # Linear layer which maps the output of the bidirectional LSTM into tag space. self.hidden2tag = nn.Linear(hidden_dim * 2, self.tagset_size) # CNN (One-layer): if self.encoder_mode == 'CNN': # Conv layer self.conv1 = nn.Conv2d(in_channels=1, out_channels=hidden_dim * 2, kernel_size=(1, 1), padding=(0, 0)) # Initializing the conv layer nn.init.xavier_uniform_(self.conv1.weight) if self.char_mode == 'LSTM': print( f'embedding_dim={embedding_dim}, char_lstm_dim={self.char_lstm_dim*2}, in={embedding_dim+self.char_lstm_dim*2}' ) self.maxpool1 = nn.MaxPool2d( (1, embedding_dim + self.char_lstm_dim * 2)) if self.char_mode == 'CNN': print( f'embedding_dim={embedding_dim}, self.out_channels={self.out_channels}, in={embedding_dim+self.out_channels}' ) self.maxpool1 = nn.MaxPool2d( (1, embedding_dim + self.out_channels)) # CNN (Two-layer): if self.encoder_mode == 'CNN2': # Conv layer self.conv1 = nn.Conv2d(in_channels=1, out_channels=hidden_dim * 2, kernel_size=(1, 1), padding=(0, 0)) self.conv2 = nn.Conv2d(in_channels=hidden_dim * 2, out_channels=hidden_dim * 2, kernel_size=(1, 1), padding=(0, 0)) # Initializing the conv layer nn.init.xavier_uniform_(self.conv1.weight) nn.init.xavier_uniform_(self.conv2.weight) self.maxpool1 = nn.MaxPool2d((1, 2)) self.maxpool2 = nn.MaxPool2d( (1, (embedding_dim + self.out_channels) // 2)) # CNN (Three-layer): if self.encoder_mode == 'CNN3': # Conv layer self.conv1 = nn.Conv2d(in_channels=1, out_channels=hidden_dim * 2, kernel_size=(1, 1), padding=(0, 0)) self.conv2 = nn.Conv2d(in_channels=hidden_dim * 2, out_channels=hidden_dim * 2, kernel_size=(1, 1), padding=(0, 0)) self.conv3 = nn.Conv2d(in_channels=hidden_dim * 2, out_channels=hidden_dim * 2, kernel_size=(1, 1), padding=(0, 0)) # Initializing the conv layer nn.init.xavier_uniform_(self.conv1.weight) nn.init.xavier_uniform_(self.conv2.weight) nn.init.xavier_uniform_(self.conv3.weight) self.maxpool1 = nn.MaxPool2d((1, 2)) self.maxpool2 = nn.MaxPool2d((1, 2)) self.maxpool3 = nn.MaxPool2d( (1, (embedding_dim + self.out_channels) // 4)) # CNN (Dilated Three-layer): if self.encoder_mode == 'CNN_DILATED': # Conv layer self.conv1 = nn.Conv2d(in_channels=1, out_channels=hidden_dim * 2, kernel_size=(1, 2), padding=(0, 0), dilation=1) self.conv2 = nn.Conv2d(in_channels=hidden_dim * 2, out_channels=hidden_dim * 2, kernel_size=(1, 2), padding=(0, 0), dilation=2) self.conv3 = nn.Conv2d(in_channels=hidden_dim * 2, out_channels=hidden_dim * 2, kernel_size=(1, 2), padding=(0, 0), dilation=3) # Initializing the conv layer nn.init.xavier_uniform_(self.conv1.weight) nn.init.xavier_uniform_(self.conv2.weight) nn.init.xavier_uniform_(self.conv3.weight) self.maxpool1 = nn.MaxPool2d((1, 2)) self.maxpool2 = nn.MaxPool2d((1, 2)) self.maxpool3 = nn.MaxPool2d((1, 27)) # Linear layer which maps the output of the bidirectional LSTM into tag space. self.hidden2tag = nn.Linear(hidden_dim * 2, self.tagset_size) # Initializing the linear layer using predefined function for initialization init_linear(self.hidden2tag) if self.use_crf: # Matrix of transition parameters. Entry i,j is the score of transitioning *to* i *from* j. # Matrix has a dimension of (total number of tags * total number of tags) self.transitions = nn.Parameter( torch.zeros(self.tagset_size, self.tagset_size)) # These two statements enforce the constraint that we never transfer # to the start tag and we never transfer from the stop tag self.transitions.data[tag_to_ix[START_TAG], :] = -10000 self.transitions.data[:, tag_to_ix[STOP_TAG]] = -10000
def __init__( self, vocab_size, tag_to_ix, embedding_dim, hidden_dim, char_lstm_dim=25, char_to_ix=None, pre_word_embeds=None, char_embedding_dim=25, use_gpu=False, n_cap=None, cap_embedding_dim=None, use_crf=True, char_mode="CNN", ): super(BiLSTM_CRF, self).__init__() self.use_gpu = use_gpu self.device = torch.device("cuda" if self.use_gpu else "cpu") self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size self.tag_to_ix = tag_to_ix self.n_cap = n_cap # Capitalization feature num self.cap_embedding_dim = cap_embedding_dim # Capitalization feature dim self.use_crf = use_crf self.tagset_size = len(tag_to_ix) self.out_channels = char_lstm_dim self.char_mode = char_mode print("char_mode: %s, out_channels: %d, hidden_dim: %d, " % (char_mode, char_lstm_dim, hidden_dim)) if self.n_cap and self.cap_embedding_dim: self.cap_embeds = nn.Embedding(self.n_cap, self.cap_embedding_dim) torch.nn.init.xavier_uniform_(self.cap_embeds.weight) if char_embedding_dim is not None: self.char_lstm_dim = char_lstm_dim self.char_embeds = nn.Embedding(len(char_to_ix), char_embedding_dim) torch.nn.init.xavier_uniform_(self.char_embeds.weight) if self.char_mode == "LSTM": self.char_lstm = nn.LSTM(char_embedding_dim, char_lstm_dim, num_layers=1, bidirectional=True) init_lstm(self.char_lstm) if self.char_mode == "CNN": self.char_cnn3 = nn.Conv2d( in_channels=1, out_channels=self.out_channels, kernel_size=(3, char_embedding_dim), padding=(2, 0), ) self.word_embeds = nn.Embedding(vocab_size, embedding_dim) if pre_word_embeds is not None: self.pre_word_embeds = True self.word_embeds.weight = nn.Parameter( torch.FloatTensor(pre_word_embeds)) else: self.pre_word_embeds = False self.dropout = nn.Dropout(0.5) if self.n_cap and self.cap_embedding_dim: if self.char_mode == "LSTM": self.lstm = nn.LSTM( embedding_dim + char_lstm_dim * 2 + cap_embedding_dim, hidden_dim, bidirectional=True, ) if self.char_mode == "CNN": self.lstm = nn.LSTM( embedding_dim + self.out_channels + cap_embedding_dim, hidden_dim, bidirectional=True, ) else: if self.char_mode == "LSTM": self.lstm = nn.LSTM(embedding_dim + char_lstm_dim * 2, hidden_dim, bidirectional=True) if self.char_mode == "CNN": self.lstm = nn.LSTM(embedding_dim + self.out_channels, hidden_dim, bidirectional=True) init_lstm(self.lstm) # # high way # self.hw_trans = nn.Linear(self.out_channels, self.out_channels) # self.hw_gate = nn.Linear(self.out_channels, self.out_channels) # self.h2_h1 = nn.Linear(hidden_dim * 2, hidden_dim) # self.tanh = nn.Tanh() # init_linear(self.h2_h1) # init_linear(self.hw_gate) # init_linear(self.hw_trans) self.hidden2tag = nn.Linear(hidden_dim * 2, self.tagset_size) init_linear(self.hidden2tag) if self.use_crf: self.transitions = nn.Parameter( torch.randn(self.tagset_size, self.tagset_size)) self.transitions.data[:, tag_to_ix[START_TAG]] = -10000 self.transitions.data[tag_to_ix[STOP_TAG], :] = -10000
def reset_parameters(self): super().reset_parameters() utils.init_lstm(self.lstm)
def reset_parameters(self): super(LSTMContextEncoder, self).reset_parameters() utils.init_lstm(self.rnn)