def __init__(self, vocab_size, tagset_size, embedding_dim, hidden_dim, rnn_layers, dropout_ratio, large_CRF=True): super(LSTM_CRF, self).__init__() self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size self.word_embeds = nn.Embedding(vocab_size, embedding_dim) self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2, num_layers=rnn_layers, bidirectional=True, dropout=dropout_ratio) self.rnn_layers = rnn_layers self.dropout1 = nn.Dropout(p=dropout_ratio) self.dropout2 = nn.Dropout(p=dropout_ratio) self.tagset_size = tagset_size if large_CRF: self.crf = crf.CRF_L(hidden_dim, tagset_size) else: self.crf = crf.CRF_S(hidden_dim, tagset_size) self.batch_size = 1 self.seq_length = 1
def __init__(self, tagset_size, char_size, char_dim, char_hidden_dim, char_rnn_layers, embedding_dim, word_hidden_dim, word_rnn_layers, vocab_size, dropout_ratio, char_embeds, large_CRF=True, if_highway = False, in_doc_words = 2, highway_layers = 1): super(LM_LSTM_CRF, self).__init__() self.char_dim = char_dim self.char_hidden_dim = char_hidden_dim self.char_size = char_size self.word_dim = embedding_dim self.word_hidden_dim = word_hidden_dim self.word_size = vocab_size self.if_highway = if_highway self.char_embeds = char_embeds #self.char_embeds = nn.Embedding(char_size, char_dim) self.forw_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.back_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) #self.forw_char_lstm = forw_char_lstm #self.back_char_lstm = back_char_lstm self.char_rnn_layers = char_rnn_layers self.word_embeds = nn.Embedding(vocab_size, embedding_dim) self.word_lstm = nn.LSTM(embedding_dim + char_hidden_dim * 2, word_hidden_dim // 2, num_layers=word_rnn_layers, bidirectional=True, dropout=dropout_ratio) self.word_rnn_layers = word_rnn_layers self.dropout = nn.Dropout(p=dropout_ratio) self.tagset_size = tagset_size if large_CRF: self.crf = crf.CRF_L(word_hidden_dim, tagset_size) else: self.crf = crf.CRF_S(word_hidden_dim, tagset_size) if if_highway: self.forw2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.forw2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.fb2char = highway.hw(2 * char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.char_pre_train_out = nn.Linear(char_hidden_dim, char_size) #self.char_pre_train_out = char_pre_train_out self.word_pre_train_out = nn.Linear(char_hidden_dim, in_doc_words) self.batch_size = 1 self.word_seq_length = 1
def __init__(self, char_vocab_size, bichar_vocab_size, word_vocab_size, tagset_size, embedding_dim, hidden_dim, rnn_layers, dropout_ratio, gpu, isbiChar, large_CRF=True): super(LSTM_CRF, self).__init__() self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.char_vocab_size = char_vocab_size self.word_vocab_size = word_vocab_size self.bichar_vocab_size = bichar_vocab_size self.gpu = gpu # self.bidirectional = bidirectional self.word_embeds = nn.Embedding(word_vocab_size, embedding_dim) self.char_embeds = nn.Embedding(char_vocab_size, embedding_dim) self.bichar_embeds = nn.Embedding(bichar_vocab_size, embedding_dim) self.lstm = LatticeLSTM(embedding_dim, hidden_dim, isbiChar, self.gpu) # self.lstm = LatticeLSTM(embedding_dim, hidden_dim, bidirectional=bidirectional) self.rnn_layers = rnn_layers self.dropout1 = nn.Dropout(p=dropout_ratio) self.dropout2 = nn.Dropout(p=dropout_ratio) if isbiChar: self.dropout3 = nn.Dropout(p=dropout_ratio) self.tagset_size = tagset_size # if self.bidirectional: # self.crf_in_dim = hidden_dim * 2 # else: self.crf_in_dim = hidden_dim if large_CRF: self.crf = crf.CRF_L(self.crf_in_dim, tagset_size) else: self.crf = crf.CRF_S(self.crf_in_dim, tagset_size) self.batch_size = 1 self.seq_length = 1
def __init__(self, tagset_sizes, char_size, char_dim, char_hidden_dim, char_rnn_layers, embedding_dim, word_hidden_dim, word_rnn_layers, vocab_size, dropout_ratio, file_num, large_CRF=True, if_highway=False, in_doc_words=2, highway_layers=1): super(LM_LSTM_CRF, self).__init__() self.char_dim = char_dim self.char_hidden_dim = char_hidden_dim self.char_size = char_size self.word_dim = embedding_dim self.word_hidden_dim = word_hidden_dim self.word_size = vocab_size self.if_highway = if_highway self.char_embeds = nn.Embedding(char_size, char_dim) self.forw_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.back_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.char_rnn_layers = char_rnn_layers self.word_embeds = nn.Embedding(vocab_size, embedding_dim) self.word_lstms_list = nn.ModuleList() self.word_lstm_task_0 = nn.LSTM(embedding_dim + char_hidden_dim * 2, word_hidden_dim // 2, num_layers=word_rnn_layers, bidirectional=True, dropout=dropout_ratio) self.word_lstms_list.append(self.word_lstm_task_0) for i in range(file_num - 1): self.word_lstms_list.append( nn.LSTM(word_hidden_dim, word_hidden_dim // 2, num_layers=1, bidirectional=True, dropout=dropout_ratio)) self.word_rnn_layers = word_rnn_layers self.dropout = nn.Dropout(p=dropout_ratio) self.tagset_sizes = tagset_sizes self.crflist = nn.ModuleList() for i in range(file_num): if large_CRF: self.crflist.append(crf.CRF_L(word_hidden_dim, tagset_sizes[i])) else: self.crflist.append(crf.CRF_S(word_hidden_dim, tagset_sizes[i])) if if_highway: self.forw2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.forw2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.fb2char = highway.hw(2 * char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.char_pre_train_out = nn.Linear(char_hidden_dim, char_size) self.word_pre_train_out = nn.Linear(char_hidden_dim, in_doc_words) self.batch_size = 1 self.word_seq_length = 1
def __init__(self, tagset_sizes, char_size, char_dim, char_hidden_dim, char_rnn_layers, embedding_dim, word_hidden_dim, word_rnn_layers, vocab_size, dropout_ratio, file_num, pad_value, large_CRF=True, if_highway=False, in_doc_words=2, highway_layers=1, n_heads=4, d_model=128): super(LM_LSTM_CRF, self).__init__() self.char_dim = char_dim self.char_hidden_dim = char_hidden_dim self.char_size = char_size self.word_dim = embedding_dim self.word_hidden_dim = word_hidden_dim self.word_size = vocab_size self.if_highway = if_highway self.d_model = char_hidden_dim * 2 + embedding_dim self.n_heads = n_heads self.pad_value = pad_value self.self_attn = MultiHeadedAttention( n_heads, char_hidden_dim * 2 + embedding_dim) self.char_embeds = nn.Embedding(char_size, char_dim) self.forw_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.back_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.char_rnn_layers = char_rnn_layers self.word_embeds = nn.Embedding(vocab_size, embedding_dim) self.word_lstms_list = nn.ModuleList() self.lstm_self_attn_list = nn.ModuleList() word_input_size = embedding_dim + char_hidden_dim * 2 + self.d_model self.word_lstm_task_0 = nn.LSTM(embedding_dim + char_hidden_dim * 2 + self.d_model, word_hidden_dim // 2, num_layers=word_rnn_layers, bidirectional=True, dropout=dropout_ratio) self.word_lstm_task_0_self_attn = MultiHeadedAttention( n_heads, word_hidden_dim) self.word_lstms_list.append(self.word_lstm_task_0) self.lstm_self_attn_list.append(self.word_lstm_task_0_self_attn) for i in range(file_num - 1): # Due to self attention after every lstm, the input size to next lstm layer is 2 * word_hidden_dim self.word_lstms_list.append( nn.LSTM(word_input_size + 2 * word_hidden_dim, word_hidden_dim // 2, num_layers=1, bidirectional=True, dropout=dropout_ratio)) self.lstm_self_attn_list.append( MultiHeadedAttention(n_heads, word_hidden_dim)) self.word_rnn_layers = word_rnn_layers self.dropout = nn.Dropout(p=dropout_ratio) self.tagset_sizes = tagset_sizes self.crflist = nn.ModuleList() for i in range(file_num): if large_CRF: self.crflist.append( crf.CRF_L(2 * word_hidden_dim, tagset_sizes[i])) else: self.crflist.append( crf.CRF_S(2 * word_hidden_dim, tagset_sizes[i])) if if_highway: self.forw2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.forw2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.fb2char = highway.hw(2 * char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.char_pre_train_out = nn.Linear(char_hidden_dim, char_size) self.word_pre_train_out = nn.Linear(char_hidden_dim, in_doc_words) self.batch_size = 1 self.word_seq_length = 1
def __init__(self, tagset_size, char_size, char_dim, char_hidden_dim, char_rnn_layers, embedding_dim, word_hidden_dim, vocab_size, dropout_ratio, repeats, which_loss, large_CRF=True, if_highway=False, in_doc_words=2, highway_layers=1, layer_residual=True, block_residual=True): super(LM_LSTM_CRF, self).__init__() self.char_dim = char_dim self.char_hidden_dim = char_hidden_dim self.char_size = char_size self.word_dim = embedding_dim self.word_hidden_dim = word_hidden_dim self.word_size = vocab_size self.if_highway = if_highway #Hyper Parameters needed to be changed self.initial_filter_width = 3 self.initial_padding = 1 self.padding = [1, 2, 1] self.dilation = [1, 2, 1] self.take_layer = [False, False, True] self.repeats = int(repeats) self.which_loss = which_loss self.layer_residual = layer_residual self.block_residual = block_residual print(repeats, which_loss, type(which_loss), which_loss, self.which_loss == "block") self.char_embeds = nn.Embedding(char_size, char_dim) self.forw_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.back_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.char_rnn_layers = char_rnn_layers #Word Embedding Layer self.word_embeds = nn.Embedding(vocab_size, embedding_dim) #Initial CNN Layer initial_filter_width = self.initial_filter_width initial_num_filters = word_hidden_dim self.itdicnn0 = nn.Conv1d(embedding_dim + char_hidden_dim * 2, initial_num_filters, kernel_size=initial_filter_width, padding=self.initial_padding, bias=True) self.itdicnn = nn.ModuleList([ nn.Conv1d(initial_num_filters, initial_num_filters, kernel_size=initial_filter_width, padding=self.padding[i], dilation=self.dilation[i], bias=True) for i in range(0, len(self.padding)) ]) self.dropout = nn.Dropout(p=dropout_ratio) self.tagset_size = tagset_size if large_CRF: self.crf = crf.CRF_L(word_hidden_dim, tagset_size) else: self.crf = crf.CRF_S(word_hidden_dim, tagset_size) if if_highway: self.forw2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.forw2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.fb2char = highway.hw(2 * char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.char_pre_train_out = nn.Linear(char_hidden_dim, char_size) self.word_pre_train_out = nn.Linear(char_hidden_dim, in_doc_words) self.batch_size = 1 self.word_seq_length = 1