def __init__(self, tagset_size, char_size, char_dim, char_hidden_dim, char_rnn_layers, embedding_dim, word_hidden_dim, word_rnn_layers, vocab_size, dropout_ratio, char_embeds, large_CRF=True, if_highway = False, in_doc_words = 2, highway_layers = 1): super(LM_LSTM_CRF, self).__init__() self.char_dim = char_dim self.char_hidden_dim = char_hidden_dim self.char_size = char_size self.word_dim = embedding_dim self.word_hidden_dim = word_hidden_dim self.word_size = vocab_size self.if_highway = if_highway self.char_embeds = char_embeds #self.char_embeds = nn.Embedding(char_size, char_dim) self.forw_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.back_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) #self.forw_char_lstm = forw_char_lstm #self.back_char_lstm = back_char_lstm self.char_rnn_layers = char_rnn_layers self.word_embeds = nn.Embedding(vocab_size, embedding_dim) self.word_lstm = nn.LSTM(embedding_dim + char_hidden_dim * 2, word_hidden_dim // 2, num_layers=word_rnn_layers, bidirectional=True, dropout=dropout_ratio) self.word_rnn_layers = word_rnn_layers self.dropout = nn.Dropout(p=dropout_ratio) self.tagset_size = tagset_size if large_CRF: self.crf = crf.CRF_L(word_hidden_dim, tagset_size) else: self.crf = crf.CRF_S(word_hidden_dim, tagset_size) if if_highway: self.forw2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.forw2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.fb2char = highway.hw(2 * char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.char_pre_train_out = nn.Linear(char_hidden_dim, char_size) #self.char_pre_train_out = char_pre_train_out self.word_pre_train_out = nn.Linear(char_hidden_dim, in_doc_words) self.batch_size = 1 self.word_seq_length = 1
def __init__(self, tagset_sizes, char_size, char_dim, char_hidden_dim, char_rnn_layers, embedding_dim, word_hidden_dim, word_rnn_layers, vocab_size, dropout_ratio, file_num, large_CRF=True, if_highway=False, in_doc_words=2, highway_layers=1): super(LM_LSTM_CRF, self).__init__() self.char_dim = char_dim self.char_hidden_dim = char_hidden_dim self.char_size = char_size self.word_dim = embedding_dim self.word_hidden_dim = word_hidden_dim self.word_size = vocab_size self.if_highway = if_highway self.char_embeds = nn.Embedding(char_size, char_dim) self.forw_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.back_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.char_rnn_layers = char_rnn_layers self.word_embeds = nn.Embedding(vocab_size, embedding_dim) self.word_lstms_list = nn.ModuleList() self.word_lstm_task_0 = nn.LSTM(embedding_dim + char_hidden_dim * 2, word_hidden_dim // 2, num_layers=word_rnn_layers, bidirectional=True, dropout=dropout_ratio) self.word_lstms_list.append(self.word_lstm_task_0) for i in range(file_num - 1): self.word_lstms_list.append( nn.LSTM(word_hidden_dim, word_hidden_dim // 2, num_layers=1, bidirectional=True, dropout=dropout_ratio)) self.word_rnn_layers = word_rnn_layers self.dropout = nn.Dropout(p=dropout_ratio) self.tagset_sizes = tagset_sizes self.crflist = nn.ModuleList() for i in range(file_num): if large_CRF: self.crflist.append(crf.CRF_L(word_hidden_dim, tagset_sizes[i])) else: self.crflist.append(crf.CRF_S(word_hidden_dim, tagset_sizes[i])) if if_highway: self.forw2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.forw2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.fb2char = highway.hw(2 * char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.char_pre_train_out = nn.Linear(char_hidden_dim, char_size) self.word_pre_train_out = nn.Linear(char_hidden_dim, in_doc_words) self.batch_size = 1 self.word_seq_length = 1
def __init__(self, tagset_size, char_size, char_dim, char_hidden_dim, embedding_dim, word_hidden_dim, win_size, vocab_size, dropout_ratio, tag_dim=100, segtgt_size=None, enttgt_size=None, if_highway=False, ex_embedding_dim=None, segment_loss=0, entity_loss=0): super(LSTM_TH, self).__init__() self.xentropy = nn.CrossEntropyLoss(size_average=False) self.char_dim = char_dim self.char_hidden_dim = char_hidden_dim self.char_size = char_size self.word_dim = embedding_dim self.ex_word_dim = ex_embedding_dim self.win_size = win_size self.word_hidden_dim = word_hidden_dim self.tag_dim = tag_dim self.word_size = vocab_size self.if_highway = if_highway self.char_embeds = nn.Embedding(char_size, char_dim) self.segment_loss = segment_loss self.entity_loss = entity_loss self.W1 = nn.Parameter(torch.zeros(word_hidden_dim, word_hidden_dim)) self.W2 = nn.Parameter(torch.zeros(word_hidden_dim, word_hidden_dim)) self.b1 = nn.Parameter(torch.zeros(word_hidden_dim)) self.b2 = nn.Parameter(torch.zeros(word_hidden_dim)) self.forw_char_lstm = nn.LSTM(char_dim, char_hidden_dim, bidirectional=False, dropout=dropout_ratio) self.back_char_lstm = nn.LSTM(char_dim, char_hidden_dim, bidirectional=False, dropout=dropout_ratio) if not ex_embedding_dim: self.word_lstm = nn.LSTM(self.word_dim * self.win_size + char_hidden_dim * 2, self.word_hidden_dim // 2, bidirectional=True, dropout=dropout_ratio) else: ''' use two embeddings ''' self.word_lstm = nn.LSTM( (self.word_dim + self.ex_word_dim) * self.win_size + char_hidden_dim * 2, self.word_hidden_dim // 2, bidirectional=True, dropout=dropout_ratio) self.word_embeds = nn.Embedding(vocab_size, self.word_dim) if self.ex_word_dim > 0: self.ex_word_embeds = nn.Embedding(vocab_size, self.ex_word_dim) else: self.ex_word_embeds = None # pdb.set_trace() self.dropout = nn.Dropout(p=dropout_ratio) ''' highway nets ''' if if_highway: self.fbchar_highway = highway.hw(2 * char_hidden_dim, dropout_ratio=dropout_ratio) self.tag_size = tagset_size self.seg_size = segtgt_size self.ent_size = enttgt_size if self.segment_loss != 2 and self.entity_loss != 2: self.hidden2tag = nn.Linear(self.word_hidden_dim, self.tag_size) elif self.segment_loss == 2 and self.entity_loss != 2: self.hidden2tag = nn.Linear(self.word_hidden_dim * 2, self.tag_size) elif self.segment_loss != 2 and self.entity_loss == 2: self.hidden2tag = nn.Linear(self.word_hidden_dim * 2, self.tag_size) elif self.segment_loss == 2 and self.entity_loss == 2: self.hidden2tag = nn.Linear(self.word_hidden_dim * 3, self.tag_size) ''' bilinear layer ''' # self.bilinear = nn.Bilinear(self.word_hidden_dim, self.word_hidden_dim, self.tag_size) if self.segment_loss != 0: self.segtgt_size = segtgt_size if not ex_embedding_dim: self.word_lstm_seg = nn.LSTM(self.word_dim * self.win_size + char_hidden_dim * 2, self.word_hidden_dim // 2, bidirectional=True, dropout=dropout_ratio) else: ''' use two embeddings ''' self.word_lstm_seg = nn.LSTM( (self.word_dim + self.ex_word_dim) * self.win_size + char_hidden_dim * 2, self.word_hidden_dim // 2, bidirectional=True, dropout=dropout_ratio) self.hidden2seg = nn.Linear(self.word_hidden_dim, self.segtgt_size) if self.entity_loss != 0: self.enttgt_size = enttgt_size if not ex_embedding_dim: self.word_lstm_ent = nn.LSTM(self.word_dim * self.win_size + char_hidden_dim * 2, self.word_hidden_dim // 2, bidirectional=True, dropout=dropout_ratio) else: ''' use two embeddings ''' self.word_lstm_ent = nn.LSTM( (self.word_dim + self.ex_word_dim) * self.win_size + char_hidden_dim * 2, self.word_hidden_dim // 2, bidirectional=True, dropout=dropout_ratio) self.hidden2ent = nn.Linear(self.word_hidden_dim, self.enttgt_size) # ''' # self.tag_embeddings = nn.Parameter(torch.zeros(self.tag_size+2, self.tag_dim)) #tag_embeddings # self.to_tag = nn.Parameter(torch.zeros(self.tag_size+2, self.tag_dim)) # ''' # pdb.set_trace() self.rand_init()
def __init__(self, tagset_sizes, char_size, char_dim, char_hidden_dim, char_rnn_layers, embedding_dim, word_hidden_dim, word_rnn_layers, vocab_size, dropout_ratio, file_num, pad_value, large_CRF=True, if_highway=False, in_doc_words=2, highway_layers=1, n_heads=4, d_model=128): super(LM_LSTM_CRF, self).__init__() self.char_dim = char_dim self.char_hidden_dim = char_hidden_dim self.char_size = char_size self.word_dim = embedding_dim self.word_hidden_dim = word_hidden_dim self.word_size = vocab_size self.if_highway = if_highway self.d_model = char_hidden_dim * 2 + embedding_dim self.n_heads = n_heads self.pad_value = pad_value self.self_attn = MultiHeadedAttention( n_heads, char_hidden_dim * 2 + embedding_dim) self.char_embeds = nn.Embedding(char_size, char_dim) self.forw_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.back_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.char_rnn_layers = char_rnn_layers self.word_embeds = nn.Embedding(vocab_size, embedding_dim) self.word_lstms_list = nn.ModuleList() self.lstm_self_attn_list = nn.ModuleList() word_input_size = embedding_dim + char_hidden_dim * 2 + self.d_model self.word_lstm_task_0 = nn.LSTM(embedding_dim + char_hidden_dim * 2 + self.d_model, word_hidden_dim // 2, num_layers=word_rnn_layers, bidirectional=True, dropout=dropout_ratio) self.word_lstm_task_0_self_attn = MultiHeadedAttention( n_heads, word_hidden_dim) self.word_lstms_list.append(self.word_lstm_task_0) self.lstm_self_attn_list.append(self.word_lstm_task_0_self_attn) for i in range(file_num - 1): # Due to self attention after every lstm, the input size to next lstm layer is 2 * word_hidden_dim self.word_lstms_list.append( nn.LSTM(word_input_size + 2 * word_hidden_dim, word_hidden_dim // 2, num_layers=1, bidirectional=True, dropout=dropout_ratio)) self.lstm_self_attn_list.append( MultiHeadedAttention(n_heads, word_hidden_dim)) self.word_rnn_layers = word_rnn_layers self.dropout = nn.Dropout(p=dropout_ratio) self.tagset_sizes = tagset_sizes self.crflist = nn.ModuleList() for i in range(file_num): if large_CRF: self.crflist.append( crf.CRF_L(2 * word_hidden_dim, tagset_sizes[i])) else: self.crflist.append( crf.CRF_S(2 * word_hidden_dim, tagset_sizes[i])) if if_highway: self.forw2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.forw2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.fb2char = highway.hw(2 * char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.char_pre_train_out = nn.Linear(char_hidden_dim, char_size) self.word_pre_train_out = nn.Linear(char_hidden_dim, in_doc_words) self.batch_size = 1 self.word_seq_length = 1
def __init__(self, tagset_size, char_size, char_dim, char_hidden_dim, char_rnn_layers, embedding_dim, word_hidden_dim, vocab_size, dropout_ratio, repeats, which_loss, large_CRF=True, if_highway=False, in_doc_words=2, highway_layers=1, layer_residual=True, block_residual=True): super(LM_LSTM_CRF, self).__init__() self.char_dim = char_dim self.char_hidden_dim = char_hidden_dim self.char_size = char_size self.word_dim = embedding_dim self.word_hidden_dim = word_hidden_dim self.word_size = vocab_size self.if_highway = if_highway #Hyper Parameters needed to be changed self.initial_filter_width = 3 self.initial_padding = 1 self.padding = [1, 2, 1] self.dilation = [1, 2, 1] self.take_layer = [False, False, True] self.repeats = int(repeats) self.which_loss = which_loss self.layer_residual = layer_residual self.block_residual = block_residual print(repeats, which_loss, type(which_loss), which_loss, self.which_loss == "block") self.char_embeds = nn.Embedding(char_size, char_dim) self.forw_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.back_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio) self.char_rnn_layers = char_rnn_layers #Word Embedding Layer self.word_embeds = nn.Embedding(vocab_size, embedding_dim) #Initial CNN Layer initial_filter_width = self.initial_filter_width initial_num_filters = word_hidden_dim self.itdicnn0 = nn.Conv1d(embedding_dim + char_hidden_dim * 2, initial_num_filters, kernel_size=initial_filter_width, padding=self.initial_padding, bias=True) self.itdicnn = nn.ModuleList([ nn.Conv1d(initial_num_filters, initial_num_filters, kernel_size=initial_filter_width, padding=self.padding[i], dilation=self.dilation[i], bias=True) for i in range(0, len(self.padding)) ]) self.dropout = nn.Dropout(p=dropout_ratio) self.tagset_size = tagset_size if large_CRF: self.crf = crf.CRF_L(word_hidden_dim, tagset_size) else: self.crf = crf.CRF_S(word_hidden_dim, tagset_size) if if_highway: self.forw2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.forw2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.back2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.fb2char = highway.hw(2 * char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio) self.char_pre_train_out = nn.Linear(char_hidden_dim, char_size) self.word_pre_train_out = nn.Linear(char_hidden_dim, in_doc_words) self.batch_size = 1 self.word_seq_length = 1