def __init__(self, params, vocab): super(SLUTagger, self).__init__() self.lstm = Lstm(params, vocab) self.num_slot = params.num_slot self.hidden_dim = params.hidden_dim * 2 if params.bidirection else params.hidden_dim self.linear = nn.Linear(self.hidden_dim, self.num_slot) self.crf_layer = CRF(self.num_slot)
def __init__(self, params, vocab): super(BiLSTMCRFTagger, self).__init__() self.lstm = Lstm(params, vocab) self.num_entity_label = params.num_entity_label self.hidden_dim = params.hidden_dim * 2 if params.bidirection else params.hidden_dim self.linear = nn.Linear(self.hidden_dim, self.num_entity_label) self.crf_layer = CRF(self.num_entity_label)
def __init__(self, params, vocab): super(CoarseSLUTagger, self).__init__() self.lstm = Lstm(params, vocab) self.num_binslot = params.num_binslot self.hidden_dim = params.hidden_dim * 2 if params.bidirection else params.hidden_dim self.linear = nn.Linear(self.hidden_dim, self.num_binslot) self.linear_chunking = nn.Linear(self.hidden_dim, 3) self.crf_layer_chunking = CRF(3) self.crf_layer = CRF(self.num_binslot) self.domain_coarse_mask = self.gen_emission_mask()
class CoarseSLUTagger(nn.Module): def __init__(self, params, vocab): super(CoarseSLUTagger, self).__init__() self.lstm = Lstm(params, vocab) self.num_binslot = params.num_binslot self.hidden_dim = params.hidden_dim * 2 if params.bidirection else params.hidden_dim self.linear = nn.Linear(self.hidden_dim, self.num_binslot) self.linear_chunking = nn.Linear(self.hidden_dim, 3) self.crf_layer_chunking = CRF(3) self.crf_layer = CRF(self.num_binslot) self.domain_coarse_mask = self.gen_emission_mask() def chunking(self, X, y, iseval, lengths): bsz, seq_len = X.size() lstm_hidden = self.lstm(X) # (bsz, seq_len, hidden_dim) prediction = self.linear_chunking(lstm_hidden) padded_y = self.pad_label(lengths, y) if iseval == False: crf_loss = self.crf_layer_chunking.loss(prediction, padded_y) return crf_loss else: pred = self.crf_layer_chunking(prediction) pred = [ pred[i, :length].data.cpu().numpy() for i, length in enumerate(lengths) ] return pred def forward(self, X, y_dm, iseval=False, lengths=None): """ Input: X: (bsz, seq_len) Output: prediction: (bsz, seq_len, num_binslot) lstm_hidden: (bsz, seq_len, hidden_size) """ bsz, seq_len = X.size() lstm_hidden = self.lstm(X) # (bsz, seq_len, hidden_dim) prediction = self.linear(lstm_hidden) all_mask = [] for dm_id in y_dm: mask_vec = self.domain_coarse_mask[dm_id.item()] mask_vec = mask_vec.unsqueeze(0) all_mask.append(mask_vec.repeat(seq_len, 1)) all_mask = torch.cat(all_mask, 0) all_mask = all_mask.view(bsz, seq_len, -1) all_mask = all_mask.float().cuda() if iseval == True: prediction = prediction + all_mask return prediction, lstm_hidden def crf_decode(self, inputs, lengths): """ crf decode Input: inputs: (bsz, seq_len, num_entity) lengths: lengths of x (bsz, ) Ouput: crf_loss: loss of crf """ prediction = self.crf_layer(inputs) prediction = [ prediction[i, :length].data.cpu().numpy() for i, length in enumerate(lengths) ] return prediction def crf_loss(self, inputs, lengths, y): """ create crf loss Input: inputs: (bsz, seq_len, num_entity) lengths: lengths of x (bsz, ) y: label of slot value (bsz, seq_len) Ouput: crf_loss: loss of crf """ padded_y = self.pad_label(lengths, y) crf_loss = self.crf_layer.loss(inputs, padded_y) return crf_loss def pad_label(self, lengths, y): bsz = len(lengths) max_len = torch.max(lengths) padded_y = torch.LongTensor(bsz, max_len).fill_(SLOT_PAD) for i in range(bsz): length = lengths[i] y_i = y[i] padded_y[i, 0:length] = torch.LongTensor(y_i) padded_y = padded_y.cuda() return padded_y def gen_emission_mask(self): mask = {} son_to_father = {} for k, v in father_son_slot.items(): for w in v: son_to_father[w] = k for i in range(len(domain_set)): temp = [-1000000] * len(y1_set) temp[0] = 0 for slot in domain2slot[domain_set[i]]: fa = son_to_father[slot] B_idx = y1_set.index('B-' + fa) I_idx = y1_set.index('I-' + fa) temp[B_idx] = 0 temp[I_idx] = 0 mask[i] = torch.tensor(temp).cuda() return mask
class SLUTagger(nn.Module): def __init__(self, params, vocab): super(SLUTagger, self).__init__() self.lstm = Lstm(params, vocab) self.num_slot = params.num_slot self.hidden_dim = params.hidden_dim * 2 if params.bidirection else params.hidden_dim self.linear = nn.Linear(self.hidden_dim, self.num_slot) self.crf_layer = CRF(self.num_slot) def forward(self, X, lengths=None): """ Input: X: (bsz, seq_len) Output: prediction: (bsz, seq_len, num_slot) lstm_hidden: (bsz, seq_len, hidden_size) """ lstm_hidden = self.lstm(X) # (bsz, seq_len, hidden_dim) prediction = self.linear(lstm_hidden) return prediction def crf_decode(self, inputs, lengths): """ crf decode Input: inputs: (bsz, seq_len, num_entity) lengths: lengths of x (bsz, ) Ouput: crf_loss: loss of crf """ prediction = self.crf_layer(inputs) prediction = [ prediction[i, :length].data.cpu().numpy() for i, length in enumerate(lengths) ] return prediction def crf_loss(self, inputs, lengths, y): """ create crf loss Input: inputs: (bsz, seq_len, num_entity) lengths: lengths of x (bsz, ) y: label of slot value (bsz, seq_len) Ouput: crf_loss: loss of crf """ padded_y = self.pad_label(lengths, y) crf_loss = self.crf_layer.loss(inputs, padded_y) return crf_loss def pad_label(self, lengths, y): bsz = len(lengths) max_len = torch.max(lengths) padded_y = torch.LongTensor(bsz, max_len).fill_(SLOT_PAD) for i in range(bsz): length = lengths[i] y_i = y[i] padded_y[i, 0:length] = torch.LongTensor(y_i) padded_y = padded_y.cuda() return padded_y