def position_aware_attn(self, hidden_mat, last_h, start1, ent1, start2, end2, seq_len): tri_pos_list = [] ent_pos_list = [] for i in range(seq_len): tri_pos_list.append(io_utils.relative_position(start1, ent1, i)) ent_pos_list.append(io_utils.relative_position(start2, end2, i)) tri_pos_emb = self.position_embed(tri_pos_list) tri_pos_mat = ops.cat(tri_pos_emb, 1) ent_pos_emb = self.position_embed(ent_pos_list) ent_pos_mat = ops.cat(ent_pos_emb, 1) #expand_last_h = nn.cat([last_h] * seq_len, 1) # (birnn * 2 + pos_emb*2, seq_len) att_input = ops.cat([hidden_mat, tri_pos_mat, ent_pos_mat], 0) hidden = self.attn_hidden(att_input) attn_out = self.attn_out(hidden) # (1, seq_len) attn_prob = nn.softmax(attn_out, dim=1) # (rnn_dim * 2, 1) rep = hidden_mat * dy.transpose(attn_prob) return rep
def decode_arg(self, beta_embed, lmda_embed, sigma_embed, delta_embed, out_embed, hidden_mat, tri_idx, ent_start, ent_end, seq_len, last_h): attn_rep = self.position_aware_attn(hidden_mat, last_h, tri_idx, tri_idx, ent_start, ent_end, seq_len) state_embed = ops.cat( [beta_embed, lmda_embed, sigma_embed, delta_embed, out_embed, attn_rep], dim=0) hidden = self.hidden_arg(state_embed) out = self.output_arg(hidden) np_score = out.npvalue().flatten() return np.argmax(np_score)
def forward_ent_corel(self, beta_embed, lmda_embed, sigma_embed, delta_embed, out_embed, hidden_mat, start1, ent1, start2, end2, seq_len, last_h, gold_arg): attn_rep = self.position_aware_attn(hidden_mat, last_h, start1, ent1, start2, end2, seq_len) state_embed = ops.cat( [beta_embed, lmda_embed, sigma_embed, delta_embed, out_embed, attn_rep], dim=0) state_embed = dy.dropout(state_embed, 0.2) hidden = self.hidden_ent_corel(state_embed) out = self.output_ent_corel(hidden) loss = dy.pickneglogsoftmax(out, gold_arg) return loss
def forward_arg(self, beta_embed, lmda_embed, sigma_embed, delta_embed, out_embed, hidden_mat, tri_idx, ent_start, ent_end, seq_len, last_h, gold_arg): attn_rep = self.position_aware_attn(hidden_mat, last_h, tri_idx, tri_idx, ent_start, ent_end, seq_len) state_embed = ops.cat( [beta_embed, lmda_embed, sigma_embed, delta_embed, out_embed, attn_rep], dim=0) state_embed = dy.dropout(state_embed, 0.25) hidden = self.hidden_arg(state_embed) out = self.output_arg(hidden) # probs = dy.softmax(out) # gold_prob = dy.pick(probs, gold_arg) # log_gold_prob = dy.log(gold_prob) # loss_weight = dy.pow(1.03 - gold_prob, dy.scalarInput(2)) # loss = - loss_weight * log_gold_prob loss = dy.pickneglogsoftmax(out, gold_arg) return loss
def __call__(self, toks, hidden_state_list, last_h, oracle_actions=None, oracle_action_strs=None, is_train=True, ents=None, tris=None, args=None): ent_dic = dict() tri_dic = dict() gold_arg_dict = {(arg[0], arg[2]): arg[-1] for arg in args} # (ent_start, tri_idx):role_type same_event_ents = self.same(args) args = [] hidden_mat = ops.cat(hidden_state_list, 1) seq_len = len(toks) buffer = nn.Buffer(self.bi_rnn_dim, hidden_state_list) losses = [] loss_rels = [] loss_roles = [] pred_action_strs = [] self.sigma_rnn.init_sequence(not is_train) self.delta_rnn.init_sequence(not is_train) self.part_ent_rnn.init_sequence(not is_train) self.actions_rnn.init_sequence(not is_train) self.out_rnn.init_sequence(not is_train) steps = 0 while not (buffer.is_empty() and self.lambda_var.is_empty() and self.part_ent_rnn.is_empty()): pre_action = None if self.actions_rnn.is_empty( ) else self.actions_rnn.last_idx() # based on parser state, get valid actions valid_actions = [] if pre_action is not None and self.act.is_ent_gen(pre_action): valid_actions += [ self.act.entity_back_id ] #[self.act.entity_back_id, self.act.entity_shift_id] # There are parts of the entity in e, we should finish this entity before process other actions elif not self.part_ent_rnn.is_empty(): valid_actions += [self.act.entity_shift_id] valid_actions += self.act.get_ent_gen_list() elif not self.lambda_var.is_empty(): if self.sigma_rnn.is_empty(): valid_actions += [ self.act.shift_id, self.act.copy_shift_id ] else: valid_actions += [self.act.no_pass_id] lmda_idx = self.lambda_var.idx sigma_idx = self.sigma_rnn.last_idx() if lmda_idx in ent_dic and sigma_idx in tri_dic: valid_actions += [self.act.right_pass_id] elif lmda_idx in tri_dic and sigma_idx in ent_dic: valid_actions += [self.act.left_pass_id] else: valid_actions += [self.act.entity_shift_id, self.act.o_del_id] valid_actions += self.act.get_tri_gen_list() action = None if buffer.is_empty(): self.empty_times += 1 beta_embed = self.empty_buffer_emb if buffer.is_empty( ) else buffer.hidden_embedding() lmda_embed = self.lambda_var.embedding() sigma_embed = self.sigma_rnn.embedding() delta_embed = self.delta_rnn.embedding() part_ent_embed = self.part_ent_rnn.embedding() action_embed = self.actions_rnn.embedding() out_embed = self.out_rnn.embedding() state_embed = ops.cat([ beta_embed, lmda_embed, sigma_embed, delta_embed, part_ent_embed, action_embed, out_embed ], dim=0) if is_train: state_embed = dy.dropout(state_embed, self.config['dp_out']) hidden_rep = self.hidden_linear(state_embed) logits = self.output_linear(hidden_rep) if is_train: log_probs = dy.log_softmax(logits, valid_actions) else: log_probs = dy.log_softmax(logits, valid_actions) if is_train: action = oracle_actions[steps] action_str = oracle_action_strs[steps] if action not in valid_actions: raise RuntimeError('Action %s dose not in valid_actions' % action_str) # append the action-specific loss #if self.act.is_o_del(action) or self.act.is_tri_gen(action): losses.append(dy.pick(log_probs, action)) #val, idx = log_probs.tensor_value().topk(0, 5) else: np_log_probs = log_probs.npvalue() act_prob = np.max(np_log_probs) action = np.argmax(np_log_probs) action_str = self.act.to_act_str(action) pred_action_strs.append(action_str) #print(action_str) #if True:continue # execute the action to update the parser state if self.act.is_o_del(action): hx, idx = buffer.pop() self.out_rnn.push(hx, idx) elif self.act.is_tri_gen(action): hx, idx = buffer.pop() type_id = self.act.to_tri_id(action) tri_dic[idx] = (idx, type_id) tri_embed = self.tri_table[type_id] tri_rep = self.tri_to_lmda(ops.cat([hx, tri_embed], dim=0)) #tri_rep = self.tri_to_lmda(hx) self.lambda_var.push(tri_rep, idx, nn.LambdaVar.TRIGGER) elif self.act.is_ent_shift(action): if buffer.is_empty(): break hx, idx = buffer.pop() self.part_ent_rnn.push(hx, idx) elif self.act.is_ent_gen(action): start, end = self.part_ent_rnn.idx_range() type_id = self.act.to_ent_id(action) ent = (start, end, type_id) ent_dic[start] = ent hx, _ = self.part_ent_rnn.last_state() ent_embed = self.ent_table[type_id] ent_rep = self.ent_to_lmda(ops.cat([hx, ent_embed], dim=0)) #ent_rep = self.ent_to_lmda(hx) self.lambda_var.push(ent_rep, start, nn.LambdaVar.ENTITY) elif self.act.is_ent_back(action): new_idx = buffer.idx new_idx -= len(self.part_ent_rnn) - 1 buffer.move_pointer(new_idx) self.part_ent_rnn.clear() elif self.act.is_shift(action): while not self.delta_rnn.is_empty(): self.sigma_rnn.push(*self.delta_rnn.pop()) self.sigma_rnn.push(*self.lambda_var.pop()) elif self.act.is_copy_shift(action): while not self.delta_rnn.is_empty(): self.sigma_rnn.push(*self.delta_rnn.pop()) self.sigma_rnn.push(*self.lambda_var.pop()) buffer.move_back() elif self.act.is_no_pass(action): lmda_idx = self.lambda_var.idx sigma_last_embed, sigma_last_idx = self.sigma_rnn.pop() if lmda_idx in ent_dic and sigma_last_idx in ent_dic: ent_start1, ent_end1, _ = ent_dic[lmda_idx] ent_start2, ent_end2, _ = ent_dic[sigma_last_idx] corel = 1 if (lmda_idx, sigma_last_idx) in same_event_ents else 0 loss_corel = self.multi_task.forward_ent_corel( beta_embed, lmda_embed, sigma_embed, delta_embed, out_embed, hidden_mat, ent_start1, ent_end1, ent_start2, ent_end2, seq_len, last_h, corel) loss_rels.append(loss_corel) self.delta_rnn.push(sigma_last_embed, sigma_last_idx) elif self.act.is_left_pass(action): lmda_idx = self.lambda_var.idx sigma_last_embed, sigma_last_idx = self.sigma_rnn.pop() tri_idx = lmda_idx ent_start, ent_end, _ = ent_dic[sigma_last_idx] if is_train: role_label = gold_arg_dict.get((ent_start, tri_idx), self.arg_null_id) loss_role = self.multi_task.forward_arg( beta_embed, lmda_embed, sigma_embed, delta_embed, out_embed, hidden_mat, tri_idx, ent_start, ent_end, seq_len, last_h, role_label) loss_roles.append(loss_role) else: role_label = self.multi_task.decode_arg( beta_embed, lmda_embed, sigma_embed, delta_embed, out_embed, hidden_mat, tri_idx, ent_start, ent_end, seq_len, last_h) event = (ent_start, ent_end, tri_idx, role_label) args.append(event) self.delta_rnn.push(sigma_last_embed, sigma_last_idx) elif self.act.is_right_pass(action): lmda_idx = self.lambda_var.idx sigma_last_embed, sigma_last_idx = self.sigma_rnn.pop() tri_idx = sigma_last_idx ent_start, ent_end, _ = ent_dic[lmda_idx] if is_train: role_label = gold_arg_dict.get((ent_start, tri_idx), self.arg_null_id) loss_role = self.multi_task.forward_arg( beta_embed, lmda_embed, sigma_embed, delta_embed, out_embed, hidden_mat, tri_idx, ent_start, ent_end, seq_len, last_h, role_label) loss_roles.append(loss_role) else: role_label = self.multi_task.decode_arg( beta_embed, lmda_embed, sigma_embed, delta_embed, out_embed, hidden_mat, tri_idx, ent_start, ent_end, seq_len, last_h) event = (ent_start, ent_end, tri_idx, role_label) args.append(event) self.delta_rnn.push(sigma_embed, sigma_last_idx) else: raise RuntimeError('Unknown action type:' + str(action)) self.actions_rnn.push(self.act_table[action], action) steps += 1 #if not is_train:print(len(self.actions_rnn.indices), self.actions_rnn.indices) pred_args = [] if is_train: pred_args = set(args) else: for arg in args: ent_start, ent_end, tri_idx, role_type = arg ent_type_id = ent_dic[ent_start][-1] tri_type_id = tri_dic[tri_idx][-1] valid_args = self.event_cons.get_constraint_arg_types( ent_type_id, tri_type_id) if valid_args and role_type in valid_args: pred_args.append(arg) self.clear() return losses, loss_roles, loss_rels, set(ent_dic.values()), set( tri_dic.values()), pred_args, pred_action_strs