def evaluate(self, line, aspect): line = str(line) aspect = str(aspect) text_left, _, text_right = [ s.lower().strip() for s in line.partition("$T$") ] aspect = aspect.lower().strip() text_raw_indices = self.tokenizer.text_to_sequence(text_left + " " + aspect + " " + text_right) aspect_indices = self.tokenizer.text_to_sequence(aspect) aspect_len = np.sum(aspect_indices != 0) text_bert_indices = self.tokenizer.text_to_sequence('[CLS] ' + text_left + " " + aspect + " " + text_right + ' [SEP] ' + aspect + " [SEP]") bert_segments_ids = np.asarray([0] * (np.sum(text_raw_indices != 0) + 2) + [1] * (aspect_len + 1)) bert_segments_ids = pad_and_truncate(bert_segments_ids, self.tokenizer.max_seq_len) inp = np.array([[text_bert_indices], [bert_segments_ids]]) inp = torch.from_numpy(inp) inp = inp.to(self.opt.device) t_outputs = self.model(inp) t_probs = F.softmax(t_outputs, dim=-1).cpu().detach().numpy() return t_probs
def evaluate(self, text, aspect): aspect = aspect.lower().strip() text_left, _, text_right = [ s.strip() for s in text.lower().partition(aspect) ] text_indices = self.tokenizer.text_to_sequence(text_left + " " + aspect + " " + text_right) context_indices = self.tokenizer.text_to_sequence(text_left + " " + text_right) left_indices = self.tokenizer.text_to_sequence(text_left) left_with_aspect_indices = self.tokenizer.text_to_sequence(text_left + " " + aspect) right_indices = self.tokenizer.text_to_sequence(text_right, reverse=True) right_with_aspect_indices = self.tokenizer.text_to_sequence( aspect + " " + text_right, reverse=True) aspect_indices = self.tokenizer.text_to_sequence(aspect) left_len = np.sum(left_indices != 0) aspect_len = np.sum(aspect_indices != 0) aspect_boundary = np.asarray([left_len, left_len + aspect_len - 1], dtype=np.int64) text_len = np.sum(text_indices != 0) concat_bert_indices = self.tokenizer.text_to_sequence('[CLS] ' + text_left + " " + aspect + " " + text_right + ' [SEP] ' + aspect + " [SEP]") concat_segments_indices = [0] * (text_len + 2) + [1] * (aspect_len + 1) concat_segments_indices = pad_and_truncate(concat_segments_indices, self.tokenizer.max_seq_len) text_bert_indices = self.tokenizer.text_to_sequence("[CLS] " + text_left + " " + aspect + " " + text_right + " [SEP]") aspect_bert_indices = self.tokenizer.text_to_sequence("[CLS] " + aspect + " [SEP]") data = { 'concat_bert_indices': concat_bert_indices, 'concat_segments_indices': concat_segments_indices, 'text_bert_indices': text_bert_indices, 'aspect_bert_indices': aspect_bert_indices, 'text_indices': text_indices, 'context_indices': context_indices, 'left_indices': left_indices, 'left_with_aspect_indices': left_with_aspect_indices, 'right_indices': right_indices, 'right_with_aspect_indices': right_with_aspect_indices, 'aspect_indices': aspect_indices, 'aspect_boundary': aspect_boundary } t_inputs = [ torch.tensor([data[col]], device=self.opt.device) for col in self.opt.inputs_cols ] t_outputs = self.model(t_inputs) t_probs = F.softmax(t_outputs, dim=-1).cpu().numpy() return t_probs
key_entity = [] for e in entities: content = title index = content.find(e) if index + (tokenizer.max_seq_len // 2) < len(content): content = content[:index + (tokenizer.max_seq_len // 2)] if len(content) > tokenizer.max_seq_len: content = content[-(tokenizer.max_seq_len - len(e) - 3):] text_raw_indices = tokenizer.text_to_sequence(content) aspect_indices = tokenizer.text_to_sequence(e) aspect_len = np.sum(aspect_indices != 0) text_bert_indices = tokenizer.text_to_sequence('[CLS] ' + content + ' [SEP] ' + e + ' [SEP]') bert_segments_ids = np.asarray([0] * (np.sum(text_raw_indices != 0) + 2) + [1] * (aspect_len + 1)) bert_segments_ids = pad_and_truncate(bert_segments_ids, tokenizer.max_seq_len) text_bert_indices = torch.tensor([text_bert_indices], dtype=torch.int64).to(opt.device) bert_segments_ids = torch.tensor([bert_segments_ids], dtype=torch.int64).to(opt.device) inputs = [text_bert_indices, bert_segments_ids] outputs = model(inputs) t_probs = F.softmax(outputs, dim=-1).cpu().numpy() sentiment = t_probs.argmax(axis=-1) if sentiment == 1: key_entity.append(e) # remove infered key entities that can be substring of other entities. final_res = [] for e1 in key_entity: flag = 0