def predict(self, string: str): """ Tag a string. Parameters ---------- string : str Returns ------- result : Tuple """ input_ids, input_masks, segment_ids, s_tokens = xlnet_tokenization( self._tokenizer, [string], space_after_punct=True) s_tokens = s_tokens[0] r = self._execute( inputs=[input_ids, segment_ids, input_masks], input_labels=['Placeholder', 'Placeholder_1', 'Placeholder_2'], output_labels=['logits', 'heads_seq'], ) tagging, depend = r['logits'], r['heads_seq'] tagging = [self._idx2tag[i] for i in tagging[0]] depend = depend[0] - self._minus for i in range(len(depend)): if depend[i] == 0 and tagging[i] != 'root': tagging[i] = 'root' elif depend[i] != 0 and tagging[i] == 'root': depend[i] = 0 tagging = merge_sentencepiece_tokens_tagging(s_tokens, tagging, model='xlnet') tagging = list(zip(*tagging)) indexing = merge_sentencepiece_tokens_tagging(s_tokens, depend, model='xlnet') indexing = list(zip(*indexing)) result, indexing_ = [], [] for i in range(len(tagging)): index = int(indexing[i][1]) if index > len(tagging): index = len(tagging) elif (i + 1) == index: index = index + 1 elif index == -1: index = i indexing_.append((indexing[i][0], index)) result.append('%d\t%s\t_\t_\t_\t_\t%d\t%s\t_\t_' % (i + 1, tagging[i][0], index, tagging[i][1])) d = DependencyGraph('\n'.join(result), top_relation_label='root') return d, tagging, indexing_
def predict(self, string: str): """ Tag a string. Parameters ---------- string : str Returns ------- result : Tuple """ input_ids, input_masks, segment_ids, s_tokens = xlnet_tokenization( self._tokenizer, [string]) s_tokens = s_tokens[0] tagging, depend = self._sess.run( [self._logits, self._heads_seq], feed_dict={ self._X: input_ids, self._segment_ids: segment_ids, self._input_masks: input_masks, }, ) tagging = [self._idx2tag[i] for i in tagging[0]] depend = depend[0] - 1 for i in range(len(depend)): if depend[i] == 0 and tagging[i] != 'root': tagging[i] = 'root' elif depend[i] != 0 and tagging[i] == 'root': depend[i] = 0 tagging = merge_sentencepiece_tokens_tagging(s_tokens, tagging, model='xlnet') tagging = list(zip(*tagging)) indexing = merge_sentencepiece_tokens_tagging(s_tokens, depend, model='xlnet') indexing = list(zip(*indexing)) result, indexing_ = [], [] for i in range(len(tagging)): index = int(indexing[i][1]) if index > len(tagging): index = len(tagging) indexing_.append((indexing[i][0], index)) result.append('%d\t%s\t_\t_\t_\t_\t%d\t%s\t_\t_' % (i + 1, tagging[i][0], index, tagging[i][1])) d = DependencyGraph('\n'.join(result), top_relation_label='root') return d, tagging, indexing_
def predict(self, string: str): """ Tag a string. Parameters ---------- string : str Returns ------- result: Tuple[str, str] """ parsed_sequence, input_mask, bert_sequence = parse_bert_tagging( string, self._tokenizer) predicted = self._sess.run( self._logits, feed_dict={ self._X: [parsed_sequence], self._input_masks: [input_mask], }, )[0] t = [self._settings['idx2tag'][d] for d in predicted] merged = merge_sentencepiece_tokens_tagging(bert_sequence, t) return list(zip(merged[0], merged[1]))
def _predict(self, strings): sequences = [ encode_sentencepiece( self._tokenizer.sp, string, return_unicode=False, sample=False, ) for string in strings ] batch_x = [self._tokenizer.encode(string) + [1] for string in strings] batch_x = padding_sequence(batch_x) r = self._execute( inputs=[batch_x], input_labels=['x_placeholder'], output_labels=['greedy', 'tag_greedy'], ) p, tag = r['greedy'], r['tag_greedy'] results = [] nonzero = (p != 0).sum(axis=-1) for i in range(len(p)): r = self._tokenizer.decode(p[i].tolist()) t = tag[i, :nonzero[i]] s = encode_sentencepiece(self._tokenizer.sp, r, return_unicode=False, sample=False) merged = merge_sentencepiece_tokens_tagging(s + ['<cls>'], t, model='xlnet') results.append(list(zip(merged[0], merged[1]))) return results
def predict(self, string: str): """ Tag a string. Parameters ---------- string : str Returns ------- result : Tuple[str, str] """ input_ids, input_masks, segment_ids, s_tokens = xlnet_tokenization( self._tokenizer, [string]) s_tokens = s_tokens[0] predicted = self._sess.run( self._logits, feed_dict={ self._X: input_ids, self._segment_ids: segment_ids, self._input_masks: input_masks, }, )[0] t = [self._settings['idx2tag'][d] for d in predicted] merged = merge_sentencepiece_tokens_tagging(s_tokens, t, model='xlnet') return list(zip(*merged))
def predict(self, string: str): """ Tag a string. Parameters ---------- string: str Returns ------- result: Tuple """ parsed_sequence, bert_sequence = parse_bert_tagging( string, self._tokenizer ) tagging, depend = self._sess.run( [self._logits, self._heads_seq], feed_dict = {self._X: [parsed_sequence]}, ) tagging = [self._idx2tag[i] for i in tagging[0]] depend = depend[0] - 1 for i in range(len(depend)): if depend[i] == 0 and tagging[i] != 'root': tagging[i] = 'root' elif depend[i] != 0 and tagging[i] == 'root': depend[i] = 0 tagging = merge_sentencepiece_tokens_tagging(bert_sequence, tagging) tagging = list(zip(*tagging)) indexing = merge_sentencepiece_tokens_tagging(bert_sequence, depend) indexing = list(zip(*indexing)) result, indexing_ = [], [] for i in range(len(tagging)): index = int(indexing[i][1]) if index > len(tagging): index = len(tagging) indexing_.append((indexing[i][0], index)) result.append( '%d\t%s\t_\t_\t_\t_\t%d\t%s\t_\t_' % (i + 1, tagging[i][0], index, tagging[i][1]) ) d = DependencyGraph('\n'.join(result), top_relation_label = 'root') return d, tagging, indexing_
def predict(self, string: str): """ Tag a string. Parameters ---------- string: str Returns ------- result: Tuple """ parsed_sequence, input_mask, bert_sequence = parse_bert_tagging( string, self._tokenizer) r = self._execute( inputs=[[parsed_sequence]], input_labels=['Placeholder'], output_labels=['logits', 'heads_seq'], ) tagging, depend = r['logits'], r['heads_seq'] tagging = [self._idx2tag[i] for i in tagging[0]] depend = depend[0] - 1 for i in range(len(depend)): if depend[i] == 0 and tagging[i] != 'root': tagging[i] = 'root' elif depend[i] != 0 and tagging[i] == 'root': depend[i] = 0 tagging = merge_sentencepiece_tokens_tagging(bert_sequence, tagging) tagging = list(zip(*tagging)) indexing = merge_sentencepiece_tokens_tagging(bert_sequence, depend) indexing = list(zip(*indexing)) result, indexing_ = [], [] for i in range(len(tagging)): index = int(indexing[i][1]) if index > len(tagging): index = len(tagging) indexing_.append((indexing[i][0], index)) result.append('%d\t%s\t_\t_\t_\t_\t%d\t%s\t_\t_' % (i + 1, tagging[i][0], index, tagging[i][1])) d = DependencyGraph('\n'.join(result), top_relation_label='root') return d, tagging, indexing_
def predict(self, string: str): """ Tag a string. Parameters ---------- string : str Returns ------- result: Tuple[str, str] """ parsed_sequence, input_mask, bert_sequence = self._tokenize(string) r = self._execute( inputs=[[parsed_sequence], [input_mask]], input_labels=['Placeholder', 'Placeholder_1'], output_labels=['logits'], ) predicted = r['logits'][0] t = [self._settings['idx2tag'][d] for d in predicted] merged = merge_sentencepiece_tokens_tagging(bert_sequence, t) return list(zip(merged[0], merged[1]))
def predict(self, string: str): """ Tag a string. Parameters ---------- string : str Returns ------- result : Tuple[str, str] """ input_ids, input_masks, segment_ids, s_tokens = self._tokenize(string) r = self._execute( inputs=[input_ids, segment_ids, input_masks], input_labels=['Placeholder', 'Placeholder_1', 'Placeholder_2'], output_labels=['logits'], ) predicted = r['logits'][0] t = [self._settings['idx2tag'][d] for d in predicted] merged = merge_sentencepiece_tokens_tagging(s_tokens, t, model='xlnet') return list(zip(*merged))