Exemple #1
0
    def predict(self, string: str):
        """
        Tag a string.

        Parameters
        ----------
        string : str

        Returns
        -------
        result : Tuple
        """

        input_ids, input_masks, segment_ids, s_tokens = xlnet_tokenization(
            self._tokenizer, [string], space_after_punct=True)
        s_tokens = s_tokens[0]
        r = self._execute(
            inputs=[input_ids, segment_ids, input_masks],
            input_labels=['Placeholder', 'Placeholder_1', 'Placeholder_2'],
            output_labels=['logits', 'heads_seq'],
        )
        tagging, depend = r['logits'], r['heads_seq']
        tagging = [self._idx2tag[i] for i in tagging[0]]
        depend = depend[0] - self._minus

        for i in range(len(depend)):
            if depend[i] == 0 and tagging[i] != 'root':
                tagging[i] = 'root'
            elif depend[i] != 0 and tagging[i] == 'root':
                depend[i] = 0

        tagging = merge_sentencepiece_tokens_tagging(s_tokens,
                                                     tagging,
                                                     model='xlnet')
        tagging = list(zip(*tagging))
        indexing = merge_sentencepiece_tokens_tagging(s_tokens,
                                                      depend,
                                                      model='xlnet')
        indexing = list(zip(*indexing))

        result, indexing_ = [], []
        for i in range(len(tagging)):
            index = int(indexing[i][1])
            if index > len(tagging):
                index = len(tagging)
            elif (i + 1) == index:
                index = index + 1
            elif index == -1:
                index = i
            indexing_.append((indexing[i][0], index))
            result.append('%d\t%s\t_\t_\t_\t_\t%d\t%s\t_\t_' %
                          (i + 1, tagging[i][0], index, tagging[i][1]))
        d = DependencyGraph('\n'.join(result), top_relation_label='root')
        return d, tagging, indexing_
Exemple #2
0
    def predict(self, string: str):
        """
        Tag a string.

        Parameters
        ----------
        string : str

        Returns
        -------
        result : Tuple
        """

        input_ids, input_masks, segment_ids, s_tokens = xlnet_tokenization(
            self._tokenizer, [string])
        s_tokens = s_tokens[0]

        tagging, depend = self._sess.run(
            [self._logits, self._heads_seq],
            feed_dict={
                self._X: input_ids,
                self._segment_ids: segment_ids,
                self._input_masks: input_masks,
            },
        )
        tagging = [self._idx2tag[i] for i in tagging[0]]
        depend = depend[0] - 1

        for i in range(len(depend)):
            if depend[i] == 0 and tagging[i] != 'root':
                tagging[i] = 'root'
            elif depend[i] != 0 and tagging[i] == 'root':
                depend[i] = 0

        tagging = merge_sentencepiece_tokens_tagging(s_tokens,
                                                     tagging,
                                                     model='xlnet')
        tagging = list(zip(*tagging))
        indexing = merge_sentencepiece_tokens_tagging(s_tokens,
                                                      depend,
                                                      model='xlnet')
        indexing = list(zip(*indexing))

        result, indexing_ = [], []
        for i in range(len(tagging)):
            index = int(indexing[i][1])
            if index > len(tagging):
                index = len(tagging)
            indexing_.append((indexing[i][0], index))
            result.append('%d\t%s\t_\t_\t_\t_\t%d\t%s\t_\t_' %
                          (i + 1, tagging[i][0], index, tagging[i][1]))
        d = DependencyGraph('\n'.join(result), top_relation_label='root')
        return d, tagging, indexing_
Exemple #3
0
    def predict(self, string: str):
        """
        Tag a string.

        Parameters
        ----------
        string : str

        Returns
        -------
        result: Tuple[str, str]
        """

        parsed_sequence, input_mask, bert_sequence = parse_bert_tagging(
            string, self._tokenizer)
        predicted = self._sess.run(
            self._logits,
            feed_dict={
                self._X: [parsed_sequence],
                self._input_masks: [input_mask],
            },
        )[0]
        t = [self._settings['idx2tag'][d] for d in predicted]
        merged = merge_sentencepiece_tokens_tagging(bert_sequence, t)
        return list(zip(merged[0], merged[1]))
Exemple #4
0
 def _predict(self, strings):
     sequences = [
         encode_sentencepiece(
             self._tokenizer.sp,
             string,
             return_unicode=False,
             sample=False,
         ) for string in strings
     ]
     batch_x = [self._tokenizer.encode(string) + [1] for string in strings]
     batch_x = padding_sequence(batch_x)
     r = self._execute(
         inputs=[batch_x],
         input_labels=['x_placeholder'],
         output_labels=['greedy', 'tag_greedy'],
     )
     p, tag = r['greedy'], r['tag_greedy']
     results = []
     nonzero = (p != 0).sum(axis=-1)
     for i in range(len(p)):
         r = self._tokenizer.decode(p[i].tolist())
         t = tag[i, :nonzero[i]]
         s = encode_sentencepiece(self._tokenizer.sp,
                                  r,
                                  return_unicode=False,
                                  sample=False)
         merged = merge_sentencepiece_tokens_tagging(s + ['<cls>'],
                                                     t,
                                                     model='xlnet')
         results.append(list(zip(merged[0], merged[1])))
     return results
Exemple #5
0
    def predict(self, string: str):
        """
        Tag a string.

        Parameters
        ----------
        string : str

        Returns
        -------
        result : Tuple[str, str]
        """

        input_ids, input_masks, segment_ids, s_tokens = xlnet_tokenization(
            self._tokenizer, [string])
        s_tokens = s_tokens[0]

        predicted = self._sess.run(
            self._logits,
            feed_dict={
                self._X: input_ids,
                self._segment_ids: segment_ids,
                self._input_masks: input_masks,
            },
        )[0]
        t = [self._settings['idx2tag'][d] for d in predicted]

        merged = merge_sentencepiece_tokens_tagging(s_tokens, t, model='xlnet')
        return list(zip(*merged))
Exemple #6
0
    def predict(self, string: str):
        """
        Tag a string.

        Parameters
        ----------
        string: str

        Returns
        -------
        result: Tuple
        """

        parsed_sequence, bert_sequence = parse_bert_tagging(
            string, self._tokenizer
        )
        tagging, depend = self._sess.run(
            [self._logits, self._heads_seq],
            feed_dict = {self._X: [parsed_sequence]},
        )
        tagging = [self._idx2tag[i] for i in tagging[0]]
        depend = depend[0] - 1

        for i in range(len(depend)):
            if depend[i] == 0 and tagging[i] != 'root':
                tagging[i] = 'root'
            elif depend[i] != 0 and tagging[i] == 'root':
                depend[i] = 0

        tagging = merge_sentencepiece_tokens_tagging(bert_sequence, tagging)
        tagging = list(zip(*tagging))
        indexing = merge_sentencepiece_tokens_tagging(bert_sequence, depend)
        indexing = list(zip(*indexing))

        result, indexing_ = [], []
        for i in range(len(tagging)):
            index = int(indexing[i][1])
            if index > len(tagging):
                index = len(tagging)
            indexing_.append((indexing[i][0], index))
            result.append(
                '%d\t%s\t_\t_\t_\t_\t%d\t%s\t_\t_'
                % (i + 1, tagging[i][0], index, tagging[i][1])
            )
        d = DependencyGraph('\n'.join(result), top_relation_label = 'root')
        return d, tagging, indexing_
Exemple #7
0
    def predict(self, string: str):
        """
        Tag a string.

        Parameters
        ----------
        string: str

        Returns
        -------
        result: Tuple
        """

        parsed_sequence, input_mask, bert_sequence = parse_bert_tagging(
            string, self._tokenizer)
        r = self._execute(
            inputs=[[parsed_sequence]],
            input_labels=['Placeholder'],
            output_labels=['logits', 'heads_seq'],
        )
        tagging, depend = r['logits'], r['heads_seq']
        tagging = [self._idx2tag[i] for i in tagging[0]]
        depend = depend[0] - 1

        for i in range(len(depend)):
            if depend[i] == 0 and tagging[i] != 'root':
                tagging[i] = 'root'
            elif depend[i] != 0 and tagging[i] == 'root':
                depend[i] = 0

        tagging = merge_sentencepiece_tokens_tagging(bert_sequence, tagging)
        tagging = list(zip(*tagging))
        indexing = merge_sentencepiece_tokens_tagging(bert_sequence, depend)
        indexing = list(zip(*indexing))

        result, indexing_ = [], []
        for i in range(len(tagging)):
            index = int(indexing[i][1])
            if index > len(tagging):
                index = len(tagging)
            indexing_.append((indexing[i][0], index))
            result.append('%d\t%s\t_\t_\t_\t_\t%d\t%s\t_\t_' %
                          (i + 1, tagging[i][0], index, tagging[i][1]))
        d = DependencyGraph('\n'.join(result), top_relation_label='root')
        return d, tagging, indexing_
Exemple #8
0
    def predict(self, string: str):
        """
        Tag a string.

        Parameters
        ----------
        string : str

        Returns
        -------
        result: Tuple[str, str]
        """
        parsed_sequence, input_mask, bert_sequence = self._tokenize(string)
        r = self._execute(
            inputs=[[parsed_sequence], [input_mask]],
            input_labels=['Placeholder', 'Placeholder_1'],
            output_labels=['logits'],
        )
        predicted = r['logits'][0]
        t = [self._settings['idx2tag'][d] for d in predicted]
        merged = merge_sentencepiece_tokens_tagging(bert_sequence, t)
        return list(zip(merged[0], merged[1]))
Exemple #9
0
    def predict(self, string: str):
        """
        Tag a string.

        Parameters
        ----------
        string : str

        Returns
        -------
        result : Tuple[str, str]
        """
        input_ids, input_masks, segment_ids, s_tokens = self._tokenize(string)

        r = self._execute(
            inputs=[input_ids, segment_ids, input_masks],
            input_labels=['Placeholder', 'Placeholder_1', 'Placeholder_2'],
            output_labels=['logits'],
        )
        predicted = r['logits'][0]
        t = [self._settings['idx2tag'][d] for d in predicted]

        merged = merge_sentencepiece_tokens_tagging(s_tokens, t, model='xlnet')
        return list(zip(*merged))