예제 #1
0
 def __init__(self, bigrams=False, encoding_type='bmeso'):
     super().__init__()
     self.bigrams = bigrams
     if encoding_type=='bmeso':
         self.encoding_func = lambda x:x
     elif encoding_type=='bio':
         self.encoding_func = bmeso2bio
     elif encoding_type == 'bioes':
         self.encoding_func = lambda words: iob2bioes(iob2(words))
     else:
         raise RuntimeError("Only support bio, bmeso, bioes")
예제 #2
0
 def __init__(self,
              encoding_type: str = 'bio',
              lower: bool = False,
              word_shape: bool = False):
     if encoding_type == 'bio':
         self.convert_tag = iob2
     elif encoding_type == 'bioes':
         self.convert_tag = lambda words: iob2bioes(iob2(words))
     else:
         raise ValueError("encoding_type only supports `bio` and `bioes`.")
     self.lower = lower
     self.word_shape = word_shape
예제 #3
0
    def __init__(self, encoding_type: str = 'bio', lower: bool = False, word_shape: bool=False):
        """

        :param: str encoding_type: target列使用什么类型的encoding方式,支持bioes, bio两种。
        :param bool lower: 是否将words小写化后再建立词表,绝大多数情况都不需要设置为True。
        :param boll word_shape: 是否新增一列word shape,5维
        """
        if encoding_type == 'bio':
            self.convert_tag = iob2
        elif encoding_type == 'bioes':
            self.convert_tag = lambda words: iob2bioes(iob2(words))
        else:
            raise ValueError("encoding_type only supports `bio` and `bioes`.")
        self.lower = lower
        self.word_shape = word_shape