def __init__(self, bigrams=False, encoding_type='bmeso'): super().__init__() self.bigrams = bigrams if encoding_type=='bmeso': self.encoding_func = lambda x:x elif encoding_type=='bio': self.encoding_func = bmeso2bio elif encoding_type == 'bioes': self.encoding_func = lambda words: iob2bioes(iob2(words)) else: raise RuntimeError("Only support bio, bmeso, bioes")
def __init__(self, encoding_type: str = 'bio', lower: bool = False, word_shape: bool = False): if encoding_type == 'bio': self.convert_tag = iob2 elif encoding_type == 'bioes': self.convert_tag = lambda words: iob2bioes(iob2(words)) else: raise ValueError("encoding_type only supports `bio` and `bioes`.") self.lower = lower self.word_shape = word_shape
def __init__(self, encoding_type: str = 'bio', lower: bool = False, word_shape: bool=False): """ :param: str encoding_type: target列使用什么类型的encoding方式,支持bioes, bio两种。 :param bool lower: 是否将words小写化后再建立词表,绝大多数情况都不需要设置为True。 :param boll word_shape: 是否新增一列word shape,5维 """ if encoding_type == 'bio': self.convert_tag = iob2 elif encoding_type == 'bioes': self.convert_tag = lambda words: iob2bioes(iob2(words)) else: raise ValueError("encoding_type only supports `bio` and `bioes`.") self.lower = lower self.word_shape = word_shape