Ejemplo n.º 1
0
def update_tag_scheme(sentences, tag_scheme):
    """
    对标注模式进行检查和转换
    :param sentences:
    :param tag_scheme:
    :return:
    """
    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # 对IOB标注模式的数据进行处理,确定原数据的标注模式是IOB2格式
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        # 若要转换成IOBES格式
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')
def update_tag_scheme(sentences, tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.
    """

    # enumerate() 函数用于将一个可遍历的数据对象组合为一个索引序列,同时列出数据和数据下标,一般用在 for 循环当中。
    # 这里i为从0开始的序列,i为每个句子的序号

    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # 这里 tags 是取sentences中每个字的标签-- w[-1]的意思是取数组最后一位,并将标签按句子排成一个列表

        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            # join() 方法用于将序列中的元素以指定的字符<这里为‘ ’和‘\n’>连接生成一个新的字符串。
            # 以上将在错误信息中打印

            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)  # iob_iobes是根据iob标签形成iobes
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag  # 将原标签word[-1]换成新标签
        else:
            raise Exception('Unknown tagging scheme!')
Ejemplo n.º 3
0
def update_tag_scheme(sentences, tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.
    
    序列标记常见标记方案:
    http://nathanlvzs.github.io/Several-Tagging-Schemes-for-Sequential-Tagging.html
    """
    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')
Ejemplo n.º 4
0
def update_tag_scheme(sentences, tag_scheme):
    """
    1:检查样本的标签是否为正确的IOB格式,如果不对则纠正。
    2:将IOB格式转化为IOBES格式。
    """

    for i, s in enumerate(sentences):

        tags = [w[-1] for w in s]

        if not iob(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            print('Sentences should be given in IOB format! \n' +
                  'Please check sentence %i:\n%s' % (i, s_str))
        """ 如果用IOB格式训练,则检查并纠正一遍 """
        if tag_scheme == 'iob':

            for word, new_tag in zip(s, tags):
                word[-1] = new_tag

        elif tag_scheme == 'iobes':
            """ 将IOB格式转化为IOBES格式 """
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
Ejemplo n.º 5
0
def update_tag_scheme(sentences, tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.
    """
    for i, s in enumerate(
            sentences):  # s是sentence,[['Russian', 'B-MISC'], ['military', 'O']
        tags = [w[-1] for w in s]
        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(
                    s, new_tags):  # zip将s和new tags两个list合并为一个元组的形式
                #  word: ['BRUSSELS', 'B-LOC']
                #  new_tag是转换为iobes后的纯标签
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')
Ejemplo n.º 6
0
def update_tag_scheme(sentences, tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.
    """
    # print("tag_schema:{}".format(tag_scheme))
    # tag_schema: iobes
    for i, s in enumerate(sentences):
        # print("i:{}\ts:{}".format(i,s))
        tags = [w[-1] for w in s]
        # print("tags:{}".format(tags))
        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iobes':
            # print("tags:{}".format(tags))
            new_tags = iob_iobes(tags)
            # print("nwe tags:{}".format(new_tags))
            for word, new_tag in zip(s, new_tags):
                # print("word:{}".format(word))
                word[-1] = new_tag
            # exit()
        else:
            raise Exception('Unknown tagging scheme!')
Ejemplo n.º 7
0
def update_tag_scheme(sentences, tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.
    """
    #��鲢����IOB2�ľ��ӱ�ע������������IOB1��IOB2����  ������õ���IOB2��IOBS��IOBES��
    for i, s in enumerate(sentences):  #sһ������
        tags = [w[-1] for w in s]  #w[-1]ÿ���ֵı�ע
        # Check that tags are given in the IOB format
        if not iob2(
                tags
        ):  #iob2(tags) ������ÿ���ֵı�ע���ж�(���� IOB1 to IOB2)�Ƿ�һ���Ϸ��ı�ע
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)  #iob ת���� iobes
            for word, new_tag in zip(s, new_tags):  #new_tags ת����ı���
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')
Ejemplo n.º 8
0
def update_tag_scheme(sentences, tag_scheme):
    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format!' +
                            'please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknow tagging scheme!')
Ejemplo n.º 9
0
def update_tag_scheme(sentences, tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.
    """
    # 该模型的输入是加载好的数据sentences是load_sentences的返回值和tag_scheme是'iob'或'iobes'。简而言之就是增加语句的end和
    # 该模型的输入数据是:
    # [[['入', 'O'], ['院', 'O'], ['情', 'B-DRU'], ['况', 'I-DRU'], [':', 'O'], ['女', 'O'], [',', 'O'], ['0', 'O'],
    #   ['0', 'O'], ['岁', 'O'], [',', 'O'], ['以', 'O'], ['突', 'B-SYM'], ['发', 'I-SYM'], ['言', 'B-SYM'], ['语', 'I-SYM'],
    #   ['不', 'I-SYM'], ['清', 'I-SYM'], ['0', 'O'], ['天', 'O'], [',', 'O'], ['加', 'O'], ['重', 'O'], ['0', 'O'],
    #   ['天', 'O'], ['入', 'O'], ['院', 'O'], ['。', 'O'], ['入', 'O'], ['院', 'O'], ['情', 'B-DRU'], ['况', 'I-DRU'],
    #   [':', 'O'], ['患', 'O'], ['者', 'O'], ['以', 'O'], ['腰', 'O'], ['痛', 'O'], ['伴', 'O'], ['双', 'B-REG'],
    #   ['下', 'I-REG'], ['肢', 'I-REG'], ['疼', 'B-SYM'], ['痛', 'I-SYM'], ['半', 'O'], ['年', 'O'], [',', 'O'], ['加', 'O'],
    #   ['重', 'O'], ['0', 'O'], ['0', 'O'], ['余', 'O'], ['天', 'O'], ['为', 'O'], ['主', 'O'], ['诉', 'O'], ['入', 'O'],
    #   ['院', 'O'], ['。', 'O']]]
    #
    # 该模型的返回值是经过修改的传入的sentences,该处牵扯python的语法,具体内容见https://blog.csdn.net/nathan_yo/article/details/98639051
    # 该模型更改后的数据为:
    # [[['入', 'O'], ['院', 'O'], ['情', 'B-DRU'], ['况', 'E-DRU'], [':', 'O'], ['女', 'O'], [',', 'O'], ['0', 'O'],
    #   ['0', 'O'], ['岁', 'O'], [',', 'O'], ['以', 'O'], ['突', 'B-SYM'], ['发', 'E-SYM'], ['言', 'B-SYM'], ['语', 'I-SYM'],
    #   ['不', 'I-SYM'], ['清', 'E-SYM'], ['0', 'O'], ['天', 'O'], [',', 'O'], ['加', 'O'], ['重', 'O'], ['0', 'O'],
    #   ['天', 'O'], ['入', 'O'], ['院', 'O'], ['。', 'O'], ['入', 'O'], ['院', 'O'], ['情', 'B-DRU'], ['况', 'E-DRU'],
    #   [':', 'O'], ['患', 'O'], ['者', 'O'], ['以', 'O'], ['腰', 'O'], ['痛', 'O'], ['伴', 'O'], ['双', 'B-REG'],
    #   ['下', 'I-REG'], ['肢', 'E-REG'], ['疼', 'B-SYM'], ['痛', 'E-SYM'], ['半', 'O'], ['年', 'O'], [',', 'O'], ['加', 'O'],
    #   ['重', 'O'], ['0', 'O'], ['0', 'O'], ['余', 'O'], ['天', 'O'], ['为', 'O'], ['主', 'O'], ['诉', 'O'], ['入', 'O'],
    #   ['院', 'O'], ['。', 'O']]]
    # 这里i 和 s分别取字符和标记
    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # Check that tags are given in the IOB format
        if not iob2(tags):
            # 该处用于检查错误,是否全是IOB标签B-
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')
Ejemplo n.º 10
0
def update_tag_scheme(sentences, tag_scheme):
    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')
Ejemplo n.º 11
0
def update_tag_scheme(sentences, tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.
    """
    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')
Ejemplo n.º 12
0
def update_tag_scheme(sentences, tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.  #为什么只有这两个体系可用???
    """
    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        #以下转标记体系没看懂???
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')