コード例 #1
0
 def test_iob2(self):
     print '\n\niob2:'
     tags = [
         'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
         'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
         u'B-ORGANIZATION:CORPORATION', u'I-ORGANIZATION:CORPORATION',
         u'I-ORGANIZATION:CORPORATION', u'E-ORGANIZATION:CORPORATION', 'O',
         'O', 'O', 'O', 'O', 'O', 'O', 'O'
     ]
     print 'original:\n', tags
     utils.iob2(tags)
     print 'new:\n', tags
コード例 #2
0
def update_tag_scheme(sentences, tag_scheme, removeTag=None):
    #{{{
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.
    """
    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                if removeTag is not None:
                    if new_tag[2:] in removeTag:
                        word[-1] = 'O'
                    else:
                        word[-1] = new_tag
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')
コード例 #3
0
ファイル: loader.py プロジェクト: natemccoy/tagger
def update_tag_scheme(sentences, tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.
    If tag scheme is generic, return without modifications
    """
    if tag_scheme == 'generic':
        return
    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')
コード例 #4
0
def check_tag_scheme(sentences, tag_scheme):
    """
    Just check for IOB format.
    """
    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        """
コード例 #5
0
ファイル: loader.py プロジェクト: ManHieu/NER_pytoch
def update_tag_scheme(sentences, tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only input IOB1 and IOB2 schemes are accepted.
    """
    for i, sentence in enumerate(sentences):
        tags = [word[-1] for word in sentence]
        if not iob2(tags):
            raise Exception("Sentence {}: {} should be given in IOB format!".format(i, sentence))

        if tag_scheme == 'iob':
            for word, new_tag in zip(sentence, tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')
コード例 #6
0
def update_tag_scheme(sentences, tag_scheme):
    "tag_scheme: 'iob' or 'iobes'"
    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # iob2 function:
        #   1. Check that tags are given in the IOB format
        #   2. Modify error tagging
        if not utils.iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme.lower() == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme.lower() == 'iobes':
            new_tags = utils.iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')
コード例 #7
0
def update_tag_scheme(sentences, tag_scheme, file_format="conll"):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.
    """
    for i, s in enumerate(sentences):
        tags = []
        if file_format == "conll":
            tags = [w[-1] for w in s]
        elif file_format == "conllu":
            if contains_golden_label(s[0], "NER_TAG"):
                tags = [extract_correct_ner_tag_from_conllu(w) for w in s]
            else:
                continue
        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            print(s_str.encode("utf8"))
            raise Exception('Sentences should be given in IOB format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                if file_format == "conll":
                    word[-1] = new_tag
                elif file_format == "conllu":
                    field_contents_dict = load_MISC_column_contents(word[9])
                    field_contents_dict["NER_TAG"] = new_tag
                    word[9] = compile_MISC_column_contents(field_contents_dict)
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                if file_format == "conll":
                    word[-1] = new_tag
                elif file_format == "conllu":
                    field_contents_dict = load_MISC_column_contents(word[9])
                    field_contents_dict["NER_TAG"] = new_tag
                    word[9] = compile_MISC_column_contents(field_contents_dict)
        else:
            raise Exception('Unknown tagging scheme!')
コード例 #8
0
def update_tag_scheme(sentences,tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.
    """
    new_sentences=[]
    for i,sentence in enumerate(sentences):
        tags=[word[-1] for word in sentence]
        # check whether tagging scheme is IOB format or not
        new_tags = iob2(tags)
        if not new_tags:
            error_str='\n'.join([' '.join(word) for word in sentence])
            raise Exception("Sentence should be given in IOB format! "
                            "Please check sentence %i \n %s") % (i+1,error_str)
        # convert tagging scheme
        if tag_scheme=='iob':
            pass
        elif tag_scheme=='iobes':
            new_tags=iob_iobes(new_tags)
        else:
            raise Exception('Unknown tag scheme!')
        new_sentences.append([[word[0],tag] for word,tag in zip(sentence,new_tags)])
    return new_sentences
コード例 #9
0
def update_tag_scheme(sentences, tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB,IOBES and IOESB1B2 schemes are accepted.
    """

    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            print s_str
            raise Exception('Sentences should be given in IOBES format! ' +
                            'Please check sentence %i:\n%s' % (i, s_str))
        if tag_scheme == 'iobes':
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iob':
            new_tags = tags_to_iob(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')
コード例 #10
0
ファイル: loader.py プロジェクト: ebolless/InovationWithNER
def update_tag_scheme(sentences, tag_scheme):
    """
    Check and update sentences tagging scheme to IOB2.
    Only IOB1 and IOB2 schemes are accepted.
    """
    for i, s in enumerate(sentences):
        tags = [w[-1] for w in s]
        # Check that tags are given in the IOB format
        if not iob2(tags):
            s_str = '\n'.join(' '.join(w) for w in s)
            # raise Exception('Sentences should be given in IOB format! ' + 'Please check sentence %i:\n%s' % (i, s_str))
            print('Removing Problematic sentence: %i:\n%s' % (i, s_str))
            continue
        if tag_scheme == 'iob':
            # If format was IOB1, we convert to IOB2
            for word, new_tag in zip(s, tags):
                word[-1] = new_tag
        elif tag_scheme == 'iobes':
            new_tags = iob_iobes(tags)
            for word, new_tag in zip(s, new_tags):
                word[-1] = new_tag
        else:
            raise Exception('Unknown tagging scheme!')