def test_iob2(self): print '\n\niob2:' tags = [ 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', u'B-ORGANIZATION:CORPORATION', u'I-ORGANIZATION:CORPORATION', u'I-ORGANIZATION:CORPORATION', u'E-ORGANIZATION:CORPORATION', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O' ] print 'original:\n', tags utils.iob2(tags) print 'new:\n', tags
def update_tag_scheme(sentences, tag_scheme, removeTag=None): #{{{ """ Check and update sentences tagging scheme to IOB2. Only IOB1 and IOB2 schemes are accepted. """ for i, s in enumerate(sentences): tags = [w[-1] for w in s] # Check that tags are given in the IOB format if not iob2(tags): s_str = '\n'.join(' '.join(w) for w in s) raise Exception('Sentences should be given in IOB format! ' + 'Please check sentence %i:\n%s' % (i, s_str)) if tag_scheme == 'iob': # If format was IOB1, we convert to IOB2 for word, new_tag in zip(s, tags): if removeTag is not None: if new_tag[2:] in removeTag: word[-1] = 'O' else: word[-1] = new_tag elif tag_scheme == 'iobes': new_tags = iob_iobes(tags) for word, new_tag in zip(s, new_tags): word[-1] = new_tag else: raise Exception('Unknown tagging scheme!')
def update_tag_scheme(sentences, tag_scheme): """ Check and update sentences tagging scheme to IOB2. Only IOB1 and IOB2 schemes are accepted. If tag scheme is generic, return without modifications """ if tag_scheme == 'generic': return for i, s in enumerate(sentences): tags = [w[-1] for w in s] # Check that tags are given in the IOB format if not iob2(tags): s_str = '\n'.join(' '.join(w) for w in s) raise Exception('Sentences should be given in IOB format! ' + 'Please check sentence %i:\n%s' % (i, s_str)) if tag_scheme == 'iob': # If format was IOB1, we convert to IOB2 for word, new_tag in zip(s, tags): word[-1] = new_tag elif tag_scheme == 'iobes': new_tags = iob_iobes(tags) for word, new_tag in zip(s, new_tags): word[-1] = new_tag else: raise Exception('Unknown tagging scheme!')
def check_tag_scheme(sentences, tag_scheme): """ Just check for IOB format. """ for i, s in enumerate(sentences): tags = [w[-1] for w in s] # Check that tags are given in the IOB format if not iob2(tags): s_str = '\n'.join(' '.join(w) for w in s) raise Exception('Sentences should be given in IOB format! ' + 'Please check sentence %i:\n%s' % (i, s_str)) """
def update_tag_scheme(sentences, tag_scheme): """ Check and update sentences tagging scheme to IOB2. Only input IOB1 and IOB2 schemes are accepted. """ for i, sentence in enumerate(sentences): tags = [word[-1] for word in sentence] if not iob2(tags): raise Exception("Sentence {}: {} should be given in IOB format!".format(i, sentence)) if tag_scheme == 'iob': for word, new_tag in zip(sentence, tags): word[-1] = new_tag else: raise Exception('Unknown tagging scheme!')
def update_tag_scheme(sentences, tag_scheme): "tag_scheme: 'iob' or 'iobes'" for i, s in enumerate(sentences): tags = [w[-1] for w in s] # iob2 function: # 1. Check that tags are given in the IOB format # 2. Modify error tagging if not utils.iob2(tags): s_str = '\n'.join(' '.join(w) for w in s) raise Exception('Sentences should be given in IOB format! ' + 'Please check sentence %i:\n%s' % (i, s_str)) if tag_scheme.lower() == 'iob': # If format was IOB1, we convert to IOB2 for word, new_tag in zip(s, tags): word[-1] = new_tag elif tag_scheme.lower() == 'iobes': new_tags = utils.iob_iobes(tags) for word, new_tag in zip(s, new_tags): word[-1] = new_tag else: raise Exception('Unknown tagging scheme!')
def update_tag_scheme(sentences, tag_scheme, file_format="conll"): """ Check and update sentences tagging scheme to IOB2. Only IOB1 and IOB2 schemes are accepted. """ for i, s in enumerate(sentences): tags = [] if file_format == "conll": tags = [w[-1] for w in s] elif file_format == "conllu": if contains_golden_label(s[0], "NER_TAG"): tags = [extract_correct_ner_tag_from_conllu(w) for w in s] else: continue # Check that tags are given in the IOB format if not iob2(tags): s_str = '\n'.join(' '.join(w) for w in s) print(s_str.encode("utf8")) raise Exception('Sentences should be given in IOB format! ' + 'Please check sentence %i:\n%s' % (i, s_str)) if tag_scheme == 'iob': # If format was IOB1, we convert to IOB2 for word, new_tag in zip(s, tags): if file_format == "conll": word[-1] = new_tag elif file_format == "conllu": field_contents_dict = load_MISC_column_contents(word[9]) field_contents_dict["NER_TAG"] = new_tag word[9] = compile_MISC_column_contents(field_contents_dict) elif tag_scheme == 'iobes': new_tags = iob_iobes(tags) for word, new_tag in zip(s, new_tags): if file_format == "conll": word[-1] = new_tag elif file_format == "conllu": field_contents_dict = load_MISC_column_contents(word[9]) field_contents_dict["NER_TAG"] = new_tag word[9] = compile_MISC_column_contents(field_contents_dict) else: raise Exception('Unknown tagging scheme!')
def update_tag_scheme(sentences,tag_scheme): """ Check and update sentences tagging scheme to IOB2. Only IOB1 and IOB2 schemes are accepted. """ new_sentences=[] for i,sentence in enumerate(sentences): tags=[word[-1] for word in sentence] # check whether tagging scheme is IOB format or not new_tags = iob2(tags) if not new_tags: error_str='\n'.join([' '.join(word) for word in sentence]) raise Exception("Sentence should be given in IOB format! " "Please check sentence %i \n %s") % (i+1,error_str) # convert tagging scheme if tag_scheme=='iob': pass elif tag_scheme=='iobes': new_tags=iob_iobes(new_tags) else: raise Exception('Unknown tag scheme!') new_sentences.append([[word[0],tag] for word,tag in zip(sentence,new_tags)]) return new_sentences
def update_tag_scheme(sentences, tag_scheme): """ Check and update sentences tagging scheme to IOB2. Only IOB,IOBES and IOESB1B2 schemes are accepted. """ for i, s in enumerate(sentences): tags = [w[-1] for w in s] # Check that tags are given in the IOB format if not iob2(tags): s_str = '\n'.join(' '.join(w) for w in s) print s_str raise Exception('Sentences should be given in IOBES format! ' + 'Please check sentence %i:\n%s' % (i, s_str)) if tag_scheme == 'iobes': for word, new_tag in zip(s, tags): word[-1] = new_tag elif tag_scheme == 'iob': new_tags = tags_to_iob(tags) for word, new_tag in zip(s, new_tags): word[-1] = new_tag else: raise Exception('Unknown tagging scheme!')
def update_tag_scheme(sentences, tag_scheme): """ Check and update sentences tagging scheme to IOB2. Only IOB1 and IOB2 schemes are accepted. """ for i, s in enumerate(sentences): tags = [w[-1] for w in s] # Check that tags are given in the IOB format if not iob2(tags): s_str = '\n'.join(' '.join(w) for w in s) # raise Exception('Sentences should be given in IOB format! ' + 'Please check sentence %i:\n%s' % (i, s_str)) print('Removing Problematic sentence: %i:\n%s' % (i, s_str)) continue if tag_scheme == 'iob': # If format was IOB1, we convert to IOB2 for word, new_tag in zip(s, tags): word[-1] = new_tag elif tag_scheme == 'iobes': new_tags = iob_iobes(tags) for word, new_tag in zip(s, new_tags): word[-1] = new_tag else: raise Exception('Unknown tagging scheme!')