Beispiel #1
0
def calc_types_distribution_for_completly_wrongly_recognized_entities(ner_id, lang = 'nl', model = None):
    class NotMatchLocationSet(TokenSet.MatchSet):
        def __init__(self, tokens):
            super(self.__class__, self).__init__(tokens, False)
        
        def match_tokens(self, token1, token2):
            return token1[1] >= 0 and token2[1] >= 0 and (token1[1] != token2[1] or token1[2] != token2[2])  
    
    a = load_all_recognized_tokens(ner_id, lang, model)
    r = load_all_matched_tokens(ner_id, lang, model)
    nm = TokenSet.NotMatchSet(TokenSet.MatchLocationSet(r))
    s = TokenSet(TokenSet(a).tokens(nm))

    misc = s.tokens(Token.NE_MISC)
    loc  = s.tokens(Token.NE_LOC)
    per  = s.tokens(Token.NE_PER)
    org  = s.tokens(Token.NE_ORG)
    print "======== %s Recognized entities type distribution :" % ner_id
    print "LOCATIONS    : %4d  %3d" % (len(loc), (len(loc)*100)/len(s)) 
    print "PERSONS      : %4d  %3d" % (len(per), (len(per)*100)/len(s)) 
    print "ORGANIZATION : %4d  %3d" % (len(org), (len(org)*100)/len(s)) 
    print "MISC         : %4d  %3d" % (len(misc),(len(misc)*100)/len(s)) 
    print "============================="
    print "AMOUNT       : %4d  100" % len(s)