Esempio n. 1
0
def GetScore(Text,Lexicon,Rules=set(),verbose=False,hyphen=False):

    CapsMatch = 0
    CapsSub = 0
    CapsCount = 0

    LowMatch = 0
    LowSub = 0
    LowCount = 0
    
    if verbose:
        print("Attempting token matching")
        if Rules == set():
            print("No substitution rules loaded")

## If not asked to check for possible fragmented matches, use basic checker.
## Function will default to basic checker.

    if hyphen == False:
        Tokens = TokenGen.Basic(Text,verbose)
    else:
        Tokens = TokenGen.Hyphen(Text,Lexicon,Rules,verbose)

## Maintains separate scores for substitution if rules were passed in, as well
## as separate scores for capitals, lowercase

    for word in Tokens:
        if word.islower():
            LowCount = LowCount + 1
            if word in Lexicon:
                LowMatch = LowMatch + 1
            elif len(Rules) >= 1 and word in Rules:
                LowSub = LowSub + 1                
        else:
            CapsCount = CapsCount + 1
            if word in Lexicon:
                CapsMatch = CapsMatch + 1
            elif len(Rules) >= 1 and word in Rules:
                CapsSub = CapsSub + 1

    if verbose:
        print("\t" + str(CapsCount) + " total capitalized tokens")
        print("\t" + str(CapsMatch) + " total capitalized dictionary matches")
        print("\t" + str(CapsSub) + " total capitalized valid substitutions")
        print("\t" + str(LowCount) + " total lower-case tokens")
        print("\t" + str(LowMatch) + " total lower-case dictionary matches")
        print("\t" + str(LowSub) + " total lower-case valid substitutions\n")

## Return the six scores as a tuple.

    return (CapsCount,CapsMatch,CapsSub,LowCount,LowMatch,LowSub)