def GetScore(Text,Lexicon,Rules=set(),verbose=False,hyphen=False): CapsMatch = 0 CapsSub = 0 CapsCount = 0 LowMatch = 0 LowSub = 0 LowCount = 0 if verbose: print("Attempting token matching") if Rules == set(): print("No substitution rules loaded") ## If not asked to check for possible fragmented matches, use basic checker. ## Function will default to basic checker. if hyphen == False: Tokens = TokenGen.Basic(Text,verbose) else: Tokens = TokenGen.Hyphen(Text,Lexicon,Rules,verbose) ## Maintains separate scores for substitution if rules were passed in, as well ## as separate scores for capitals, lowercase for word in Tokens: if word.islower(): LowCount = LowCount + 1 if word in Lexicon: LowMatch = LowMatch + 1 elif len(Rules) >= 1 and word in Rules: LowSub = LowSub + 1 else: CapsCount = CapsCount + 1 if word in Lexicon: CapsMatch = CapsMatch + 1 elif len(Rules) >= 1 and word in Rules: CapsSub = CapsSub + 1 if verbose: print("\t" + str(CapsCount) + " total capitalized tokens") print("\t" + str(CapsMatch) + " total capitalized dictionary matches") print("\t" + str(CapsSub) + " total capitalized valid substitutions") print("\t" + str(LowCount) + " total lower-case tokens") print("\t" + str(LowMatch) + " total lower-case dictionary matches") print("\t" + str(LowSub) + " total lower-case valid substitutions\n") ## Return the six scores as a tuple. return (CapsCount,CapsMatch,CapsSub,LowCount,LowMatch,LowSub)