def twoGramFrequencyCount(tokens : [str]) -> [Frequency]: ''' Counts frequency of 2grams from a tokenized list. ''' if not tokens: #check if list is empty, [] == False return [] tokens = list(filter(lambda x: x !='', tokens)) #filters out empty strings tempFreq = defaultdict(int) for twoGram in zip(tokens,tokens[1:]): # for (token,token) in [(token,token)] ''' iterate over a list of the combined list of words and the off-set list of words to create the twoGrams, using a defaultdict(int) with the 2gram as key and count duplicates for value ''' tempFreq[' '.join(list(twoGram))] += 1 # tempFreq['token token'] = frequency value return Utilities.collateFrequencies(tempFreq)
def palindromeFrequencyCount(tokens : [str]) -> [Frequency]: ''' Counts the frequency of palindromes in a given list of tokens ''' if not tokens: #check if list is empty, [] == False return [] tempFreq = defaultdict(int) palindromeAccumulator = '' '' #The variable accumulates tokens into a string until a palindrome is form tokens.append('addOne') #For the algorithm to work it is necessary that the last token ends a palindrome for i in range(len(tokens)): ''' iterates for the length of the tokens list. Each iteration checks palindromAccumulator and the reverse of palindrome accumulator twice. if checks for non-matches and catches the empty string(entry case). When true, appends token[i] to palindromeAccumulator else there is a palindrome. tempFreq is incremented and palindromeAccumulator is reset to token[i] ''' if palindromeAccumulator != palindromeAccumulator[::-1] or palindromeAccumulator == '': # compare pal and revesed pal; check for pal palindromeAccumulator += tokens[i] #concatenate tokens[i] to palindromeAccumulator else: tempFreq[palindromeAccumulator] += 1 # assign value to dict palindromeAccumulator = tokens[i] #reset to palindromeAccumulator to tokens[i] return Utilities.collateFrequencies(tempFreq)