def compute_two_gram_frequencies(words: list): """ :param words: takes in a list of strings :return: returns a list of frequency objects Uses Python's itertools to take pairwise pairs of strings and count their frequencies and return them using WordFrequencyCounter """ if not words: return [] a, b = itertools.tee(words) next(b, None) pairs = zip(a, b) two_grams = [" ".join(p) for p in pairs] return compute_word_frequencies(two_grams)
def main(): file_path = "" instructions = """ Enter 0 to set the input file Enter 1 to compute_word_frequencies Enter 2 to compute_2gram_frequencies Enter 3 to compute_palindrome_frequencies Enter 4 to exit """ run = True while (run): response = input(instructions) if response == "0": file_path = input("Enter the path to the file: ") print("Tokenizing File") result = Utilities.tokenize_file(file_path) print("File Tokenized") elif response == "1": if file_path != "": print("Computing Word Frequencies") freqs = compute_word_frequencies(result) Utilities.print_frequencies(freqs) print("Done") else: print("No File Selected") continue elif response == "2": if file_path != "": freqs = compute_two_gram_frequencies(result) Utilities.print_frequencies(freqs) print("Done") else: print("No File Selected") continue elif response == "3": if file_path != "": freqs = compute_palindrome_frequencies(result) Utilities.print_frequencies(freqs) print("Done") else: print("No File Selected") continue elif response == "4": run = False return
def compute_palindrome_frequencies(words: list): """ :param words: takes in a list of strings :return: returns list of frequencies of palindromes in list of strings SEE ANALYSIS FOR MORE EXPLANATIONS we create: a list of indices of beginnings of words in a joined string of the list of words a list of indices of ending of words in a joined string of the list of words a joined string of the list of words we then iterate over the string using range and expand around each character to find palindromes and record them, we then convert to list of frequencies """ # create an array of palindromes palindromes = [] #check if the words list is empty if not words: return palindromes else: complete_words, begin_indices, end_indices = setup(words) total_length = len(complete_words) # set the constant of minimum length of palindromes found MIN_LEN = 3 for i in range(total_length): begin = i end = i + 1 # handling odd length strings by setting before pointer to current char while (check_indexes(begin, end, total_length) and (check_reverse_match(begin, end, complete_words))): # When we encounter a space, we must skip over it, so a palindrome _abba_ is not counted we just move pointer backwards or forwards if complete_words[begin] == " ": begin-=1 continue if complete_words[end] == " ": end+=1 continue pal = complete_words[begin : end + 1] if (is_valid_palindrome(pal) and (begin in begin_indices) and (end in end_indices)): palindromes.append(pal) # expand outwards begin-=1 end+=1 # reinitialize begin and end for even length strings by setting pointer before and after current char begin = i - 1 end = i + 1 while (check_indexes(begin, end, total_length) and (check_reverse_match(begin, end, complete_words))): # When we encounter a space, we must skip over it, so a palindrome _abba_ is not counted we just move pointer backwards or forwards if complete_words[begin] == " ": begin-=1 continue if complete_words[end] == " ": end+=1 continue pal = complete_words[begin : end + 1] if (is_valid_palindrome(pal) and (begin in begin_indices) and (end in end_indices)): palindromes.append(pal) # expand outwards begin-=1 end+=1 result = compute_word_frequencies(palindromes) return result