예제 #1
0
def compute_two_gram_frequencies(words: list):
    """
    :param words: takes in a list of strings
    :return: returns a list of frequency objects

    Uses Python's itertools to take pairwise pairs of strings and count their frequencies and return them using WordFrequencyCounter
    """
    if not words:
        return []
    a, b = itertools.tee(words)
    next(b, None)
    pairs = zip(a, b)
    two_grams = [" ".join(p) for p in pairs]
    return compute_word_frequencies(two_grams)
예제 #2
0
def compute_two_gram_frequencies(words: list):
    """
    :param words: takes in a list of strings
    :return: returns a list of frequency objects

    Uses Python's itertools to take pairwise pairs of strings and count their frequencies and return them using WordFrequencyCounter
    """
    if not words:
        return []
    a, b = itertools.tee(words)
    next(b, None)
    pairs = zip(a, b)
    two_grams = [" ".join(p) for p in pairs]
    return compute_word_frequencies(two_grams)
예제 #3
0
파일: Main.py 프로젝트: Aznchris/UCI
def main():
    file_path = ""
    instructions = """
    Enter 0 to set the input file
    Enter 1 to compute_word_frequencies
    Enter 2 to compute_2gram_frequencies
    Enter 3 to compute_palindrome_frequencies
    Enter 4 to exit

    """
    run = True
    while (run):
        response = input(instructions)

        if response == "0":
            file_path = input("Enter the path to the file: ")
            print("Tokenizing File")
            result = Utilities.tokenize_file(file_path)
            print("File Tokenized")
        elif response == "1":
            if file_path != "":
                print("Computing Word Frequencies")
                freqs = compute_word_frequencies(result)
                Utilities.print_frequencies(freqs)
                print("Done")
            else:
                print("No File Selected")
                continue
        elif response == "2":
            if file_path != "":
                freqs = compute_two_gram_frequencies(result)
                Utilities.print_frequencies(freqs)
                print("Done")
            else:
                print("No File Selected")
                continue
        elif response == "3":
            if file_path != "":
                freqs = compute_palindrome_frequencies(result)
                Utilities.print_frequencies(freqs)
                print("Done")
            else:
                print("No File Selected")
                continue
        elif response == "4":
            run = False
    return
예제 #4
0
def main():
    file_path = ""
    instructions = """
    Enter 0 to set the input file
    Enter 1 to compute_word_frequencies
    Enter 2 to compute_2gram_frequencies
    Enter 3 to compute_palindrome_frequencies
    Enter 4 to exit

    """
    run = True
    while (run):
        response = input(instructions)

        if response == "0":
            file_path = input("Enter the path to the file: ")
            print("Tokenizing File")
            result = Utilities.tokenize_file(file_path)
            print("File Tokenized")
        elif response == "1":
            if file_path != "":
                print("Computing Word Frequencies")
                freqs = compute_word_frequencies(result)
                Utilities.print_frequencies(freqs)
                print("Done")
            else:
                print("No File Selected")
                continue
        elif response == "2":
            if file_path != "":
                freqs = compute_two_gram_frequencies(result)
                Utilities.print_frequencies(freqs)
                print("Done")
            else:
                print("No File Selected")
                continue
        elif response == "3":
            if file_path != "":
                freqs = compute_palindrome_frequencies(result)
                Utilities.print_frequencies(freqs)
                print("Done")
            else:
                print("No File Selected")
                continue
        elif response == "4":
            run = False
    return
예제 #5
0
def compute_palindrome_frequencies(words: list):
    """
    :param words: takes in a list of strings
    :return: returns list of frequencies of palindromes in list of strings

    SEE ANALYSIS FOR MORE EXPLANATIONS

    we create:
    a list of indices of beginnings of words in a joined string of the list of words
    a list of indices of ending of words in a joined string of the list of words
    a joined string of the list of words

    we then iterate over the string using range and expand around each character to find palindromes and record them, we then convert to list of frequencies
    """

    # create an array of palindromes
    palindromes = []
    #check if the words list is empty
    if not words:
        return palindromes

    else:
        complete_words, begin_indices, end_indices = setup(words)
        total_length = len(complete_words)

        # set the constant of minimum length of palindromes found
        MIN_LEN = 3

        for i in range(total_length):

            begin = i
            end = i + 1
            # handling odd length strings by setting before pointer to current char
            while (check_indexes(begin, end, total_length) and (check_reverse_match(begin, end, complete_words))):
                # When we encounter a space, we must skip over it, so a palindrome _abba_ is not counted we just move pointer backwards or forwards
                if complete_words[begin] == " ":
                    begin-=1
                    continue
                if complete_words[end] == " ":
                    end+=1
                    continue
                pal = complete_words[begin : end + 1]
                if (is_valid_palindrome(pal) and (begin in begin_indices) and (end in end_indices)):
                    palindromes.append(pal)
                # expand outwards
                begin-=1
                end+=1

            # reinitialize begin and end for even length strings by setting pointer before and after current char
            begin = i - 1
            end = i + 1

            while (check_indexes(begin, end, total_length) and (check_reverse_match(begin, end, complete_words))):
                # When we encounter a space, we must skip over it, so a palindrome _abba_ is not counted we just move pointer backwards or forwards
                if complete_words[begin] == " ":
                    begin-=1
                    continue

                if complete_words[end] == " ":
                    end+=1
                    continue
                pal = complete_words[begin : end + 1]
                if (is_valid_palindrome(pal) and (begin in begin_indices) and (end in end_indices)):
                    palindromes.append(pal)
                # expand outwards
                begin-=1
                end+=1

        result = compute_word_frequencies(palindromes)
        return result