Beispiel #1
0
 def __init__(self) -> None:
     self.hash_table = LinearProbeHashTable(
         self.HASH_BASE,
         self.TABLE_SIZE)  # storage for new words to be read in
     self.dictionary = Dictionary(
         self.HASH_BASE,
         self.TABLE_SIZE)  # instance of the DIctionary object
     self.dictionary.load_dictionary(
         "english_large.txt",
         time_limit=None)  # read in filenameto the dictionary
     self.warehouse = self.dictionary.hash_table  # storage for the dictionary of words
     self.max_word = (None, 0
                      )  # tuple containing max word and its frequency
     self.highest_occurrence = 0  # frequency of most occuring word in the read file
Beispiel #2
0
 def __init__(self) -> None:
     '''Initiates a hash_table instance based on given hash_base & table_size, and max_word
     for the word with the highest frequency.
     
     :complexity: O(N), where N is the table_size
     '''
     self.hash_table = LinearProbeHashTable(27183, 1000081)
     self.dictionary = Dictionary(27183, 1000081)
     self.dictionary.load_dictionary("english_large.txt")
     self.max_word = ("", 0)
 def __init__(self, hash_base: int = LinearProbeHashTable.DEFAULT_HASH_BASE, table_size: int = LinearProbeHashTable.DEFAULT_TABLE_SIZE) -> None:
     """
     Constructs a Hash Table with given parameters
     :param hash_base: contains the base of the hash table
     :param table_size: contains the size of the hash table
     :raises ValueError if either of the parameters are less than 0
     :returns None
     @complexity: O(1) for this method
     """
     if hash_base < 0 or table_size < 0:
         raise ValueError("Hash base and table size must be >= 0")
     self.hash_table = LinearProbeHashTable(hash_base, table_size)
Beispiel #4
0
 def __init__(
         self,
         hash_base: int = LinearProbeHashTable.DEFAULT_HASH_BASE,
         table_size: int = LinearProbeHashTable.DEFAULT_TABLE_SIZE) -> None:
     """
     Constructor for the frequency class, creates a dictionary using the English large file
     :param hash_base: Sets the base for the hash table, defaults to the default value mentioned in
     the Linear Probe Hash Table
     :param table_size: Sets the size for the hash table, defaults to the default value mentioned in
     the Linear Probe Hash Table
     """
     self.hash_table = LinearProbeHashTable(hash_base, table_size)
     self.dictionary = Dictionary(hash_base, table_size)
     self.dictionary.load_dictionary('english_large.txt')
     self.max_word = (None, 0)
Beispiel #5
0
class Frequency:
    """ Class uses Linear Probing hash table to create dictionary and perform frequency analysis on a set of files."""
    HASH_BASE = 250726  # constant
    TABLE_SIZE = 1000081  # constant

    def __init__(self) -> None:
        self.hash_table = LinearProbeHashTable(
            self.HASH_BASE,
            self.TABLE_SIZE)  # storage for new words to be read in
        self.dictionary = Dictionary(
            self.HASH_BASE,
            self.TABLE_SIZE)  # instance of the DIctionary object
        self.dictionary.load_dictionary(
            "english_large.txt",
            time_limit=None)  # read in filenameto the dictionary
        self.warehouse = self.dictionary.hash_table  # storage for the dictionary of words
        self.max_word = (None, 0
                         )  # tuple containing max word and its frequency
        self.highest_occurrence = 0  # frequency of most occuring word in the read file

    def normalize_word(self, text: str) -> str:
        """Method returns the lower case of a word, remove punctuation at the start and end of a word, whitespace
        and strip away all unwanted characters
        
        :complexity: O(1) in best case, O(N) in worst case as N is the number of chars in the txt
        """
        def remove_punctuation(my_text: str) -> str:
            return my_text.strip(string.punctuation)

        def new_line_char(my_text: str) -> str:
            return my_text.rstrip()

        def lower(my_text: str) -> str:
            return my_text.lower()

        return lower(new_line_char(remove_punctuation(text)))

    def add_file(self, filename: str) -> None:
        """Method validated a file taht a file is readable and closeable and if its  then calls words_in_dictionary.
        Raises an exception if the input file is not valid
        """
        try:
            # checking if file is readable and closeable
            handle = open(filename, 'r')  #
            handle.close()
        except IOError:
            print("'" + filename + "'", 'File is not accessible')
            pass
        else:
            self.words_in_dictionary(filename)

    def words_in_dictionary(self, filename: str) -> None:
        """Method reads words from a valid file into a hash table only if it exit in the dictionary of words called the
        warehouse and updates its occurence in such a way that the data associated to the word is its “occurrence count.
        :complexity: O(N) in best case, O(N)* Complexity of normalize_word method  in worst case as n is the numbe od words
        """
        with open(filename, 'r',
                  encoding="UTF-8") as handle:  # Open file on read mode
            for word in handle.read().split():
                key = self.normalize_word(
                    word)  # removes unwanted characters and punctuations
                try:
                    # NOTE: key here is a word
                    self.warehouse[
                        key]  # checking if word exit in the dictionary
                except KeyError:
                    pass
                else:
                    try:
                        # At this stage the word exit inthe dictionary which is the warehouse,
                        # So now we check if the word exit in hash table already or not.
                        # if this word is already exist in the table, increment the count by 1
                        if self.hash_table[key]:
                            self.hash_table[key] += 1
                    except KeyError:
                        self.hash_table[
                            key] = 1  # first occurrence of this word, frequency set to 1
                        pass
                    else:
                        # keeps track of the highest occurence, and the associated word.
                        if self.hash_table[key] > self.highest_occurrence:
                            self.highest_occurrence += 1
                            self.max_word = (key, self.hash_table[key])

    def rarity(self, word: str) -> Rarity:
        """ Method accept a word as arqument and returns its rarity score as an enumerated value
        which explains whether the word is a common, uncommon or rare on a given text file.
        :complexity: O(1) in best case, O(1) in worst case
        """
        try:
            self.hash_table[word]
        except KeyError:
            return Rarity.MISSPELT  # if the word is not in the hash table, probably its most probably typo error
        else:
            if self.hash_table[word] >= self.highest_occurrence / 100:
                return Rarity.COMMON
            elif self.hash_table[word] < self.highest_occurrence / 1000:
                return Rarity.RARE
            else:  # self.highest_occurrence / 1000 <= self.hash_table[word] < self.highest_occurrence / 100:
                return Rarity.UNCOMMON

    def ranking(self) -> ArrayList[Tuple]:
        """Method create storage space of ArrayList type of same size of the hash table and transfers not None items of
        the hash table into it and calls the sorting method to sort it based on sorting type.
        :complexity: Nlog(N)*CompEq in best case when the pivot is the median value, O(N2)*CompEq in worst case
        when the pivot is max/ min
        """
        # 1 is sorting by descending order and 2 is sorting by ascending order(Not implemented here though)
        # Only run is hash table is not empty.
        if not self.hash_table.is_empty():
            order_by = 1  # sorting by descending order
            ranking_array = ArrayList(len(self.hash_table))
            index = 0
            for item in self.hash_table:
                if item is not None:
                    ranking_array.insert(index, item)
                    index += 1
            self.custom_sort(ranking_array, order_by)
            return ranking_array

    def custom_sort(self, the_array: ArrayList, order_by: int) -> ArrayList:
        """method sort an ArrayList in descending order using quick sort
        :complexity: Nlog(N)*CompEq in best case when the pivot is the median value, O(N2)*CompEq in worst case
        when the pivot is max/ min
        """
        def quick_sort(array: ArrayList) -> ArrayList:
            start = 0
            end = len(array) - 1
            print("........ Sorting Start .......")
            quick_sort_aux(array, start, end)
            print("........ Done sorting ........")
            return array  # its finally sorted

        def quick_sort_aux(array: ArrayList, start: int, end: int) -> None:
            if start < end:
                boundary = partition(array, start, end)  # get the boundary
                quick_sort_aux(array, start, boundary - 1)  # pivot becomes
                quick_sort_aux(array, boundary + 1, end)

        def partition(array: ArrayList, start: int, end: int) -> int:
            """ """
            mid = (start + end) // 2
            pivot = array[mid][1]  # Select the pivot element
            array[start], array[mid] = array[mid], array[start]
            boundary = start
            for k in range(start + 1, end + 1):
                if order_by == 1:  # sorting descending
                    # swap all elements greater than the pivot with the array at the next boundary of the array
                    if array[k][1] > pivot:
                        boundary += 1
                        array[k], array[boundary] = array[boundary], array[
                            k]  # swapping

                if order_by == 2:  # sorting ascending
                    if array[k][1] < pivot:
                        boundary += 1
                        array[k], array[boundary] = array[boundary], array[
                            k]  # swapping

            # Put the pivot back in its final place
            array[start], array[boundary] = array[boundary], array[
                start]  # swapping
            return boundary

        return quick_sort(the_array)
Beispiel #6
0
 def __init__(self, hash_base: int, table_size: int) -> None:
     '''Initiates a hash_table instance based on given hash_base and table_size
     
     :complexity: O(N), where N is the table_size
     '''
     self.hash_table = LinearProbeHashTable(hash_base, table_size)
Beispiel #7
0
class Dictionary(LinearProbeHashTable[T]):
    def __init__(self, hash_base: int, table_size: int) -> None:
        '''Initiates a hash_table instance based on given hash_base and table_size
        
        :complexity: O(N), where N is the table_size
        '''
        self.hash_table = LinearProbeHashTable(hash_base, table_size)

    def load_dictionary(self, filename: str, time_limit: int = None) -> int:
        '''Loads words from file name into hash_table. Each line is a word.
        
        :complexity: O(N), where N is the length of lines in a file
        '''
        word_count = 0
        with open(filename, 'r', encoding='utf-8') as file:
            start_time = timer()
            for word in file:
                if time_limit != None and timer() - start_time > time_limit:
                    raise TimeoutError(
                        "Time limit exceeded. Load dictionary failed.")
                self.add_word(word.strip("\n"))
                word_count += 1
        return word_count

    def add_word(self, word: str) -> None:
        '''Adds a given word and stores into hash_table. Word is paired with 1 as the value-pair.
        :complexity best: O(K) where the first searched position is empty, using linear probe,
                          where K is the size of the key
        :complexity worst: O(N^2) when we've searched the entire table, using linear probe, and 
                           the table is rehashed, where N is the table_size
        '''
        self.hash_table[word.lower()] = 1

    def find_word(self, word: str) -> bool:
        '''Returns true if given word exists in dictionary. Otherwise, returns false.
        :complexity best: O(K) where the given word is the first searched position, using linear
                        probe, where K is the size of the key
        :complexity worst: O(N) where the given word is the last searched position, using linear
                        probe, where N is the size of the hash_table
        :raises KeyError: When a position can't be found
        '''
        return self.hash_table[word.lower()] == 1

    def delete_word(self, word: str) -> None:
        '''Deletes the given word from the dictionary. Otherwise, raises KeyError.
        :complexity best: O(K) finds the position straight away and doesn't have to rehash
                          where K is the size of the key
        :complexity worst: O(K + N) when it has to rehash all items in the hash table
                          where N is the table size
        :raises KeyError: When a position can't be found
        '''
        self.hash_table.__delitem__(word.lower())

    def menu(self) -> None:
        '''Initiates a menu in terminal. Uses methods in Dictionary class. Runs for as long as 
        user uses the menu, in other words, while exit_boolean is false. Terminates when exit_boolean 
        is true.'''
        exit_boolean = False
        while not exit_boolean:
            print("Select option: ")
            print("1 - Read file")
            print("2 - Add a word")
            print("3 - Find a word")
            print("4 - Delete a word")
            print("5 - Exit")

            try:
                option = int(input())
                if option < 1 or option > 5:
                    raise ValueError
            except ValueError:
                print("Input given is invalid, try again!")
            else:
                if option == 1:
                    # handles ValueError for time_limit. Sets strings and empty inputs to None
                    try:
                        filename = input(
                            "What file would you like to import? ")
                        time_limit = int(
                            input(
                                "How long do you want to wait? (For no time limit, just press enter)"
                            ))
                    except ValueError:
                        time_limit = None

                    # handles FileNotFoundError. Returns to menu if not found.
                    try:
                        total_words = self.load_dictionary(
                            filename, time_limit)
                    except FileNotFoundError:
                        print(
                            "File not found. Input was not given the correct directory. Returning to menu."
                        )
                    else:
                        print("File successfully imported " +
                              str(total_words) + " words!")
                elif option == 2:
                    word = input("What word would you like to add? ")
                    self.add_word(word)
                elif option == 3:
                    try:
                        word = input("What word do you want to find? ")
                        self.find_word(word)
                    except KeyError as e:
                        print("Key doesn't exist: " + str(e))
                    else:
                        print("Key found!")
                elif option == 4:
                    try:
                        word = input(
                            "What word that exists in the dictionary would you like to delete? "
                        )
                        self.delete_word(word)
                    except KeyError as e:
                        print("Key doesn't exist: " + str(e))
                    else:
                        print("Key successfully deleted!")
                elif option == 5:
                    print("Exiting program. ")
                    exit_boolean = True