def _bucket_setitem(self, j, k, v): if self._table[j] is None: self._table[j] = UnsortedTableMap() # new a bucket oldsize = len(self._table[j]) self._table[j][k] = v if oldsize < len(self._table[j]): self._n += 1
def _bucket_setitem(self, j, k, v): if self._table[j] is None: self._table[j] = UnsortedTableMap() # bucket is new to the table old_size = len(self._table[j]) self._table[j][k] = v if len(self._table[j]) > old_size: # key is new to the table self._n += 1 # increase overall map size
def __init__(self, original, preprocessed, typeOfMap ,index=None ): self.__original = original self.__preprocessed = preprocessed self.__typeOfMap = typeOfMap if self.__typeOfMap: if self.__typeOfMap == 'avl': self.__map = AVLTreeMap() elif self.__typeOfMap == 'unsorted': self.__map = UnsortedTableMap() elif self.__typeOfMap == 'sorted': self.__map = SortedTableMap() elif self.__typeOfMap == 'chain': self.__map = ChainHashMap() elif self.__typeOfMap == 'probe': self.__map = ProbeHashMap() elif self.__typeOfMap == 'splay': self.__map = SplayTreeMap() elif self.__typeOfMap == 'rb': self.__map = RedBlackTreeMap() elif self.__typeOfMap == 'dict': self.__map = dict() elif self.__typeOfMap == 'od': self.__map = OrderedDict() self.__indexFile = index self.__stats = [0, 0, 0]
def _bucket_setitem(self, j, k, v): if self._table[j] is None: # check if the entry is new self._table[j] = UnsortedTableMap() oldsize = len(self._table[j]) self._table[j][k] = v if len(self._table) > oldsize: # key is new entry to the table self._n += 1 # increase in the overall map size
def _bucket_setitem(self, j, k, v): if self._table[j] is None: self._table[j] = UnsortedTableMap() oldsize = len(self._table[j]) self._table[j][k] = v # set has two scenarios: 1 add, 2 update # only add will increase n if len(self._table[j]) > oldsize: self._n += 1
def _bucket_setitem(self, j, k, v): if self._table[j] is None: self._table[j] = UnsortedTableMap() #deal with newly added item in bucket and update self._n oldsize = len(self._table[j]) #set newly added item; an update of value with the same key #will not affect self._n self._table[j][k] = v if len(self._table) > oldsize: self._n += 1
def _set_bucket_item(self, bucket_idx, key, value): bucket = self._table[bucket_idx] if bucket is None: self._table[bucket_idx] = UnsortedTableMap() previous_bucket_size = len(self._table[bucket_idx]) self._table[bucket_idx][key] = value after_bucket_size = len(self._table[bucket_idx]) if after_bucket_size > previous_bucket_size: self._number_of_elements += 1
def _bucket_setitem(self, j, k, v) -> None: """ Args: v (object): The new value to set. """ if self._table[j] is None: self._table[j] = UnsortedTableMap() # bucket is new to the table oldsize = len(self._table[j]) self._table[j][k] = v if len(self._table[j]) > oldsize: # key was new to the table self._n += 1 # increase overall map size
class TestSimpleTable(unittest.TestCase): """Basic functionality tests using a simple table.""" def setUp(self): self.table = UnsortedTableMap() def test_init(self): self.assertIsNotNone(self.table) self.assertIsInstance(self.table, UnsortedTableMap) def test_setitem_getitem(self): key = "test key" value = "test value" self.table[key] = value self.assertEqual(self.table[key], value) def test_getitem_raises_keyerror(self): """Does __getitem__ raise KeyError when the key isn't in the table?""" key = "missing" with self.assertRaises(KeyError): return self.table[key] def test_setitem_update_existing(self): key = "test" value = 0 self.table[key] = value new_value = 1 self.table[key] = new_value self.assertEqual(self.table[key], new_value) def test_delitem(self): assert len(self.table) == 0 key = "delete me" value = 1 self.table[key] = value del self.table[key] self.assertEqual(len(self.table), 0) with self.assertRaises(KeyError): return self.table[key] def test_delitem_raises_keyerror(self): """Does __delitem__ raise KeyError when trying to delete a key-value pair that's not in the map?""" key = "missing" with self.assertRaises(KeyError): del self.table[key] def test_iter(self): items = {"a": 1, "b": 2, "c": 3} for key in items.keys(): self.table[key] = items[key] assert len(self.table) == 3 for key in self.table.keys(): self.assertIsNotNone(self.table[key])
class Indexer: """A class for indexing preprocessed text documents.""" __structures = {'avl': AVLTreeMap(), 'unsorted': UnsortedTableMap(), 'sorted': SortedTableMap(), 'chain': ChainHashMap(), 'probe': ProbeHashMap(), 'splay': SplayTreeMap(), 'rb': RedBlackTreeMap(), 'dict': dict(), 'od': OrderedDict()} __names = {'avl': 'AVL Tree Map', 'unsorted': 'Unsorted Table Map', 'sorted': 'Sorted Table Map', 'chain': 'Chain Hash Map', 'probe': 'Probe Hash Map', 'splay': 'Splay Tree Map', 'rb': 'Red and Black Tree Map', 'dict': 'Python Dictionary', 'od': 'Python Ordered Dictionary'} def __init__(self, original, preprocessed, indexed=None, map_type='rb'): self.__pre_file = open(preprocessed, 'r', encoding='utf-8-sig') self.__org_file = open(original, 'r', encoding='utf-8-sig') self.__map_type = map_type self._mapFix(self.__map_type) self.__multimap = self.__structures[self.__map_type] self.__average = 0 self.__median = 0 self.__indexing_time = 0 self.__index_out = indexed def _mapFix(self, map_type): if map_type not in self.__structures: self.__map_type = 'avl' def index(self): """ reads the preprocessed file and indexes the words.""" initial_time = time() total_terms = 0 for i, line in enumerate(self.__pre_file): line_num = i + 1 for word in line.strip().split(): try: self.__multimap[word].append(line_num) total_terms += 1 except: self.__multimap[word] = [line_num] total_terms += 1 self.__indexing_time = time() - initial_time print('Indexing duration is {} seconds.'.format( round(self.__indexing_time, 4))) self.__average = total_terms / len(self.__multimap) self._find_median() def dump(self): """Writes the index list to a file""" if self.__index_out is not None: out_file = open(self.__index_out, 'w') for word in self.__multimap: lines = str(self.__multimap[word])[1:-1] output = '{} {}\n'.format(word, lines) out_file.write(word + ' ' + lines + '\n') out_file.close() def _find_median(self): frequencies = list() for key in self.__multimap: frequencies.append(len(self.__multimap[key])) frequencies.sort() self.__median = frequencies[len(frequencies) // 2] def _search(self, keyword): initial_time = time() lines = self.__multimap[keyword] search_time = time() - initial_time keyword = keyword for i, text in enumerate(self.__org_file): line_num = i + 1 if line_num in lines: print('{1}: {0}'.format(text.strip(), line_num)) self.__org_file.seek(0) # resets buffer for next searches print('\nIt took {:.12f} seconds to find {} occurrence ' 'of {!r}.'.format(search_time, len(lines), keyword)) def startUI(self): """Runs a loop and for a word. Return occurrence and lines it appeared on""" print('This search is powered by {}.'.format( self.__names[self.__map_type])) while True: try: keyword = input('Enter a word to search for: ').lower() if len(keyword) < 3 or not keyword.isalpha(): raise ValueError() self._search(keyword) except KeyError: print("Sorry! We couldn't find {!r} in " "the file.\n".format(keyword)) except RecursionError: print("Structure recursion limit has exceeded, please try" " another map!") except ValueError: print('Invalid Term!\n\tOnly alphabetical words with three or' ' more characters are allowed!') except: print('Error has been occurred!') if input("Quit? (y/n): ").lower().startswith('y'): break def __repr__(self): """prints the stats table.""" output = 'Total indexed terms:\t{}\n'.format(len(self.__multimap)) output += 'Average word frequency:\t{}\n'.format( round(self.__average, 2)) output += 'Median word frequency:\t{}\n'.format(self.__median) return output
def setUp(self): self.table = UnsortedTableMap()