class CostPerformanceDatabase: """Maintain a database of maximal (cost,performance) pairs.""" def __init__(self): """Create an empty database.""" self._M = SortedTableMap() # or a more efficient sorted map def best(self, c): """Return (cost,performance) pair with largest cost not exceeding c. Return None if there is no such pair. """ return self._M.find_le(c) def add(self, c, p): """Add new entry with cost c and performance p.""" # determine if (c,p) is dominated by an existing pair other = self._M.find_le(c) # other is at least as cheap as c if other is not None and other[1] >= p: # if its performance is as good, return # (c,p) is dominated, so ignore self._M[c] = p # else, add (c,p) to database # and now remove any pairs that are dominated by (c,p) other = self._M.find_gt(c) # other more expensive than c while other is not None and other[1] <= p: del self._M[other[0]] other = self._M.find_gt(c)
def __init__(self, original, preprocessed, typeOfMap ,index=None ): self.__original = original self.__preprocessed = preprocessed self.__typeOfMap = typeOfMap if self.__typeOfMap: if self.__typeOfMap == 'avl': self.__map = AVLTreeMap() elif self.__typeOfMap == 'unsorted': self.__map = UnsortedTableMap() elif self.__typeOfMap == 'sorted': self.__map = SortedTableMap() elif self.__typeOfMap == 'chain': self.__map = ChainHashMap() elif self.__typeOfMap == 'probe': self.__map = ProbeHashMap() elif self.__typeOfMap == 'splay': self.__map = SplayTreeMap() elif self.__typeOfMap == 'rb': self.__map = RedBlackTreeMap() elif self.__typeOfMap == 'dict': self.__map = dict() elif self.__typeOfMap == 'od': self.__map = OrderedDict() self.__indexFile = index self.__stats = [0, 0, 0]
class TestBasicTable(unittest.TestCase): """Basic-coverage tests.""" def setUp(self): self.stmap = SortedTableMap() def test_init(self): self.assertIsInstance(self.stmap, SortedTableMap) def test_len(self): self.assertEqual(len(self.stmap), 0) for i in range(5): self.stmap[i] = i self.assertEqual(len(self.stmap), 5) def test_setitem_getitem(self): test_key = "key" test_value = "value" with self.assertRaises(KeyError): returned = self.stmap[test_key] # not in the table yet self.stmap[test_key] = test_value self.assertEqual(self.stmap[test_key], test_value) # cover the overwrite-existing case in __setitem__: updated_value = "updated value" self.stmap[test_key] = updated_value self.assertEqual(self.stmap[test_key], updated_value) def test_delitem(self): test_key = "key" test_value = "value" # Should raise key error if not in table with self.assertRaises(KeyError): del self.stmap[test_key] self.stmap[test_key] = test_value del self.stmap[test_key] self.assertEqual(len(self.stmap), 0) def test_iter(self): for i in range(5): self.stmap[i] = f"value for key {i}" assert len(self.stmap) == 5 i = 0 for key in self.stmap.keys(): self.assertEqual(key, i) i += 1 def test_reversed(self): for i in range(4, -1, -1): self.stmap[i] = f"value for key {i}" assert len(self.stmap) == 5 i = 4 for key in reversed(self.stmap): self.assertEqual(key, i) i -= 1
def test_find_key_methods_return_none_when_table_empty(self): """Do the methods that find a key based on an arithmetic comparison criterion return None when the table is empty?""" empty_stmap = SortedTableMap() k = 1 self.assertIsNone(empty_stmap.find_min()) self.assertIsNone(empty_stmap.find_max()) self.assertIsNone(empty_stmap.find_ge(k)) self.assertIsNone(empty_stmap.find_lt(k)) self.assertIsNone(empty_stmap.find_gt(k))
def __init__(self): """Create an empty database.""" self._M = SortedTableMap() # or a more efficient sorted map
def setUp(self): self.stmap = SortedTableMap()
def setUp(self): self.stmap = SortedTableMap() for i in range(ord("a"), ord("a") + 26): key = i - ord("a") self.stmap[key] = chr(i)
class TestAccessorsWithAlphabetTable(unittest.TestCase): """Uses a table containing 26 (number, letter) k-v pairs to test the methods that return min, max, less than, etc.""" def setUp(self): self.stmap = SortedTableMap() for i in range(ord("a"), ord("a") + 26): key = i - ord("a") self.stmap[key] = chr(i) def test_find_min(self): expected_min = (0, "a") actual_min = self.stmap.find_min() self.assertEqual(expected_min, actual_min) def test_find_max(self): expected_max = (25, "z") actual_max = self.stmap.find_max() self.assertEqual(expected_max, actual_max) def test_find_ge(self): # least key greater than or equal to 15 should be 15. key = 15 expected = (15, "p") actual = self.stmap.find_ge(key) self.assertEqual(expected, actual) def test_find_lt(self): # Greatest key strictly less than 15 should be 14 key = 15 expected = (14, "o") actual = self.stmap.find_lt(key) self.assertEqual(expected, actual) def test_find_gt(self): # Least key strictly greater than 15 should be 16. key = 15 expected = (16, "q") actual = self.stmap.find_gt(key) self.assertEqual(expected, actual) def test_find_range(self): # No start and stop=14 should yield all kvps from (0, "a") to (14, "o") expected = [] for i in range(ord("a"), ord("a") + 15): expected_key = i - ord("a") expected_value = chr(i) expected.append((expected_key, expected_value)) actual = [i for i in self.stmap.find_range(start=None, stop=15)] assert len(actual) == 15 self.assertEqual(expected, actual) # start=10 and stop=20 should yield all k-v pairs from (10, "k") to # (19, "t") expected = [] for i in range(ord("a") + 10, ord("a") + 20): expected_key = i - ord("a") expected_value = chr(i) expected.append((expected_key, expected_value)) actual = [i for i in self.stmap.find_range(start=10, stop=20)] assert len(actual) == 10 self.assertEqual(expected, actual) def test_find_key_methods_return_none_when_table_empty(self): """Do the methods that find a key based on an arithmetic comparison criterion return None when the table is empty?""" empty_stmap = SortedTableMap() k = 1 self.assertIsNone(empty_stmap.find_min()) self.assertIsNone(empty_stmap.find_max()) self.assertIsNone(empty_stmap.find_ge(k)) self.assertIsNone(empty_stmap.find_lt(k)) self.assertIsNone(empty_stmap.find_gt(k))
# This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import sys from unsorted_table_map import UnsortedTableMap from sorted_table_map import SortedTableMap filename = sys.argv[1] # command line arg #freq = {} #freq = UnsortedTableMap() freq = SortedTableMap() for piece in open(filename).read().lower().split(): # only consider alphabetic characters within this piece word = ''.join(c for c in piece if c.isalpha()) if word: # require at least one alphabetic character freq[word] = 1 + freq.get(word, 0) max_word = '' max_count = 0 for (w, c) in freq.items(): # (key, value) tuples represent (word, count) if c > max_count: max_word = w max_count = c print('The most frequent word is', max_word) print('Its number of occurrences is', max_count)
class Indexer: """A class for indexing preprocessed text documents.""" __structures = {'avl': AVLTreeMap(), 'unsorted': UnsortedTableMap(), 'sorted': SortedTableMap(), 'chain': ChainHashMap(), 'probe': ProbeHashMap(), 'splay': SplayTreeMap(), 'rb': RedBlackTreeMap(), 'dict': dict(), 'od': OrderedDict()} __names = {'avl': 'AVL Tree Map', 'unsorted': 'Unsorted Table Map', 'sorted': 'Sorted Table Map', 'chain': 'Chain Hash Map', 'probe': 'Probe Hash Map', 'splay': 'Splay Tree Map', 'rb': 'Red and Black Tree Map', 'dict': 'Python Dictionary', 'od': 'Python Ordered Dictionary'} def __init__(self, original, preprocessed, indexed=None, map_type='rb'): self.__pre_file = open(preprocessed, 'r', encoding='utf-8-sig') self.__org_file = open(original, 'r', encoding='utf-8-sig') self.__map_type = map_type self._mapFix(self.__map_type) self.__multimap = self.__structures[self.__map_type] self.__average = 0 self.__median = 0 self.__indexing_time = 0 self.__index_out = indexed def _mapFix(self, map_type): if map_type not in self.__structures: self.__map_type = 'avl' def index(self): """ reads the preprocessed file and indexes the words.""" initial_time = time() total_terms = 0 for i, line in enumerate(self.__pre_file): line_num = i + 1 for word in line.strip().split(): try: self.__multimap[word].append(line_num) total_terms += 1 except: self.__multimap[word] = [line_num] total_terms += 1 self.__indexing_time = time() - initial_time print('Indexing duration is {} seconds.'.format( round(self.__indexing_time, 4))) self.__average = total_terms / len(self.__multimap) self._find_median() def dump(self): """Writes the index list to a file""" if self.__index_out is not None: out_file = open(self.__index_out, 'w') for word in self.__multimap: lines = str(self.__multimap[word])[1:-1] output = '{} {}\n'.format(word, lines) out_file.write(word + ' ' + lines + '\n') out_file.close() def _find_median(self): frequencies = list() for key in self.__multimap: frequencies.append(len(self.__multimap[key])) frequencies.sort() self.__median = frequencies[len(frequencies) // 2] def _search(self, keyword): initial_time = time() lines = self.__multimap[keyword] search_time = time() - initial_time keyword = keyword for i, text in enumerate(self.__org_file): line_num = i + 1 if line_num in lines: print('{1}: {0}'.format(text.strip(), line_num)) self.__org_file.seek(0) # resets buffer for next searches print('\nIt took {:.12f} seconds to find {} occurrence ' 'of {!r}.'.format(search_time, len(lines), keyword)) def startUI(self): """Runs a loop and for a word. Return occurrence and lines it appeared on""" print('This search is powered by {}.'.format( self.__names[self.__map_type])) while True: try: keyword = input('Enter a word to search for: ').lower() if len(keyword) < 3 or not keyword.isalpha(): raise ValueError() self._search(keyword) except KeyError: print("Sorry! We couldn't find {!r} in " "the file.\n".format(keyword)) except RecursionError: print("Structure recursion limit has exceeded, please try" " another map!") except ValueError: print('Invalid Term!\n\tOnly alphabetical words with three or' ' more characters are allowed!') except: print('Error has been occurred!') if input("Quit? (y/n): ").lower().startswith('y'): break def __repr__(self): """prints the stats table.""" output = 'Total indexed terms:\t{}\n'.format(len(self.__multimap)) output += 'Average word frequency:\t{}\n'.format( round(self.__average, 2)) output += 'Median word frequency:\t{}\n'.format(self.__median) return output