class CostPerformanceDatabase:
  """Maintain a database of maximal (cost,performance) pairs."""

  def __init__(self):
    """Create an empty database."""
    self._M = SortedTableMap()             # or a more efficient sorted map

  def best(self, c):
    """Return (cost,performance) pair with largest cost not exceeding c.

    Return None if there is no such pair.
    """
    return self._M.find_le(c)

  def add(self, c, p):
    """Add new entry with cost c and performance p."""
    # determine if (c,p) is dominated by an existing pair
    other = self._M.find_le(c)              # other is at least as cheap as c
    if other is not None and other[1] >= p: # if its performance is as good,
        return                              # (c,p) is dominated, so ignore
    self._M[c] = p                          # else, add (c,p) to database
    # and now remove any pairs that are dominated by (c,p)
    other = self._M.find_gt(c)              # other more expensive than c
    while other is not None and other[1] <= p:
      del self._M[other[0]]
      other = self._M.find_gt(c)
 def __init__(self, original, preprocessed, typeOfMap ,index=None ):
     self.__original = original
     self.__preprocessed = preprocessed
     self.__typeOfMap = typeOfMap
     if self.__typeOfMap:
         if self.__typeOfMap == 'avl':
             self.__map = AVLTreeMap()
         elif self.__typeOfMap == 'unsorted':
             self.__map = UnsortedTableMap()
         elif self.__typeOfMap == 'sorted':
             self.__map = SortedTableMap()
         elif self.__typeOfMap == 'chain':
             self.__map = ChainHashMap()
         elif self.__typeOfMap == 'probe':
             self.__map = ProbeHashMap()
         elif self.__typeOfMap == 'splay':
             self.__map = SplayTreeMap()
         elif self.__typeOfMap == 'rb':
             self.__map = RedBlackTreeMap()
         elif self.__typeOfMap == 'dict':
             self.__map = dict()
         elif self.__typeOfMap == 'od':
             self.__map = OrderedDict()
     self.__indexFile = index
     self.__stats = [0, 0, 0]
class TestBasicTable(unittest.TestCase):
    """Basic-coverage tests."""

    def setUp(self):
        self.stmap = SortedTableMap()

    def test_init(self):
        self.assertIsInstance(self.stmap, SortedTableMap)

    def test_len(self):
        self.assertEqual(len(self.stmap), 0)
        for i in range(5):
            self.stmap[i] = i
        self.assertEqual(len(self.stmap), 5)

    def test_setitem_getitem(self):
        test_key = "key"
        test_value = "value"
        with self.assertRaises(KeyError):
            returned = self.stmap[test_key] # not in the table yet
        self.stmap[test_key] = test_value
        self.assertEqual(self.stmap[test_key], test_value)
        # cover the overwrite-existing case in __setitem__:
        updated_value = "updated value"
        self.stmap[test_key] = updated_value
        self.assertEqual(self.stmap[test_key], updated_value)

    def test_delitem(self):
        test_key = "key"
        test_value = "value"
        # Should raise key error if not in table
        with self.assertRaises(KeyError):
            del self.stmap[test_key]
        self.stmap[test_key] = test_value
        del self.stmap[test_key]
        self.assertEqual(len(self.stmap), 0)

    def test_iter(self):
        for i in range(5):
            self.stmap[i] = f"value for key {i}"
        assert len(self.stmap) == 5
        i = 0
        for key in self.stmap.keys():
            self.assertEqual(key, i)
            i += 1

    def test_reversed(self):
        for i in range(4, -1, -1):
            self.stmap[i] = f"value for key {i}"
        assert len(self.stmap) == 5
        i = 4
        for key in reversed(self.stmap):
            self.assertEqual(key, i)
            i -= 1
 def test_find_key_methods_return_none_when_table_empty(self):
     """Do the methods that find a key based on an arithmetic comparison
     criterion return None when the table is empty?"""
     empty_stmap = SortedTableMap()
     k = 1
     self.assertIsNone(empty_stmap.find_min())
     self.assertIsNone(empty_stmap.find_max())
     self.assertIsNone(empty_stmap.find_ge(k))
     self.assertIsNone(empty_stmap.find_lt(k))
     self.assertIsNone(empty_stmap.find_gt(k))
 def __init__(self):
   """Create an empty database."""
   self._M = SortedTableMap()             # or a more efficient sorted map
 def setUp(self):
     self.stmap = SortedTableMap()
 def setUp(self):
     self.stmap = SortedTableMap()
     for i in range(ord("a"), ord("a") + 26):
         key = i - ord("a")
         self.stmap[key] = chr(i)
class TestAccessorsWithAlphabetTable(unittest.TestCase):
    """Uses a table containing 26 (number, letter) k-v pairs to test the
    methods that return min, max, less than, etc."""

    def setUp(self):
        self.stmap = SortedTableMap()
        for i in range(ord("a"), ord("a") + 26):
            key = i - ord("a")
            self.stmap[key] = chr(i)

    def test_find_min(self):        
        expected_min = (0, "a")
        actual_min = self.stmap.find_min()
        self.assertEqual(expected_min, actual_min)

    def test_find_max(self):
        expected_max = (25, "z")
        actual_max = self.stmap.find_max()
        self.assertEqual(expected_max, actual_max)

    def test_find_ge(self):
        # least key greater than or equal to 15 should be 15.
        key = 15
        expected = (15, "p")
        actual = self.stmap.find_ge(key)
        self.assertEqual(expected, actual)

    def test_find_lt(self):
        # Greatest key strictly less than 15 should be 14
        key = 15
        expected = (14, "o")
        actual = self.stmap.find_lt(key)
        self.assertEqual(expected, actual)

    def test_find_gt(self):
        # Least key strictly greater than 15 should be 16.
        key = 15
        expected = (16, "q")
        actual = self.stmap.find_gt(key)
        self.assertEqual(expected, actual)

    def test_find_range(self):
        # No start and stop=14 should yield all kvps from (0, "a") to (14, "o")
        expected = []
        for i in range(ord("a"), ord("a") + 15):
            expected_key = i - ord("a")
            expected_value = chr(i)
            expected.append((expected_key, expected_value))
        actual = [i for i in self.stmap.find_range(start=None, stop=15)]
        assert len(actual) == 15
        self.assertEqual(expected, actual)

        # start=10 and stop=20 should yield all k-v pairs from (10, "k") to
        #   (19, "t")
        expected = []
        for i in range(ord("a") + 10, ord("a") + 20):
            expected_key = i - ord("a")
            expected_value = chr(i)
            expected.append((expected_key, expected_value))
        actual = [i for i in self.stmap.find_range(start=10, stop=20)]
        assert len(actual) == 10
        self.assertEqual(expected, actual)

    def test_find_key_methods_return_none_when_table_empty(self):
        """Do the methods that find a key based on an arithmetic comparison
        criterion return None when the table is empty?"""
        empty_stmap = SortedTableMap()
        k = 1
        self.assertIsNone(empty_stmap.find_min())
        self.assertIsNone(empty_stmap.find_max())
        self.assertIsNone(empty_stmap.find_ge(k))
        self.assertIsNone(empty_stmap.find_lt(k))
        self.assertIsNone(empty_stmap.find_gt(k))
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import sys
from unsorted_table_map import UnsortedTableMap
from sorted_table_map import SortedTableMap

filename = sys.argv[1]  # command line arg
#freq = {}
#freq = UnsortedTableMap()
freq = SortedTableMap()

for piece in open(filename).read().lower().split():
    # only consider alphabetic characters within this piece
    word = ''.join(c for c in piece if c.isalpha())
    if word:  # require at least one alphabetic character
        freq[word] = 1 + freq.get(word, 0)

max_word = ''
max_count = 0
for (w, c) in freq.items():  # (key, value) tuples represent (word, count)
    if c > max_count:
        max_word = w
        max_count = c
print('The most frequent word is', max_word)
print('Its number of occurrences is', max_count)
Exemple #10
0
class Indexer:
    """A class for indexing preprocessed text documents."""
    __structures = {'avl': AVLTreeMap(), 'unsorted': UnsortedTableMap(),
                    'sorted': SortedTableMap(), 'chain': ChainHashMap(),
                    'probe': ProbeHashMap(), 'splay': SplayTreeMap(),
                    'rb': RedBlackTreeMap(), 'dict': dict(),
                    'od': OrderedDict()}
    __names = {'avl': 'AVL Tree Map', 'unsorted': 'Unsorted Table Map',
               'sorted': 'Sorted Table Map', 'chain': 'Chain Hash Map',
               'probe': 'Probe Hash Map', 'splay': 'Splay Tree Map',
               'rb': 'Red and Black Tree Map', 'dict': 'Python Dictionary',
               'od': 'Python Ordered Dictionary'}

    def __init__(self, original, preprocessed, indexed=None, map_type='rb'):
        self.__pre_file = open(preprocessed, 'r', encoding='utf-8-sig')
        self.__org_file = open(original, 'r', encoding='utf-8-sig')
        self.__map_type = map_type
        self._mapFix(self.__map_type)
        self.__multimap = self.__structures[self.__map_type]
        self.__average = 0
        self.__median = 0
        self.__indexing_time = 0
        self.__index_out = indexed

    def _mapFix(self, map_type):
        if map_type not in self.__structures:
            self.__map_type = 'avl'

    def index(self):
        """ reads the preprocessed file and indexes the words."""
        initial_time = time()
        total_terms = 0
        for i, line in enumerate(self.__pre_file):
            line_num = i + 1
            for word in line.strip().split():
                try:
                    self.__multimap[word].append(line_num)
                    total_terms += 1
                except:
                    self.__multimap[word] = [line_num]
                    total_terms += 1
        self.__indexing_time = time() - initial_time
        print('Indexing duration is {} seconds.'.format(
            round(self.__indexing_time, 4)))
        self.__average = total_terms / len(self.__multimap)
        self._find_median()

    def dump(self):
        """Writes the index list to a file"""
        if self.__index_out is not None:
            out_file = open(self.__index_out, 'w')
            for word in self.__multimap:
                lines = str(self.__multimap[word])[1:-1]
                output = '{} {}\n'.format(word, lines)
                out_file.write(word + ' ' + lines + '\n')
            out_file.close()

    def _find_median(self):
        frequencies = list()
        for key in self.__multimap:
            frequencies.append(len(self.__multimap[key]))
        frequencies.sort()
        self.__median = frequencies[len(frequencies) // 2]

    def _search(self, keyword):
        initial_time = time()
        lines = self.__multimap[keyword]
        search_time = time() - initial_time
        keyword = keyword
        for i, text in enumerate(self.__org_file):
            line_num = i + 1
            if line_num in lines:
                print('{1}: {0}'.format(text.strip(), line_num))
        self.__org_file.seek(0)  # resets buffer for next searches
        print('\nIt took {:.12f} seconds to find {} occurrence '
              'of {!r}.'.format(search_time, len(lines), keyword))

    def startUI(self):
        """Runs a loop and for a word. Return occurrence and lines
         it appeared on"""
        print('This search is powered by {}.'.format(
            self.__names[self.__map_type]))
        while True:
            try:
                keyword = input('Enter a word to search for: ').lower()
                if len(keyword) < 3 or not keyword.isalpha():
                    raise ValueError()
                self._search(keyword)
            except KeyError:
                print("Sorry! We couldn't find {!r} in "
                      "the file.\n".format(keyword))
            except RecursionError:
                print("Structure recursion limit has exceeded, please try"
                      " another map!")
            except ValueError:
                print('Invalid Term!\n\tOnly alphabetical words with three or'
                      ' more characters are allowed!')
            except:
                print('Error has been occurred!')
            if input("Quit? (y/n): ").lower().startswith('y'):
                break

    def __repr__(self):
        """prints the stats table."""
        output = 'Total indexed terms:\t{}\n'.format(len(self.__multimap))
        output += 'Average word frequency:\t{}\n'.format(
            round(self.__average, 2))
        output += 'Median word frequency:\t{}\n'.format(self.__median)
        return output