def __init__(self, original, preprocessed, typeOfMap ,index=None ): self.__original = original self.__preprocessed = preprocessed self.__typeOfMap = typeOfMap if self.__typeOfMap: if self.__typeOfMap == 'avl': self.__map = AVLTreeMap() elif self.__typeOfMap == 'unsorted': self.__map = UnsortedTableMap() elif self.__typeOfMap == 'sorted': self.__map = SortedTableMap() elif self.__typeOfMap == 'chain': self.__map = ChainHashMap() elif self.__typeOfMap == 'probe': self.__map = ProbeHashMap() elif self.__typeOfMap == 'splay': self.__map = SplayTreeMap() elif self.__typeOfMap == 'rb': self.__map = RedBlackTreeMap() elif self.__typeOfMap == 'dict': self.__map = dict() elif self.__typeOfMap == 'od': self.__map = OrderedDict() self.__indexFile = index self.__stats = [0, 0, 0]
def spellCheck(para): #read text area; define empty dictionary; empty list for notWords words = para.split() dictionary = AVLTreeMap() notWords = [] #reads in dictionary; creates tree for i in open( "/home/staff/kurban/public/lists/web2.txt").read().lower().split(): dictionary.__setitem__(i, i) #find each word in dictionary for j in words: word = j.lower() findWord = dictionary.find_position(word) if findWord.key() != word: notWords.append(word) #print mispelled words if len(notWords) == 0: print("No words were mispelled") else: for k in notWords: print(k + "</br>")
# -*- coding: utf-8 -*- """ Created on Sat Nov 18 10:46:05 2017 @author: bryna """ from avl_tree import AVLTreeMap import cgi, cgitb cgitb.enable() print("Content-type: text/html\n\n") freq = AVLTreeMap() formInfo = cgi.FieldStorage() #paragraph = formInfo.getvalue("paragraph") paragraph = "this is bryna bryna bryna\r\nbryna bryna" if paragraph == None: print('') else: lines = paragraph.lower().split('\r\n') for line in lines: # only consider alphabetic characters for word in line.split(): if word: freq[word] = 1 + freq.get(word, 0) max_word = ''
#!/usr/local/bin/python3.5 import sys from avl_tree import AVLTreeMap import cgi, cgitb cgitb.enable() print("Content-type: text/html\n\n") freq = AVLTreeMap() formInfo = cgi.FieldStorage() #paragraph = formInfo.getvalue("paragraph") paragraph = "yes this is bryna bryna hey, yes, this is Bryna yeah" #paragraph = '''May indulgence difficulty ham can put especially. Bringing remember for supplied her why was confined. Middleton principle did she procuring extensive believing add. Weather adapted prepare oh is calling. #These wrong of he which there smile to my front. He fruit oh enjoy it of whose table. Cultivated occasional old her unpleasing unpleasant. At as do be against pasture covered viewing started. Enjoyed me settled mr respect no spirits civilly. ''' if paragraph == None: print('') else: lines = paragraph.lower().split('\r\n') for line in lines: # only consider alphabetic characters for word in line.split(): if word.isalpha(): freq[word] = 1 + freq.get(word, 0) #for (w,c) in freq.items(): #print (w,c) print("""
# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import sys from avl_tree import AVLTreeMap print("Content-type: text/html\n\n") #filename = sys.argv[1] # command line arg filename = "theRoadNotTaken.txt" #freq = {} #freq = UnsortedTableMap() #freq = SortedTableMap() freq = AVLTreeMap() for piece in open(filename).read().lower().split(): # only consider alphabetic characters within this piece word = ''.join(c for c in piece if c.isalpha()) if word: # require at least one alphabetic character freq[word] = 1 + freq.get(word, 0) max_word = '' max_count = 0 for (w, c) in freq.items(): # (key, value) tuples represent (word, count) if c > max_count: max_word = w max_count = c print('The most frequent word is', max_word) print('Its number of occurrences is', max_count)
class Indexer: """A class for indexing preprocessed text documents.""" __structures = {'avl': AVLTreeMap(), 'unsorted': UnsortedTableMap(), 'sorted': SortedTableMap(), 'chain': ChainHashMap(), 'probe': ProbeHashMap(), 'splay': SplayTreeMap(), 'rb': RedBlackTreeMap(), 'dict': dict(), 'od': OrderedDict()} __names = {'avl': 'AVL Tree Map', 'unsorted': 'Unsorted Table Map', 'sorted': 'Sorted Table Map', 'chain': 'Chain Hash Map', 'probe': 'Probe Hash Map', 'splay': 'Splay Tree Map', 'rb': 'Red and Black Tree Map', 'dict': 'Python Dictionary', 'od': 'Python Ordered Dictionary'} def __init__(self, original, preprocessed, indexed=None, map_type='rb'): self.__pre_file = open(preprocessed, 'r', encoding='utf-8-sig') self.__org_file = open(original, 'r', encoding='utf-8-sig') self.__map_type = map_type self._mapFix(self.__map_type) self.__multimap = self.__structures[self.__map_type] self.__average = 0 self.__median = 0 self.__indexing_time = 0 self.__index_out = indexed def _mapFix(self, map_type): if map_type not in self.__structures: self.__map_type = 'avl' def index(self): """ reads the preprocessed file and indexes the words.""" initial_time = time() total_terms = 0 for i, line in enumerate(self.__pre_file): line_num = i + 1 for word in line.strip().split(): try: self.__multimap[word].append(line_num) total_terms += 1 except: self.__multimap[word] = [line_num] total_terms += 1 self.__indexing_time = time() - initial_time print('Indexing duration is {} seconds.'.format( round(self.__indexing_time, 4))) self.__average = total_terms / len(self.__multimap) self._find_median() def dump(self): """Writes the index list to a file""" if self.__index_out is not None: out_file = open(self.__index_out, 'w') for word in self.__multimap: lines = str(self.__multimap[word])[1:-1] output = '{} {}\n'.format(word, lines) out_file.write(word + ' ' + lines + '\n') out_file.close() def _find_median(self): frequencies = list() for key in self.__multimap: frequencies.append(len(self.__multimap[key])) frequencies.sort() self.__median = frequencies[len(frequencies) // 2] def _search(self, keyword): initial_time = time() lines = self.__multimap[keyword] search_time = time() - initial_time keyword = keyword for i, text in enumerate(self.__org_file): line_num = i + 1 if line_num in lines: print('{1}: {0}'.format(text.strip(), line_num)) self.__org_file.seek(0) # resets buffer for next searches print('\nIt took {:.12f} seconds to find {} occurrence ' 'of {!r}.'.format(search_time, len(lines), keyword)) def startUI(self): """Runs a loop and for a word. Return occurrence and lines it appeared on""" print('This search is powered by {}.'.format( self.__names[self.__map_type])) while True: try: keyword = input('Enter a word to search for: ').lower() if len(keyword) < 3 or not keyword.isalpha(): raise ValueError() self._search(keyword) except KeyError: print("Sorry! We couldn't find {!r} in " "the file.\n".format(keyword)) except RecursionError: print("Structure recursion limit has exceeded, please try" " another map!") except ValueError: print('Invalid Term!\n\tOnly alphabetical words with three or' ' more characters are allowed!') except: print('Error has been occurred!') if input("Quit? (y/n): ").lower().startswith('y'): break def __repr__(self): """prints the stats table.""" output = 'Total indexed terms:\t{}\n'.format(len(self.__multimap)) output += 'Average word frequency:\t{}\n'.format( round(self.__average, 2)) output += 'Median word frequency:\t{}\n'.format(self.__median) return output