コード例 #1
 def __init__(self, original, preprocessed, typeOfMap ,index=None ):
     self.__original = original
     self.__preprocessed = preprocessed
     self.__typeOfMap = typeOfMap
     if self.__typeOfMap:
         if self.__typeOfMap == 'avl':
             self.__map = AVLTreeMap()
         elif self.__typeOfMap == 'unsorted':
             self.__map = UnsortedTableMap()
         elif self.__typeOfMap == 'sorted':
             self.__map = SortedTableMap()
         elif self.__typeOfMap == 'chain':
             self.__map = ChainHashMap()
         elif self.__typeOfMap == 'probe':
             self.__map = ProbeHashMap()
         elif self.__typeOfMap == 'splay':
             self.__map = SplayTreeMap()
         elif self.__typeOfMap == 'rb':
             self.__map = RedBlackTreeMap()
         elif self.__typeOfMap == 'dict':
             self.__map = dict()
         elif self.__typeOfMap == 'od':
             self.__map = OrderedDict()
     self.__indexFile = index
     self.__stats = [0, 0, 0]
コード例 #2
def spellCheck(para):
    #read text area; define empty dictionary; empty list for notWords
    words = para.split()
    dictionary = AVLTreeMap()
    notWords = []

    #reads in dictionary; creates tree
    for i in open(
        dictionary.__setitem__(i, i)

    #find each word in dictionary
    for j in words:
        word = j.lower()
        findWord = dictionary.find_position(word)
        if findWord.key() != word:

    #print mispelled words
    if len(notWords) == 0:
        print("No words were mispelled")
        for k in notWords:
            print(k + "</br>")
コード例 #3
# -*- coding: utf-8 -*-
Created on Sat Nov 18 10:46:05 2017

@author: bryna

from avl_tree import AVLTreeMap

import cgi, cgitb


print("Content-type: text/html\n\n")

freq = AVLTreeMap()

formInfo = cgi.FieldStorage()
#paragraph = formInfo.getvalue("paragraph")
paragraph = "this is bryna bryna bryna\r\nbryna bryna"
if paragraph == None:
    lines = paragraph.lower().split('\r\n')
    for line in lines:
        # only consider alphabetic characters
        for word in line.split():
            if word:
                freq[word] = 1 + freq.get(word, 0)

max_word = ''
コード例 #4

import sys
from avl_tree import AVLTreeMap

import cgi, cgitb

print("Content-type: text/html\n\n")

freq = AVLTreeMap()

formInfo = cgi.FieldStorage()
#paragraph = formInfo.getvalue("paragraph")
paragraph = "yes this is bryna bryna hey, yes, this is Bryna yeah"
#paragraph = '''May indulgence difficulty ham can put especially. Bringing remember for supplied her why was confined. Middleton principle did she procuring extensive believing add. Weather adapted prepare oh is calling.
#These wrong of he which there smile to my front. He fruit oh enjoy it of whose table. Cultivated occasional old her unpleasing unpleasant. At as do be against pasture covered viewing started. Enjoyed me settled mr respect no spirits civilly.  '''
if paragraph == None:
    lines = paragraph.lower().split('\r\n')
    for line in lines:
        # only consider alphabetic characters
        for word in line.split():
            if word.isalpha():
                freq[word] = 1 + freq.get(word, 0)

#for (w,c) in freq.items():
#print (w,c)

コード例 #5
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import sys
from avl_tree import AVLTreeMap

print("Content-type: text/html\n\n")

#filename = sys.argv[1]  # command line arg
filename = "theRoadNotTaken.txt"
#freq = {}
#freq = UnsortedTableMap()
#freq = SortedTableMap()
freq = AVLTreeMap()

for piece in open(filename).read().lower().split():
    # only consider alphabetic characters within this piece
    word = ''.join(c for c in piece if c.isalpha())
    if word:  # require at least one alphabetic character
        freq[word] = 1 + freq.get(word, 0)

max_word = ''
max_count = 0
for (w, c) in freq.items():  # (key, value) tuples represent (word, count)
    if c > max_count:
        max_word = w
        max_count = c
print('The most frequent word is', max_word)
print('Its number of occurrences is', max_count)
コード例 #6
class Indexer:
    """A class for indexing preprocessed text documents."""
    __structures = {'avl': AVLTreeMap(), 'unsorted': UnsortedTableMap(),
                    'sorted': SortedTableMap(), 'chain': ChainHashMap(),
                    'probe': ProbeHashMap(), 'splay': SplayTreeMap(),
                    'rb': RedBlackTreeMap(), 'dict': dict(),
                    'od': OrderedDict()}
    __names = {'avl': 'AVL Tree Map', 'unsorted': 'Unsorted Table Map',
               'sorted': 'Sorted Table Map', 'chain': 'Chain Hash Map',
               'probe': 'Probe Hash Map', 'splay': 'Splay Tree Map',
               'rb': 'Red and Black Tree Map', 'dict': 'Python Dictionary',
               'od': 'Python Ordered Dictionary'}

    def __init__(self, original, preprocessed, indexed=None, map_type='rb'):
        self.__pre_file = open(preprocessed, 'r', encoding='utf-8-sig')
        self.__org_file = open(original, 'r', encoding='utf-8-sig')
        self.__map_type = map_type
        self.__multimap = self.__structures[self.__map_type]
        self.__average = 0
        self.__median = 0
        self.__indexing_time = 0
        self.__index_out = indexed

    def _mapFix(self, map_type):
        if map_type not in self.__structures:
            self.__map_type = 'avl'

    def index(self):
        """ reads the preprocessed file and indexes the words."""
        initial_time = time()
        total_terms = 0
        for i, line in enumerate(self.__pre_file):
            line_num = i + 1
            for word in line.strip().split():
                    total_terms += 1
                    self.__multimap[word] = [line_num]
                    total_terms += 1
        self.__indexing_time = time() - initial_time
        print('Indexing duration is {} seconds.'.format(
            round(self.__indexing_time, 4)))
        self.__average = total_terms / len(self.__multimap)

    def dump(self):
        """Writes the index list to a file"""
        if self.__index_out is not None:
            out_file = open(self.__index_out, 'w')
            for word in self.__multimap:
                lines = str(self.__multimap[word])[1:-1]
                output = '{} {}\n'.format(word, lines)
                out_file.write(word + ' ' + lines + '\n')

    def _find_median(self):
        frequencies = list()
        for key in self.__multimap:
        self.__median = frequencies[len(frequencies) // 2]

    def _search(self, keyword):
        initial_time = time()
        lines = self.__multimap[keyword]
        search_time = time() - initial_time
        keyword = keyword
        for i, text in enumerate(self.__org_file):
            line_num = i + 1
            if line_num in lines:
                print('{1}: {0}'.format(text.strip(), line_num))
        self.__org_file.seek(0)  # resets buffer for next searches
        print('\nIt took {:.12f} seconds to find {} occurrence '
              'of {!r}.'.format(search_time, len(lines), keyword))

    def startUI(self):
        """Runs a loop and for a word. Return occurrence and lines
         it appeared on"""
        print('This search is powered by {}.'.format(
        while True:
                keyword = input('Enter a word to search for: ').lower()
                if len(keyword) < 3 or not keyword.isalpha():
                    raise ValueError()
            except KeyError:
                print("Sorry! We couldn't find {!r} in "
                      "the file.\n".format(keyword))
            except RecursionError:
                print("Structure recursion limit has exceeded, please try"
                      " another map!")
            except ValueError:
                print('Invalid Term!\n\tOnly alphabetical words with three or'
                      ' more characters are allowed!')
                print('Error has been occurred!')
            if input("Quit? (y/n): ").lower().startswith('y'):

    def __repr__(self):
        """prints the stats table."""
        output = 'Total indexed terms:\t{}\n'.format(len(self.__multimap))
        output += 'Average word frequency:\t{}\n'.format(
            round(self.__average, 2))
        output += 'Median word frequency:\t{}\n'.format(self.__median)
        return output