Beispiel #1
0
def preprocess(moDict, moSequence, minlen, maxlen):
    l = list()  # each element is a list of possible words starting from the same index in the 
                # sequence, ordered by length
    
    for i in range(len(moSequence)):
        l.append(None)
        
        for j in range(minlen, maxlen + 1):
            if i + j <= len(moSequence):
                s = moSequence[i:i + j]
                
                words = moDict.get(util.orderASCII(s.strip()), None)
                #print '[', i, ',', j, ':', s, '->', words
                if words != None:
                    word = words[0] # pick a word               
                    
                    if l[i] == None:
                        l[i] = collections.deque()
                    l[i].appendleft(word)

    return l
Beispiel #2
0
def buildDict(filepath):
    """Read data from file to build the keyword search dictionary"""    
    d = dict()
    minlen = sys.maxint
    maxlen = -sys.maxint
        
    datafile = open(filepath, 'r')
    for line in datafile:
        s = line.strip()
        l = len(s)
        if l > 0:
            if l < minlen:
                minlen = l  # find mininum word length
            if l > maxlen:
                maxlen = l  # find maximum word length
            key = util.orderASCII(s)
            value = d.setdefault(key, collections.deque())
            value.append(s)        
        
    datafile.close()
    
    return (d, minlen, maxlen)
Beispiel #3
0
 def test_orderASCII(self):        
     self.assertEqual(util.orderASCII('char'), 'achr')
     self.assertEquals(util.orderASCII('slight'), 'ghilst')