Ejemplo n.º 1
0
 def searchWordInDef(self, st, opt):
     #seachs word 'st' in meanings(definitions) of the glossary 'self'
     opt = addDefaultOptions(opt, {
         'minRel': 0.0,
         'maxNum': 100,
         'sep': commaFa,
         'matchWord': True,
         'showRel': 'Percent',
     })
     sep = opt['sep']
     matchWord = opt['matchWord']
     maxNum = opt['maxNum']
     minRel = opt['minRel']
     defs = opt['includeDefs']
     outRel = []
     for item in self.data:
         (word, defi) = item[:2]
         defiParts = defi.split(sep)
         if defi.find(st) == -1:
             continue
         rel = 0 ## relation value of word (as a float number between 0 and 1
         for part in defiParts:
             for ch in sch:
                 part = part.replace(ch, ' ')
             pRel = 0 # part relation
             if matchWord:
                 pNum = 0
                 partWords = takeStrWords(part)
                 pLen = len(partWords)
                 if pLen==0:
                     continue
                 for pw in partWords:
                     if pw == st:
                         pNum += 1
                 pRel = float(pNum)/pLen ## part relation
             else:
                 pLen = len(part.replace(' ', ''))
                 if pLen==0:
                     continue
                 pNum = len(findAll(part, st))*len(st)
                 pRel = float(pNum)/pLen ## part relation
             if pRel > rel:
                 rel = pRel
         if rel <= minRel:
             continue
         if defs:
             outRel.append((word, rel, defi))
         else:
             outRel.append((word, rel))
     #sortby_inplace(outRel, 1, True)##???
     outRel.sort(key=1, reverse=True)
     n = len(outRel)
     if n > maxNum > 0:
         outRel = outRel[:maxNum]
         n = maxNum
     num = 0
     out = []
     if defs:
         for j in xrange(n):
             numP = num
             (w, num, m) = outRel[j]
             m = m.replace('\n', '\\n').replace('\t', '\\t')
             onePer = int(1.0/num)
             if onePer == 1.0:
                 out.append('%s\\n%s'%(w, m))
             elif opt['showRel'] == 'Percent':
                 out.append('%s(%%%d)\\n%s'%(w, 100*num, m))
             elif opt['showRel'] == 'Percent At First':
                 if num == numP:
                     out.append('%s\\n%s'%(w, m))
                 else:
                     out.append('%s(%%%d)\\n%s'%(w, 100*num, m))
             else:
                 out.append('%s\\n%s'%(w, m))
         return out
     for j in xrange(n):
         numP = num
         (w, num) = outRel[j]
         onePer = int(1.0/num)
         if onePer == 1.0:
             out.append(w)
         elif opt['showRel'] == 'Percent':
             out.append('%s(%%%d)'%(w, 100*num))
         elif opt['showRel'] == 'Percent At First':
             if num == numP:
                 out.append(w)
             else:
                 out.append('%s(%%%d)'%(w, 100*num))
         else:
             out.append(w)
     return out
Ejemplo n.º 2
0
#!/usr/bin/python
import sys
sys.path.append('/usr/share/pyglossary/src')
import text_utils
import string, os, time

t0 = time.time()

fp = file(sys.argv[1])
words = text_utils.takeStrWords(fp.read(), {'sort': False})
wordsFile = file(sys.argv[1][:-4] + '-words.txt', 'w')
print len(words), 'words found. writing to file...'
wordsFile.write('\n'.join(words) + '\n')
wordsFile.close()
fp.close()

print '%f  seconds left.' % (time.time() - t0)
Ejemplo n.º 3
0
 def takeOutputWords(self, opt={}):
     words = sorted(takeStrWords(' '.join([item[1] for item in self.data]), opt))
     words = removeRepeats(words)
     return words
Ejemplo n.º 4
0
 def searchWordInDef(self, st, opt):
     #seachs word 'st' in meanings(definitions) of the glossary 'self'
     opt = addDefaultOptions(opt, {
         'minRel': 0.0,
         'maxNum': 100,
         'sep': commaFa,
         'matchWord': True,
         'showRel': 'Percent',
     })
     sep = opt['sep']
     matchWord = opt['matchWord']
     maxNum = opt['maxNum']
     minRel = opt['minRel']
     defs = opt['includeDefs']
     outRel = []
     for item in self.data:
         (word, defi) = item[:2]
         defiParts = defi.split(sep)
         if defi.find(st) == -1:
             continue
         rel = 0 ## relation value of word (as a float number between 0 and 1
         for part in defiParts:
             for ch in sch:
                 part = part.replace(ch, ' ')
             pRel = 0 # part relation
             if matchWord:
                 pNum = 0
                 partWords = takeStrWords(part)
                 pLen = len(partWords)
                 if pLen==0:
                     continue
                 for pw in partWords:
                     if pw == st:
                         pNum += 1
                 pRel = float(pNum)/pLen ## part relation
             else:
                 pLen = len(part.replace(' ', ''))
                 if pLen==0:
                     continue
                 pNum = len(findAll(part, st))*len(st)
                 pRel = float(pNum)/pLen ## part relation
             if pRel > rel:
                 rel = pRel
         if rel <= minRel:
             continue
         if defs:
             outRel.append((word, rel, defi))
         else:
             outRel.append((word, rel))
     #sortby_inplace(outRel, 1, True)##???
     outRel.sort(key=1, reverse=True)
     n = len(outRel)
     if n > maxNum > 0:
         outRel = outRel[:maxNum]
         n = maxNum
     num = 0
     out = []
     if defs:
         for j in xrange(n):
             numP = num
             (w, num, m) = outRel[j]
             m = m.replace('\n', '\\n').replace('\t', '\\t')
             onePer = int(1.0/num)
             if onePer == 1.0:
                 out.append('%s\\n%s'%(w, m))
             elif opt['showRel'] == 'Percent':
                 out.append('%s(%%%d)\\n%s'%(w, 100*num, m))
             elif opt['showRel'] == 'Percent At First':
                 if num == numP:
                     out.append('%s\\n%s'%(w, m))
                 else:
                     out.append('%s(%%%d)\\n%s'%(w, 100*num , m))
             else:
                 out.append('%s\\n%s'%(w,m))
         return out
     for j in xrange(n):
         numP = num
         (w, num) = outRel[j]
         onePer = int(1.0/num)
         if onePer == 1.0:
             out.append(w)
         elif opt['showRel'] == 'Percent':
             out.append('%s(%%%d)'%(w, 100*num))
         elif opt['showRel'] == 'Percent At First':
             if num == numP:
                 out.append(w)
             else:
                 out.append('%s(%%%d)'%(w, 100*num))
         else:
             out.append(w)
     return out
Ejemplo n.º 5
0
 def takeOutputWords(self, opt={}):
     words = takeStrWords(' '.join([item[1] for item in self.data]), opt)
     words.sort()
     words = removeRepeats(words)
     return words
Ejemplo n.º 6
0
#!/usr/bin/python
import sys

sys.path.append("/usr/share/pyglossary/src")
import text_utils
import string, os, time

t0 = time.time()

fp = file(sys.argv[1])
words = text_utils.takeStrWords(fp.read(), {"sort": False})
wordsFile = file(sys.argv[1][:-4] + "-words.txt", "w")
print(len(words), "words found. writing to file...")
wordsFile.write("\n".join(words) + "\n")
wordsFile.close()
fp.close()

print("%f  seconds left." % (time.time() - t0))
Ejemplo n.º 7
0
 def takeOutputWords(self, opt=None):
     if opt is None:
         opt = {}
     words = sorted(takeStrWords(' '.join([item[1] for item in self._data]), opt))
     words = removeRepeats(words)
     return words
Ejemplo n.º 8
0
#!/usr/bin/python
import sys
sys.path.append('/usr/share/pyglossary/src')
import text_utils
import string, os, time

t0 = time.time()

fp=file(sys.argv[1])
words = text_utils.takeStrWords(fp.read(), {'sort':False})
wordsFile=file(sys.argv[1][:-4]+'-words.txt', 'w')
print len(words), 'words found. writing to file...'
wordsFile.write('\n'.join(words) + '\n')
wordsFile.close()
fp.close()

print  '%f  seconds left.' %(time.time()-t0)