def searchWordInDef(self, st, opt): #seachs word 'st' in meanings(definitions) of the glossary 'self' opt = addDefaultOptions(opt, { 'minRel': 0.0, 'maxNum': 100, 'sep': commaFa, 'matchWord': True, 'showRel': 'Percent', }) sep = opt['sep'] matchWord = opt['matchWord'] maxNum = opt['maxNum'] minRel = opt['minRel'] defs = opt['includeDefs'] outRel = [] for item in self.data: (word, defi) = item[:2] defiParts = defi.split(sep) if defi.find(st) == -1: continue rel = 0 ## relation value of word (as a float number between 0 and 1 for part in defiParts: for ch in sch: part = part.replace(ch, ' ') pRel = 0 # part relation if matchWord: pNum = 0 partWords = takeStrWords(part) pLen = len(partWords) if pLen==0: continue for pw in partWords: if pw == st: pNum += 1 pRel = float(pNum)/pLen ## part relation else: pLen = len(part.replace(' ', '')) if pLen==0: continue pNum = len(findAll(part, st))*len(st) pRel = float(pNum)/pLen ## part relation if pRel > rel: rel = pRel if rel <= minRel: continue if defs: outRel.append((word, rel, defi)) else: outRel.append((word, rel)) #sortby_inplace(outRel, 1, True)##??? outRel.sort(key=1, reverse=True) n = len(outRel) if n > maxNum > 0: outRel = outRel[:maxNum] n = maxNum num = 0 out = [] if defs: for j in xrange(n): numP = num (w, num, m) = outRel[j] m = m.replace('\n', '\\n').replace('\t', '\\t') onePer = int(1.0/num) if onePer == 1.0: out.append('%s\\n%s'%(w, m)) elif opt['showRel'] == 'Percent': out.append('%s(%%%d)\\n%s'%(w, 100*num, m)) elif opt['showRel'] == 'Percent At First': if num == numP: out.append('%s\\n%s'%(w, m)) else: out.append('%s(%%%d)\\n%s'%(w, 100*num, m)) else: out.append('%s\\n%s'%(w, m)) return out for j in xrange(n): numP = num (w, num) = outRel[j] onePer = int(1.0/num) if onePer == 1.0: out.append(w) elif opt['showRel'] == 'Percent': out.append('%s(%%%d)'%(w, 100*num)) elif opt['showRel'] == 'Percent At First': if num == numP: out.append(w) else: out.append('%s(%%%d)'%(w, 100*num)) else: out.append(w) return out
#!/usr/bin/python import sys sys.path.append('/usr/share/pyglossary/src') import text_utils import string, os, time t0 = time.time() fp = file(sys.argv[1]) words = text_utils.takeStrWords(fp.read(), {'sort': False}) wordsFile = file(sys.argv[1][:-4] + '-words.txt', 'w') print len(words), 'words found. writing to file...' wordsFile.write('\n'.join(words) + '\n') wordsFile.close() fp.close() print '%f seconds left.' % (time.time() - t0)
def takeOutputWords(self, opt={}): words = sorted(takeStrWords(' '.join([item[1] for item in self.data]), opt)) words = removeRepeats(words) return words
def searchWordInDef(self, st, opt): #seachs word 'st' in meanings(definitions) of the glossary 'self' opt = addDefaultOptions(opt, { 'minRel': 0.0, 'maxNum': 100, 'sep': commaFa, 'matchWord': True, 'showRel': 'Percent', }) sep = opt['sep'] matchWord = opt['matchWord'] maxNum = opt['maxNum'] minRel = opt['minRel'] defs = opt['includeDefs'] outRel = [] for item in self.data: (word, defi) = item[:2] defiParts = defi.split(sep) if defi.find(st) == -1: continue rel = 0 ## relation value of word (as a float number between 0 and 1 for part in defiParts: for ch in sch: part = part.replace(ch, ' ') pRel = 0 # part relation if matchWord: pNum = 0 partWords = takeStrWords(part) pLen = len(partWords) if pLen==0: continue for pw in partWords: if pw == st: pNum += 1 pRel = float(pNum)/pLen ## part relation else: pLen = len(part.replace(' ', '')) if pLen==0: continue pNum = len(findAll(part, st))*len(st) pRel = float(pNum)/pLen ## part relation if pRel > rel: rel = pRel if rel <= minRel: continue if defs: outRel.append((word, rel, defi)) else: outRel.append((word, rel)) #sortby_inplace(outRel, 1, True)##??? outRel.sort(key=1, reverse=True) n = len(outRel) if n > maxNum > 0: outRel = outRel[:maxNum] n = maxNum num = 0 out = [] if defs: for j in xrange(n): numP = num (w, num, m) = outRel[j] m = m.replace('\n', '\\n').replace('\t', '\\t') onePer = int(1.0/num) if onePer == 1.0: out.append('%s\\n%s'%(w, m)) elif opt['showRel'] == 'Percent': out.append('%s(%%%d)\\n%s'%(w, 100*num, m)) elif opt['showRel'] == 'Percent At First': if num == numP: out.append('%s\\n%s'%(w, m)) else: out.append('%s(%%%d)\\n%s'%(w, 100*num , m)) else: out.append('%s\\n%s'%(w,m)) return out for j in xrange(n): numP = num (w, num) = outRel[j] onePer = int(1.0/num) if onePer == 1.0: out.append(w) elif opt['showRel'] == 'Percent': out.append('%s(%%%d)'%(w, 100*num)) elif opt['showRel'] == 'Percent At First': if num == numP: out.append(w) else: out.append('%s(%%%d)'%(w, 100*num)) else: out.append(w) return out
def takeOutputWords(self, opt={}): words = takeStrWords(' '.join([item[1] for item in self.data]), opt) words.sort() words = removeRepeats(words) return words
#!/usr/bin/python import sys sys.path.append("/usr/share/pyglossary/src") import text_utils import string, os, time t0 = time.time() fp = file(sys.argv[1]) words = text_utils.takeStrWords(fp.read(), {"sort": False}) wordsFile = file(sys.argv[1][:-4] + "-words.txt", "w") print(len(words), "words found. writing to file...") wordsFile.write("\n".join(words) + "\n") wordsFile.close() fp.close() print("%f seconds left." % (time.time() - t0))
def takeOutputWords(self, opt=None): if opt is None: opt = {} words = sorted(takeStrWords(' '.join([item[1] for item in self._data]), opt)) words = removeRepeats(words) return words
#!/usr/bin/python import sys sys.path.append('/usr/share/pyglossary/src') import text_utils import string, os, time t0 = time.time() fp=file(sys.argv[1]) words = text_utils.takeStrWords(fp.read(), {'sort':False}) wordsFile=file(sys.argv[1][:-4]+'-words.txt', 'w') print len(words), 'words found. writing to file...' wordsFile.write('\n'.join(words) + '\n') wordsFile.close() fp.close() print '%f seconds left.' %(time.time()-t0)