def run(self): build.run(self) # generate data from othman.core import othmanCore, searchIndexer if not os.path.isfile('othman-data/ix.db'): q = othmanCore(False) ix = searchIndexer(True) for n, (o, i) in enumerate(q.getAyatIter(1, 6236)): for w in i.split(): ix.addWord(w, n + 1) d = os.path.dirname(sys.argv[0]) ix.save()
#! /usr/bin/python # -*- coding: UTF-8 -*- from __future__ import print_function import sys, os, os.path, time from othman.core import othmanCore, searchIndexer q = othmanCore(False) ix = searchIndexer(True) wc = 0 for n, (o, i) in enumerate(q.getAyatIter(1, 6236)): for w in i.split(): ix.addWord(w, n + 1) wc += 1 d = os.path.dirname(sys.argv[0]) ix.save() print( "got %d words, %d terms (max term length=%d character, term vectors size=%d bytes)." % (wc, ix.terms_count, ix.maxWordLen, ix.term_vectors_size))
from StringIO import StringIO from xml.sax.saxutils import escape, unescape, quoteattr # for xml rendering from dataModel import * from tags import * from meta import MCache, metaDict2Hash, prettyId, makeId, metaVrr from userDb import UserDb from platform import guess_prefixes from whooshSearchEngine import SearchEngine from asyncIndex import AsyncIndex from othman.core import othmanCore from okasha.utils import ObjectsCache, fromFs, toFs th_ext=u'.ki' th_ext_glob=u'*.ki' othman=othmanCore() class ThawabMan (object): def __init__(self, prefixes=None, isMonolithic=True, indexerQueueSize=0): """Create a new Thawab instance given a user writable directory and an optional system-wide read-only directory prefixes a list of directories all are read-only except the first the first writable directory can be os.path.expanduser('~/.thawab') os.path.join([os.path.dirname(sys.argv[0]),'..','data']) isMonolithic=True if we should use locks and reconnect to sqlite indexerQueueSize is the size of threaded index queue (0 infinite, -1 disabled) the first thing you should do is to call loadMCache()
#! /usr/bin/python # -*- coding: UTF-8 -*- import sys, os, os.path, time from othman.core import othmanCore, searchIndexer q = othmanCore(False) ix = searchIndexer(True) wc = 0 for n,(o,i) in enumerate(q.getAyatIter(1, 6236)): for w in i.split(): ix.addWord(w,n+1) wc += 1 d = os.path.dirname(sys.argv[0]) ix.save() print "got %d words, %d terms (max term length=%d character, term vectors size=%d bytes)." % (wc, ix.terms_count, ix.maxWordLen, ix.term_vectors_size)