Exemple #1
0
    def run(self):
        build.run(self)
        # generate data
        from othman.core import othmanCore, searchIndexer

        if not os.path.isfile('othman-data/ix.db'):
            q = othmanCore(False)
            ix = searchIndexer(True)
            for n, (o, i) in enumerate(q.getAyatIter(1, 6236)):
                for w in i.split():
                    ix.addWord(w, n + 1)
            d = os.path.dirname(sys.argv[0])
            ix.save()
Exemple #2
0
#! /usr/bin/python
# -*- coding: UTF-8 -*-
from __future__ import print_function

import sys, os, os.path, time
from othman.core import othmanCore, searchIndexer

q = othmanCore(False)
ix = searchIndexer(True)
wc = 0
for n, (o, i) in enumerate(q.getAyatIter(1, 6236)):
    for w in i.split():
        ix.addWord(w, n + 1)
        wc += 1
d = os.path.dirname(sys.argv[0])
ix.save()
print(
    "got %d words, %d terms (max term length=%d character, term vectors size=%d bytes)."
    % (wc, ix.terms_count, ix.maxWordLen, ix.term_vectors_size))
Exemple #3
0
from StringIO import StringIO
from xml.sax.saxutils import escape, unescape, quoteattr # for xml rendering
from dataModel import *
from tags import *
from meta import MCache, metaDict2Hash, prettyId, makeId, metaVrr
from userDb import UserDb
from platform import guess_prefixes

from whooshSearchEngine import SearchEngine
from asyncIndex import AsyncIndex
from othman.core import othmanCore
from okasha.utils import ObjectsCache, fromFs, toFs

th_ext=u'.ki'
th_ext_glob=u'*.ki'
othman=othmanCore()

class ThawabMan (object):
  def __init__(self, prefixes=None, isMonolithic=True, indexerQueueSize=0):
    """Create a new Thawab instance given a user writable directory and an optional system-wide read-only directory

  prefixes a list of directories all are read-only except the first
  the first writable directory can be 
    os.path.expanduser('~/.thawab')
    os.path.join([os.path.dirname(sys.argv[0]),'..','data'])
  
  isMonolithic=True if we should use locks and reconnect to sqlite
  
  indexerQueueSize is the size of threaded index queue (0 infinite, -1 disabled)

the first thing you should do is to call loadMCache()
Exemple #4
0
#! /usr/bin/python
# -*- coding: UTF-8 -*-

import sys, os, os.path, time
from othman.core import othmanCore, searchIndexer

q = othmanCore(False)
ix = searchIndexer(True)
wc = 0
for n,(o,i) in enumerate(q.getAyatIter(1, 6236)):
    for w in i.split():
        ix.addWord(w,n+1)
        wc += 1
d = os.path.dirname(sys.argv[0])
ix.save()
print "got %d words, %d terms (max term length=%d character, term vectors size=%d bytes)." % (wc, ix.terms_count, ix.maxWordLen, ix.term_vectors_size)