Example #1
0
    def __init__(self, prefixes=None, isMonolithic=True, indexerQueueSize=0):
        """Create a new Thawab instance given a user writable directory and an optional system-wide read-only directory

    prefixes a list of directories all are read-only except the first
    the first writable directory can be 
        os.path.expanduser('~/.thawab')
        os.path.join([os.path.dirname(sys.argv[0]),'..','data'])
    
    isMonolithic = True if we should use locks and reconnect to sqlite
    
    indexerQueueSize is the size of threaded index queue (0 infinite, -1 disabled)

the first thing you should do is to call loadMCache()
"""
        if not prefixes:
            prefixes = guess_prefixes()
        try:
            if not os.path.isdir(prefixes[0]):
                os.makedirs(prefixes[0])
        except:
            raise OSError
        self.prefixes = filter(lambda i: os.path.isdir(i), [os.path.realpath(os.path.abspath(p)) for p in prefixes])
        # make sure it's unique
        p = self.prefixes[0]
        s = set(self.prefixes[1:])
        if p in s:
            s.remove(p)
        if len(s) < len(self.prefixes) - 1:
            self.prefixes = [p] + sorted(list(s))
        # print self.prefixes
        self.othman = othman
        self.__meta = None
        self.read_only = self.assertManagedTree()
        self.conf = self.prase_conf()
        self.searchEngine = SearchEngine(self)
        self.user_db = UserDb(self, os.path.join(self.prefixes[0], "user.db"))
        if indexerQueueSize >= 0:
            self.asyncIndexer = AsyncIndex(self.searchEngine, indexerQueueSize)
        else:
            self.asyncIndexer = None

        self.isMonolithic = isMonolithic
        if not self.isMonolithic:
            import threading

            lock1 = threading.Lock()
        else:
            lock1 = None
        self.kutubCache = ObjectsCache(lock=lock1)
Example #2
0
 def __init__(self, th, typ='web', *args, **kw):
   """
   th is an instance of ThawabMan
   allowByUri=True for desktop, False for server
   """
   self.th=th
   self.isMonolithic=th.isMonolithic
   self.stringSeed="S3(uR!r7y"
   self._typ=typ
   self._allowByUri=(typ=='app')
   self._emptyViewResp[u"apptype"]=self._typ
   # FIXME: move ObjectsCache of kitab to routines to core.ThawabMan
   if not self.isMonolithic:
     import threading
     lock1=threading.Lock();
   else:
     lock1=None
   self.searchCache=ObjectsCache(lock=lock1)
   baseWebApp.__init__(self,*args, **kw)
Example #3
0
class ThawabMan (object):
  def __init__(self, prefixes=None, isMonolithic=True, indexerQueueSize=0):
    """Create a new Thawab instance given a user writable directory and an optional system-wide read-only directory

  prefixes a list of directories all are read-only except the first
  the first writable directory can be 
    os.path.expanduser('~/.thawab')
    os.path.join([os.path.dirname(sys.argv[0]),'..','data'])
  
  isMonolithic=True if we should use locks and reconnect to sqlite
  
  indexerQueueSize is the size of threaded index queue (0 infinite, -1 disabled)

the first thing you should do is to call loadMCache()
"""
    if not prefixes: prefixes=guess_prefixes()
    try:
      if not os.path.isdir(prefixes[0]): os.makedirs(prefixes[0])
    except:
      raise OSError
    self.prefixes=filter(lambda i:os.path.isdir(i), [os.path.realpath(os.path.abspath(p)) for p in prefixes])
    # make sure it's unique
    p=self.prefixes[0]
    s=set(self.prefixes[1:])
    if p in s: s.remove(p)
    if len(s)<len(self.prefixes)-1: self.prefixes=[p]+sorted(list(s))
    #print self.prefixes
    self.othman=othman
    self.__meta=None
    self.read_only = self.assertManagedTree()
    self.conf = self.prase_conf()
    self.searchEngine=SearchEngine(self)
    self.user_db=UserDb(self, os.path.join(self.prefixes[0],"user.db") )
    if indexerQueueSize>=0:
      self.asyncIndexer=AsyncIndex(self.searchEngine, indexerQueueSize)
    else:
      self.asyncIndexer=None

    self.isMonolithic=isMonolithic
    if not self.isMonolithic:
      import threading
      lock1=threading.Lock();
    else:
      lock1=None
    self.kutubCache=ObjectsCache(lock=lock1)

  def prase_conf(self):
    r={}
    fn=os.path.join(self.prefixes[0], 'conf', 'main.txt')
    if not os.path.exists(fn): return {}
    try:
      f=open(fn)
      t=f.readlines()
      f.close()
    except: return {}
    for l in t:
      a=l.strip().split("=",1)
      if len(a)!=2: continue
      r[a[0].strip()]=a[1].strip()
    return r

  def assertManagedTree(self):
     """create the hierarchy inside the user-managed prefix    
     # db	contains Kitab files [.thawab]
     # index	contains search index
     # conf	application configuration
     # cache	contains the metadata cache for all containers"""
     P=self.prefixes[0]
     if not os.access(P, os.W_OK): return False
     for i in ['db','index','conf','cache', 'tmp', 'themes']:
       p=os.path.join(P,i)
       if not os.path.isdir(p): os.makedirs(p)
     return True

  def mktemp(self):
    h,fn=mkstemp(th_ext, 'THAWAB_',os.path.join(self.prefixes[0],'tmp'))
    return Kitab(fn,True)

  def getCachedKitab(self, uri):
    """
    try to get a kitab by uri from cache,
    if it's not in the cache, it will be opened and cached
    """
    ki=self.kutubCache.get(uri)
    if not ki:
      ki=self.getKitabByUri(uri)
      if ki: self.kutubCache.append(uri, ki)
    #elif not self.isMonolithic: ki.connect() # FIXME: no longer needed, kept to trace other usage of isMonolithic
    return ki

  def getCachedKitabByNameV(self, kitabNameV):
    a=kitabNameV.split(u'-')
    l=len(a)
    if l==1:
      m=self.getMeta().getLatestKitab(kitabNameV)
    elif l==2:
      m=self.getMeta().getLatestKitabV(*a)
    else:
      m=self.getMeta().getLatestKitabVr(*a)
    if m: return self.getCachedKitab(m['uri'])
    return None

  def getUriByKitabName(self,kitabName):
    """
    return uri for the latest kitab with the given name
    """
    m=self.getMeta().getLatestKitab(kitabName)
    if not m: return None
    return m['uri']

  def getKitab(self,kitabName):
    m=self.getMeta().getLatestKitab(kitabName)
    if m: return Kitab(m['uri'], th=self, meta=m)
    return None

  def getKitabByUri(self,uri):
    m=self.getMeta().getByUri(uri)
    if m: return Kitab(uri, th=self, meta=m)
    return Kitab(uri, th=self)

  def getKitabList(self):
    """
    return a list of managed kitab's name
    """
    return self.getMeta().getKitabList()

  def getManagedUriList(self):
    """list of all managed uri (absolute filenames for a Kitab)
     this is low level as the user should work with kitabName, title, and rest of meta data"""
    if self.__meta:
      return self.__meta.getUriList()
    r=[]
    for i in self.prefixes:
      a=glob(toFs(os.path.join(fromFs(i),u'db',th_ext_glob)))
      p=map(lambda j: fromFs(j), a)
      r.extend(p)
    return r

  def getMeta(self):
    if not self.__meta: self.loadMeta()
    return self.__meta

  def loadMeta(self):
    self.__meta=None
    p=os.path.join(self.prefixes[0],'cache','meta.db')
    self.__meta=MCache(p, self.getManagedUriList())
    return self.__meta

  def reconstructMetaIndexedFlags(self):
    m=self.loadMeta() # NOTE: getMeta is not used because we want to make sure we are using a fresh one
    l1=m.getIndexedList()
    l2=m.getUnindexedList()
    #l3=m.getDirtyIndexList() # NOTE: Dirty are kept as is
    for i in l1:
      v=self.searchEngine.getIndexedVersion(i['kitab'])
      if not v or metaVrr(i)!=v: m.setIndexedFlags(i['uri'], 0) # mark as unindexed
    for i in l2:
      v=self.searchEngine.getIndexedVersion(i['kitab'])
      if v and metaVrr(i)==v: m.setIndexedFlags(i['uri']) # mark as indexed if same version
Example #4
0
class webApp(baseWebApp):
    _emptyViewResp = {
        'apptype': 'web',
        'content': '',
        'childrenLinks': '',
        'prevUrl': '',
        'prevTitle': '',
        'upUrl': '',
        'upTitle': '',
        'nextUrl': '',
        'nextTitle': '',
        'breadcrumbs': ''
    }

    def __init__(self, th, typ='web', *args, **kw):
        """
    th is an instance of ThawabMan
    allowByUri=True for desktop, False for server
    """
        self.th = th
        self.isMonolithic = th.isMonolithic
        self.stringSeed = "S3(uR!r7y"
        self._typ = typ
        self._allowByUri = (typ == 'app')
        self._emptyViewResp[u"apptype"] = self._typ
        # FIXME: move ObjectsCache of kitab to routines to core.ThawabMan
        if not self.isMonolithic:
            import threading
            lock1 = threading.Lock()
        else:
            lock1 = None
        self.searchCache = ObjectsCache(lock=lock1)
        baseWebApp.__init__(self, *args, **kw)

    def _safeHash(self, o):
        """
    a URL safe hash, it results a 22 byte long string hash based on md5sum
    """
        if isinstance(o, unicode): o = o.encode('utf8')
        return hashlib.md5(self.stringSeed +
                           o).digest().encode('base64').replace(
                               '+', '-').replace('/', '_')[:22]

    def _root(self, rq, *args):
        if args:
            if args[0] == 'favicon.ico':
                raise redirectException(rq.script + '/_files/img/favicon.ico')
            elif args[0] == 'robots.txt':
                return self._robots(rq, *args)
            elif args[0] == 'sitemap.xml':
                return self._sitemap(rq, *args)
            raise forbiddenException()
        raise redirectException(rq.script + '/index/')

    @expose(contentType='text/plain; charset=utf-8')
    def _robots(self, rq, *args):
        return """Sitemap: http://%s/sitemap.xml
User-agent: *
Allow: /
""" % (rq.environ['HTTP_HOST'] + rq.script)

    @expose(contentType='text/xml; charset=utf-8')
    def _sitemap(self, rq, *args):
        t = time.gmtime()  # FIXME: use meta to get mime of meta.db
        d = time.strftime("%Y-%m-%dT%H:%M:%S+00:00", t)
        tmp = "\t<url>\n\t\t<loc>http://" + rq.environ[
            'HTTP_HOST'] + rq.script + "/static/%s/_i0.html</loc>\n\t\t<lastmod>" + d + "</lastmod>\n\t\t<changefreq>daily</changefreq>\n\t\t<priority>0.5</priority>\n\t</url>"
        l = self.th.getMeta().getKitabList()
        urls = []
        for k in l:
            urls.append(tmp % (k))
        return """<?xml version='1.0' encoding='UTF-8'?>
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
	<url>
		<loc>http://thawab.ojuba.org/index/</loc>
		<lastmod>%s</lastmod>
		<changefreq>daily</changefreq>
		<priority>0.8</priority>
	</url>
	%s
</urlset>""" % (d, "\n".join(urls))

    @expose(bottleTemplate, ["main"])
    def index(self, rq, *args):
        rq.q.title = "الرئيسية"
        l = self.th.getMeta().getKitabList()
        htmlLinks = []
        for k in l:
            # FIXME: it currenly offers only one version for each kitab (the first one)
            htmlLinks.append(
                '\t<li><a href="/view/%s/">%s</a></li>' %
                (k, prettyId(self.th.getMeta().getByKitab(k)[0]['kitab'])))
        htmlLinks = (u"\n".join(htmlLinks))
        return {
            u"lang": u"ar",
            u"dir": u"rtl",
            u"kutublinks": htmlLinks,
            "args": '/'.join(args)
        }

    @expose(percentTemplate, ["stem.html"])
    def stem(self, rq, *args):
        from stemming import stemArabic
        w = rq.q.getfirst('word', '').decode('utf-8')
        s = ''
        if w:
            s = " ".join([stemArabic(i) for i in w.split()])
        return {u"script": rq.script, u"word": w, u"stem": s}

    def _getKitabObject(self, rq, *args):
        # FIXME: cache KitabObjects and update last access
        if not args:
            raise forbiddenException()  # TODO: make it a redirect to index
        k = args[0]
        if k == '_by_uri':
            if self._allowByUri:
                uri = rq.q.getfirst('uri', None)
                if not uri: raise fileNotFoundException()
                m = self.th.getMeta().getByUri(uri)
            else:
                raise forbiddenException()
        else:
            m = self.th.getMeta().getLatestKitab(k)
            if not m: raise forbiddenException()
            uri = m['uri']
        ki = self.th.getCachedKitab(uri)
        return ki, m

    def _view(self, ki, m, i, d='#', s=""):
        r = self._emptyViewResp.copy()
        node, p, u, n, c, b = ki.toc.getNodePrevUpNextChildrenBreadcrumbs(i)
        if n: ub = n.globalOrder
        else: ub = -1
        if not node or i == "_i0":
            r['content'] = "<h1>%s</h1>" % escape(prettyId(m['kitab']))
        else:
            r['content'] = node.toHtml(ub).replace('\n\n', '\n</p><p>\n')
        if c:
            cLinks = ''.join(
                map(
                    lambda cc: '<li><a href="%s">%s</a></li>\n' %
                    (d + "_i" + str(cc.idNum) + s, escape(cc.getContent())),
                    c))
            cLinks = "<ul>\n" + cLinks + "</ul>"
        else:
            cLinks = ''
        r['childrenLinks'] = cLinks
        if n:
            r['nextUrl'] = d + '_i' + str(n.idNum) + s
            r['nextTitle'] = escape(n.getContent())
        if p:
            r['prevUrl'] = d + '_i' + str(p.idNum) + s
            r['prevTitle'] = escape(p.getContent())
        if u:
            r['upUrl'] = d + '_i' + str(u.idNum) + s
            r['upTitle'] = escape(u.getContent())
        if b:
            r['breadcrumbs'] = " &gt; ".join(
                map(
                    lambda (i, t):
                    ("<a href='" + d + "_i%i" + s + "'>%s</a>") %
                    (i, escape(t)), b))
        vrr = metaVrr(ki.meta)
        #self.th.searchEngine.related(m['kitab'], vrr, node.idNum)
        return r

    def _get_kitab_details(self, rq, *args):
        ki, m = self._getKitabObject(rq, *args)
        if not ki or not m: return None, None, {}
        lang = m.get('lang', 'ar')
        if lang in ('ar', 'fa', 'he'): d = 'rtl'
        else: d = 'ltr'
        kitabId = escape(makeId(m['kitab']))
        t = escape(prettyId(m['kitab']))
        r = self._emptyViewResp.copy()
        r.update({
            u"script": rq.script,
            u"kitabTitle": t,
            u"kitabId": kitabId,
            u"headingId": u"_i0",
            u"app": u"Thawab",
            u"version": u"3.0.1",
            u"lang": lang,
            u"dir": d,
            u"title": t,
            u"content": t,
            "args": '/'.join(args)
        })
        return ki, m, r

    @expose(bottleTemplate, ["view"])
    def static(self, rq, *args):
        l = len(args)
        if l < 1:
            raise forbiddenException()  # TODO: make it show a list of books
        elif l == 1:
            raise redirectException(rq.script + '/static/' + args[0] +
                                    "/_i0.html")
        elif l != 2:
            raise forbiddenException()
        ki, m, r = self._get_kitab_details(rq, *args)
        if not ki: raise fileNotFoundException()
        h = args[1]
        if h.endswith(".html"): h = h[:-5]
        r.update(self._view(ki, m, h, './', ".html"))
        if self.th.searchEngine.getIndexedVersion(m['kitab']):
            rq.q.is_indexed = 1
            r['is_indexed'] = 1
        else:
            rq.q.is_indexed = 0
            r['is_indexed'] = 0
        r['is_static'] = 1
        r['d'] = './'
        r['s'] = '.html'
        return r

    @expose(bottleTemplate, ["view"])
    def view(self, rq, *args):
        if len(args) != 1: raise forbiddenException()
        ki, m, r = self._get_kitab_details(rq, *args)
        if not ki: raise fileNotFoundException()
        if self.th.searchEngine.getIndexedVersion(m['kitab']):
            rq.q.is_indexed = 1
            r['is_indexed'] = 1
        else:
            rq.q.is_indexed = 0
            r['is_indexed'] = 0
        r['is_static'] = 0
        r['d'] = '#'
        r['s'] = ''
        return r

    @expose()
    def ajax(self, rq, *args):
        if not args: raise forbiddenException()
        if args[0] == 'searchExcerpt' and len(args) == 3:
            h = args[1]
            try:
                i = int(args[2])
            except TypeError:
                raise forbiddenException()
            R = self.searchCache.get(h)
            if R == None: return 'انتهت صلاحية هذا البحث'
            r = self.th.searchEngine.resultExcerpt(R, i)
            #r=escape(self.th.searchEngine.resultExcerpt(R,i)).replace('\0','<em>').replace('\010','</em>').replace(u"\u2026",u"\u2026<br/>").encode('utf8')
            return r
        elif args[0] == 'kutub' and len(args) == 1:
            q = rq.q.getfirst(
                'q', '').decode('utf-8').strip().translate(normalize_tb)
            r = []
            l = self.th.getMeta().getKitabList()
            for k in l:
                n = prettyId(k)
                if not q or q in n.translate(normalize_tb):
                    r.append('\t<li><a href="/view/%s/">%s</a></li>' % (k, n))
            return '<ul>%s</ul>\n<div class="clear"></div>' % "\n".join(r)
        raise forbiddenException()

    @expose(jsonDumps)
    def json(self, rq, *args):
        # use rq.rhost to impose host-based limits on searching
        if not args: raise forbiddenException()
        ki = None
        r = {}
        if args[0] == 'view':
            a = args[1:]
            ki, m = self._getKitabObject(rq, *a)
            if len(a) == 2:
                r = self._view(ki, m, a[1])
        elif args[0] == 'search':
            q = rq.q.getfirst('q', '')
            h = self._safeHash(q)
            # FIXME: check to see if one already search for that before
            q = q.decode('utf8')
            R = self.th.searchEngine.queryIndex(q)
            # print R
            if not R: return {'t': 0, 'c': 0, 'h': ''}
            self.searchCache.append(h, R)
            r = {'t': R.runtime, 'c': len(R), 'h': h}
        elif args[0] == 'searchResults':
            h = rq.q.getfirst('h', '')
            try:
                i = int(rq.q.getfirst('i', '0'))
            except TypeError:
                i = 0
            try:
                c = int(rq.q.getfirst('c', '0'))
            except TypeError:
                c = 0
            R = self.searchCache.get(h)
            if R == None: return {'c': 0}
            C = len(R)
            if i >= C: return {'c': 0}
            c = min(c, C - i)
            r = {'c': c, 'a': []}
            n = 100.0 / R[0].score
            j = 0
            for j in range(i, i + c):
                name = R[j]['kitab']
                v = R[j]['vrr'].split('-')[0]
                m = self.th.getMeta().getLatestKitabV(name, v)
                if not m: continue  # book is removed
                r['a'].append({
                    'i': j,
                    'n': '_i' + R[j]['nodeIdNum'],
                    'k': m['kitab'],
                    'a': prettyId(m['author']),
                    'y': tryInt(m['year']),
                    't': R[j]['title'],
                    'r': '%4.1f' % (n * R[j].score)
                })
                j += 1
            r[c] = j
        else:
            r = {}
        return r
Example #5
0
# -*- coding: utf-8 -*-
import sys, os, os.path, time

print()

try:
    from okasha.utils import ObjectsCache
except ImportError:
    try:
        sys.path.insert(0, os.path.abspath(os.curdir))
        from okasha.utils import ObjectsCache
    except ImportError:
        sys.path.insert(0, os.path.abspath(os.path.join(os.curdir, '..')))
        from okasha.utils import ObjectsCache

ch = ObjectsCache(lock=None, minCount=10, maxCount=100, maxTime=0)
print("asserting empty start: ", end=' ')
assert (len(ch.objs) == 0)
print("OK")

print("asserting respecting minCount: ", end=' ')
for i in range(10):
    ch.append(i, i)
    assert (len(ch.objs) == i + 1)
print("OK, after inserting 10 objects we have ", len(ch.objs), " cached")

print("asserting getting least expected while below minCount: ", end=' ')
ch = ObjectsCache(lock=None, minCount=10, maxCount=100, maxTime=0)
for i in range(10):
    ch.append(i, 100 - 3 * i)
    for j in range(i + 1):
Example #6
0
# -*- coding: utf-8 -*-
import sys, os, os.path, time

print 

try:
  from okasha.utils import ObjectsCache
except ImportError:
  try: 
    sys.path.insert(0,os.path.abspath(os.curdir))
    from okasha.utils import ObjectsCache
  except ImportError:
    sys.path.insert(0,os.path.abspath(os.path.join(os.curdir,'..')))
    from okasha.utils import ObjectsCache

ch=ObjectsCache(lock=None, minCount=10, maxCount=100, maxTime=0)
print "asserting empty start: ",
assert(len(ch.objs)==0)
print "OK"

print "asserting respecting minCount: ",
for i in range(10):
  ch.append(i,i)
  assert(len(ch.objs)==i+1)
print "OK, after inserting 10 objects we have ", len(ch.objs), " cached"

print "asserting getting least expected while below minCount: ",
ch=ObjectsCache(lock=None, minCount=10, maxCount=100, maxTime=0)
for i in range(10):
  ch.append(i,100-3*i)
  for j in range(i+1):
Example #7
0
class webApp(baseWebApp):
  _emptyViewResp={
    'apptype':'web',
    'content':'', 'childrenLinks':'',
    'prevUrl':'', 'prevTitle':'',
    'upUrl':'', 'upTitle':'',
    'nextUrl':'', 'nextTitle':'',
    'breadcrumbs':''
  }
  def __init__(self, th, typ='web', *args, **kw):
    """
    th is an instance of ThawabMan
    allowByUri=True for desktop, False for server
    """
    self.th=th
    self.isMonolithic=th.isMonolithic
    self.stringSeed="S3(uR!r7y"
    self._typ=typ
    self._allowByUri=(typ=='app')
    self._emptyViewResp[u"apptype"]=self._typ
    # FIXME: move ObjectsCache of kitab to routines to core.ThawabMan
    if not self.isMonolithic:
      import threading
      lock1=threading.Lock();
    else:
      lock1=None
    self.searchCache=ObjectsCache(lock=lock1)
    baseWebApp.__init__(self,*args, **kw)

  def _safeHash(self,o):
    """
    a URL safe hash, it results a 22 byte long string hash based on md5sum
    """
    if isinstance(o,unicode): o=o.encode('utf8')
    return hashlib.md5(self.stringSeed+o).digest().encode('base64').replace('+','-').replace('/','_')[:22]

  def _root(self, rq, *args):
    if args:
      if args[0]=='favicon.ico':
        raise redirectException(rq.script+'/_files/img/favicon.ico')
      elif args[0]=='robots.txt':
        return self._robots(rq, *args)
      elif args[0]=='sitemap.xml':
        return self._sitemap(rq, *args)
      raise forbiddenException()
    raise redirectException(rq.script+'/index/')

  @expose(contentType='text/plain; charset=utf-8')
  def _robots(self, rq, *args):
    return """Sitemap: http://%s/sitemap.xml
User-agent: *
Allow: /
""" % (rq.environ['HTTP_HOST']+rq.script)

  @expose(contentType='text/xml; charset=utf-8')
  def _sitemap(self, rq, *args):
    t=time.gmtime() # FIXME: use meta to get mime of meta.db
    d=time.strftime("%Y-%m-%dT%H:%M:%S+00:00", t)
    tmp="\t<url>\n\t\t<loc>http://"+rq.environ['HTTP_HOST']+rq.script+"/static/%s/_i0.html</loc>\n\t\t<lastmod>"+d+"</lastmod>\n\t\t<changefreq>daily</changefreq>\n\t\t<priority>0.5</priority>\n\t</url>"
    l=self.th.getMeta().getKitabList()
    urls=[]
    for k in l:
      urls.append(tmp % (k))
    return """<?xml version='1.0' encoding='UTF-8'?>
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
	<url>
		<loc>http://thawab.ojuba.org/index/</loc>
		<lastmod>%s</lastmod>
		<changefreq>daily</changefreq>
		<priority>0.8</priority>
	</url>
	%s
</urlset>""" % (d,"\n".join(urls))


  @expose(bottleTemplate,["main"])
  def index(self, rq, *args):
    rq.q.title="الرئيسية"
    l=self.th.getMeta().getKitabList()
    htmlLinks=[]
    for k in l:
      # FIXME: it currenly offers only one version for each kitab (the first one)
      htmlLinks.append('\t<li><a href="/view/%s/">%s</a></li>' % (k,
      prettyId(self.th.getMeta().getByKitab(k)[0]['kitab'])))
    htmlLinks=(u"\n".join(htmlLinks))
    return {
      u"lang":u"ar", u"dir":u"rtl",
      u"kutublinks": htmlLinks,
      "args":'/'.join(args)}

  @expose(percentTemplate,["stem.html"])
  def stem(self, rq, *args):
    from stemming import stemArabic
    w=rq.q.getfirst('word','').decode('utf-8')
    s=''
    if w:
      s=" ".join([stemArabic(i) for i in w.split()])
    return {u"script":rq.script, u"word":w, u"stem":s}

  def _getKitabObject(self, rq, *args):
    # FIXME: cache KitabObjects and update last access
    if not args: raise forbiddenException() # TODO: make it a redirect to index
    k=args[0]
    if k=='_by_uri':
      if self._allowByUri:
        uri=rq.q.getfirst('uri',None)
        if not uri: raise fileNotFoundException()
        m=self.th.getMeta().getByUri(uri)
      else:
        raise forbiddenException()
    else:
      m=self.th.getMeta().getLatestKitab(k)
      if not m: raise forbiddenException()
      uri=m['uri']
    ki=self.th.getCachedKitab(uri)
    return ki,m

  def _view(self, ki, m, i, d='#', s=""):
    r=self._emptyViewResp.copy()
    node,p,u,n,c,b=ki.toc.getNodePrevUpNextChildrenBreadcrumbs(i)
    if n: ub=n.globalOrder
    else: ub=-1
    if not node or i=="_i0":
      r['content']="<h1>%s</h1>" % escape(prettyId(m['kitab']))
    else:
      r['content']=node.toHtml(ub).replace('\n\n','\n</p><p>\n')
    if c:
      cLinks=''.join(map(lambda cc: '<li><a href="%s">%s</a></li>\n' % (d+"_i"+str(cc.idNum)+s,escape(cc.getContent())) ,c))
      cLinks="<ul>\n"+cLinks+"</ul>"
    else: cLinks=''
    r['childrenLinks']=cLinks
    if n:
      r['nextUrl']=d+'_i'+str(n.idNum)+s
      r['nextTitle']=escape(n.getContent())
    if p:
      r['prevUrl']=d+'_i'+str(p.idNum)+s
      r['prevTitle']=escape(p.getContent())
    if u:
      r['upUrl']=d+'_i'+str(u.idNum)+s
      r['upTitle']=escape(u.getContent())
    if b:
      r['breadcrumbs']=" &gt; ".join(map(lambda (i,t): ("<a href='"+d+"_i%i"+s+"'>%s</a>") % (i,escape(t)),b))
    vrr=metaVrr(ki.meta)
    #self.th.searchEngine.related(m['kitab'], vrr, node.idNum)
    return r

  def _get_kitab_details(self, rq, *args):
    ki,m=self._getKitabObject(rq, *args)
    if not ki or not m: return None,None, {}
    lang=m.get('lang','ar')
    if lang in ('ar','fa','he'): d='rtl'
    else: d='ltr'
    kitabId=escape(makeId(m['kitab']))
    t=escape(prettyId(m['kitab']))
    r=self._emptyViewResp.copy()
    r.update({
      u"script":rq.script,
      u"kitabTitle":t,
      u"kitabId":kitabId,
      u"headingId":u"_i0",
      u"app":u"Thawab", u"version":u"3.0.1",
      u"lang":lang, u"dir":d,
      u"title": t,
      u"content": t,
      "args":'/'.join(args)})
    return ki,m,r


  @expose(bottleTemplate,["view"])
  def static(self, rq, *args):
    l=len(args)
    if l<1: raise forbiddenException() # TODO: make it show a list of books
    elif l==1: raise redirectException(rq.script+'/static/'+args[0]+"/_i0.html")
    elif l!=2: raise forbiddenException()
    ki,m,r=self._get_kitab_details(rq, *args)
    if not ki: raise fileNotFoundException()
    h=args[1]
    if h.endswith(".html"): h=h[:-5]
    r.update(self._view(ki, m, h, './', ".html"))
    if self.th.searchEngine.getIndexedVersion(m['kitab']): rq.q.is_indexed=1; r['is_indexed']=1
    else: rq.q.is_indexed=0; r['is_indexed']=0
    r['is_static']=1
    r['d']='./'
    r['s']='.html'
    return r

  @expose(bottleTemplate,["view"])
  def view(self, rq, *args):
    if len(args)!=1: raise forbiddenException()
    ki,m,r=self._get_kitab_details(rq, *args)
    if not ki: raise fileNotFoundException()
    if self.th.searchEngine.getIndexedVersion(m['kitab']): rq.q.is_indexed=1; r['is_indexed']=1
    else: rq.q.is_indexed=0; r['is_indexed']=0
    r['is_static']=0
    r['d']='#'
    r['s']=''
    return r

  @expose()
  def ajax(self, rq, *args):
    if not args: raise forbiddenException()
    if args[0]=='searchExcerpt' and len(args)==3:
      h=args[1]
      try: i=int(args[2])
      except TypeError: raise forbiddenException()
      R=self.searchCache.get(h)
      if R==None: return 'انتهت صلاحية هذا البحث'
      r=self.th.searchEngine.resultExcerpt(R,i)
      #r=escape(self.th.searchEngine.resultExcerpt(R,i)).replace('\0','<em>').replace('\010','</em>').replace(u"\u2026",u"\u2026<br/>").encode('utf8')
      return r
    elif args[0]=='kutub' and len(args)==1:
      q=rq.q.getfirst('q','').decode('utf-8').strip().translate(normalize_tb)
      r=[]
      l=self.th.getMeta().getKitabList()
      for k in l:
        n=prettyId(k)
        if not q or q in n.translate(normalize_tb):
          r.append('\t<li><a href="/view/%s/">%s</a></li>' % (k, n))
      return '<ul>%s</ul>\n<div class="clear"></div>' % "\n".join(r)
    raise forbiddenException()
  
  @expose(jsonDumps)
  def json(self, rq, *args):
    # use rq.rhost to impose host-based limits on searching
    if not args: raise forbiddenException()
    ki=None
    r={}
    if args[0]=='view':
      a=args[1:]
      ki,m=self._getKitabObject(rq, *a)
      if len(a)==2:
        r=self._view(ki, m, a[1])
    elif args[0]=='search':
      q=rq.q.getfirst('q','')
      h=self._safeHash(q)
      # FIXME: check to see if one already search for that before
      q=q.decode('utf8')
      R=self.th.searchEngine.queryIndex(q)
      # print R
      if not R: return {'t':0,'c':0,'h':''}
      self.searchCache.append(h,R)
      r={'t':R.runtime,'c':len(R),'h':h}
    elif args[0]=='searchResults':
      h=rq.q.getfirst('h','')
      try: i=int(rq.q.getfirst('i','0'))
      except TypeError: i=0
      try: c=int(rq.q.getfirst('c','0'))
      except TypeError: c=0
      R=self.searchCache.get(h)
      if R==None: return {'c':0}
      C=len(R)
      if i>=C: return {'c':0}
      c=min(c,C-i)
      r={'c':c,'a':[]}
      n=100.0/R[0].score
      j=0
      for j in range(i,i+c):
        name=R[j]['kitab']
        v=R[j]['vrr'].split('-')[0]
        m=self.th.getMeta().getLatestKitabV(name,v)
        if not m: continue # book is removed
        r['a'].append({
        'i':j,'n':'_i'+R[j]['nodeIdNum'],
        'k':m['kitab'], 'a':prettyId(m['author']), 'y':tryInt(m['year']),
        't':R[j]['title'], 'r':'%4.1f' % (n*R[j].score)})
        j+=1
      r[c]=j;
    else: r={}
    return r
Example #8
0
class ThawabMan (object):
    def __init__(self, prefixes=None, isMonolithic = True, indexerQueueSize = 0):
        """Create a new Thawab instance given a user writable directory and an optional system-wide read-only directory

    prefixes a list of directories all are read-only except the first
    the first writable directory can be 
        os.path.expanduser('~/.thawab')
        os.path.join([os.path.dirname(sys.argv[0]),'..','data'])
    
    isMonolithic = True if we should use locks and reconnect to sqlite
    
    indexerQueueSize is the size of threaded index queue (0 infinite, -1 disabled)

the first thing you should do is to call loadMCache()
"""
        if not prefixes:
            prefixes = guess_prefixes()
        try:
            if not os.path.isdir(prefixes[0]):
                os.makedirs(prefixes[0])
        except:
            raise OSError
        self.prefixes = filter(lambda i:os.path.isdir(i),
                               [os.path.realpath(os.path.abspath(p)) for p in prefixes])
        # make sure it's unique
        p = self.prefixes[0]
        s = set(self.prefixes[1:])
        if p in s:
            s.remove(p)
        if len(s)<len(self.prefixes)-1:
            self.prefixes=[p]+sorted(list(s))
        #print self.prefixes
        self.othman = othman
        self.__meta = None
        self.read_only = self.assertManagedTree()
        self.conf = self.prase_conf()
        self.searchEngine = SearchEngine(self)
        self.user_db = UserDb(self, os.path.join(self.prefixes[0], "user.db"))
        if indexerQueueSize >= 0:
            self.asyncIndexer = AsyncIndex(self.searchEngine, indexerQueueSize)
        else:
            self.asyncIndexer = None

        self.isMonolithic = isMonolithic
        if not self.isMonolithic:
            import threading
            lock1 = threading.Lock()
        else:
            lock1 = None
        self.kutubCache = ObjectsCache(lock = lock1)

    def prase_conf(self):
        r = {}
        fn = os.path.join(self.prefixes[0], 'conf', 'main.txt')
        if not os.path.exists(fn):
            return {}
        try:
            f = open(fn)
            t = f.readlines()
            f.close()
        except:
            return {}
        for l in t:
            a = l.strip().split(" = ",1)
            if len(a) != 2:
                continue
            r[a[0].strip()] = a[1].strip()
        return r

    def assertManagedTree(self):
         """create the hierarchy inside the user-managed prefix        
         # db    contains Kitab files [.thawab]
         # index    contains search index
         # conf    application configuration
         # cache    contains the metadata cache for all containers"""
         P = self.prefixes[0]
         if not os.access(P, os.W_OK):
            return False
         for i in ['db','index','conf','cache', 'tmp', 'themes']:
             p = os.path.join(P,i)
             if not os.path.isdir(p):
                os.makedirs(p)
         return True

    def mktemp(self):
        h, fn = mkstemp(th_ext, 'THAWAB_' ,os.path.join(self.prefixes[0], 'tmp'))
        return Kitab(fn, True)

    def getCachedKitab(self, uri):
        """
        try to get a kitab by uri from cache,
        if it's not in the cache, it will be opened and cached
        """
        ki = self.kutubCache.get(uri)
        if not ki:
            ki = self.getKitabByUri(uri)
            if ki:
                self.kutubCache.append(uri, ki)
        #elif not self.isMonolithic: ki.connect() # FIXME: no longer needed, kept to trace other usage of isMonolithic
        return ki

    def getCachedKitabByNameV(self, kitabNameV):
        a = kitabNameV.split(u'-')
        l = len(a)
        if l == 1:
            m = self.getMeta().getLatestKitab(kitabNameV)
        elif l == 2:
            m = self.getMeta().getLatestKitabV(*a)
        else:
            m = self.getMeta().getLatestKitabVr(*a)
        if m:
            return self.getCachedKitab(m['uri'])
        return None

    def getUriByKitabName(self,kitabName):
        """
        return uri for the latest kitab with the given name
        """
        m = self.getMeta().getLatestKitab(kitabName)
        if not m:
            return None
        return m['uri']

    def getKitab(self,kitabName):
        m = self.getMeta().getLatestKitab(kitabName)
        if m:
            return Kitab(m['uri'], th = self, meta = m)
        return None

    def getKitabByUri(self,uri):
        m = self.getMeta().getByUri(uri)
        if m:
            return Kitab(uri, th = self, meta = m)
        return Kitab(uri, th=self)

    def getKitabList(self):
        """
        return a list of managed kitab's name
        """
        return self.getMeta().getKitabList()

    def getManagedUriList(self):
        """list of all managed uri (absolute filenames for a Kitab)
         this is low level as the user should work with kitabName, title, and rest of meta data"""
        if self.__meta:
            return self.__meta.getUriList()
        r = []
        for i in self.prefixes:
            a = glob(toFs(os.path.join(fromFs(i),u'db',th_ext_glob)))
            p = map(lambda j: fromFs(j), a)
            r.extend(p)
        return r

    def getMeta(self):
        if not self.__meta:
            self.loadMeta()
        return self.__meta

    def loadMeta(self):
        self.__meta = None
        p = os.path.join(self.prefixes[0],'cache','meta.db')
        self.__meta = MCache(p, self.getManagedUriList())
        return self.__meta

    def reconstructMetaIndexedFlags(self):
        # NOTE: getMeta is not used because we want to make sure we are using a fresh one
        m = self.loadMeta() 
        l1 = m.getIndexedList()
        l2 = m.getUnindexedList()
        # NOTE: Dirty are kept as is
        #l3 = m.getDirtyIndexList() 
        for i in l1:
            v = self.searchEngine.getIndexedVersion(i['kitab'])
            # mark as unindexed
            if not v or metaVrr(i) != v:
                m.setIndexedFlags(i['uri'], 0) 
        for i in l2:
            v = self.searchEngine.getIndexedVersion(i['kitab'])
            if v and metaVrr(i) == v:
                # mark as indexed if same version
                m.setIndexedFlags(i['uri'])