Ejemplo n.º 1
0
 def _matchItem(self,parentFti,val,op):
     if parentFti._getSplitmatch():                                          #      if splitmatch:
         l_xs = self._genIndexes(val,match=True)                             #       list each set of match indexes for this fragment
         m = parentFti._getSplitmodes()                                      #       get split matching modes
     else:                                                                   #      else simple fti matching:
         l_xs = [[val]]                                                      #       so prepare 1 match set from the match value
         m = ['complete','consistent']                                       #       apply default matching modes
     if parentFti._getUsecase(): i = 'a'                                     #      standardise word to parentStore case setting
     else: i = 'i'                                                           #       actual or case insensitive full text index
     r = mtutils.slist()                                                     #      initialise the list of matching uris
     for xs in l_xs:                                                         #      for each set of match indexes:
         r1 = mtutils.slist()                                                #       initialise the list of matching uris for this set
         for x in xs:                                                        #       for each index in this match index set:
             if not parentFti._getUsecase():                                 #        if case insensitive matching:
                 w = x.lower()                                               #         convert index to common lowercase
             if w in self._idx[i]:                                           #        if index is in the full text index:
                 r2 = self._idx[i][w]                                        #         get matching uris for this index
                 if not r1: r1 += r2                                         #         use as is if 1st match for this set of indexes
                 else:                                                       #         else combine uris according to split modes
                     if 'complete' in m:                                     #          if complete matching:
                         r1 = r1._intersection(r2)                           #           ensure uris match all indexes in set
                     else: r1 = r1._union(r2)                                #          else: collate any unique matching uris
                     if not r1: break                                        #          if collation is empty: unmatched index set - goto next
             else: r1 = []; break                                            #        else: unmatched index set - goto next
         if r1:                                                              #       if uris match this index set: (collate with matches from other index sets)
             if not r: r += r1                                               #        use as is if 1st matching index set
             else:                                                           #        else: combine with other index set matches
                 if 'consistent' in m:                                       #         according to consistency mode (if consistent):
                     if op != '~': r = r._union(r1)                          #          if unlike: all matching uris to be excluded
                     else: r = r._intersection(r1)                           #          if like: only uris in all matching index sets
                 else:                                                       #         if inconsistent matches allowed:
                     if op == '~': r = r._union(r1)                          #          if like: any uris from matching index sets
                     else: r = r._intersection(r1)                           #          if unlike: only uris in all matching index sets to be excluded
     return r
Ejemplo n.º 2
0
 def _genIndexes(self,s,match=False):
     xs = mtutils.slist()                                                    #     initialise new list of indexes for this fragment
     for ftir in ftire:                                                      #     using each word index generator:
         x = ftir.findall(s)
         if match: xs += [x]
         else: xs = xs._union(x)                                             #      collate all indexes for this fragment
     return xs
Ejemplo n.º 3
0
 def _rt(self,t,order=None,toself=True):
     # usage:
     #    supports bi-directional re-ording of triple t:
     #     - from order to self (if toself is True)
     #     - to order from self (if toself is False)
     # inputs:
     #    t      - triple to be re-ordered
     #    order  - source or target order of t (if None default s,p,o ordering is assumed)
     #    toself - direction of re-ordering:
     #              - True:  from order to that of self
     #              - False: to order from that of self
     # returns:
     #    r     - reordered triple
     o = self._getTripleOrder()
     if not self._testTorder(order): order = ['s','p','o']
     if order == o: return t
     if toself: stoi = self._getStoi()
     else:
         stoi = {}
         for c,v in enumerate(order): stoi[v] = c
         order = o
     r = mtutils.slist()
     for c,i in enumerate(order): r[stoi[i]] = t[c]
     if isinstance(t,tuple): r = tuple(r)
     return r
Ejemplo n.º 4
0
 def _testTorder(self,order):
     # usage:
     #    tests if order is a valid triple order
     # inputs:
     #    order - triple ordering to test
     # returns:
     #    order valid [True|False]
     default = mtutils.slist(['s','p','o'])
     if (isinstance(order,list)
     and len(order) == 3
     and len(default._symmetric_distance(order)) == 0
         ): return True
     else: return False
Ejemplo n.º 5
0
        - the supplied default via settings
        - a backup last resort hardcoded default if this is invalid

@author: Administrator
'''
import re
import metabulate.utils.utils           as mtutils
import metabulate.utils.debug           as mtdebug
import metabulate.singletons.singleton  as mtsingleton

mtprefs  = mtsingleton.Settings()._getItem('prefs')
# extract and complie list of fti regex from prefs or hardcoded default
ws = eval(mtprefs._interpretItem('fti_regex',"['[A-Za-z0-9]+','[A-Za-z0-9\-\_]+']"))
if isinstance(ws,basestring): wr = [ws]
elif isinstance(ws,list):
    wr = mtutils.slist()
    wr += ws
    wr = wr._peel(1)
ftire = [re.compile(w) for w in wr]

class fti(object):
    # defaults
    _default_usecase    = mtprefs._interpretItem('fti_usecase','true',2)          # case sensitivity
    _default_splitmatch = mtprefs._interpretItem('fti_splitmatch','true',2)       # discontiguous matching
    _default_splitmodes = mtutils._stripsplit(mtprefs._interpretItem(
                                'fti_splitmodes','complete,consistent'))          # discontiguous matching modes
    _default_splitmodes = (_default_splitmodes,['complete','consistent'])         # include hardcoded in-case settings exist but are wrong
    def __init__(self
                ,usecase=None
                ,splitmatch=None
                ,splitmodes=None
Ejemplo n.º 6
0
 def __init__(self, select=None, filepath=None, filename=None):
     self._rules = []
     self._rulesUsed = mtutils.slist()
     self._cache = mtutils.sdict()
     self._i_rules = mtutils.sdict()
     self._update(select=select, filepath=filepath, filename=filename)
Ejemplo n.º 7
0
 def _setI_Rules(self):
     # pre-process rules into strings or copiled regex
     # splitting regex parts and counting match groups
     # in i_rules indexed by lhs
     # with rhs sorted by inverse (clause count, rhs clause string length)
     i_rules = mtutils.sdict()
     for rule in self._rules:
         equation = rule.split("::-")
         if len(equation) == 2:  # only if lhs and rhs
             ok = 1  # default ok
             uc = 0  # dummy usage count for later sorting by most used first
             ep = mtutils.slist()
             for c in range(2):
                 equation[c] = equation[c].strip()  # strip lhs and rhs
             if equation[1].startswith("/") and equation[1].endswith("/"):  # rhs is a regex
                 ep = mtutils._splitRegex(equation[1])
                 try:  # try processing regex:
                     ec, pc = mtutils._pdepth(ep[0])  #  count and validate inner parenthesis
                     if ec == 1:
                         raise mterrors.UnbalancedParenthesisError(ep[0])  #  test error code to raise errors
                     elif ec == 2:
                         raise mterrors.UnspecifiedParenthesisError()
                     self._showDebug(
                         clas="Parser",
                         method="_setI_Rules",
                         line=177,
                         level=1,
                         vars=[["equation[1]", equation[1]], ["ep", ep], ["ok", ok], ["pc", pc]],
                     )
                     if not ep[0].startswith("^"):
                         ep[0] = "^" + ep[0]  #   ensure it matches from beginning
                     try:
                         equation[1] = re.compile(ep[0])  #   try compiling the pattern
                     except:
                         raise mterrors.ParseInvalidRegexError(ep[0])  #   escalate failure
                 except mterrors.ParseError, X:
                     X._notify(c="Parser", m="_setI_Rules()")  # notify ParseError
             elif equation[1].startswith("{") and equation[1].endswith("}"):  # rhs is a userexit Element method
                 equation[1] = mtpexits.Test(equation[1][1:-1].strip())  #  instantiate rhs a userexit Test
             else:  # rhs is a string or clause(s)
                 equation[1] = safesplit.sub(">~#!@!#~<", equation[1])  #  sub special clause seperator
             self._showDebug(
                 clas="Parser",
                 method="_setI_Rules",
                 line=188,
                 level=1,
                 vars=[["equation[0]", equation[0]], ["equation[1]", equation[1]]],
             )
             if ok:  # add to i_rules if OK
                 if equation[0] not in i_rules:  #  if rhs not in
                     i_rules[equation[0]] = [[equation[1], uc, ep]]  #   add it
                     self._showDebug(
                         clas="Parser", method="_setI_Rules", line=192, level=1, vars=[["equation[0]", equation[0]]]
                     )
                 else:  # insert new lhs into list sorted by size
                     l1 = len(i_rules[equation[0]])  # set up manual break and list counter
                     c = -1
                     while c + 1 < l1:
                         c += 1
                         self._showDebug(
                             clas="Parser",
                             method="_setI_Rules",
                             line=198,
                             level=2,
                             vars=[
                                 ["l1", l1],
                                 ["c", c],
                                 ["i_rules[equation[0]][c]", i_rules[equation[0]][c]],
                                 ["equation[1]", equation[1]],
                             ],
                         )
                         if i_rules[equation[0]][c][1] < uc:  # if new rhs has more clauses
                             i_rules[equation[0]][c:c] = [
                                 [equation[1], uc, ep]
                             ]  #   insert it here into the rhs list
                             c = l1  #   and break
                         elif i_rules[equation[0]][c][0] == equation[1]:
                             c = l1  # elif rhs is not new break
                         elif c + 1 == l1:  # elif at end of list
                             i_rules[equation[0]] += [[equation[1], uc, ep]]  # append new rhs to list
                     self._showDebug(
                         clas="Parser",
                         method="_setI_Rules",
                         line=205,
                         level=1,
                         vars=[["i_rules[equation[0]]", i_rules[equation[0]]]],
                     )