def _matchItem(self,parentFti,val,op): if parentFti._getSplitmatch(): # if splitmatch: l_xs = self._genIndexes(val,match=True) # list each set of match indexes for this fragment m = parentFti._getSplitmodes() # get split matching modes else: # else simple fti matching: l_xs = [[val]] # so prepare 1 match set from the match value m = ['complete','consistent'] # apply default matching modes if parentFti._getUsecase(): i = 'a' # standardise word to parentStore case setting else: i = 'i' # actual or case insensitive full text index r = mtutils.slist() # initialise the list of matching uris for xs in l_xs: # for each set of match indexes: r1 = mtutils.slist() # initialise the list of matching uris for this set for x in xs: # for each index in this match index set: if not parentFti._getUsecase(): # if case insensitive matching: w = x.lower() # convert index to common lowercase if w in self._idx[i]: # if index is in the full text index: r2 = self._idx[i][w] # get matching uris for this index if not r1: r1 += r2 # use as is if 1st match for this set of indexes else: # else combine uris according to split modes if 'complete' in m: # if complete matching: r1 = r1._intersection(r2) # ensure uris match all indexes in set else: r1 = r1._union(r2) # else: collate any unique matching uris if not r1: break # if collation is empty: unmatched index set - goto next else: r1 = []; break # else: unmatched index set - goto next if r1: # if uris match this index set: (collate with matches from other index sets) if not r: r += r1 # use as is if 1st matching index set else: # else: combine with other index set matches if 'consistent' in m: # according to consistency mode (if consistent): if op != '~': r = r._union(r1) # if unlike: all matching uris to be excluded else: r = r._intersection(r1) # if like: only uris in all matching index sets else: # if inconsistent matches allowed: if op == '~': r = r._union(r1) # if like: any uris from matching index sets else: r = r._intersection(r1) # if unlike: only uris in all matching index sets to be excluded return r
def _genIndexes(self,s,match=False): xs = mtutils.slist() # initialise new list of indexes for this fragment for ftir in ftire: # using each word index generator: x = ftir.findall(s) if match: xs += [x] else: xs = xs._union(x) # collate all indexes for this fragment return xs
def _rt(self,t,order=None,toself=True): # usage: # supports bi-directional re-ording of triple t: # - from order to self (if toself is True) # - to order from self (if toself is False) # inputs: # t - triple to be re-ordered # order - source or target order of t (if None default s,p,o ordering is assumed) # toself - direction of re-ordering: # - True: from order to that of self # - False: to order from that of self # returns: # r - reordered triple o = self._getTripleOrder() if not self._testTorder(order): order = ['s','p','o'] if order == o: return t if toself: stoi = self._getStoi() else: stoi = {} for c,v in enumerate(order): stoi[v] = c order = o r = mtutils.slist() for c,i in enumerate(order): r[stoi[i]] = t[c] if isinstance(t,tuple): r = tuple(r) return r
def _testTorder(self,order): # usage: # tests if order is a valid triple order # inputs: # order - triple ordering to test # returns: # order valid [True|False] default = mtutils.slist(['s','p','o']) if (isinstance(order,list) and len(order) == 3 and len(default._symmetric_distance(order)) == 0 ): return True else: return False
- the supplied default via settings - a backup last resort hardcoded default if this is invalid @author: Administrator ''' import re import metabulate.utils.utils as mtutils import metabulate.utils.debug as mtdebug import metabulate.singletons.singleton as mtsingleton mtprefs = mtsingleton.Settings()._getItem('prefs') # extract and complie list of fti regex from prefs or hardcoded default ws = eval(mtprefs._interpretItem('fti_regex',"['[A-Za-z0-9]+','[A-Za-z0-9\-\_]+']")) if isinstance(ws,basestring): wr = [ws] elif isinstance(ws,list): wr = mtutils.slist() wr += ws wr = wr._peel(1) ftire = [re.compile(w) for w in wr] class fti(object): # defaults _default_usecase = mtprefs._interpretItem('fti_usecase','true',2) # case sensitivity _default_splitmatch = mtprefs._interpretItem('fti_splitmatch','true',2) # discontiguous matching _default_splitmodes = mtutils._stripsplit(mtprefs._interpretItem( 'fti_splitmodes','complete,consistent')) # discontiguous matching modes _default_splitmodes = (_default_splitmodes,['complete','consistent']) # include hardcoded in-case settings exist but are wrong def __init__(self ,usecase=None ,splitmatch=None ,splitmodes=None
def __init__(self, select=None, filepath=None, filename=None): self._rules = [] self._rulesUsed = mtutils.slist() self._cache = mtutils.sdict() self._i_rules = mtutils.sdict() self._update(select=select, filepath=filepath, filename=filename)
def _setI_Rules(self): # pre-process rules into strings or copiled regex # splitting regex parts and counting match groups # in i_rules indexed by lhs # with rhs sorted by inverse (clause count, rhs clause string length) i_rules = mtutils.sdict() for rule in self._rules: equation = rule.split("::-") if len(equation) == 2: # only if lhs and rhs ok = 1 # default ok uc = 0 # dummy usage count for later sorting by most used first ep = mtutils.slist() for c in range(2): equation[c] = equation[c].strip() # strip lhs and rhs if equation[1].startswith("/") and equation[1].endswith("/"): # rhs is a regex ep = mtutils._splitRegex(equation[1]) try: # try processing regex: ec, pc = mtutils._pdepth(ep[0]) # count and validate inner parenthesis if ec == 1: raise mterrors.UnbalancedParenthesisError(ep[0]) # test error code to raise errors elif ec == 2: raise mterrors.UnspecifiedParenthesisError() self._showDebug( clas="Parser", method="_setI_Rules", line=177, level=1, vars=[["equation[1]", equation[1]], ["ep", ep], ["ok", ok], ["pc", pc]], ) if not ep[0].startswith("^"): ep[0] = "^" + ep[0] # ensure it matches from beginning try: equation[1] = re.compile(ep[0]) # try compiling the pattern except: raise mterrors.ParseInvalidRegexError(ep[0]) # escalate failure except mterrors.ParseError, X: X._notify(c="Parser", m="_setI_Rules()") # notify ParseError elif equation[1].startswith("{") and equation[1].endswith("}"): # rhs is a userexit Element method equation[1] = mtpexits.Test(equation[1][1:-1].strip()) # instantiate rhs a userexit Test else: # rhs is a string or clause(s) equation[1] = safesplit.sub(">~#!@!#~<", equation[1]) # sub special clause seperator self._showDebug( clas="Parser", method="_setI_Rules", line=188, level=1, vars=[["equation[0]", equation[0]], ["equation[1]", equation[1]]], ) if ok: # add to i_rules if OK if equation[0] not in i_rules: # if rhs not in i_rules[equation[0]] = [[equation[1], uc, ep]] # add it self._showDebug( clas="Parser", method="_setI_Rules", line=192, level=1, vars=[["equation[0]", equation[0]]] ) else: # insert new lhs into list sorted by size l1 = len(i_rules[equation[0]]) # set up manual break and list counter c = -1 while c + 1 < l1: c += 1 self._showDebug( clas="Parser", method="_setI_Rules", line=198, level=2, vars=[ ["l1", l1], ["c", c], ["i_rules[equation[0]][c]", i_rules[equation[0]][c]], ["equation[1]", equation[1]], ], ) if i_rules[equation[0]][c][1] < uc: # if new rhs has more clauses i_rules[equation[0]][c:c] = [ [equation[1], uc, ep] ] # insert it here into the rhs list c = l1 # and break elif i_rules[equation[0]][c][0] == equation[1]: c = l1 # elif rhs is not new break elif c + 1 == l1: # elif at end of list i_rules[equation[0]] += [[equation[1], uc, ep]] # append new rhs to list self._showDebug( clas="Parser", method="_setI_Rules", line=205, level=1, vars=[["i_rules[equation[0]]", i_rules[equation[0]]]], )