def _compile(self): """ Compile the regular expression to generate more matchers when necessary. """ if len(self._expr) < 2: raise NdnRegexMatcherBase.Error( "Regexp compile error (cannot parse " + self._expr + ")") if self._expr[0] == '<': self._compileSingleComponent() elif self._expr[0] == '[': lastIndex = len(self._expr) - 1 if ']' != self._expr[lastIndex]: raise NdnRegexMatcherBase.Error( "Regexp compile error (no matching ']' in " + self._expr + ")") if '^' == self._expr[1]: self._isInclusion = False self._compileMultipleComponents(2, lastIndex) else: self._compileMultipleComponents(1, lastIndex) else: raise NdnRegexMatcherBase.Error( "Regexp compile error (cannot parse " + self._expr + ")")
def _parseRepetition(self): exprSize = len(self._expr) MAX_REPETITIONS = 32767 if exprSize == self._indicator: self._repeatMin = 1 self._repeatMax = 1 return True else: if exprSize == self._indicator + 1: if '?' == self._expr[self._indicator]: self._repeatMin = 0 self._repeatMax = 1 return True if '+' == self._expr[self._indicator]: self._repeatMin = 1 self._repeatMax = MAX_REPETITIONS return True if '*' == self._expr[self._indicator]: self._repeatMin = 0 self._repeatMax = MAX_REPETITIONS return True else: repeatStruct = self._expr[self._indicator:exprSize] rsSize = len(repeatStruct) min = 0 max = 0 if re.match("\\{[0-9]+,[0-9]+\\}", repeatStruct) != None: separator = repeatStruct.index(',') min = int(repeatStruct[1:separator]) max = int(repeatStruct[separator + 1:rsSize - 1]) elif re.match("\\{,[0-9]+\\}", repeatStruct) != None: separator = repeatStruct.index(',') min = 0 max = int(repeatStruct[separator + 1:rsSize - 1]) elif re.match("\\{[0-9]+,\\}", repeatStruct) != None: separator = repeatStruct.index(',') min = int(repeatStruct[1:separator]) max = MAX_REPETITIONS elif re.match("\\{[0-9]+\\}", repeatStruct) != None: min = int(repeatStruct[1:rsSize - 1]) max = min else: raise NdnRegexMatcherBase.Error( "Error: RegexRepeatMatcher.ParseRepetition(): Unrecognized format " + self._expr) if min > MAX_REPETITIONS or max > MAX_REPETITIONS or min > max: raise NdnRegexMatcherBase.Error( "Error: RegexRepeatMatcher.ParseRepetition(): Wrong number " + self._expr) self._repeatMin = min self._repeatMax = max return True return False
def _compile(self): if len(self._expr) < 2: raise NdnRegexMatcherBase.Error("Unrecognized format: " + self._expr) lastIndex = len(self._expr) - 1 if '(' == self._expr[0] and ')' == self._expr[lastIndex]: matcher = NdnRegexPatternListMatcher( self._expr[1:lastIndex], self._backrefManager) self._matchers.append(matcher) else: raise NdnRegexMatcherBase.Error("Unrecognized format: " + self._expr)
def match(self, name, offset, length): """ :param Name name: :param int offset: :param int length: :rtype: bool """ self._matchResult = [] if self._expr == "": self._matchResult.append(name.get(offset)) return True if self._isExactMatch: targetStr = name.get(offset).toEscapedString() subResult = self._componentRegex.search(targetStr) if subResult != None: for i in range(1, self._componentRegex.groups + 1): self._pseudoMatchers[i].resetMatchResult() self._pseudoMatchers[i].setMatchResult(subResult.group(i)) self._matchResult.append(name.get(offset)) return True else: raise NdnRegexMatcherBase.Error( "Non-exact component search is not supported yet") return False
def _extractRepetition(self, index): """ :param int index: :rtype: int """ exprSize = len(self._expr) if index == exprSize: return index if ('+' == self._expr[index] or '?' == self._expr[index] or '*' == self._expr[index]): index += 1 return index if '{' == self._expr[index]: while '}' != self._expr[index]: index += 1 if index == exprSize: break if index == exprSize: raise NdnRegexMatcherBase.Error("Missing right brace bracket") else: index += 1 return index else: return index
def _compileSingleComponent(self): end = self._extractComponent(1) if len(self._expr) != end: raise NdnRegexMatcherBase.Error("Component expr error " + self._expr) else: component = NdnRegexComponentMatcher( self._expr[1 : end - 1], self._backrefManager) self._components.append(component)
def _compile(self): length = len(self._expr) index = [0] subHead = index[0] while index[0] < length: subHead = index[0] if not self._extractPattern(subHead, index): raise NdnRegexMatcherBase.Error("Compile error")
def _extractPattern(self, index, next): """ :param int index: :param Array<int> next: Update next[0] :rtype: bool """ start = index end = index indicator = index if self._expr[index] == '(': index += 1 index = self._extractSubPattern('(', ')', index) indicator = index end = self._extractRepetition(index) if indicator == end: matcher = NdnRegexBackrefMatcher(self._expr[start:end], self._backrefManager) self._backrefManager.pushRef(matcher) matcher.lateCompile() self._matchers.append(matcher) else: self._matchers.append( NdnRegexRepeatMatcher(self._expr[start:end], self._backrefManager, indicator - start)) elif self._expr[index] == '<': index += 1 index = self._extractSubPattern('<', '>', index) indicator = index end = self._extractRepetition(index) self._matchers.append( NdnRegexRepeatMatcher(self._expr[start:end], self._backrefManager, indicator - start)) elif self._expr[index] == '[': index += 1 index = self._extractSubPattern('[', ']', index) indicator = index end = self._extractRepetition(index) self._matchers.append( NdnRegexRepeatMatcher(self._expr[start:end], self._backrefManager, indicator - start)) else: raise NdnRegexMatcherBase.Error("Unexpected syntax") next[0] = end return True
def _compileMultipleComponents(self, start, lastIndex): """ :param int start: :param int lastIndex: """ index = start tempIndex = start while index < lastIndex: if '<' != self._expr[index]: raise NdnRegexMatcherBase.Error( "Component expr error " + self._expr) tempIndex = index + 1 index = self._extractComponent(tempIndex) component = NdnRegexComponentMatcher( self._expr[tempIndex : index - 1], self._backrefManager) self._components.append(component) if index != lastIndex: raise NdnRegexMatcherBase.Error( "Not sufficient expr to parse " + self._expr)
def _extractComponent(self, index): """ :param int index: :rtype: int """ lcount = 1 rcount = 0 while lcount > rcount: if index >= len(self._expr): raise NdnRegexMatcherBase.Error("Error: angle brackets mismatch") if self._expr[index] == '<': lcount += 1 elif self._expr[index] == '>': rcount += 1 index += 1 return index
def _getItemFromExpand(expand, offset): """ :param str expand: :param Array<int> offset: This updates offset[0]. :rtype: str """ begin = offset[0] if expand[offset[0]] == '\\': offset[0] += 1 if offset[0] >= len(expand): raise NdnRegexMatcherBase.Error( "Wrong format of expand string!") while (offset[0] < len(expand) and expand[offset[0]] <= '9' and expand[offset[0]] >= '0'): offset[0] += 1 if offset[0] > len(expand): raise NdnRegexMatcherBase.Error( "Wrong format of expand string!") if offset[0] > begin + 1: return expand[begin:offset[0]] else: raise NdnRegexMatcherBase.Error( "Wrong format of expand string!") elif expand[offset[0]] == '<': offset[0] += 1 if offset[0] >= len(expand): raise NdnRegexMatcherBase.Error( "Wrong format of expand string!") left = 1 right = 0 while right < left: if expand[offset[0]] == '<': left += 1 if expand[offset[0]] == '>': right += 1 offset[0] += 1 if offset[0] >= len(expand): raise NdnRegexMatcherBase.Error( "Wrong format of expand string!") return expand[begin:offset[0]] else: raise NdnRegexMatcherBase.Error("Wrong format of expand string!")
def expand(self, expandStr=""): """ :param str expandStr: :rtype: Name """ result = Name() backrefManager = (self._secondaryBackrefManager if self._isSecondaryUsed else self._primaryBackrefManager) backrefNo = backrefManager.size() if expandStr != "": usingExpand = expandStr else: usingExpand = self._expand offset = [0] while offset[0] < len(usingExpand): item = NdnRegexTopMatcher._getItemFromExpand(usingExpand, offset) if item[0] == '<': result.append(item[1:len(item) - 1]) if item[0] == '\\': index = int(item[1:len(item)]) if 0 == index: for component in self._matchResult: result.append(component) elif index <= backrefNo: for component in backrefManager.getBackref( index - 1).getMatchResult(): result.append(component) else: raise NdnRegexMatcherBase.Error( "Exceeded the range of back reference") return result
def _extractSubPattern(self, left, right, index): """ :param str left: :param str right: :param int index: :rtype: int """ lcount = 1 rcount = 0 while lcount > rcount: if index >= len(self._expr): raise NdnRegexMatcherBase.Error("Parenthesis mismatch") if left == self._expr[index]: lcount += 1 if right == self._expr[index]: rcount += 1 index += 1 return index