def test_not_predicate(self): n = parser.not_predicate m = parser.match('a') self.assertEqual(type(n(m)), FunctionType) self.assertEqual(n(m)('a'), (False, 'a', None)) self.assertEqual(n(m)('b'), (True, 'b', None))
def test_and_predicate(self): a = parser.and_predicate m = parser.match('a') self.assertEqual(type(a(m)), FunctionType) self.assertEqual(a(m)('a'), (True, 'a', None)) self.assertEqual(a(m)('b'), (False, 'b', None))
def __recog(cls, tokens): m = lambda v: match(tokens, v) ret = {} for name, rec in cls.recognizers.iteritems(): ok, start, end, g = m(rec) if ok: ret[name] = g return ret
def test_plus(self): p = parser.plus m = parser.match('a') self.assertEqual(type(p(m('foo'))), FunctionType) self.assertEqual(p(m)(''), (False, '', None)) self.assertEqual(p(m)('_'), (False, '_', None)) self.assertEqual(p(m)('a'), (True, '', ['a'])) self.assertEqual(p(m)('aa'), (True, '', ['a', 'a'])) self.assertEqual(p(m)('aa_'), (True, '_', ['a', 'a']))
def test_match(self): m = parser.match('ab') # Testing basic behaviour self.assertEqual(type(m), FunctionType) self.assertEqual(m('ab'), (True, '', 'ab')) self.assertEqual(m('abc'), (True, 'c', 'ab')) self.assertEqual(m('_'), (False, '_', None)) # Testing with regexp self.assertEqual(parser.match(r'a*')('aaab'), (True, 'b', 'aaa')) # Testing the ignore parameter self.assertEqual(parser.match('ab', ignore=r' *')(' ab'), (True, '', 'ab')) self.assertEqual(parser.match('ab', ignore=r' *')('\tab'), (False, '\tab', None)) # Testing the default value for the ignore self.assertEqual(m('\n\t ab '), (True, ' ', 'ab'))
def parse(self): ok, start, end, groups = match(tokenize(self.text), 'assignment') if groups is None: #print >> sys.stderr, "Assignment matching failed !" #print >> sys.stderr, " \\_: text", self.text return l, e, r = groups self.lvalue = l[1] # tokens self.rvalue = r[1] # tokens self.effect = e[0] # group name; will be 'set' or 'update' if not ok: print "FAILURE", self.text, groups
def recognize(cls, text, scope): tokens = tokenize(text) ok, start, end, groups = match(tokens, 'c_label|(scope colon)') if ok: tokens = tokens[end:] ret = cls.__recog(tokens) clsbyname = lambda n: getattr(sys.modules[__name__], n) classes = map(clsbyname, ret.iterkeys()) print print " classes", classes if ret and len(ret) > 1: # try and disambiguate stuff # first, a derived class has priority over the base class. #print " more than one possible meaning" def test_class(c): others = tuple(cl for cl in classes if cl is not c) return issubclass(c, others) #print " BEFORE disambiguation by derivation", classes subclasses = filter(test_class, classes) #print " AFTER disambiguation by derivation", subclasses if subclasses: classes = subclasses if len(classes) != 1: print "ambiguity:", text print " classes", classes validate = lambda c: cls.validate(scope, c, text, ret[c.__name__]) statements = filter(lambda x: x is not None, imap(validate, classes)) print " statements", statements if len(statements) == 1: return statements[0] else: raise AmbiguousStatement(text, scope, statements) return CppStatement(text, scope, [])
def postprocess(statement, context): # TESTING ! print >> sys.stderr, "EXPERIMENTAL RECOGNITION", \ CppMeta.recognize(statement.text) # END TESTING ! c_type = len(context) and type(context[-1]) or None if type(statement) is ElseStatement and c_type is IfStatement: ret = context.pop() ret.elses.append(statement) return ret if type(statement) is WhileStatement and c_type is DoWhileStatement: ret = context.pop() ret.whilecond.append(statement) return ret if type(statement) in (ClassDeclStatement, StructDeclStatement): scopes = ('public', 'private', 'protected') def strip(scope, text): if text.startswith(scope): return text[len(scope) + 1:].strip() return text def strip3(text): return strip_scope('public', strip_scope('private', strip_scope('protected', text))) def strip_helper(st): st.text = strip3(st.text) statement.sub = map(strip_helper, statement.sub) return statement m = Cpp.assignment_re.match(statement.text) if m: # Detect chained assignments and split them parts = filter(bool, re.split(Cpp.assignment_op, statement.text)) if len(parts) > 2: #print statement.text #print parts t = statement.text # chained assignment ! expr = parts[-1] exprpos = len(t) - len(expr) expr = expr[:-1] # strip final ; exprend = exprpos + len(expr) for i in xrange(len(parts)-2, -1, -1): lvaluepos = t.rfind(parts[i], 0, exprpos) tmp_assign = t[lvaluepos:exprend].strip() + ';' #print "chained assignment#%i:"%i, tmp_assign context.append(AssignmentStatement(tmp_assign)) exprpos = lvaluepos exprend = lvaluepos + len(parts[i]) return context.pop() # "much more better" ((C) Jack Sparrow) # to keep the code simple else: ret = AssignmentStatement(statement.text) #ret.sub = statement.sub #ret.lvalue = m.group(1) return ret if Cpp.local_var_decl_re.match(statement.text): #print "DETECTED LOCAL VAR DECL" ok, start, end, grps = match(tokenize(statement.text), 'var_decl') #print ok and "SUCCESS" or "FAILED", statement.text, grps # print tokenize(statement.text) # #dump_expression('var_decl') ret = VarDeclStatement(statement.text) ret.sub = statement.sub return ret # tokenize to differentiate the rest #print "TOKENIZING", statement.text #for x in tokenize(statement.text): # print x return statement
def parse(scope, lines, start, level): #print level, "line #%i" % start, lines[start] dump(lines, start) if lines[start] == '{': ret = CppStatement('<DATA>', scope, []) start -= 1 else: ret = CppMeta.recognize(lines[start], scope) #print " %s" % (' ' * len(str(start))), ret if ret is None: raise InvalidStatement("Couldn't parse < %s >" % lines[start]) for abs_expr in ret.absorb: start += 1 dump(lines, start) #print level, "ABSORB", abs_expr #print level, "-line #%i" % start, lines[start] ok, mstart, mend, groups = match(tokenize(lines[start]), abs_expr) if not ok: raise InvalidStatement(lines[start]) #print repr(ret.text), repr(lines[start]) ret.text += lines[start] #print repr(ret.text) for g in groups: ret.process_payload(g) if (start + 1) < len(lines) and lines[start + 1] == '{': if ret.absorb_sub: end = start + 1 while lines[end] != '}': end += 1 text = tokenize('\n'.join(lines[start + 2:end])) ok, mstart, mend, groups = match(text, ret.absorb_sub) for g in groups: ret.process_payload(g) start = end + 1 else: ret.pre_sub() start += 2 while start < len(lines) and lines[start] != '}': statement, start = Cpp.parse(ret, lines, start, level + 1) ret.sub.append(statement) for abspo in ret.absorb_post: start += 1 dump(lines, start) #print level, "ABSORB POST", abspo #print level, "-line #%i" % start, lines[start] ok, mstart, mend, groups = match(tokenize(lines[start]), abspo) if not ok: raise InvalidStatement('\n' + lines[start] + '\nwhile expecting ' + abspo + '\nafter ' + type(ret).__name__ + '\n' + ret.text) ret.text += lines[start] for g in groups: #g.dump() ret.process_payload(g) ret.post_sub() ret.commit() return ret, start + 1
def itemize(self): return match(tokenize(self.text), self.recognize)