class LangletUnparser(BaseClass("Unparser", parent_langlet)): ''' Defines langlet specific unparsing / formatting. ''' def format_left(self, c0, c1, text): if c1 in "([": if c0 in " ([": return text else: return " " + text def format_op(self, c0, c1, text): if self.isop(c1): if c1 == "|": return " " + text return text def format_char(self, c0, c1, text): if self.ischar(c1): if c0 in "+*|": return " " + text elif c0 in "(": return " " + text else: return super(LangletUnparser, self).format_char(c0, c1, text)
class LangletPostlexer(BaseClass("Postlexer", parent_langlet)): ''' Defines a langlet specific token stream post-processor. ''' @postlex def INTRON(self, pos, tok): intron = tok[:] intron[0] = INTRON_NID + SYMBOL_OFFSET self.add_token(intron)
class LangletTransformer(BaseClass("Transformer", parent_langlet)): ''' Defines langlet specific CST transformations. ''' @transform def file_input(self, node): if self.options.get("interactive"): # global transformations for files return # for i, nd in enumerate(node[1:]): # if self._like_main_transform(nd, node, i+1): # break for i, nd in enumerate(node[1:]): trans = self._import_langlet_trans(nd) if trans: node.insert(i + 1, trans) break def is_main(self, node): _if_stmt = find_node(node, self.symbol.if_stmt, depth=3) if _if_stmt is None: return False elif len(_if_stmt) > 4: return self.fn.match_token_seq(_if_stmt, ["if", "__name__", "=="]) else: return False def _future(self, node): _import_stmt = find_node(node, self.symbol.import_stmt, depth=3) if _import_stmt: return self.fn.match_token_seq(_import_stmt, ["from", "__future__"]) return False def _import_langlet_trans(self, node): if self._future(node): return False load_langlet = 'import langscape; __langlet__ = langscape.load_langlet("%s")\n' % self.langlet.config.langlet_name return find_node(self.langlet.parse(load_langlet), self.symbol.stmt) def _like_main_transform(self, node, tree, i): if self.is_main(node): _suite = find_node(node, self.symbol.suite) _func_def_suite = _suite[:] def__like_main__ = self.fn.stmt( self.fn.compound_stmt( self.fn.funcdef("def", self.fn.Name("__like_main__"), self.fn.parameters(), _func_def_suite))) tree.insert(i, def__like_main__) call__like_main__ = self.fn.CallFunc("__like_main__", []) replace_node(_suite, self.fn.suite(self.fn.stmt(call__like_main__))) return True return False
class LangletPostlexer(BaseClass("Postlexer", parent_langlet)): ''' Defines a langlet specific token stream post-processor. ''' @postlex def NAME(self, pos, tok): name = tok[1] if name[-1] == '-': col = tok[-1] self.add_token([self.lex_symbol.NAME, name[:-1], tok[2], col]) self.add_token([self.lex_symbol.MINUS, '-', tok[2], col+len(name)-1]) else: self.add_token(tok)
class LangletTransformer(BaseClass("Transformer", parent_langlet)): ''' Defines langlet specific CST transformations. ''' def __init__(self, *args, **kwd): super(LangletTransformer, self).__init__(*args, **kwd) @transform def repeated(self, node): s = self.langlet.unparse(node).strip()[1:-1] n0 = 0 n1 = 0 if str.isdigit(s): n0 = int(s) n1 = n0 else: f1, f2 = s.split(",") if f1: n0 = int(f1) else: n0 = 0 if f2: n1 = int(f2) else: n1 = 0 chain = self.node_stack(node) item, _ = chain.step() atom = find_node(item, self.symbol.atom) satom = self.langlet.unparse(atom) if n0 == n1: return self.langlet.parse("(" + " ".join([satom] * n0) + ")", start_symbol=self.symbol.item) elif n1 == 0: # min = n0, max = infinity return self.langlet.parse("(" + " ".join([satom] * n0) + " " + satom + "*" + ")", start_symbol=self.symbol.item) elif n1 > n0: return self.langlet.parse("(" + " ".join([satom] * n0) + " " + ("[" + satom + "]") * (n1 - n0) + ")", start_symbol=self.symbol.item) else: raise SyntaxError("Bad repeat interval: {" + s + "}") @transform def variable(self, node): dest, varname = find_all(node, self.token.NAME) self.langlet.variables[varname[1]] = dest[1] return self.langlet.fn.atom(varname)
class LangletModuleFilter(BaseClass("importer.ModuleFilter", parent_langlet)): def __init__(self, langlet): super(LangletModuleFilter, self).__init__(langlet) def define_pattern(self): sm = self.langlet.options.get("start_module") self._start_module = sm.split(".")[0] if sm else "" self._pattern_default = re.compile( r'(?P<test>test\_(?P<mod_name>\w+))') self._default_groups = [] self._deactivate_default = self.langlet.options.get( "deactivate_default") def accept_module(self, fpth_mod): if not super(LangletModuleFilter, self).accept_module(fpth_mod): if self.dbg: dbg_import("module not covered: " + fpth_mod) return if self.is_mainmodule(fpth_mod): m = self._pattern_default.match(fpth_mod.basename()) if m: self._default_groups.append(m.group(2).lower()) self.langlet.transformer.set_module( self.langlet.importer.module_descr) return self if fpth_mod.basename() in self.module_blacklist: return if self.dbg: dbg_import("module_path: %s\n" % fpth_mod) if not self._deactivate_default: m = self._pattern_default.match(fpth_mod.basename()) if m: md = LangletModuleDescriptor() md.fpth_mod_full = fpth_mod self._default_groups.append(m.group(2).lower()) self.langlet.transformer.set_module(md) return self else: module_name = fpth_mod.splitext()[0].basename() if module_name in self._default_groups: self._default_groups.remove(module_name) md = LangletModuleDescriptor() md.fpth_mod_full = fpth_mod self.langlet.transformer.set_module(md) return self
class LangletPostlexer(BaseClass("Postlexer", parent_langlet)): ''' Defines a langlet specific token stream post-processor. ''' @postlex def NEWLINE(self, pos, tok): if pos < len(self.scan) - 3: T1 = self.scan[pos + 2] T2 = self.scan[pos + 3] if self.lex_symbol.COLON in (T1[0], T2[0]): self.add_token(tok) else: self.add_token(tok) @postlex def WHITE(self, pos, tok): if '\n' in tok[1]: self.NEWLINE(pos, [self.lex_symbol.NEWLINE] + tok[1:]) @postlex def COMMENT(self, pos, tok): self.NEWLINE(pos, [self.lex_symbol.NEWLINE, '\n'] + tok[2:])
class LangletImporter(BaseClass("Importer", parent_langlet)): ''' Defines langlet specific import hooks. ''' ''' Specialized Importer for coverage purposes. This is delicate because not every module shall be covered. The basic coverage relation associates a module test_bla.py with a module bla.py. If for example "coverage test_all.py" is executed each test_xxx.py module imported by test_all is covered as well as xxx modules. ''' def __init__(self, langlet): # sys.stdout.write("initialize coverage.importer\n") super(LangletImporter, self).__init__(langlet) self.modulefilter = LangletModuleFilter(langlet) self.modulefilter.dbg = self.dbg self.modulefilter.module_blacklist.add("test_support.py") def define_pattern(self): self.modulefilter.define_pattern() def prepare(self): self.langlet.options["re_compile"] = True self.define_pattern()
class LangletPrelexer(BaseClass("Prelexer", parent_langlet)): '''
class LangletPostlexer(BaseClass("Postlexer", parent_langlet)): ''' Defines a langlet specific token stream post-processor. ''' def run(self, scanned): self.reset() self.set_refactor_mode() self.scan = scanned sym_left = self.langlet.lex_nfa.reachables[self.lex_symbol.LEFT] sym_right = self.langlet.lex_nfa.reachables[self.lex_symbol.RIGHT] for pos, tok in enumerate(self.scan): tid = tok[0] if tid in sym_left: self.LEFT(pos, tok) elif tid in sym_right: self.RIGHT(pos, tok) else: handler = self._post_handler.get(tid) if handler: handler(pos, tok) else: self.add_token(tok) self.dedent_to(0) self.terminate_stream() self.indents = [] self.parenlevl = 0 return self.stream @postlex def dot_start(self, pos, tok): "dot_start: '.' | '.' A_DIGIT+ [Exponent] ['j'|'J']" # dot_start is an optimization hack in the Token definition. It helps preventing a bloated # `unit` NFA. if tok[1] == '.': self.add_token([self.lex_symbol.DOT]+tok[1:]) else: self.add_token([self.lex_symbol.NUMBER]+tok[1:]) @postlex def INTRON(self, pos, tok): if self._refactor_mode: intron = tok[:] # print "INTRON-PRE", intron intron[0] = INTRON_NID + SYMBOL_OFFSET # print "INTRON_POST", intron self.add_token(intron) if self.parenlev>0: # no action inside expression return else: # extract INDENT and NEWLINE token S = tok[1] nl_termination = S[-1] in ("\n", "\r") nl_inserted = False if S in ("\n", "\r"): self.add_token([self.lex_symbol.NEWLINE]+tok[1:]) if self.indents and nl_termination: self.dedent_to(0) return line, col = tok[2], tok[3] _indent = 0 _linecont = 0 for c in S: if c == '\\': _linecont = 2 elif c in ("\n", "\r"): if _linecont <= 0: if not nl_inserted: self.add_token([self.lex_symbol.NEWLINE, c, line, col]) nl_inserted = True if self.indents and nl_termination: self.dedent_to(0) else: _linecont-=1 _indent = 0 line+=1 col = -1 elif c == '#': _indent = 0 else: if _linecont>0: pass elif c == " ": if col == 0: _indent = 1 elif _indent>0: _indent+=1 elif c == '\t': if col == 0: _indent = TABWIDTH elif _indent>0: _indent += TABWIDTH col+=1 if _indent>0: k = 0 while self.indents: last_indent = self.indents[-1] n = len(last_indent[1]) if _indent > n: if k>0: raise IndentationError("(Line %d, column %d): Unindent does not match any outer indentation level."%(line, col)) else: indent_tok = [self.lex_symbol.INDENT, " "*_indent, line, 0] self.add_token(indent_tok) self.indents.append(indent_tok) return elif _indent < n: self.indents.pop() self.add_token([self.lex_symbol.DEDENT, "", line, 0]) else: break else: indent_tok = [self.lex_symbol.INDENT, " "*_indent, line, 0] self.add_token(indent_tok) self.indents.append(indent_tok) def dedent_to(self, k): if not self.stream: return line = self.stream[-1][2]+1 while self.indents: n = self.indents[-1] if n>k: self.indents.pop() self.add_token([self.lex_symbol.DEDENT, '', line, 0]) elif n == k: break @postlex def LEFT(self, pos, tok): self.parenlev+=1 self.add_token(tok) @postlex def RIGHT(self, pos, tok): self.parenlev-=1 self.add_token(tok)
class LangletTransformer(BaseClass("Transformer", parent_langlet)): ''' Defines langlet specific CST transformations. ''' @transform def IPv4Address(self, node): nd = find_node(node, self.token.IPv4Address) if nd: sub = nd[1].split(".") T = self.fn.Tuple(*sub) return self.fn.atom('(', self.fn.testlist_comp(self.fn.CallFunc("ip.IPv4", [T])),')') @transform def thunk_stmt(self, node): "thunk_stmt: small_stmt ':' suite" small = find_node(node, self.symbol.small_stmt) # perform checks on expression form NAME '=' NAME for small_stmt # and extract names _expr_stmt = find_node(small, self.symbol.expr_stmt) if not _expr_stmt: raise SyntaxError("thunk_stmt is required to have the form: NAME = NAME ':' SUITE") if len(_expr_stmt) == 4: nid, tl1, eq, tl2 = _expr_stmt if not ( is_node(tl1, self.symbol.testlist) and \ is_node(eq, self.token.EQUAL) and \ is_node(tl2, self.symbol.testlist)): raise SyntaxError("thunk_stmt must have the form: NAME = NAME ':' SUITE") a1, a2 = smallest_node(tl1), smallest_node(tl2) if not ( is_node(a1, self.token.NAME) and \ is_node(a2, self.token.NAME)): raise SyntaxError("thunk_stmt must have the form: NAME = NAME ':' SUITE") Name = find_node(a1, self.token.NAME,depth = 1) Func = find_node(a2, self.token.NAME,depth = 1) if Name is None or Func is None: raise SyntaxError("thunk_stmt must have the form: NAME = NAME ':' SUITE") else: raise SyntaxError("thunk_stmt must have the form: NAME = NAME ':' SUITE") name, func = Name[1], Func[1] returns = self.fn.stmt(self.fn.Return(self.fn.CallFunc("locals",[]))) BLOCK = self.fn.add_to_suite(find_node(node, self.symbol.suite), returns) thunk = self.fn.stmt(self.fn.Function("thunk", BLOCK, ())) thunk_call = self.fn.stmt(self.fn.Assign(name, self.fn.CallFunc(func, dstar_args = self.fn.CallFunc("thunk",[])))) del_thunk = self.fn.stmt(self.fn.Del("thunk")) return [thunk, thunk_call, del_thunk] @transform def if_stmt(self, node): "if_stmt: 'if' test [ as_name ] ':' suite ('elif' test [ as_name ] ':' suite)* ['else' ':' suite]" # # if test as x: # BLOCK # # ---------> # # __d = {} # if __d.__setitem__("x", test) or __d["x"]: # x = __d["x"] # BLOCK # del __d # # if not find_node(node, self.symbol.as_name,depth = 1): return __d = "__d_"+str(random.randrange(100000)) __d_assign = self.fn.stmt(self.fn.Assign(__d, self.fn.Dict())) __d_del = self.fn.stmt(self.fn.Del(__d)) nodes = node[1:] new_if = [self.symbol.if_stmt] i = 0 while i<len(nodes): item = nodes[i] if is_node(item, self.symbol.test): _test = item if is_node(nodes[i+1], self.symbol.as_name): _suite = nodes[i+3] name = find_all(nodes[i+1], self.token.NAME)[-1][1] new_if.append( self.fn.Or( self.fn.CallFunc("%s.%s"%(__d, "__setitem__"), [self.fn.String(name),_test]), self.fn.GetItem(__d, self.fn.String(name)))) new_if.append(nodes[i+2]) name_assign = self.fn.stmt(self.fn.Assign(name, self.fn.GetItem(__d, self.fn.String(name)))) new_if.append(self.fn.add_to_suite(_suite, name_assign, 0)) i+=4 continue else: new_if.append(item) else: new_if.append(item) i+=1 return [__d_assign, self.fn.stmt(new_if), __d_del] @transform def repeat_stmt(self, node): "repeat_stmt: 'repeat' ':' suite 'until' ':' (NEWLINE INDENT test NEWLINE DEDENT | test NEWLINE )" _suite = find_node(node, self.symbol.suite) _test = find_node(node, self.symbol.test, depth=1) _until = self.fn.if_stmt(_test, self.fn.suite(self.fn.stmt(self.fn.break_stmt()))) _suite.insert(-1, self.fn.stmt(_until)) return self.fn.stmt(self.fn.While(True, _suite)) @transform def switch_stmt(self, node): "switch_stmt: 'switch' expr ':' NEWLINE INDENT case_stmt DEDENT ['else' ':' suite]" # this implementation uses only basic CST functions # derived from grammar rules as well as CST interpolation SELECT = "SELECT_"+str(random.randrange(100000)) _test = node[2] _case = find_node(node, self.symbol.case_stmt, depth = 1) _else = find_node(node, self.symbol.suite, depth = 1) _cond = self.fn.power("isChainlet", self.fn.trailer("(", _test, ")")) _select = self.fn.testlist(SELECT) assign_else = self.fn.stmt(self.fn.expr_stmt(_select,"=", _test)) _testlist = map(self.fn.test, find_all(_case, self.symbol.expr, depth = 1)) select_args = self.fn.arglist(*map(self.fn.argument, _testlist)) trailer_select = self.fn.trailer(".", "select") trailer_select_args = self.fn.trailer("(", select_args, ")") call_select = self.fn.power( self.fn.atom("(",_test, ")"), trailer_select, trailer_select_args) assign_if = self.fn.stmt(self.fn.expr_stmt(SELECT,"=",call_select)) if_chainlet = self.fn.stmt(self.fn.if_stmt( _cond, self.fn.suite(assign_if), 'else', self.fn.suite(assign_else))) if_case = self.fn.stmt(self._handle_case_stmt(_case,_select,_else)) del_select = self.fn.stmt(self.fn.del_stmt(SELECT)) return if_chainlet, if_case, del_select def _handle_case_stmt(self, node, _select, _else_suite = None): "case_stmt: 'case' expr ':' suite ('case' expr ':' suite)*" _tests = map(self.fn.test, find_all(node, self.symbol.expr,depth = 1)) _suites = find_all(node, self.symbol.suite,depth = 1) _select = find_node(_select, self.symbol.expr) _conds = [self.fn.comparison(find_node(test, self.symbol.expr),"==",_select) for test in _tests] if_input = sum(map(list, zip(_conds,_suites)),[]) for i in range(len(if_input)-2,1,-2): if_input.insert(i, "elif") if _else_suite: if_input.append("else") if_input.append(_else_suite) return self.fn.if_stmt(*if_input)
class LangletPostlexer(BaseClass("Postlexer", parent_langlet)): '''
class LangletCSTFunction(BaseClass("CSTFunction", parent_langlet)): ''' Implements langlet specific functions operating on CSTs which are accessed through the Langlet object via the ``self.fn`` attribute. ''' def __init__(self, langlet): super(LangletCSTFunction, self).__init__(langlet) self.symbol = self.langlet.parse_symbol self.token = self.langlet.parse_token def is_atomic(self, node): try: nid = node[0] if nid in ( self.symbol.atom, self.token.STRING, self.token.NAME, self.token.NUMBER): return True else: if len(node) >= 3: return False else: return self.is_atomic(node[1]) except TypeError: raise def atomize(self, node): if node[0] == self.symbol.atom: return node elif node[0] in (self.token.STRING, self.token.NAME, self.token.NUMBER): return self.atom(node) return self.atom("(", node, ")") def maybe_projection(self, node): ''' This is a variant of the projection() function. It projects on a Python cst only when the first node can be projected. ''' if node[0]>SYMBOL_OFFSET+MAX_PY_SYMBOL: node[0] = node[0]%LANGLET_ID_OFFSET for item in node[1:]: if isinstance(item, (list, tuple)): self.langlet.projection(item) return node def left_distribute(self, a_atom, a_test, func = None): ''' Suppose a_test is a predicate of the form `A == X or B > Y`. Then we map a_atom against the boolean expressions s.t. we yield `a_atom.A == X or a_atom.B > Y`. If func is available we distribute as `func(a_atom, A) == X or func(a_atom, B) > Y`. ''' # Implementation: # 1) We seek for all not_test nodes in test down to depth 3. For each not_test node we seek the comparison # node without limitations of depth. # 2) The comparison node has the structure `expr (comp_op expr)*` If func is available we transform like # any_expr(CallFunc([[func]], [a_atom, expr])) (comp_op expr)*. # Otherwise we apply # any_expr(CallFunc("getattr", [a_atom, expr])) (comp_op expr)*. _not_tests = find_all(a_test, self.symbol.not_test,depth = 3) for nt in _not_tests: _comparison = find_node(nt, self.symbol.comparison) _expr = _comparison[1] _cloned = clone_node(_expr) if func: if func == ".": _power = find_node(_expr, self.symbol.power) _trailer = find_all(_power, self.symbol.trailer,depth = 1) _name = find_node(_power, self.token.NAME) _power[1] = self.atomize(a_atom) _power.insert(2, self.trailer(".", _name)) else: replace_node(_expr, self.expr(self.CallFunc(func, [a_atom, _cloned]))) else: _cloned = clone_node(_expr) replace_node(_expr, self.expr(self.CallFunc("getattr", [a_atom, _cloned]))) def varargs2arglist(self, varargs): """ This function is used to turn the arguments of a function defintion into that of a function call. Motivation :: Let def f(x,y,*args): ... be a given function. We want to define a second function def g(*args,**kwd): ... that shall be called with the arguments of f in the body of f: def f(x,y,*args): ... g(x,y,*args) ... To call g with the correct arguments of f we need to transform the varargslist node according to f into the arglist of g. """ if not varargs: raise ValueError, "No varargs found" self.maybe_projection(varargs) arguments = [] i = 1 while i<len(varargs): arg = varargs[i] if arg[0] == self.symbol.fpdef: if i+1 < len(varargs): tok = varargs[i+1][0] if tok == self.token.EQUAL: i+=3 elif tok == self.token.COMMA: i+=2 arguments.append(self.argument(arg[1])) else: arguments.append(self.argument(arg[1])) break elif arg[0] == self.token.STAR: arguments.append("*") arguments.append(test(varargs[i+1][1])) i+=2 elif arg[0] == self.token.DOUBLESTAR: arguments.append("**") arguments.append(test(varargs[i+1])) i+=2 elif arg[0] == self.token.COMMA: i+=1 else: raise ValueError,"Unexpected node %s"%(self.token.tok_name[arg[0]]) return arglist(*arguments) def func_name(self, funcdef): if funcdef[1][0] == self.symbol.decorators: return funcdef[3][1] else: return funcdef[2][1] def to_signature(self, varargs): """ Creates a dictionary from a node of type symbol.varargslist. @param varargs: node of type varargslist. @return: dict of following structure: {'args': dict, 'defaults': dict, 'star_args': dict, 'dstar_args': dict} """ #assert proj_nid(varargs) == self.symbol.varargslist, self.symbol.sym_name[proj_nid(varargs)] signature = {'args':{}, 'defaults':{}, 'star_args': {}, 'dstar_args':{}, 'arglist': [] } n = len(varargs)-2 i = 0 current_name = "" while i<=n: item = varargs[1:][i] if proj_nid(item) == self.symbol.fpdef: if find_node(item, self.symbol.fplist): raise SyntaxError("Does not support tuple-structured arguments") else: current_name = item[1][1] signature['arglist'].append(current_name) signature['args'][current_name] = () elif proj_nid(item) == self.symbol.test: signature['defaults'][current_name] = item elif proj_nid(item) == self.token.STAR: i+=1 signature['star_args'][find_node(varargs[1:][i], self.token.NAME)[1]] = () elif proj_nid(item) == self.token.DOUBLESTAR: i+=1 signature['dstar_args'][find_node(varargs[1:][i], self.token.NAME)[1]] = {} i+=1 return signature def power_merge(self, nodeA, nodeB): ''' This function merges a pair of power nodes in the following way:: nodeA = atomA + trailerA \\ | => atomB + trailerB + trailerA nodeB = atomB + trailerB / ''' nodeA = self.maybe_projection(nodeA) nodeB = self.maybe_projection(nodeB) if nodeA[0] == self.symbol.power and nodeB[0] == self.symbol.power: trailerA = find_all(nodeA, self.symbol.trailer,depth = 1) if not trailerA: trailerA = [] trailerB = find_all(nodeB, self.symbol.trailer,depth = 1) if not trailerB: trailerB = [] atomB = find_node(nodeB, self.symbol.atom) return self.power(atomB, *(trailerB+trailerA)) def concat_funcalls(self, funA, funB): ''' Two function calls funA(argsA), funB(argsB) are merged to one call funA(args).funB(argsB). ''' if funA[0] == self.symbol.power and funB[0] == self.symbol.power: trailerA = find_all(funA, self.symbol.trailer,depth = 1) trailerB = find_all(funB, self.symbol.trailer,depth = 1) atomA = find_node(funA, self.symbol.atom) atomB = find_node(funB, self.symbol.atom) return self.power(atomA, *(trailerA+[trailer(".",atomB[1])]+trailerB)) def parens(self, node): ''' Like atomize but default for enforced parentheses is true. ''' return self.atomize(node, enforce = True) def split_expr(self, node): "splits an expr of the kind a.b(x).c(). ... into factors a, b, (x), c, (), ..." pw = find_node(node, self.symbol.power) at = find_node(pw, self.symbol.atom) tr = find_all(pw, self.symbol.trailer,depth = 1) return [at]+tr def add_to_suite(self, _suite, _stmt, pos=-1): ''' Inserts statement into suite node. @param _suite: suite node in which stmt node is inserted. @param _stmt: stmt node to be inserted into suite @param pos: optional argument used to characterize the insert position. default value is -1 i.e. stmt node will be appended. ''' n = find_node(_suite, self.symbol.simple_stmt,depth = 1) if n: _args = [self.stmt(n)] if pos==0: _args.insert(0, _stmt) else: _args.append(_stmt) return replace_node(_suite, self.suite(*_args)) else: nodes = find_all(_suite, self.symbol.stmt, depth=1) if pos == -1: nodes.append(_stmt) else: nodes.insert(pos, _stmt) return replace_node(_suite, self.suite(*nodes)) def pushstmt(self, stmt1, stmt2): ''' If stmt1 has following structure :: EXPR1: STMT11 ... STMT1k EXPR2: STMT21 ... STMT2m then we insert the second argument stmt2 at the end :: EXPR1: STMT11 ... STMT1k EXPR2: STMT21 ... STMT2m --> stmt2 ''' SUITE = find_node(stmt1, self.symbol.suite) while True: _stmts = find_all(SUITE, self.symbol.stmt, depth = 1) _stmt = _stmts[-1] _suite = find_node(_stmt, self.symbol.suite) if not _suite: _stmts.append(stmt2) return stmt1 else: SUITE = _suite def Name(self, s): return [self.token.NAME, s] def Number(self, s): return [self.token.NUMBER, str(s)] def String(self, s): if s: if s[0] not in ("'", '"'): s = '"'+s+'"' else: s = '""' return [self.token.STRING, s] def Add(self, fst, snd, *args): "Add: term ('+' term)+ -> arith_expr" addargs = [] allargs = [fst,snd]+list(args) for item in allargs[:-1]: addargs.append(self.fit(item, self.symbol.term)) addargs.append("+") addargs.append(self.fit(allargs[-1], self.symbol.term)) return self.arith_expr(*addargs) def Assign(self, name, value): "Assign: expr (',' expr)* '=' expr (',' expr)* -> expr_stmt" if isinstance(name, str): arg1 = self.testlist(self.test(self.Name(name))) else: arg1 = self.testlist(self.test(name)) arg2 = self.testlist(self.test(value)) return self.expr_stmt(arg1,'=',arg2) def AugAssign(self, var, augass, val): "AugAssign: expr augassign expr -> expr_stmt" if type(var) == str: v1 = self.testlist(self.test(self.Name(var))) else: v1 = self.testlist(self.test(var)) v2 = self.testlist(self.test(val)) if isinstance(augass, list): op = augass else: op = self.augassign(augass) return self.expr_stmt(v1,op,v2) def Comparison(self, arg1, op, arg2): "Comparison: expr comp_op expr -> test" expr1 = find_node(self.expr(arg1), self.symbol.expr) expr2 = find_node(self.expr(arg2), self.symbol.expr) return self.test(self.comparison(expr1, self.comp_op(op), expr2)) def Power(self, a, n): "Power: atom factor -> power" return self.power(self.fit(a, self.symbol.atom), self.fit(n, self.symbol.factor)) def Sub(self, fst, snd, *args): "Sub: term ('-' term)+ -> arith_expr" addargs = [] allargs = [fst,snd]+list(args) for item in allargs[:-1]: addargs.append(self.fit(item, self.symbol.term)) addargs.append("-") addargs.append(self.fit(allargs[-1], self.symbol.term)) return self.arith_expr(*addargs) def Mul(self, fst, snd, *args): "Mul: factor ('+' factor)+ -> term" addargs = [] allargs = [fst,snd]+list(args) for item in allargs[:-1]: addargs.append(self.fit(item, self.symbol.factor)) addargs.append("*") addargs.append(self.fit(allargs[-1], self.symbol.factor)) return self.term(*addargs) def Div(self, fst, snd, *args): "Div: factor ('/' factor)+ -> term" addargs = [] allargs = [fst,snd]+list(args) for item in allargs[:-1]: addargs.append(self.fit(item, self.symbol.factor)) addargs.append("/") addargs.append(self.fit(allargs[-1], self.symbol.factor)) return self.term(*addargs) def FloorDiv(self, *args): "FloorDiv: expr ( '//' expr)+ -> expr" addargs = [] allargs = args for item in allargs[:-1]: addargs.append(self.fit(item, self.symbol.factor)) addargs.append("//") addargs.append(self.fit(allargs[-1], self.symbol.factor)) return self.term(*addargs) def BitAnd(self, *args): "BitAnd: expr ( '&' expr)+ -> expr" allargs = [self.fit(arg, self.symbol.shift_expr) for arg in args] return self.and_expr(*allargs) def BitOr(self, *args): "BitOr: expr ( '|' expr)+ -> expr" allargs = [self.fit(arg, self.symbol.xor_expr) for arg in args] return self.expr(*allargs) def BitXor(self, *args): "BitXor: expr ( '^' expr)+ -> expr" allargs = [self.fit(arg, self.symbol.and_expr) for arg in args] return self.xor_expr(*allargs) def If(self, *args, **kwd): # TODO: to be finished #_else = kwd.get("_else") _ifargs = [] for _t,_s in zip(args[::2],args[1::2]): _ifargs.append(test(_t)) def Not(self, expr): "Not: 'not' expr -> not_test" return self.not_test("not", self.fit(expr, self.symbol.not_test)) def And(self, fst,snd,*args): "And: expr ( 'and' expr)+ -> and_test" allargs = [self.fit(arg, self.symbol.not_test) for arg in [fst,snd]+list(args)] return self.and_test(*allargs) def Or(fst,snd,*args): "And: expr ( 'or' expr)+ -> or_test" allargs = [self.fit(arg, self.symbol.and_test) for arg in [fst,snd]+list(args)] return self.test(self.or_test(*allargs)) def Del(self, *args): _args = [] for arg in args: _args.append(self.fit(arg, self.symbol.expr)) return self.del_stmt(self.exprlist(*_args)) def GetItem(self, name, arg): if isinstance(name, str): name = self.Name(name) return self.power(self.atom(name), self.trailer("[",self.subscriptlist(self.subscript(self.expr(arg))),"]")) def CallFuncWithArglist(self, name_or_atom, arglist): _params = self.trailer("(",arglist,")") if isinstance(name_or_atom, list): if name_or_atom[0]%LANGLET_ID_OFFSET == self.symbol.atom: _args = [name_or_atom]+[_params] elif name_or_atom[0]%LANGLET_ID_OFFSET == self.token.NAME: _args = [self.atom(name_or_atom)]+[_params] else: raise ValueError("Cannot handle function name %s"%name_or_atom) return self.power(*_args) elif name_or_atom.find(".")>0: names = name_or_atom.split(".") _args = [self.atom(self.Name(names[0]))]+[self.trailer(".",n) for n in names[1:]]+[_params] return self.power(*_args) else: return self.power(self.atom(self.Name(name_or_atom)),_params) def CallFunc(self, name_or_atom, args = [], star_args = None, dstar_args = None): ''' Instead of a name an atom is allowed as well. ''' _arglist = [] for arg in args: if isinstance(arg, tuple): assert len(arg)==3, arg _param = [self.symbol.argument, self.test(self.Name(arg[0])),[self.token.EQUAL, '=']] _param.append(self.test(arg[2])) _arglist.append(_param) else: _arglist.append(self.argument(self.test(arg))) "arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) " if star_args: if type(star_args) == str: star_args = self.Name(star_args) _arglist.append('*') _arglist.append(self.test(star_args)) if dstar_args: if type(dstar_args) == str: dstar_args = self.Name(dstar_args) _arglist.append('**') _arglist.append(self.test(dstar_args)) if _arglist: _params = self.trailer("(",self.arglist(*_arglist),")") else: _params = self.trailer("(",")") if isinstance(name_or_atom, list): if name_or_atom[0] == self.symbol.atom: _args = [name_or_atom]+[_params] elif name_or_atom[0] == self.token.NAME: _args = [self.atom(name_or_atom)]+[_params] else: raise ValueError("Cannot handle function name %s"%name_or_atom) return self.power(*_args) elif name_or_atom.find(".")>0: names = name_or_atom.split(".") _args = [self.atom(self.Name(names[0]))]+[self.trailer(".",n) for n in names[1:]]+[_params] return self.power(*_args) else: return self.power(self.atom(self.Name(name_or_atom)),_params) def GetAttr(self, expr, *args): ''' (A(EXPR), B, C(EXPR), ...) -> CST (A(EXPR1).B.C(EXPR). ... ) of power ''' if isinstance(expr, str): expr = self.Name(expr) trailers = [] for arg in args: if isinstance(arg, str): trailers.append(trailer(".",self.Name(arg))) elif arg[0]%LANGLET_ID_OFFSET == 101: trailers.append(self.trailer(".", arg)) else: call = find_node(arg, self.symbol.power)[1:] assert is_node(call[0], self.symbol.atom) trailers.append(".") trailers.append(call[0][1]) for item in call[1:]: assert is_node(item, self.symbol.trailer) trailers.insert(0,item) return self.power(self.atom("(", self.testlist_comp(self.test(expr)),")"),*trailers) def List(self, *args): ''' List: '[' ']' | '[' expr (',' expr)* ']' -> atom ''' if not args: return self.atom("[","]") else: return self.atom("[",self.listmaker(*[self.expr(arg) for arg in args]),"]") def Tuple(self, *args): ''' Tuple: '(' ')' | '(' expr (',' expr)* ')' -> atom ''' if not args: return self.atom("(",")") else: return self.atom("(", self.testlist_comp(*([self.expr(arg) for arg in args]+[","])),")") def Dict(self, pairs = None, **dct): ''' Dict: '{' '}' | '{' expr ':' expr (',' expr ':' expr )* '}' -> atom ''' if dct: pairs = dct.items() if pairs is None: return self.atom("{","}") args = [] for key, value in pairs: args.append(self.expr(key)) args.append(self.expr(value)) return self.atom("{", self.dictmaker(*args),"}") def ParametersFromSignature(self, sig): return self.FuncParameters(sig['args'], defaults = sig['defaults'], star_args=sig['star_args'], dstar_args=sig['dstar_args']) def Lambda(self, body, argnames, defaults = {}, star_args=None, dstar_args=None): if argnames: _param = find_node(self.FuncParameters(argnames, defaults, star_args, dstar_args), self.symbol.varargslist) if _param: return self.lambdef(_param, self.test(body)) return self.lambdef(self.test(body)) def FuncParameters(self, argnames, defaults = {}, star_args=None, dstar_args=None): _argnames = [self.fpdef(arg) for arg in argnames] _star_args= [] if star_args: _star_args = ['*', star_args] _dstar_args= [] if dstar_args: _dstar_args = ['**', dstar_args] _defaults = [] for key,val in defaults.items(): _defaults+=[self.fpdef(self.Name(key)), self.expr(val)] _all = _argnames+_defaults+_star_args+_dstar_args if _all: return self.parameters(self.varargslist(*_all)) else: return self.parameters() def Function(self, name, BLOCK, argnames, defaults={}, star_args=None, dstar_args=None): def _wrap_name(name): if isinstance(name, str): return self.Name(name) return name return self.stmt(self.funcdef("def", _wrap_name(name), self.FuncParameters(argnames, defaults, star_args, dstar_args), BLOCK)) def Subscript(self, expression, sub, *subs): ''' Maps to expr[sub1,sub2,...,subn] only ''' SUBSCR = [self.symbol.subscriptlist, self.subscript(self.expr(sub))]+[self.subscript(self.expr(arg)) for arg in subs] return self.power(self.atom('(', self.testlist_comp(self.expr(expression)),')'), self.trailer('[',SUBSCR,']')) def Return(self, *args): ''' (EXPR, EXPR, ... ) -> CST ( return_stmt ) ''' return self.return_stmt(self.testlist(*[self.expr(arg) for arg in args])) def Eval(self, arg): return self.eval_input(self.fit(arg, self.symbol.testlist)) def Except(self, arg1, arg2 = None): if arg2: return self.except_clause(self.expr(arg1), self.expr(arg2)) else: return self.except_clause(self.expr(arg1)) def TryExcept(self, try_suite, else_suite = None, *args): assert len(args)%2 == 0, "pairs of (except_clause, suite) expected" try_except_args = [try_suite] for i in range(len(args))[::2]: arg = args[i] if isinstance(arg, list): if arg[0] == self.symbol.except_clause: try_except_args.append(arg) else: try_except_args.append(self.Except(arg)) try_except_args.append(args[i+1]) if else_suite: try_except_args.append(else_suite) return self.try_stmt(*try_except_args) def TryFinally(self, try_suite, finally_suite): return self.try_stmt(try_suite, 'finally', finally_suite) def Import(self, module): return self.import_name(self.dotted_as_names(self.dotted_as_name(self.dotted_name(*[mod for mod in module.split(".")])))) def ImportFrom(self, from_module, *names): path = self.dotted_name(*[self.Name(mod) for mod in from_module.split(".")]) if names[0] == "*": return self.import_from(path, '*') else: return self.import_from(path, self.import_as_name(*names)) def While(self, *args): arg = self.expr(args[0]) return self.while_stmt(*((arg,)+args[1:])) def For(self, *args): raise NotImplementedError def ListComp(self, *args): return self.atom("[", self.listmaker( self.expr(args[0]), args[1]),"]") def Subscript(self, expression, *subs): assert len(subs)>1 return self.power( self.atom('(', expression, ')'), self.trailer('[', self.subscriptlist(*subs),']') ) def Tuple(self, *args): if not args: return self.atom("(",")") else: exprs = self.testlist_comp(*list(args)+[","]) return self.atom("(", exprs ,")") def Binary(self, outnode, op, *args): assert len(args)>=2 allargs = [] for arg in args[:-1]: allargs.append(arg) allargs.append(op) allargs.append(args[-1]) return outnode(*allargs) def Or(self, *args): return self.test(self.or_test(*args))
class LangletTransformer(BaseClass("Transformer", parent_langlet)): ''' Defines langlet specific CST transformations. ''' def set_module(self, module_descriptor): self._module_descriptor = module_descriptor self.mon = monitor.Monitor() self.mon.assign_sensors(module_descriptor.fpth_mod_full) def get_line_info_begin(self, node): try: node_begin = node[1][2] except IndexError: token = find_token_gen(node) nl = 0 for T in token: if len(T) > 2: node_begin = T[2] - nl break else: nl += T[1].count("\n") return node_begin def get_line_info_end(self, node): try: node_end = node[-1][2] - 1 except IndexError: token = find_all_token(node) nl = 0 for T in token[::-1]: if len(T) > 2: node_end = T[2] + nl - 1 break else: nl += T[1].count("\n") return node_end def get_line_info(self, node): node_begin = self.get_line_info_begin(node) node_end = self.get_line_info_end(node) return node_begin, node_end @transform def file_input(self, node): for sub in node[1:]: self.run(sub) super(LangletTransformer, self).file_input(node) @transform def if_stmt(self, node): # Set sensors in "if __name__ == '__main__':" statements # which correspond to __main__ only. In all other cases the statement is unreachable if self.is_main(node): if self._module_descriptor.is_main: for sub in node[1:]: self.run(sub) else: self.unmark_node(node) @transform @t_dbg("cv", cond=lambda node, **locals: locals.get("line", -1) >= 0) def and_test(self, node, line=-1, idx=0): if find_node(node, self.keyword["and"], depth=1): _not_tests = find_all(node, self.symbol.not_test, depth=1) for sub in _not_tests: if find_node(sub, self.symbol.test): self.run(sub, line=line, idx=idx) else: # find not_test nodes for item in find_all_gen(node, self.symbol.atom): if len(item) > 2: first_line = item[1][2] else: continue if isinstance(first_line, int): break else: continue if first_line == line: idx += 1 else: line = first_line idx = 1 _num = self.fn.Number(len(monitor.Monitor().expr_sensors)) monitor.ExprSensor(first_line, idx) self.run(sub, line=line, idx=idx) cloned = clone_node(sub) call_measure_expr = self.fn.CallFunc( "measure_expr", [cloned, _num]) replace_node(sub, self.fn.not_test(call_measure_expr)) @transform def or_test(self, node, line=-1, idx=0): if find_node(node, self.keyword["or"], depth=1): and_tests = find_all(node, self.symbol.and_test, depth=1) for i, t in enumerate(and_tests): self.run(t, line=0, idx=idx) for sub in and_tests: for item in find_all_gen(node, self.symbol.atom): if len(item) > 2: first_line = item[1][2] else: continue if isinstance(first_line, int): break else: continue if first_line == line: idx += 1 else: line = first_line idx = 1 _num = self.fn.Number(len(monitor.Monitor().expr_sensors)) monitor.ExprSensor(first_line, idx) self.run(sub, line=line, idx=idx) cloned = clone_node(sub) call_measure_expr = self.fn.CallFunc("measure_expr", [cloned, _num]) replace_node(sub, self.fn.and_test(call_measure_expr)) @transform # @t_dbg("si") def suite(self, node): # special case: no use of sensors in 'if __main__...' stmts of modules that are not __main__. _stmts = find_all(node, self.symbol.stmt, depth=1) _num = self.fn.Number(len(monitor.Monitor().stmt_sensors)) # compile a call 'measure_stmt(_num)' into each suite call_measure_stmt = self.fn.CallFunc("measure_stmt", [_num]) _sensor_stmt = self.fn.stmt(call_measure_stmt) IDX = 0 for i, item in enumerate(node[1:]): if item[0] == self.symbol.stmt: if find_node(item, self.symbol.flow_stmt, depth=3): # measure_stmt shall be execed before IDX = i # return, break, continue break IDX = i if IDX: suite_begin, suite_end = self.get_line_info(node) monitor.StmtSensor(suite_begin, suite_end) _small = find_node(node[i], self.symbol.small_stmt, depth=3) if _small and self.fn.is_atomic(_small) and find_node( _small, self.token.STRING): node.insert(IDX + 2, _sensor_stmt) else: node.insert(IDX + 1, _sensor_stmt)
class LangletTransformer(BaseClass("Transformer", parent_langlet)): ''' Defines langlet specific CST transformations. ''' def __init__(self, *args, **kwd): super(LangletTransformer, self).__init__(*args, **kwd) self.rules = {} self.cnt = 0 self.nullidx = 0 @transform def rule(self, node): "rule: NAME ':' rhs NEWLINE" self.cnt = 0 self.nullidx = 0 rule_name = find_node(node, self.token.NAME)[1] rhs = self.rhs(find_node(node, self.symbol.rhs)) if isinstance(rhs, SequenceRule): rhs.lst.append(ConstRule([(FIN, FEX)])) else: rhs = SequenceRule([rhs, ConstRule([(FIN, FEX)])]) self.rules[rule_name] = (Rule([(rule_name, 0), rhs]), self.langlet.unparse(node)) def rhs(self, node): "rhs: alt ( '|' alt )*" altnodes = [ self.alt(N) for N in find_all(node, self.symbol.alt, depth=1) ] if len(altnodes) > 1: return AltRule(altnodes) else: return altnodes[0] def alt(self, node): "alt: item+" items = [ self.item(it) for it in find_all(node, self.symbol.item, depth=1) ] if len(items) > 1: return SequenceRule(items) else: return items[0] def item(self, node): "item: '[' rhs ']' | atom [ '*' | '+' ]" rhs = find_node(node, self.symbol.rhs, depth=1) if rhs: self.nullidx += 1 return AltRule([EmptyRule([(FIN, self.nullidx)]), self.rhs(rhs)]) else: atom = self.atom(find_node(node, self.symbol.atom)) if find_node(node, self.token.STAR, depth=1): self.nullidx += 1 return AltRule([ EmptyRule([(FIN, self.nullidx)]), atom, SequenceRule([atom, atom]) ]) elif find_node(node, self.token.PLUS, depth=1): return AltRule([atom, SequenceRule([atom, atom])]) else: return atom def atom(self, node): "atom: '(' rhs ')' | NAME | STRING" rhs = find_node(node, self.symbol.rhs, depth=1) if rhs: return self.rhs(rhs) else: self.cnt += 1 item = node[1][1] return ConstRule([(item, self.cnt)])
class LangletTransformer(BaseClass("Transformer", parent_langlet)): '''
class LangletUnparser(BaseClass("Unparser", parent_langlet)): '''
class LangletCSTFunction(BaseClass("CSTFunction", parent_langlet)): '''