def process_string(self, cur): val = cur.value # Lazy hack, there are edge cases where this would be incorrect val = val.replace("\\n", "\n").replace("\\t", "\t") # Scan for format strings parts = [] scan = val upto = 0 while True: x = scan.find("\\{", upto) if x != -1: y = scan.find("}", x) if upto != x: parts.append(ast.Value(scan[upto:x])) sp = Parser(scan[x + 2:y]).parse() parts.append(semiAsExpr(sp.elems)) upto = y + 1 else: break if upto < len(val): parts.append(ast.Value(val[upto:])) if len(parts) == 1: return parts[0].set_origin(cur) else: return ast.Format(parts).set_origin(cur)
def parse_unary(self, cur): '''Prefix unary''' op = cur.value prec = PRECS[op] rhs = self.expr(prec) # Need to convert these to assignment if op in {"--", "++"}: val = ast.Assign(rhs, ast.Value(1), op[0]) else: val = ast.Op(op, ast.Value(None), rhs) return val.set_origin(cur)
def p_textfacet(p): """textfacet : catalogslash TEXTFACET '/' string """ p[0] = p[1].textfacet( ast.data.predicatecls('ciregexp')(ast.Name().with_suffix('value'), ast.Value(p[4])), ast.NameList([ ast.Name().with_suffix('schema'), ast.Name().with_suffix('table'), ast.Name().with_suffix('column') ]), ast.NameList())
def parse_objectliteral(self, cur): obj = [] if self.maybe("}"): return ast.Value(cur, EspDict()) while not self.maybe("}"): tok = self.cur if tok.type in {"val", "id", "op"}: key = ast.Value(tok.value) elif tok.type == "kw": if tok.value in {"get", "set"}: raise NotImplementedError("get/set") key = ast.Value(tok.value) elif tok.type == "punc": if tok.value == "[": raise NotImplementedError("Computed properties") raise self.error(tok, "Unexpected punctuation") else: raise self.error(tok, f"Unknown token {tok!r}") key = key.set_origin(tok) self.consume() if self.peek("("): # Object method functok = self.cur args = self.funcargs() value = ast.Func(key, args, self.block()).set_origin(functok) elif self.maybe(":"): # Normal property value = self.expr() else: # NOTE: Doesn't work for computed properties value = ast.Var(key.value) obj.append((key, value)) # Should commas be optional? self.maybe(",") return ast.ObjectLiteral(obj).set_origin(cur)
def p_value(self, p): """ value : FLOAT | INT | STRING | matrix | access """ p[0] = ast.Value(p[1], p.lexer.lexer.lineno)
def parse_listliteral(self, cur): if self.maybe("]"): return ast.Value(EspList()).set_origin(cur) vals = [] while True: vals.append(self.expr()) if not self.maybe(","): break self.expect("]") return ast.ListLiteral(vals).set_origin(cur)
def semiAsExpr(block): ''' Convert a list of expressions to a single expression, possibly a Block ''' block = list(filter(lambda x: x is not None, block)) if len(block) == 0: return ast.Value(None) elif len(block) == 1: return block[0] else: return ast.Block(block)
def relaxid(self): ''' Parse an identifier with relaxed restrictions. Most token types are converted to a corresponding identifier, eg "+" ''' tok = self.maybe(type={"id", "kw", "op", "cmp", "assign", "str"}) if not tok: return None if tok.type == "str": return self.process_string(tok) else: return ast.Value(tok.value).set_origin(tok)
def parse_funcliteral(self, cur): name = None if self.cur.type == "id": name = ast.Value(self.cur.value) self.consume() args = self.funcargs() block = self.block() func = ast.Func(name, args, block).set_origin(cur) if name: v = ast.Var(str(name.value)) self.addVar(v) return ast.Assign(v, func).set_origin(cur) else: return func
def p_expr_const(p): """expr : string """ p[0] = ast.Value(p[1])
def expr(self, min_prec=0): ''' Uses precedence climbing ''' lhs = self.atom() while self.cur: cur = self.cur # Calls and indexing are functionally equivalent to binary # operators with a parenthesized rhs if self.maybe("("): args = self.listing(",", self.expr) self.expect(")") lhs = ast.Call(lhs, args).set_origin(cur) elif self.maybe("["): args = self.listing(":", self.expr) self.expect("]") lhs = ast.Index(lhs, args).set_origin(cur) # Accessor operators require special parsing because they allow # relaxed identifiers elif self.peek({".", "->", "::"}): prec = PRECS[cur.value] if prec < min_prec: break # Don't consume the token here, accessexpr will do it lhs = self.accessexpr(lhs, prec) # Postfix operators elif self.peek({"++", "--"}): # Disable for continuation statements if type(lhs) in {ast.If, ast.Loop}: break self.consume() lhs = ast.After(lhs, ast.Assign(lhs, ast.Value(1), "+")).set_origin(cur) else: op = cur.value assign = False if self.peek(type="assign"): op = op[:-1] assign = True elif self.peek(type={"op", "cmp"}): pass else: break prec = PRECS['=' if assign else op] if prec < min_prec: break self.consume() rhs = self.expr(prec + (op not in RIGHT)) if not rhs: break if assign: lhs = ast.Assign(lhs, rhs, op).set_origin(cur) else: if op == ",": if type(lhs) is ast.Tuple: lhs.append(rhs) else: lhs = ast.Tuple([lhs, rhs]) elif op == ".": lhs = ast.Index(lhs, [rhs]) else: lhs = ast.Op(op, lhs, rhs) return lhs
def atom(self): if self.cur is None: return None cur = self.cur val = cur.value ct = cur.type # Exceptions which shouldn't be consumed if ct == "punc" and val in {")", ";", "}", "]"}: return None elif ct == "kw" and val in {"case", "else"}: return None self.consume() if ct == "val": return ast.Value(cur.value).set_origin(cur) elif ct == "str": return self.process_string(cur) elif ct == "id": v = ast.Var(cur.value).set_origin(cur) # Check string call s = self.maybe(type="str") if s: return ast.Call(v, [self.process_string(s)]).set_origin(s) return v elif ct == "punc": if val == "(": if self.maybe(")"): return ast.Tuple([]).set_origin(cur) x = self.semichain() self.expect(")") return semiAsExpr(x) elif val == "{": return self.parse_objectliteral(cur) elif val == "[": return self.parse_listliteral(cur) #elif val == ")": pass #elif val == "}": pass # TODO: dot functions else: raise NotImplementedError(val) elif ct == "op": return self.parse_unary(cur) elif ct == "kw": if val == "if": return self.parse_if(cur) elif val == "proto": return self.parse_proto(cur) elif val == "import": return self.parse_import(cur) elif val == "loop": return self.parse_loop(cur) elif val == "while": cond, bl, th, el = self.parse_while() # Not account for el return ast.Loop(ast.Block( [ast.If(cond, bl, ast.Branch("break"))]), el=el).set_origin(cur) elif val == "for": return self.parse_for(cur) elif val == "switch": return self.parse_switch(cur) elif val in {"break", "continue", "redo"}: return ast.Branch(val).set_origin(cur) # Todo: targeted branches elif val in {"var", "const"}: return semiAsExpr(self.parse_declgroup(cur)) elif val == "function": return self.parse_funcliteral(cur) elif val == "try": raise NotImplementedError("try") else: raise self.error(f"Unimplemented keyword {val}") raise self.error(f"Unknown token {val}")