def read_value(self, indent=""): log(f"{indent}read_value") self._read_next() if self.current == "EOF": log(f"{indent} EOF") return None if isinstance(self.current, lexer.Value.BRACE_LEFT): res = self.read_object(indent + " ") log(f"{indent} found object") return res if isinstance(self.current, lexer.Value.BRACKET_LEFT): res = self.read_array(indent + " ") log(f"{indent} found array") return res for lit in [ lexer.Value.STRING, lexer.Value.NUMBER, lexer.Value.BOOLEAN, lexer.Value.NULL, ]: if isinstance(self.current, lit): self._unread() res = self.read_litteral(indent + " ") log(f"{indent} found litteral") return res # brace and bracket right, colon, comma self.fail()
def read_null(self, indent=""): log(f"{indent}read_null") for i in "null": if i != self.current: log(f"{indent} read_null: fail") self.fail() self._read_next() self._unread() return Value.NULL()
def read_litteral(self, indent=""): self._read_next() log(f"{indent}read_litteral") if isinstance(self.current, lexer.Value.STRING): return Value.STRING(self.current.value) if isinstance(self.current, lexer.Value.BOOLEAN): return Value.BOOLEAN(self.current.value) if isinstance(self.current, lexer.Value.NUMBER): return Value.NUMBER(self.current.value) if isinstance(self.current, lexer.Value.NULL): return Value.NULL() self.fail()
def is_lexer(obj, indent=""): log(f"{indent}is_lexer") # [Value.NULL, Value.BRACE_LEFT, ...] types = list(filter(lambda x: not "__" in x, dir(Value))) types = list(map(lambda x: Value.__getattribute__(Value, x), types)) is_valid = lambda o: any(map(lambda x: isinstance(o, x), types)) # empty list or all in `types` return isinstance(obj, list) and ( len(obj) == 0 or all(map(lambda x: is_valid(x), obj)) )
def read_string(self, indent=""): res = "" log(f"{indent} read_string") self._read_next() while self.current != '"': if self.current == "EOF": log(f"{indent} read_string: EOF") self.fail() res += self.current self._read_next() if not re.fullmatch(r'([^"\\]|(\"|\\|\/|\b|\f|\n|\r|\t|\\u[0-9]{4}))*', res): raise LexerError(f"this is not a valid string: {res}") return Value.STRING(res)
def parser(lex): log("api.parser") if isinstance(lex, str): log(" api.parser: arg is str") return lexer(lex) if is_lexer(lex): log(" api.parser: arg is lexer") return Parser(lex) if is_parser(lex): log(" api.parser: arg is parser") return lex raise Exception("wrong lexer type")
def unmarshal(ast): log("api.unmarshal") if isinstance(ast, str): log(" api.unmarshal: arg is str") return unmarshal(lexer(ast)) if is_lexer(ast): log(" api.unmarshal: arg is lexer") return unmarshal(parser(ast)) if is_parser(ast): log(" api.unmarshal: arg is parser") return ast.unmarshal() raise Exception("wrong ast type")
def read_boolean(self, indent=""): log(f"{indent}read_boolean") if self.current == "t": log(f"{indent} read_boolean: guessed true") word = "true" elif self.current == "f": log(f"{indent} read_boolean: guessed false") word = "false" else: self.fail() for i in word: if self.current != i: log(f"{indent} read_boolean: wrong guessed") self.fail() self._read_next() log(f"{indent} read_boolean: well guessed") self._unread() return Value.BOOLEAN(word)
def query(ast, request): log("api.query") if isinstance(ast, str): log(" api.query: arg is str") return query(lexer(ast), request) if is_lexer(ast): log(" api.query: arg is lexer") return query(parser(ast), request) if is_parser(ast): log(" api.query: arg is parser") q = Query(ast) return q.exec(request) raise Exception("wrong ast type")
def _read_until(self, read, end, separator=lexer.Value.COMMA, indent=""): sep = True empty = True def reach_end(): log(f"{indent} read_end..", end="") self._read_next() if isinstance(self.current, end): log("found") return True self._unread() log("not found") return False while not reach_end(): empty = False if not sep: self.fail() sep = False yield read(indent + " ") log(f"{indent} read_comma..", end="") self._read_next() if isinstance(self.current, separator): log("found") sep = True else: log("not found") self._unread() if sep and not empty: # trailing comma self.fail()
def reach_end(): log(f"{indent} read_end..", end="") self._read_next() if isinstance(self.current, end): log("found") return True self._unread() log("not found") return False
def lexer(json): log("api.lexer") if isinstance(json, str): log(" api.lexer: arg is str") return list(Lexer(str(json))) if is_lexer(json): log(" api.lexer: arg is lexer") return json raise Exception("wrong lexer type")
def read_number(self, indent=""): log(f"{indent}read_number") res = "" while self.current in [*string.digits, ".", "e", "-"]: if self.current == "EOF": self.fail() res += self.current self._read_next() log(f"{indent} read_number: valid digits") if not re.fullmatch(r"-?(0|[1-9][0-9]*)(\.[0-9]+)?((e|E)(\+|-)?[0-9]+)?", res): raise LexerError(f"this is not a valid number: {res}") self._unread() log(f"{indent} read_number: valid regex") return Value.NUMBER(res)
def main(): args = parse_args() lib.VERBOSE = args.verbose with open(args.input) as intput_file: json = "\n".join(intput_file.readlines()) log("::: LEXER :::") l = lexer(json) log(f"l: {l}") log() log("::: PARSER :::") p = parser(l) log(f"p: {p}") log(f"p.ast: {p.ast}") log() log("::: UNMARSHAL :::") res = unmarshal(json) log(f"py: {res}") log() log("::: QUERY :::") res = query(json, args.query) log(f"q: {res}") log() if not lib.VERBOSE: print(res)
def __next__(self, indent=""): log(f"{indent}next") self._read_next() if self.current == "\n": log(f"{indent} next: new line") self.col = 0 self.line += 1 else: self.col += 1 if self.current in " \t\r\n": log(f"{indent} next: blank") return next(self) if self.current == "{": log(f"{indent} next: brace_left") return Value.BRACE_LEFT() if self.current == "}": log(f"{indent} next: brace_right") return Value.BRACE_RIGHT() if self.current == "[": log(f"{indent} next: bracket_left") return Value.BRACKET_LEFT() if self.current == "]": log(f"{indent} next: bracket_right") return Value.BRACKET_RIGHT() if self.current == ":": log(f"{indent} next: colon") return Value.COLON() if self.current == ",": log(f"{indent} next: comma") return Value.COMMA() if self.current == '"': log(f"{indent} next: quote") return self.read_string(indent=indent + " ") if self.current in [*string.digits, "-"]: log(f"{indent} next: number") return self.read_number(indent=indent + " ") if self.current in "tf": log(f"{indent} next: boolean") return self.read_boolean(indent=indent + " ") if self.current in "n": log(f"{indent} next: null") return self.read_null(indent=indent + " ") if self.current == "EOF": log(f"{indent} next: EOF") raise StopIteration log(f"{indent} next: fail") self.fail()
def read_object(self, indent=""): log(f"{indent}read_object") res = Value.OBJECT() end = lexer.Value.BRACE_RIGHT res.properties = list(self._read_until(self.read_property, end)) return res
def read_property(self, indent=""): log(f"{indent}read_property") log(f"{indent} read_string..", end="") self._read_next() if not isinstance(self.current, lexer.Value.STRING): log("not found") self.fail() log("found") key = Value.STRING(self.current.value) log(f"{indent} read_colon..", end="") self._read_next() if not isinstance(self.current, lexer.Value.COLON): log("not found") self.fail() log("found") value = self.read_value(indent + " ") if not value: self.fail() return Value.PROPERTY(key, value)
def read_array(self, indent=""): log(f"{indent}read_array") res = Value.ARRAY() end = lexer.Value.BRACKET_RIGHT res.values = list(self._read_until(self.read_value, end)) return res
def unmarshal(self, ast=None, indent=""): if not ast: ast = self.ast if isinstance(ast, Value.OBJECT): log(f"{indent}object") unmarshal = lambda x: self.unmarshal(ast=x, indent=indent + " ") return {k: v for k, v in map(unmarshal, ast.properties)} if isinstance(ast, Value.ARRAY): log(f"{indent}array") unmarshal = lambda x: self.unmarshal(ast=x, indent=indent + " ") return list(map(unmarshal, ast.values)) if isinstance(ast, Value.PROPERTY): log(f"{indent}property") key = self.unmarshal(ast=ast.key, indent=indent + " ") value = self.unmarshal(ast=ast.value, indent=indent + " ") return key, value if isinstance(ast, Value.NULL): log(f"{indent}null") return None if isinstance(ast, Value.STRING): log(f"{indent}string") return str(ast.value) if isinstance(ast, Value.NUMBER): log(f"{indent}number") return float(ast.value) if isinstance(ast, Value.BOOLEAN): log(f"{indent}boolean") return bool(ast.value) raise ParserError(f"unexpected ast: {ast}")