예제 #1
0
class _Lexer(events.EventSource):
    _number_pattern = re.compile('^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?$')
    _true = 'true'
    _false = 'false'
    _null = 'null'
    _literal = 'LITERAL'

    _s_new = 'new'
    _s_doc_start = 'doc_start'
    _s_doc_end = 'doc_end'
    _s_o_start = 'object_start'
    _s_o_end = 'object_end'
    _s_a_start = 'array_start'
    _s_a_end = 'array_end'
    _s_s_start = 'string_start'
    _s_s_end = 'string_end'
    _s_literal = 'literal'
    _s_more = 'more'
    _s_escaping = 'string_escaping'
    _k_escaping = 'key_escaping'

    _e_start = 'start'
    _e_end = 'end'
    _e_reset = 'reset'
    _e_lbrace = 'lbrace'
    _e_rbrace = 'rbrace'
    _e_lsquare = 'lsquare'
    _e_rsquare = 'rsquare'
    _e_char = 'char'
    _e_comma = 'comma'
    _e_colon = 'colon'
    _e_dblquote = 'dblquote'
    _e_whitespace = 'whitespace'
    _e_newline = 'newline'
    _e_backslash = 'backslash'

    def __init__(self):
        super(_Lexer, self).__init__()
        self._started = False
        self._tokenizer = _Tokenizer()
        self._tokenizer.add_catch_all_listener(self._catch_all)
        self._text_accumulator = _TextAccumulator()
        self._text_accumulator.bind(self._tokenizer)
        self._setup_state_machine()

    def _setup_state_machine(self):
        new = State(_Lexer._s_new)
        doc_start = State(_Lexer._s_doc_start)
        doc_end = State(_Lexer._s_doc_end)
        object_start = State(_Lexer._s_o_start)
        object_end = State(_Lexer._s_o_end)
        array_start = State(_Lexer._s_a_start)
        array_end = State(_Lexer._s_a_end)
        key_escaping = State(_Lexer._k_escaping)
        string_start = State(_Lexer._s_s_start)
        string_end = State(_Lexer._s_s_end)
        literal = State(_Lexer._s_literal)
        more = State(_Lexer._s_more)
        string_escaping = State(_Lexer._s_escaping)

        e_start = Event(_Lexer._e_start)
        e_end = Event(_Lexer._e_end)
        e_reset = Event(_Lexer._e_reset)
        e_lbrace = Event(_Lexer._e_lbrace)
        e_rbrace = Event(_Lexer._e_rbrace)
        e_lsquare = Event(_Lexer._e_lsquare)
        e_rsquare = Event(_Lexer._e_rsquare)
        e_char = Event(_Lexer._e_char)
        e_comma = Event(_Lexer._e_comma)
        e_colon = Event(_Lexer._e_colon)
        e_dblquote = Event(_Lexer._e_dblquote)
        e_whitespace = Event(_Lexer._e_whitespace)
        e_newline = Event(_Lexer._e_newline)
        e_backslash = Event(_Lexer._e_backslash)

        new.on(e_start, doc_start)
        new.on(e_end, doc_end)
        new.ignores(e_newline, e_whitespace)
        new.faulty(e_lbrace, e_lsquare, e_rbrace, e_rsquare, e_char, e_comma, e_colon, e_dblquote, e_backslash)

        doc_start.on(e_lbrace, object_start)
        doc_start.on(e_lsquare, array_start)
        doc_start.ignores(e_newline, e_whitespace)
        doc_start.faulty(e_start, e_end, e_rbrace, e_rsquare, e_char, e_comma, e_colon, e_dblquote, e_backslash)

        doc_end.on(e_reset, new)
        doc_end.faulty(e_start, e_end, e_lbrace, e_rbrace, e_lsquare, e_rsquare, e_char, e_comma, e_colon, e_dblquote,
                       e_backslash)

        # JSON Object related states
        object_start.loops(e_lbrace)
        object_start.on(e_rbrace, object_end)
        object_start.on(e_dblquote, string_start)
        object_start.ignores(e_newline, e_whitespace)
        object_start.faulty(e_start, e_end, e_lsquare, e_rsquare, e_comma, e_colon, e_backslash)

        object_end.on(e_end, doc_end)
        object_end.loops(e_lbrace, e_rbrace)
        object_end.on(e_rsquare, array_end)
        object_end.on(e_comma, more)
        object_end.ignores(e_whitespace, e_newline)
        object_end.faulty(e_start, e_reset, e_lsquare, e_char, e_colon, e_dblquote, e_backslash)

        array_start.on(e_lbrace, object_start)
        array_start.on(e_rsquare, array_end)
        array_start.loops(e_lsquare)
        array_start.on(e_char, literal)
        array_start.on(e_dblquote, string_start)
        array_start.ignores(e_newline, e_whitespace)
        array_start.faulty(e_start, e_end, e_reset, e_rbrace, e_rsquare, e_comma, e_colon, e_backslash)

        array_end.on(e_end, doc_end)
        array_end.on(e_comma, more)
        array_end.on(e_rbrace, object_end)
        array_end.loops(e_rsquare)
        array_end.ignores(e_whitespace, e_newline)
        array_end.faulty(e_start, e_reset, e_lbrace, e_lsquare, e_rsquare, e_char, e_colon, e_dblquote,
                         e_backslash)

        string_start.loops(e_char, e_comma, e_colon, e_whitespace, e_newline, e_lbrace, e_rbrace, e_lsquare, e_rsquare)
        string_start.on(e_backslash, string_escaping)
        string_start.on(e_dblquote, string_end)
        string_start.faulty(e_start, e_end, e_reset)

        string_escaping.on(e_char, string_start)
        string_escaping.on(e_dblquote, string_start)
        string_escaping.on(e_backslash, string_start)
        string_escaping.faulty(e_start, e_end, e_reset, e_lbrace, e_rbrace, e_lsquare, e_rsquare, e_comma, e_colon,
                               e_whitespace, e_newline)

        string_end.on(e_rbrace, object_end)
        string_end.on(e_rsquare, array_end)
        string_end.on(e_comma, more)
        string_end.on(e_colon, more)
        string_end.ignores(e_whitespace, e_newline)
        string_end.faulty(e_start, e_end, e_reset, e_lbrace, e_lsquare, e_char, e_colon, e_dblquote, e_backslash)

        literal.loops(e_char)
        literal.on(e_rbrace, object_end)
        literal.on(e_rsquare, array_end)
        literal.on(e_comma, more)
        literal.ignores(e_newline, e_whitespace)
        literal.faulty(e_start, e_end, e_reset, e_lbrace, e_lsquare, e_colon, e_dblquote, e_backslash)

        more.on(e_lbrace, object_start)
        more.on(e_lsquare, array_start)
        more.on(e_char, literal)
        more.on(e_dblquote, string_start)
        more.ignores(e_whitespace, e_newline)
        more.faulty(e_start, e_end, e_reset, e_rbrace, e_rsquare, e_comma, e_colon, e_backslash)

        self._state_machine = StateMachine(new)
        self._state_machine.add_states(new, doc_start, doc_end, object_start, object_end, array_start, array_end,
                                       key_escaping, literal, string_start, string_end, more)
        self._state_machine.add_listener('before_state_change', self._on_before_state_change)
        self._state_machine.add_listener('after_state_change', self._on_after_state_change)
        self._state_machine.add_listener('error', self._on_error)

    def _on_error(self, current_state, event):
        raise RuntimeError("{} event cannot be processed in current state: {}".format(event.name, current_state.name))

    def _on_before_state_change(self, current_state, pending_event):
        pass

    def _on_after_state_change(self, previous_state, event, new_state):
        # TODO reorder new_state by probability for perf
        if previous_state.equals(_Lexer._s_s_end):
            text = self._text_accumulator.pop().strip()
            text = text[1:-1]  # remove surrounding double quotes
            self.fire(_Lexer._literal, JSONLiteralType.STRING, text)

        if previous_state.equals(_Lexer._s_literal) and not new_state.equals(_Lexer._s_literal):
            literal = self._text_accumulator.pop().strip()
            if re.fullmatch(_Lexer._number_pattern, literal):
                try:
                    i = int(literal)
                except ValueError:
                    i = float(literal)
                self.fire(_Lexer._literal, JSONLiteralType.NUMBER, i)
            elif literal == _Lexer._true:
                self.fire(_Lexer._literal, JSONLiteralType.BOOLEAN, True)
            elif literal == _Lexer._false:
                self.fire(_Lexer._literal, JSONLiteralType.BOOLEAN, False)
            elif literal == _Lexer._null:
                self.fire(_Lexer._literal, JSONLiteralType.NULL, None)
            else:
                raise RuntimeError("Invalid Literal {}".format(literal))

        if new_state.equals(_Lexer._s_doc_start, _Lexer._s_doc_end, _Lexer._s_o_start,
                            _Lexer._s_o_end, _Lexer._s_a_start, _Lexer._s_a_end):
            self.fire(new_state.name)
        if new_state.equals(_Lexer._s_doc_end):
            self._state_machine.consume(Event('reset'))
            self._started = False

    def _catch_all(self, event_name, payload):
        e = Event(event_name, payload)
        self._state_machine.consume(e)

    def consume(self, data):
        if not self._started:
            self._started = True
            self._state_machine.consume(Event(_Lexer._e_start))
        self._tokenizer.consume(data)

    def close(self):
        if self._started:
            self._state_machine.consume(Event(_Lexer._e_end))
            self._started = False
            self._tokenizer = None
            self._text_accumulator = None
            self._state_machine = None
예제 #2
0
    def _setup_state_machine(self):
        new = State(_Lexer._s_new)
        doc_start = State(_Lexer._s_doc_start)
        doc_end = State(_Lexer._s_doc_end)
        object_start = State(_Lexer._s_o_start)
        object_end = State(_Lexer._s_o_end)
        array_start = State(_Lexer._s_a_start)
        array_end = State(_Lexer._s_a_end)
        key_escaping = State(_Lexer._k_escaping)
        string_start = State(_Lexer._s_s_start)
        string_end = State(_Lexer._s_s_end)
        literal = State(_Lexer._s_literal)
        more = State(_Lexer._s_more)
        string_escaping = State(_Lexer._s_escaping)

        e_start = Event(_Lexer._e_start)
        e_end = Event(_Lexer._e_end)
        e_reset = Event(_Lexer._e_reset)
        e_lbrace = Event(_Lexer._e_lbrace)
        e_rbrace = Event(_Lexer._e_rbrace)
        e_lsquare = Event(_Lexer._e_lsquare)
        e_rsquare = Event(_Lexer._e_rsquare)
        e_char = Event(_Lexer._e_char)
        e_comma = Event(_Lexer._e_comma)
        e_colon = Event(_Lexer._e_colon)
        e_dblquote = Event(_Lexer._e_dblquote)
        e_whitespace = Event(_Lexer._e_whitespace)
        e_newline = Event(_Lexer._e_newline)
        e_backslash = Event(_Lexer._e_backslash)

        new.on(e_start, doc_start)
        new.on(e_end, doc_end)
        new.ignores(e_newline, e_whitespace)
        new.faulty(e_lbrace, e_lsquare, e_rbrace, e_rsquare, e_char, e_comma, e_colon, e_dblquote, e_backslash)

        doc_start.on(e_lbrace, object_start)
        doc_start.on(e_lsquare, array_start)
        doc_start.ignores(e_newline, e_whitespace)
        doc_start.faulty(e_start, e_end, e_rbrace, e_rsquare, e_char, e_comma, e_colon, e_dblquote, e_backslash)

        doc_end.on(e_reset, new)
        doc_end.faulty(e_start, e_end, e_lbrace, e_rbrace, e_lsquare, e_rsquare, e_char, e_comma, e_colon, e_dblquote,
                       e_backslash)

        # JSON Object related states
        object_start.loops(e_lbrace)
        object_start.on(e_rbrace, object_end)
        object_start.on(e_dblquote, string_start)
        object_start.ignores(e_newline, e_whitespace)
        object_start.faulty(e_start, e_end, e_lsquare, e_rsquare, e_comma, e_colon, e_backslash)

        object_end.on(e_end, doc_end)
        object_end.loops(e_lbrace, e_rbrace)
        object_end.on(e_rsquare, array_end)
        object_end.on(e_comma, more)
        object_end.ignores(e_whitespace, e_newline)
        object_end.faulty(e_start, e_reset, e_lsquare, e_char, e_colon, e_dblquote, e_backslash)

        array_start.on(e_lbrace, object_start)
        array_start.on(e_rsquare, array_end)
        array_start.loops(e_lsquare)
        array_start.on(e_char, literal)
        array_start.on(e_dblquote, string_start)
        array_start.ignores(e_newline, e_whitespace)
        array_start.faulty(e_start, e_end, e_reset, e_rbrace, e_rsquare, e_comma, e_colon, e_backslash)

        array_end.on(e_end, doc_end)
        array_end.on(e_comma, more)
        array_end.on(e_rbrace, object_end)
        array_end.loops(e_rsquare)
        array_end.ignores(e_whitespace, e_newline)
        array_end.faulty(e_start, e_reset, e_lbrace, e_lsquare, e_rsquare, e_char, e_colon, e_dblquote,
                         e_backslash)

        string_start.loops(e_char, e_comma, e_colon, e_whitespace, e_newline, e_lbrace, e_rbrace, e_lsquare, e_rsquare)
        string_start.on(e_backslash, string_escaping)
        string_start.on(e_dblquote, string_end)
        string_start.faulty(e_start, e_end, e_reset)

        string_escaping.on(e_char, string_start)
        string_escaping.on(e_dblquote, string_start)
        string_escaping.on(e_backslash, string_start)
        string_escaping.faulty(e_start, e_end, e_reset, e_lbrace, e_rbrace, e_lsquare, e_rsquare, e_comma, e_colon,
                               e_whitespace, e_newline)

        string_end.on(e_rbrace, object_end)
        string_end.on(e_rsquare, array_end)
        string_end.on(e_comma, more)
        string_end.on(e_colon, more)
        string_end.ignores(e_whitespace, e_newline)
        string_end.faulty(e_start, e_end, e_reset, e_lbrace, e_lsquare, e_char, e_colon, e_dblquote, e_backslash)

        literal.loops(e_char)
        literal.on(e_rbrace, object_end)
        literal.on(e_rsquare, array_end)
        literal.on(e_comma, more)
        literal.ignores(e_newline, e_whitespace)
        literal.faulty(e_start, e_end, e_reset, e_lbrace, e_lsquare, e_colon, e_dblquote, e_backslash)

        more.on(e_lbrace, object_start)
        more.on(e_lsquare, array_start)
        more.on(e_char, literal)
        more.on(e_dblquote, string_start)
        more.ignores(e_whitespace, e_newline)
        more.faulty(e_start, e_end, e_reset, e_rbrace, e_rsquare, e_comma, e_colon, e_backslash)

        self._state_machine = StateMachine(new)
        self._state_machine.add_states(new, doc_start, doc_end, object_start, object_end, array_start, array_end,
                                       key_escaping, literal, string_start, string_end, more)
        self._state_machine.add_listener('before_state_change', self._on_before_state_change)
        self._state_machine.add_listener('after_state_change', self._on_after_state_change)
        self._state_machine.add_listener('error', self._on_error)