예제 #1
0
    def parse_statement_delimiter(tokens: abc.Generator) -> bool:
        """Parses the tokens for a Statement Delimiter.

        *tokens* is expected to be a *generator iterator* which
        provides ``pvl.token`` objects.

         <Statement-Delimiter> ::= <WSC>*
                     (<white-space-character> | <comment> | ';' | <EOF>)

        Although the above structure comes from Figure 2-4
        of the Blue Book, the <white-space-character> and <comment>
        elements are redundant with the presence of [WSC]*
        so it can be simplified to:

         <Statement-Delimiter> ::= <WSC>* [ ';' | <EOF> ]

        Typically written [<Statement-Delimiter>].
        """
        for t in tokens:
            if t.is_WSC():
                # If there's a comment, could parse here.
                pass
            elif t.is_delimiter():
                return True
            else:
                tokens.send(t)  # Put the next token back into the generator
                return False
예제 #2
0
파일: parser.py 프로젝트: jessemapel/pvl
    def _parse_set_seq(self, delimiters, tokens: abc.Generator) -> list:
        """The internal parsing of PVL Sets and Sequences are very
        similar, and this function provides that shared logic.

        *delimiters* are a two-tuple containing the start and end
        characters for the PVL Set or Sequence.
        """
        t = next(tokens)
        if t != delimiters[0]:
            tokens.send(t)
            raise ValueError(f'Expecting a begin delimiter "{delimiters[0]}" '
                             f'but found: "{t}"')
        set_seq = list()
        # Initial WSC and/or empty
        if self.parse_WSC_until(delimiters[1], tokens):
            return set_seq

        # First item:
        set_seq.append(self.parse_value(tokens))
        if self.parse_WSC_until(delimiters[1], tokens):
            return set_seq

        # Remaining items, if any
        for t in tokens:
            # print(f'in loop, t: {t}, set_seq: {set_seq}')
            if t == ',':
                self.parse_WSC_until(None, tokens)  # consume WSC after ','
                set_seq.append(self.parse_value(tokens))
                if self.parse_WSC_until(delimiters[1], tokens):
                    return set_seq
            else:
                tokens.send(t)
                tokens.throw(ValueError,
                             'While parsing, expected a comma (,)'
                             f'but found: "{t}"')
예제 #3
0
    def parse_end_statement(self, tokens: abc.Generator) -> None:
        """Parses the tokens for an End Statement.

        <End-Statement> ::= "END" ( <WSC>* | [<Statement-Delimiter>] )

        """
        try:
            end = next(tokens)
            if not end.is_end_statement():
                tokens.send(end)
                raise ValueError(
                    "Expecting an End Statement, like "
                    f'"{self.grammar.end_statements}" but found '
                    f'"{end}"'
                )

            try:
                t = next(tokens)
                if t.is_WSC():
                    # maybe process comment
                    return
                else:
                    tokens.send(t)
                    return
            except LexerError:
                pass
        except StopIteration:
            pass

        return
예제 #4
0
    def parse_end_aggregation(
        self, begin_agg: str, block_name: str, tokens: abc.Generator
    ) -> None:
        """Parses the tokens for an End Aggregation Statement.

        <End-Aggregation-Statement-block> ::=
             <End-Aggegation-Statement> [<WSC>* '=' <WSC>*
             <Block-Name>] [<Statement-Delimiter>]

        Where <Block-Name> ::= <Parameter-Name>

        """
        end_agg = next(tokens)

        # Need to do a little song and dance to case-independently
        # match the keys:
        for k in self.grammar.aggregation_keywords.keys():
            if k.casefold() == begin_agg.casefold():
                truecase_begin = k
                break
        if (
            end_agg.casefold()
            != self.grammar.aggregation_keywords[truecase_begin].casefold()
        ):
            tokens.send(end_agg)
            raise ValueError(
                "Expecting an End-Aggegation-Statement that "
                "matched the Begin-Aggregation_Statement, "
                f'"{begin_agg}" but found: {end_agg}'
            )

        try:
            self.parse_around_equals(tokens)
        except (ParseError, ValueError):  # No equals statement, which is fine.
            self.parse_statement_delimiter(tokens)
            return None

        t = next(tokens)
        if t != block_name:
            tokens.send(t)
            tokens.throw(
                ValueError,
                f'Expecting a Block-Name after "{end_agg} =" '
                f'that matches "{block_name}", but found: '
                f'"{t}"',
            )

        self.parse_statement_delimiter(tokens)

        return None
예제 #5
0
def bar(successor: abc.Generator = None):
    while True:
        msg = yield
        msg = '%s > %s in %s' % (msg, msg.split()[0], 'bar')
        if successor is not None:
            msg = successor.send(msg)
        yield msg
예제 #6
0
    def parse_value(self, tokens: abc.Generator):
        """Parses PVL Values.

         <Value> ::= (<Simple-Value> | <Set> | <Sequence>)
                     [<WSC>* <Units Expression>]

        Returns the decoded <Value> as an appropriate Python object.
        """
        value = None

        try:
            t = next(tokens)
            value = self.decoder.decode_simple_value(t)
        except ValueError:
            tokens.send(t)
            for p in (
                self.parse_set,
                self.parse_sequence,
                self.parse_value_post_hook,
            ):
                try:
                    value = p(tokens)
                    break
                except LexerError:
                    # A LexerError is a subclass of ValueError, but
                    # if we get a LexerError, that's a problem and
                    # we need to raise it, and not let it pass.
                    raise
                except ValueError:
                    # Getting a ValueError is a normal conseqence of
                    # one of the parsing strategies not working,
                    # this pass allows us to go to the next one.
                    pass
            else:
                tokens.throw(
                    ValueError,
                    "Was expecting a Simple Value, or the "
                    "beginning of a Set or Sequence, but "
                    f'found: "{t}"',
                )

        # print(f'in parse_value, value is: {value}')
        self.parse_WSC_until(None, tokens)
        try:
            return self.parse_units(value, tokens)
        except (ValueError, StopIteration):
            return value
예제 #7
0
    def parse_units(self, value, tokens: abc.Generator) -> str:
        """Parses PVL Units Expression.

         <Units-Expression> ::= "<" [<white-space>] <Units-Value>
                                    [<white-space>] ">"

        and

         <Units-Value> ::= <units-character>
                             [ [ <units-character> | <white-space> ]*
                                 <units-character> ]

        Returns the *value* and the <Units-Value> as a ``Units()``
        object.
        """
        t = next(tokens)

        if not t.startswith(self.grammar.units_delimiters[0]):
            tokens.send(t)
            raise ValueError(
                "Was expecting the start units delimiter, "
                + '"{}" '.format(self.grammar.units_delimiters[0])
                + f'but found "{t}"'
            )

        if not t.endswith(self.grammar.units_delimiters[1]):
            tokens.send(t)
            raise ValueError(
                "Was expecting the end units delimiter, "
                + '"{}" '.format(self.grammar.units_delimiters[1])
                + f'at the end, but found "{t}"'
            )

        delim_strip = t.strip("".join(self.grammar.units_delimiters))

        units_value = delim_strip.strip("".join(self.grammar.whitespace))

        for d in self.grammar.units_delimiters:
            if d in units_value:
                tokens.throw(
                    ValueError,
                    "Was expecting a units character, but found a "
                    f'unit delimiter, "{d}" instead.',
                )

        return self.decoder.decode_quantity(value, units_value)
예제 #8
0
    def parse_begin_aggregation_statement(
        self, tokens: abc.Generator
    ) -> tuple:
        """Parses the tokens for a Begin Aggregation Statement, and returns
        the name Block Name as a ``str``.

        <Begin-Aggregation-Statement-block> ::=
             <Begin-Aggegation-Statement> <WSC>* '=' <WSC>*
             <Block-Name> [<Statement-Delimiter>]

        Where <Block-Name> ::= <Parameter-Name>

        """
        try:
            begin = next(tokens)
            if not begin.is_begin_aggregation():
                tokens.send(begin)
                raise ValueError(
                    "Expecting a Begin-Aggegation-Statement, but "
                    f"found: {begin}"
                )
        except StopIteration:
            raise ValueError(
                "Ran out of tokens before starting to parse "
                "a Begin-Aggegation-Statement."
            )

        try:
            self.parse_around_equals(tokens)
        except ValueError:
            tokens.throw(
                ValueError, f'Expecting an equals sign after "{begin}" '
            )

        block_name = next(tokens)
        if not block_name.is_parameter_name():
            tokens.throw(
                ValueError,
                f'Expecting a Block-Name after "{begin} =" '
                f'but found: "{block_name}"',
            )

        self.parse_statement_delimiter(tokens)

        return begin, str(block_name)
예제 #9
0
    def parse_WSC_until(token: str, tokens: abc.Generator) -> bool:
        """Consumes objects from *tokens*, if the object's *.is_WSC()*
        function returns *True*, it will continue until *token* is
        encountered and will return *True*.  If it encounters an object
        that does not meet these conditions, it will 'return' that
        object to *tokens* and will return *False*.

        *tokens* is expected to be a *generator iterator* which
        provides ``pvl.token`` objects.
        """
        for t in tokens:
            if t == token:
                return True
            elif t.is_WSC():
                # If there's a comment, could parse here.
                pass
            else:
                tokens.send(t)
                return False
예제 #10
0
    def parse_assignment_statement(self, tokens: abc.Generator) -> tuple:
        """Parses the tokens for an Assignment Statement.

        The returned two-tuple contains the Parameter Name in the
        first element, and the Value in the second.

         <Assignment-Statement> ::= <Parameter-Name> <WSC>* '=' <WSC>*
                                     <Value> [<Statement-Delimiter>]

        """
        try:
            t = next(tokens)
            if t.is_parameter_name():
                parameter_name = str(t)
            else:
                tokens.send(t)
                raise ValueError(
                    "Expecting a Parameter Name, but " f'found: "{t}"'
                )
        except StopIteration:
            raise ValueError(
                "Ran out of tokens before starting to parse "
                "an Assignment-Statement."
            )

        self.parse_around_equals(tokens)

        try:
            # print(f'parameter name: {parameter_name}')
            value = self.parse_value(tokens)
        except StopIteration:
            raise ParseError(
                "Ran out of tokens to parse after the equals "
                "sign in an Assignment-Statement: "
                f'"{parameter_name} =".',
                t,
            )

        self.parse_statement_delimiter(tokens)

        return parameter_name, value
예제 #11
0
def foo(successor: abc.Generator = None):
    """
    notice we use yield in both the
    traditional generator sense and
    also in the coroutine sense.
    """
    while True:
        msg = yield  # coroutine feature
        msg = '%s > %s in %s' % (msg, msg.split()[0], 'foo')
        if successor is not None:
            msg = successor.send(msg)
        yield msg    # generator feature
예제 #12
0
    def parse_value_post_hook(self, tokens: abc.Generator):
        """Overrides the parent function to allow for more
        permissive parsing.

        If the next token is a reserved word or delimiter,
        then it is returned to the *tokens* and an
        EmptyValueAtLine object is returned as the value.
        """

        t = next(tokens)
        # print(f't: {t}')
        truecase_reserved = [
            x.casefold() for x in self.grammar.reserved_keywords
        ]
        trucase_delim = [x.casefold() for x in self.grammar.delimiters]
        if t.casefold() in (truecase_reserved + trucase_delim):
            # print(f'kw: {kw}')
            # if kw.casefold() == t.casefold():
            # print('match')
            tokens.send(t)
            return self._empty_value(t.pos)
        else:
            raise ValueError
예제 #13
0
    def parse_module_post_hook(
        self, module: MutableMappingSequence, tokens: abc.Generator
    ):
        """Overrides the parent function to allow for more
        permissive parsing.  If an Assignment-Statement
        is blank, then the value will be assigned an
        EmptyValueAtLine object.
        """
        # It enables this by checking to see if the next thing is an
        # '=' which means there was an empty assigment at the previous
        # equals sign, and then unwinding the stack to give the
        # previous assignment the EmptyValueAtLine() object and trying
        # to continue parsing.

        # print('in hook')
        try:
            t = next(tokens)
            if t == "=" and len(module) != 0:
                (last_k, last_v) = module[-1]
                last_token = Token(
                    last_v, grammar=self.grammar, decoder=self.decoder
                )
                if last_token.is_parameter_name():
                    # Fix the previous entry
                    module.pop()
                    module.append(last_k, self._empty_value(t.pos))
                    # Now use last_token as the parameter name
                    # for the next assignment, and we must
                    # reproduce the last part of parse-assignment:
                    try:
                        # print(f'parameter name: {last_token}')
                        self.parse_WSC_until(None, tokens)
                        value = self.parse_value(tokens)
                        self.parse_statement_delimiter(tokens)
                        module.append(str(last_token), value)
                    except StopIteration:
                        module.append(
                            str(last_token), self._empty_value(t.pos + 1)
                        )
                        return module, False  # return through parse_module()
                else:
                    tokens.send(t)
            else:
                # The next token isn't an equals sign or the module is
                # empty, so we want return the token and signal
                # parse_module() that it should ignore us.
                tokens.send(t)
                raise Exception

            # Peeking at the next token gives us the opportunity to
            # see if we're at the end of tokens, which we want to handle.
            t = next(tokens)
            tokens.send(t)
            return module, True  # keep parsing
        except StopIteration:
            # If we're out of tokens, that's okay.
            return module, False  # return through parse_module()
예제 #14
0
    def parse_around_equals(self, tokens: abc.Generator) -> None:
        """Parses white space and comments on either side
        of an equals sign.

        *tokens* is expected to be a *generator iterator* which
        provides ``pvl.token`` objects.

        This is shared functionality for Begin Aggregation Statements
        and Assignment Statements.  It basically covers parsing
        anything that has a syntax diagram like this:

          <WSC>* '=' <WSC>*

        """
        if not self.parse_WSC_until("=", tokens):
            try:
                t = next(tokens)
                tokens.send(t)
                raise ValueError(f'Expecting "=", got: {t}')
            except StopIteration:
                raise ParseError('Expecting "=", but ran out of tokens.')

        self.parse_WSC_until(None, tokens)
        return