def parse_statement_delimiter(tokens: abc.Generator) -> bool: """Parses the tokens for a Statement Delimiter. *tokens* is expected to be a *generator iterator* which provides ``pvl.token`` objects. <Statement-Delimiter> ::= <WSC>* (<white-space-character> | <comment> | ';' | <EOF>) Although the above structure comes from Figure 2-4 of the Blue Book, the <white-space-character> and <comment> elements are redundant with the presence of [WSC]* so it can be simplified to: <Statement-Delimiter> ::= <WSC>* [ ';' | <EOF> ] Typically written [<Statement-Delimiter>]. """ for t in tokens: if t.is_WSC(): # If there's a comment, could parse here. pass elif t.is_delimiter(): return True else: tokens.send(t) # Put the next token back into the generator return False
def _parse_set_seq(self, delimiters, tokens: abc.Generator) -> list: """The internal parsing of PVL Sets and Sequences are very similar, and this function provides that shared logic. *delimiters* are a two-tuple containing the start and end characters for the PVL Set or Sequence. """ t = next(tokens) if t != delimiters[0]: tokens.send(t) raise ValueError(f'Expecting a begin delimiter "{delimiters[0]}" ' f'but found: "{t}"') set_seq = list() # Initial WSC and/or empty if self.parse_WSC_until(delimiters[1], tokens): return set_seq # First item: set_seq.append(self.parse_value(tokens)) if self.parse_WSC_until(delimiters[1], tokens): return set_seq # Remaining items, if any for t in tokens: # print(f'in loop, t: {t}, set_seq: {set_seq}') if t == ',': self.parse_WSC_until(None, tokens) # consume WSC after ',' set_seq.append(self.parse_value(tokens)) if self.parse_WSC_until(delimiters[1], tokens): return set_seq else: tokens.send(t) tokens.throw(ValueError, 'While parsing, expected a comma (,)' f'but found: "{t}"')
def parse_end_statement(self, tokens: abc.Generator) -> None: """Parses the tokens for an End Statement. <End-Statement> ::= "END" ( <WSC>* | [<Statement-Delimiter>] ) """ try: end = next(tokens) if not end.is_end_statement(): tokens.send(end) raise ValueError( "Expecting an End Statement, like " f'"{self.grammar.end_statements}" but found ' f'"{end}"' ) try: t = next(tokens) if t.is_WSC(): # maybe process comment return else: tokens.send(t) return except LexerError: pass except StopIteration: pass return
def parse_end_aggregation( self, begin_agg: str, block_name: str, tokens: abc.Generator ) -> None: """Parses the tokens for an End Aggregation Statement. <End-Aggregation-Statement-block> ::= <End-Aggegation-Statement> [<WSC>* '=' <WSC>* <Block-Name>] [<Statement-Delimiter>] Where <Block-Name> ::= <Parameter-Name> """ end_agg = next(tokens) # Need to do a little song and dance to case-independently # match the keys: for k in self.grammar.aggregation_keywords.keys(): if k.casefold() == begin_agg.casefold(): truecase_begin = k break if ( end_agg.casefold() != self.grammar.aggregation_keywords[truecase_begin].casefold() ): tokens.send(end_agg) raise ValueError( "Expecting an End-Aggegation-Statement that " "matched the Begin-Aggregation_Statement, " f'"{begin_agg}" but found: {end_agg}' ) try: self.parse_around_equals(tokens) except (ParseError, ValueError): # No equals statement, which is fine. self.parse_statement_delimiter(tokens) return None t = next(tokens) if t != block_name: tokens.send(t) tokens.throw( ValueError, f'Expecting a Block-Name after "{end_agg} =" ' f'that matches "{block_name}", but found: ' f'"{t}"', ) self.parse_statement_delimiter(tokens) return None
def bar(successor: abc.Generator = None): while True: msg = yield msg = '%s > %s in %s' % (msg, msg.split()[0], 'bar') if successor is not None: msg = successor.send(msg) yield msg
def parse_value(self, tokens: abc.Generator): """Parses PVL Values. <Value> ::= (<Simple-Value> | <Set> | <Sequence>) [<WSC>* <Units Expression>] Returns the decoded <Value> as an appropriate Python object. """ value = None try: t = next(tokens) value = self.decoder.decode_simple_value(t) except ValueError: tokens.send(t) for p in ( self.parse_set, self.parse_sequence, self.parse_value_post_hook, ): try: value = p(tokens) break except LexerError: # A LexerError is a subclass of ValueError, but # if we get a LexerError, that's a problem and # we need to raise it, and not let it pass. raise except ValueError: # Getting a ValueError is a normal conseqence of # one of the parsing strategies not working, # this pass allows us to go to the next one. pass else: tokens.throw( ValueError, "Was expecting a Simple Value, or the " "beginning of a Set or Sequence, but " f'found: "{t}"', ) # print(f'in parse_value, value is: {value}') self.parse_WSC_until(None, tokens) try: return self.parse_units(value, tokens) except (ValueError, StopIteration): return value
def parse_units(self, value, tokens: abc.Generator) -> str: """Parses PVL Units Expression. <Units-Expression> ::= "<" [<white-space>] <Units-Value> [<white-space>] ">" and <Units-Value> ::= <units-character> [ [ <units-character> | <white-space> ]* <units-character> ] Returns the *value* and the <Units-Value> as a ``Units()`` object. """ t = next(tokens) if not t.startswith(self.grammar.units_delimiters[0]): tokens.send(t) raise ValueError( "Was expecting the start units delimiter, " + '"{}" '.format(self.grammar.units_delimiters[0]) + f'but found "{t}"' ) if not t.endswith(self.grammar.units_delimiters[1]): tokens.send(t) raise ValueError( "Was expecting the end units delimiter, " + '"{}" '.format(self.grammar.units_delimiters[1]) + f'at the end, but found "{t}"' ) delim_strip = t.strip("".join(self.grammar.units_delimiters)) units_value = delim_strip.strip("".join(self.grammar.whitespace)) for d in self.grammar.units_delimiters: if d in units_value: tokens.throw( ValueError, "Was expecting a units character, but found a " f'unit delimiter, "{d}" instead.', ) return self.decoder.decode_quantity(value, units_value)
def parse_begin_aggregation_statement( self, tokens: abc.Generator ) -> tuple: """Parses the tokens for a Begin Aggregation Statement, and returns the name Block Name as a ``str``. <Begin-Aggregation-Statement-block> ::= <Begin-Aggegation-Statement> <WSC>* '=' <WSC>* <Block-Name> [<Statement-Delimiter>] Where <Block-Name> ::= <Parameter-Name> """ try: begin = next(tokens) if not begin.is_begin_aggregation(): tokens.send(begin) raise ValueError( "Expecting a Begin-Aggegation-Statement, but " f"found: {begin}" ) except StopIteration: raise ValueError( "Ran out of tokens before starting to parse " "a Begin-Aggegation-Statement." ) try: self.parse_around_equals(tokens) except ValueError: tokens.throw( ValueError, f'Expecting an equals sign after "{begin}" ' ) block_name = next(tokens) if not block_name.is_parameter_name(): tokens.throw( ValueError, f'Expecting a Block-Name after "{begin} =" ' f'but found: "{block_name}"', ) self.parse_statement_delimiter(tokens) return begin, str(block_name)
def parse_WSC_until(token: str, tokens: abc.Generator) -> bool: """Consumes objects from *tokens*, if the object's *.is_WSC()* function returns *True*, it will continue until *token* is encountered and will return *True*. If it encounters an object that does not meet these conditions, it will 'return' that object to *tokens* and will return *False*. *tokens* is expected to be a *generator iterator* which provides ``pvl.token`` objects. """ for t in tokens: if t == token: return True elif t.is_WSC(): # If there's a comment, could parse here. pass else: tokens.send(t) return False
def parse_assignment_statement(self, tokens: abc.Generator) -> tuple: """Parses the tokens for an Assignment Statement. The returned two-tuple contains the Parameter Name in the first element, and the Value in the second. <Assignment-Statement> ::= <Parameter-Name> <WSC>* '=' <WSC>* <Value> [<Statement-Delimiter>] """ try: t = next(tokens) if t.is_parameter_name(): parameter_name = str(t) else: tokens.send(t) raise ValueError( "Expecting a Parameter Name, but " f'found: "{t}"' ) except StopIteration: raise ValueError( "Ran out of tokens before starting to parse " "an Assignment-Statement." ) self.parse_around_equals(tokens) try: # print(f'parameter name: {parameter_name}') value = self.parse_value(tokens) except StopIteration: raise ParseError( "Ran out of tokens to parse after the equals " "sign in an Assignment-Statement: " f'"{parameter_name} =".', t, ) self.parse_statement_delimiter(tokens) return parameter_name, value
def foo(successor: abc.Generator = None): """ notice we use yield in both the traditional generator sense and also in the coroutine sense. """ while True: msg = yield # coroutine feature msg = '%s > %s in %s' % (msg, msg.split()[0], 'foo') if successor is not None: msg = successor.send(msg) yield msg # generator feature
def parse_value_post_hook(self, tokens: abc.Generator): """Overrides the parent function to allow for more permissive parsing. If the next token is a reserved word or delimiter, then it is returned to the *tokens* and an EmptyValueAtLine object is returned as the value. """ t = next(tokens) # print(f't: {t}') truecase_reserved = [ x.casefold() for x in self.grammar.reserved_keywords ] trucase_delim = [x.casefold() for x in self.grammar.delimiters] if t.casefold() in (truecase_reserved + trucase_delim): # print(f'kw: {kw}') # if kw.casefold() == t.casefold(): # print('match') tokens.send(t) return self._empty_value(t.pos) else: raise ValueError
def parse_module_post_hook( self, module: MutableMappingSequence, tokens: abc.Generator ): """Overrides the parent function to allow for more permissive parsing. If an Assignment-Statement is blank, then the value will be assigned an EmptyValueAtLine object. """ # It enables this by checking to see if the next thing is an # '=' which means there was an empty assigment at the previous # equals sign, and then unwinding the stack to give the # previous assignment the EmptyValueAtLine() object and trying # to continue parsing. # print('in hook') try: t = next(tokens) if t == "=" and len(module) != 0: (last_k, last_v) = module[-1] last_token = Token( last_v, grammar=self.grammar, decoder=self.decoder ) if last_token.is_parameter_name(): # Fix the previous entry module.pop() module.append(last_k, self._empty_value(t.pos)) # Now use last_token as the parameter name # for the next assignment, and we must # reproduce the last part of parse-assignment: try: # print(f'parameter name: {last_token}') self.parse_WSC_until(None, tokens) value = self.parse_value(tokens) self.parse_statement_delimiter(tokens) module.append(str(last_token), value) except StopIteration: module.append( str(last_token), self._empty_value(t.pos + 1) ) return module, False # return through parse_module() else: tokens.send(t) else: # The next token isn't an equals sign or the module is # empty, so we want return the token and signal # parse_module() that it should ignore us. tokens.send(t) raise Exception # Peeking at the next token gives us the opportunity to # see if we're at the end of tokens, which we want to handle. t = next(tokens) tokens.send(t) return module, True # keep parsing except StopIteration: # If we're out of tokens, that's okay. return module, False # return through parse_module()
def parse_around_equals(self, tokens: abc.Generator) -> None: """Parses white space and comments on either side of an equals sign. *tokens* is expected to be a *generator iterator* which provides ``pvl.token`` objects. This is shared functionality for Begin Aggregation Statements and Assignment Statements. It basically covers parsing anything that has a syntax diagram like this: <WSC>* '=' <WSC>* """ if not self.parse_WSC_until("=", tokens): try: t = next(tokens) tokens.send(t) raise ValueError(f'Expecting "=", got: {t}') except StopIteration: raise ParseError('Expecting "=", but ran out of tokens.') self.parse_WSC_until(None, tokens) return