def DOCSTR_BLOCK(expr, resultsName=None): # pylint: disable=invalid-name """Block with an optional docstring followed by one of more `expr`.""" # Copied from pyparsing.indentedBlock def checkSubIndent(s, l, t): # pylint: disable=invalid-name curCol = pp.col(l, s) # pylint: disable=invalid-name if curCol > _indentation_stack[-1]: _indentation_stack.append(curCol) else: raise pp.ParseException(s, l, 'not a subentry') def checkUnindent(s, l, t): # pylint: disable=invalid-name del t if l >= len(s): return curCol = pp.col(l, s) # pylint: disable=invalid-name if not (_indentation_stack and curCol < _indentation_stack[-1] and curCol <= _indentation_stack[-2]): raise pp.ParseException(s, l, 'not an unindent') _indentation_stack.pop() INDENT = ( # pylint: disable=invalid-name pp.Empty() + pp.Empty().setParseAction(checkSubIndent)).setName('INDENT') UNDENT = pp.Empty().setParseAction(checkUnindent).setName('UNINDENT') # pylint: disable=invalid-name docstring = Group( Tag('docstring') + pp.QuotedString('"""', multiline=True)) code = Optional(pp.indentedBlock(expr, _indentation_stack, False), []) if resultsName: code = code.setResultsName(resultsName) body = INDENT + Optional(docstring)('docstring') + code + UNDENT return S(':') + NEWLINE - body
class Tokens(object): # shared tokens delim_chars = '[]{},' pause = pp.FollowedBy(pp.Word(delim_chars) | pp.StringEnd()) number = (pp.pyparsing_common.number + pause) quoted_string = pp.QuotedString('"', escChar='\\') true = (pp.Regex(r'(True|true|yes|on)') + pause). \ setParseAction(lambda _: True) false = (pp.Regex(r'(False|false|no|off)') + pause). \ setParseAction(lambda _: False) unquoted_string = pp.CharsNotIn(delim_chars). \ setParseAction(lambda toks: toks[0].strip()) empty_value = pp.Empty(). \ setParseAction(lambda _: '') # tokens for configs identifier = pp.pyparsing_common.identifier.copy() comma = pp.Literal(',').suppress() assign = pp.Literal('=').suppress() config_value = ( number | true | false | quoted_string | unquoted_string | empty_value) key_value_pair = (identifier + assign + config_value). \ setParseAction(lambda toks: (toks[0], toks[1])) key_value_pair_list = pp.Optional( key_value_pair + pp.ZeroOrMore(comma + key_value_pair)) # tokens for tags tag = quoted_string | unquoted_string | pp.Empty().suppress() tag_list = pp.Optional(tag + pp.ZeroOrMore(comma + tag))
class PortWithProfile(Node): """ Variant of :class:`Port` that is used by "card" records inside the "Ports" property. It differs from the normal port syntax by having different entries inside the last section. Availability is not listed here, only priority. Priority does not have a colon before the actual number. This port is followed by profile assignment. """ __fragments__ = { 'name': 'port-name', 'label': 'port-label', 'priority': 'port-priority', 'latency_offset': 'port-latency-offset', 'availability': 'port-availability', 'properties': lambda t: t['port-properties'].asList(), 'profile_list': lambda t: t['port-profile-list'].asList(), } __syntax__ = ( p.Word(p.alphanums + "-;").setResultsName('port-name') + p.Suppress(':') # This part was very tricky to write. The label is basically arbitrary # localized Unicode text. We want to grab all of it in one go but # without consuming the upcoming and latest '(' character or the space # that comes immediately before. # # The syntax here combines a sequence of words, as defined by anything # other than a space and '(', delimited by a single whitespace. + p.Combine( p.OneOrMore(~p.FollowedBy(p.Regex('\(.+?\)') + p.LineEnd()) + p.Regex('[^ \n]+') + p.White().suppress()), ' ').setResultsName('port-label') + p.Suppress('(') + p.Keyword('priority').suppress() + p.Optional(p.Suppress(':')) + p.Word(p.nums).setParseAction(lambda t: int(t[0])).setResultsName( 'port-priority') + p.Optional( p.MatchFirst([ p.Suppress(',') + p.Keyword('latency offset:').suppress() + p.Word(p.nums).setParseAction(lambda t: int(t[0])) + p.Literal("usec").suppress(), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-latency-offset')) + p.Optional( p.MatchFirst([ p.Suppress(',') + p.Literal('not available'), p.Suppress(',') + p.Literal('available'), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-availability')) + p.Suppress(')') + p.LineEnd().suppress() + p.Optional( p.MatchFirst([ p.LineStart().suppress() + p.NotAny(p.White(' ')) + p.White('\t').suppress() + p.Keyword('Properties:').suppress() + p.LineEnd().suppress() + PropertyAttributeValue, p.Empty().setParseAction(lambda t: []) ]).setResultsName('port-properties')) + p.White('\t', max=3).suppress() + p.Literal("Part of profile(s)").suppress() + p.Suppress(":") + p.delimitedList( p.Word(p.alphanums + "+-:"), ", ").setResultsName("port-profile-list")).setResultsName("port")
def capnpCreateGrammar(): ''' Creates the pyparsing grammar for capnproto and creates actions to convert to RST ''' ws2space = pp.ZeroOrMore(pp.White()).setParseAction(lambda t: ' ') nonopt2space = pp.OneOrMore(pp.White()).setParseAction(lambda t: ' ') ws2del = ppDel(pp.ZeroOrMore(pp.White())) bracket0 = ppToSpan(pp.Empty() + '{\n', 'cp_op_curly_open') bracket1 = ppToSpan(pp.Empty() + '}', 'cp_op_curly_close') semi = ppToSpan(pp.Empty() + ';', 'cp_op_semi') structNameRaw = pp.Word(pp.alphanums) structName = ppToSpan(pp.Empty() + structNameRaw, 'cp_struct_name', ppToAnchor) fieldName = ppToSpan(pp.Word(pp.alphanums), 'cp_field_name') enumName = ppToSpan(pp.Word(pp.alphanums), 'cp_enum_name', ppToAnchor) structKeyword = ppToSpan(pp.Empty() + 'struct', 'cp_struct_keyword') enumKeyword = ppToSpan(pp.Empty() + 'enum', 'cp_enum_keyword') cpid = ppDel('@' + pp.Word(pp.alphanums) + ';') ordinal = ppToSpan('@' + ws2del + pp.Word(pp.nums), 'cp_ordinal') fieldType = pp.Or([ \ (pp.Empty() + structNameRaw).setParseAction(fieldTypeAction), \ ('List(' + structNameRaw + ')').setParseAction(fieldTypeListAction)]) field = ppToDiv( fieldName + ws2space + ordinal + ws2del + ':' + ws2space + fieldType + ws2del + semi, 'cp_field') comment = ppToDiv(ws2del + '#' + ws2space + pp.Word(pp.printables + ' ') + pp.LineEnd(), 'cp_comment', lambda t: ''.join(t[:-1])) comment_struct = ppToDiv(ws2del + '#' + ws2space + pp.Word(pp.printables + ' ') + pp.LineEnd(), 'cp_comment_struct', lambda t: ''.join(t[:-1])) enum_field = ppToDiv(ws2del + fieldName + ws2space + ordinal + ws2del + semi + ws2del, 'cp_enum_field') enum_entry = ws2del + pp.Or([comment, enum_field]) + ws2del enum_body = ppToDiv(pp.ZeroOrMore(enum_entry), 'cp_scope') enum = ppToDiv( pp.ZeroOrMore(comment) + enumKeyword + nonopt2space + enumName + ws2space + bracket0 + ws2del + enum_body + ws2del + bracket1, 'cp_enum') struct = pp.Forward() struct_entry = ws2del + pp.Or([comment, field, enum, struct]) + ws2del struct_body = ppToDiv(pp.ZeroOrMore(struct_entry), 'cp_scope') struct << ppToDiv( pp.ZeroOrMore(comment) + structKeyword + nonopt2space + structName + ws2space + bracket0 + ws2del + struct_body + ws2del + bracket1, 'cp_struct') mainstruct = pp.Forward() mainstruct << ppToDiv( pp.ZeroOrMore(comment_struct) + structKeyword + nonopt2space + structName + ws2space + bracket0 + ws2del + struct_body + ws2del + bracket1, 'cp_mainstruct') using = ppDel(pp.Empty() + 'using import "' + pp.Word(pp.alphanums + "./_") + '".' + pp.Word(pp.alphanums) + ';') capnp = ws2del + pp.ZeroOrMore(comment) + cpid + ws2del + pp.ZeroOrMore( pp.Or([mainstruct + ws2del, using + ws2del, enum + ws2del])) return capnp.leaveWhitespace()
class Port(Node): """ Description of a port on a sink """ __fragments__ = { 'name': 'port-name', 'label': 'port-label', 'priority': 'port-priority', 'availability': 'port-availability' } __syntax__ = ( p.Word(p.alphanums + "-;").setResultsName('port-name') + p.Suppress(':') # This part was very tricky to write. The label is basically # arbitrary localized Unicode text. We want to grab all of it in # one go but without consuming the upcoming '(' character or the # space that comes immediately before. # # The syntax here combines a sequence of words, as defined by # anything other than a space and '(', delimited by a single # whitespace. + p.delimitedList(p.Regex('[^ (\n]+'), ' ', combine=True).setResultsName('port-label') + p.Suppress('(') + p.Keyword('priority').suppress() + p.Suppress(':') + p.Word(p.nums).setParseAction(lambda t: int(t[0])).setResultsName( 'port-priority') + p.MatchFirst([ p.Suppress(',') + p.Literal('not available'), p.Suppress(',') + p.Literal('available'), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-availability') + p.Suppress(')')).setResultsName("port")
def _ParseFieldsMetadata(self, parser_mediator, structure): """Parses the fields metadata and updates the log line definition to match. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. structure (pyparsing.ParseResults): structure parsed from the log file. """ fields = self._GetValueFromStructure(structure, 'fields', default_value='') fields = fields.strip() fields = fields.split(' ') log_line_structure = pyparsing.Empty() if fields[0] == 'date' and fields[1] == 'time': log_line_structure += self.DATE_TIME.setResultsName('date_time') fields = fields[2:] for member in fields: if not member: continue field_structure = self._LOG_LINE_STRUCTURES.get(member, None) if not field_structure: field_structure = self.URI parser_mediator.ProduceExtractionWarning( 'missing definition for field: {0:s} defaulting to URI'. format(member)) log_line_structure += field_structure line_structures = [('comment', self.COMMENT), ('logline', log_line_structure)] self._SetLineStructures(line_structures)
def _ParseFieldsMetadata(self, structure): """Parses the fields metadata and updates the log line definition to match. Args: structure (pyparsing.ParseResults): structure parsed from the log file. """ fields = self._GetValueFromStructure(structure, 'fields', default_value='') fields = fields.strip() fields = fields.split(' ') log_line_structure = pyparsing.Empty() if fields[0] == 'date' and fields[1] == 'time': log_line_structure += self.DATE_TIME.setResultsName('date_time') fields = fields[2:] for member in fields: log_line_structure += self._LOG_LINE_STRUCTURES.get( member, self.URI) updated_structures = [] for line_structure in self._line_structures: if line_structure[0] != 'logline': updated_structures.append(line_structure) updated_structures.append(('logline', log_line_structure)) # TODO: self._line_structures is a work-around and this needs # a structural fix. self._line_structures = updated_structures
def parse_fchk(fchk_path, keys): with open(fchk_path) as handle: text = handle.read() def to_float(s, loc, toks): return float(toks[0]) # Matches -4.10693837E-16 and 1.60184209E-15 float_ = pp.Word(pp.nums + "+-E.").setParseAction(to_float) # Start with Empty so we can progessively build the # parser for all keys. parser = pp.Empty() def parser_for_key(key): return pp.Group(pp.Suppress(pp.SkipTo(key)) + key + pp.restOfLine + pp.ZeroOrMore(float_)) for key in keys: parser += parser_for_key(key) results = parser.parseString(text) results_dict = {} for key, res in zip(keys, results): # This handles scalar entries like # Total Energy [...] R -9.219940072302333E+01 if len(res) == 2: results_dict[key] = float(res[-1].split()[-1]) # This handles matrices like # Cartesian Gradient [...] R N= 9 \ # [Matrix entries] if len(res) > 2: results_dict[key] = np.array(res[2:]) return results_dict
def __init__(self, argumentList): self.pyparseItem = pp.Empty() # build a pyparsing representation for arg in argumentList: if not isinstance(arg, ArgumentBase): raise CommandDefinitionError("argument %s must be of type ArgumentBase"%arg) self.pyparseItem += arg.pyparseItem self.argumentList = argumentList
def _ParseCommentRecord(self, structure): """Parse a comment and store appropriate attributes. Args: structure: A pyparsing.ParseResults object from a line in the log file. """ comment = structure[1] if comment.startswith(u'Version'): _, _, self.version = comment.partition(u':') elif comment.startswith(u'Software'): _, _, self.software = comment.partition(u':') elif comment.startswith(u'Date'): self._date = structure.get(u'date', None) self._time = structure.get(u'time', None) # Check if there's a Fields line. If not, LOG_LINE defaults to IIS 6.0 # common format. elif comment.startswith(u'Fields'): log_line = pyparsing.Empty() for member in comment[7:].split(): log_line += self._LOG_LINE_STRUCTURES.get(member, self.URI) # TODO: self._line_structures is a work-around and this needs # a structural fix. self._line_structures[1] = (u'logline', log_line)
def _compile_sequence(self, element, src_state, dst_state, grammar, kaldi_rule, fst): children = element.children if len(children) > 1: # Handle Repetition elements differently as a special case. is_rep = isinstance(element, elements_.Repetition) if is_rep and element.optimize: # Repetition... # Insert new states, so back arc only affects child s1 = fst.add_state() s2 = fst.add_state() fst.add_arc(src_state, s1, None) # NOTE: to avoid creating an un-decodable epsilon loop, we must not allow an all-epsilon child here (compile_graph_agf should check) matcher = self.compile_element(children[0], s1, s2, grammar, kaldi_rule, fst) # NOTE: has_eps_path is ~3-5x faster than matcher.parseString() inside try/except block if not fst.has_eps_path(s1, s2): fst.add_arc(s2, s1, fst.eps_disambig, fst.eps) # back arc fst.add_arc(s2, dst_state, None) return pp.OneOrMore(matcher) else: # Cannot do optimize path, because of epsilon loop, so finish up with Sequence path self._log.warning( "%s: Cannot optimize Repetition element, because its child element can match empty string; falling back inefficient non-optimize path!" % self) states = [src_state, s2] + [ fst.add_state() for i in range(len(children) - 2) ] + [dst_state] matchers = [matcher] for i, child in enumerate(children[1:], start=1): s1 = states[i] s2 = states[i + 1] matchers.append( self.compile_element(child, s1, s2, grammar, kaldi_rule, fst)) return pp.And(tuple(matchers)) else: # Sequence... # Insert new states for individual children elements states = [src_state] + [ fst.add_state() for i in range(len(children) - 1) ] + [dst_state] matchers = [] for i, child in enumerate(children): s1 = states[i] s2 = states[i + 1] matchers.append( self.compile_element(child, s1, s2, grammar, kaldi_rule, fst)) return pp.And(tuple(matchers)) elif len(children) == 1: return self.compile_element(children[0], src_state, dst_state, grammar, kaldi_rule, fst) else: # len(children) == 0 return pp.Empty()
def keep_pos(expr): """Transform a pyparsing grammar by inserting an attribute, "pos", on the match which describes position information""" loc_marker = pp.Empty().setParseAction(lambda s, loc, t: loc) end_loc_marker = loc_marker.copy() end_loc_marker.callPreparse = False # don't allow the cursor to move return (loc_marker.setResultsName("pos_start") + expr + end_loc_marker.setResultsName("pos_end") ).setParseAction(parse_position)
def parse_logic(input): CONST = pp.Word(pp.nums).setParseAction(lambda s, l, t: [int(t[0])]) WIRE = pp.Word(pp.alphas) UN_OP = pp.Literal("NOT")("op") + WIRE("a") BIN_OP = (WIRE("a") | CONST("c")) + (pp.Literal("AND") | pp.Literal("OR") | pp.Literal("XOR"))("op") + WIRE("b") CONST_OP = WIRE("a") + (pp.Literal("RSHIFT") | pp.Literal("LSHIFT"))("op") + CONST("c") ASSIGN_OP = (pp.Empty().addParseAction(lambda s, l, t: ["ASSIGN"]))("op") + (CONST("c") | WIRE("a")) OP = pp.Group(UN_OP | BIN_OP | CONST_OP | ASSIGN_OP)("operation") INSTR = pp.Group(OP + pp.Literal("->").suppress() + WIRE("dest")) INSTRS = pp.OneOrMore(INSTR) return INSTRS.parseString(input)
def parseWithLocation(expr, action): startMarker = p.Empty().setParseAction(lambda s, loc, t: loc) endMarker = startMarker.copy() complete = startMarker + expr + endMarker startMarker.setName(str(expr)) def parseAction(s, loc, t): start, inner_tokens, end = t[0], t[1:-1], t[-1] src_loc = SourceLocation(s, start, end) return callParseAction(action, src_loc, inner_tokens) complete.setParseAction(parseAction) return complete
def parser_for_string(self, string): if string == '': return pp.Empty() if hasattr(string, 'match'): return pp.Regex(string) tokens = string.split() token_parsers = [self.parser_for_token(t) for t in tokens] parser = pp.And(token_parsers) if not self.name.endswith('ʹ'): parser.setParseAction(parse_action) #print('Grammar for', string, 'is', repr(parser)) return parser
def _make_matcher_element(self): # Handle the case where use_current_match is True. if self.use_current_match is True: current_match = self.current_match if current_match is None: result = pyparsing.NoMatch() elif current_match == "": result = pyparsing.Empty() else: result = pyparsing.Literal(self.current_match) # Set the element's attributes and return it. return self._set_matcher_element_attributes(result) # Otherwise build a list of next possible literals. Make the required stack # of child-parent pairs. stack = [] p1, p2 = self, self.parent while p1 and p2: stack.append((p1, p2)) # Move both pivots further up the tree. p1 = p1.parent p2 = p2.parent # Build a list of next literals using the stack. next_literals, _ = _collect_next_literals(stack, 0, True, False) # De-duplicate the list. next_literals = set(next_literals) word = pyparsing.Regex(_word_regex_str, re.UNICODE) if next_literals: # Check if there is a next dictation literal. If there is, only match # one word for this expansion. if _word_regex_str in next_literals: result = word # Otherwise build an element to match one or more words stopping on # any of the next literals so that they aren't matched as dictation. else: next_literals = list(map(pyparsing.Literal, next_literals)) result = pyparsing.OneOrMore( word, stopOn=pyparsing.Or(next_literals)) else: # Handle the case of no literals ahead by allowing one or more Unicode # words without restrictions. result = pyparsing.OneOrMore(word) return self._set_matcher_element_attributes(result)
class GroovyImportParser(object): """ Given a string containing a single import definition this class will parse the import definition and return information regarding it. """ # Simple Groovy sub-grammar definitions ImportDef = pyparsing.Suppress(pyparsing.Keyword('import')) ImportVarName = pyparsing.Regex(r'[A-Za-z_.\*]*') CommentVar = pyparsing.Word(pyparsing.alphas, pyparsing.alphanums).setName('comment') OptionalSpace = pyparsing.Optional(' ') ImportDefn = ImportDef + \ pyparsing.delimitedList(ImportVarName, delim='.').setResultsName('imports') + \ pyparsing.Suppress(";") + \ pyparsing.Optional( pyparsing.Suppress('//') + pyparsing.delimitedList(CommentVar, delim=pyparsing.Empty()).setResultsName('comment') ) @classmethod def parse(cls, data): """ Parse the given import and return information regarding the contained import statement. :param data: The import statement in a string :type data: str | basestring :rtype: dict """ try: # Parse the function here result = cls.ImportDefn.parseString(data) package_list = [] if 'imports' in result: package_list = result['imports'].asList() comment_list = [] if 'comment' in result: comment_list = result['comment'].asList() return GroovyImport(comment_list, package_list, ['import {};'.format(package) for package in package_list]) except Exception as ex: return None
def __init__(self, parser, public='commands'): self.parser = parser self.public = public self.rules = {} self.commands = Commands(self) ruleName = pp.Combine( pp.Suppress('<') + pp.Word(pp.alphanums + '_.') + pp.Suppress('>')) ruleName.setParseAction(lambda toks: self[toks[0]]) expr = pp.Forward() seq = pp.delimitedList(expr, delim=pp.Empty()) seq.setParseAction(lambda toks: pp.And(toks.asList()) if len(toks.asList()) > 1 else None) self.rule = alt = pp.delimitedList(seq, delim='|') alt.setParseAction(lambda toks: pp.Or(toks.asList()) if len(toks.asList()) > 1 else None) groupExpr = pp.Suppress('(') + alt + pp.Suppress(')') groupExpr.setParseAction(lambda toks: Grouping(toks[0])) word = pp.Word(pp.alphanums + r".&'\"") word.setParseAction(lambda toks: pp.Keyword(toks[0])) token = groupExpr | ruleName | word optExpr = pp.Suppress('[') + alt + pp.Suppress(']') optExpr.setParseAction(lambda toks: pp.Optional(toks[0])) zeroOrMore = token + pp.Suppress(pp.Literal('*')) zeroOrMore.setParseAction(lambda toks: pp.ZeroOrMore(toks[0])) oneOrMore = token + pp.Suppress(pp.Literal('+')) oneOrMore.setParseAction(lambda toks: pp.OneOrMore(toks[0])) elem = zeroOrMore | oneOrMore | optExpr | token tagged = elem + pp.Combine(pp.Suppress('/') + pp.Word(pp.alphanums)).setResultsName('tag') tagged.setParseAction(self.parseExpr) expr << (tagged | elem)
def _ParseFieldsMetadata(self, structure): """Parses the fields metadata. Args: structure (pyparsing.ParseResults): structure parsed from the log file. """ fields = structure.fields.split(u' ') log_line_structure = pyparsing.Empty() if fields[0] == u'date' and fields[1] == u'time': log_line_structure += self.DATE_TIME.setResultsName(u'date_time') fields = fields[2:] for member in fields: log_line_structure += self._LOG_LINE_STRUCTURES.get(member, self.URI) # TODO: self._line_structures is a work-around and this needs # a structural fix. self._line_structures[1] = (u'logline', log_line_structure)
def _ParseCommentRecord(self, structure): """Parse a comment and store appropriate attributes.""" comment = structure[1] if comment.startswith(u'Version'): _, _, self.version = comment.partition(u':') elif comment.startswith(u'Software'): _, _, self.software = comment.partition(u':') elif comment.startswith(u'Date'): # TODO: fix this date is not used here. _, _, unused_date = comment.partition(u':') # Check if there's a Fields line. If not, LOG_LINE defaults to IIS 6.0 # common format. elif comment.startswith(u'Fields'): log_line = pyparsing.Empty() for member in comment[7:].split(): log_line += self._LOG_LINE_STRUCTURES.get(member, self.URI) # TODO: self._line_structures is a work-around and this needs # a structural fix. self._line_structures[1] = ('logline', log_line)
def _make_parser(): word = pp.CharsNotIn(''.join(whitespace)) word.skipWhitespace = True value = pp.MatchFirst([ pp.dblQuotedString.copy().setParseAction(pp.removeQuotes), pp.sglQuotedString.copy().setParseAction(pp.removeQuotes), pp.Empty() + pp.CharsNotIn(''.join(whitespace)), ]) expressions = [] for field in named_fields: exp = pp.Suppress(pp.CaselessLiteral(field) + ':') + \ value.copy().setParseAction(_decorate_match(field)) expressions.append(exp) any_ = value.copy().setParseAction(_decorate_match('any')) expressions.append(any_) return pp.ZeroOrMore(pp.MatchFirst(expressions))
def _make_parser(): word = pp.CharsNotIn("".join(whitespace)) word.skipWhitespace = True value = pp.MatchFirst([ pp.dbl_quoted_string.copy().set_parse_action(pp.remove_quotes), pp.sgl_quoted_string.copy().set_parse_action(pp.remove_quotes), pp.Empty() + pp.CharsNotIn("".join(whitespace)), ]) expressions = [] for field in named_fields: exp = pp.Suppress(pp.CaselessLiteral(field) + ":") + value.copy().set_parse_action( _decorate_match(field)) expressions.append(exp) any_ = value.copy().set_parse_action(_decorate_match("any")) expressions.append(any_) return pp.ZeroOrMore(pp.MatchFirst(expressions))
def _build_sample_parser(): separator = pp.Suppress('=') key = pp.LineStart() + pp.Literal('%').suppress() + pp.Word( pp.printables, excludeChars='=') value = pp.Regex(r'[^\r%]*') | pp.Empty() + pp.LineEnd().suppress() element = pp.Word(pp.alphas) elements = pp.Group(pp.LineStart().suppress() + pp.Literal('%').suppress() + pp.Literal('elements') + separator + element + pp.ZeroOrMore(pp.White(ws='\t ').suppress() + element) + pp.LineEnd().suppress()) compositions = pp.Group( pp.LineStart().suppress() + pp.Literal('%').suppress() + pp.Literal('compositions') + separator + pyparsing_common.number + pp.ZeroOrMore(pp.White(ws='\t ').suppress() + pyparsing_common.number) + pp.LineEnd().suppress()) epoch = pp.Group(pp.LineStart().suppress() + pp.Literal('%').suppress() + pp.Literal('Epoch') + separator + pyparsing_common.number + pp.LineEnd().suppress()) sample = pp.Group(pp.LineStart().suppress() + pp.Literal('%').suppress() + pp.Literal('Sample') + separator + pyparsing_common.number + pp.LineEnd().suppress()) key_value = (sample | epoch | elements | compositions | pp.Group(key + separator + value)) row_separator = pp.White(ws='\t ').suppress() row = (pp.LineStart().suppress() + pyparsing_common.number + pp.ZeroOrMore(row_separator + pyparsing_common.number) + pp.LineEnd().suppress()) return pp.OneOrMore(pp.Dict(key_value)).setResultsName('meta') + \ pp.Group(pp.ZeroOrMore(pp.Group(row))).setResultsName('values')
class Property(Node): """ A key=value pair. A list of properties is a possible syntax for Attribute value. """ __fragments__ = { 'name': 'property-name', 'value': 'property-value' } __syntax__ = ( p.Word(p.alphanums + "-_.").setResultsName("property-name") + p.Suppress('=') + p.Or([ p.Suppress('"') + p.Empty().setParseAction( lambda t: " " ).setResultsName("property-value") + p.Suppress('"'), p.QuotedString('"').setResultsName("property-value") ]) ).setResultsName('property')
"[['cast'], ['fireball'], 'grimlock1', 'grimlock2']"), ("I [cast] a [fireball] @grimlock1 and@grimlock2", "[['cast'], ['fireball'], 'grimlock1', 'grimlock2']"), ] _test_sentences_altbot = [ ("VellumTalk: combat", "['combat', '']"), ("vELLUMTAlk aliases shara", "['aliases', 'shara']"), ] # convert scanned sentences into a normalized form and then parse them verb_phrases = P.OneOrMore(P.Group(verb_phrase)).setResultsName('verb_phrases') targets = P.OneOrMore(target).setResultsName('targets') normalized_sentence = (command | P.Optional(actor) + verb_phrases + P.Optional(targets) | Sup(P.Empty())) def parseSentence(s): actor = None verb_phrases = [] targets = [] for item, _, _ in sentence.scanString(s): if item.command: return item elif item.actor: actor = item.actor.character_name elif item.verb_phrase: verb_phrases.append(item.verb_phrase) elif item.target: targets.append(item.target.character_name)
nan_p.setParseAction(lambda tokens: float("".join(tokens))) number_literal_p = float_literal_p | integer_literal_p | nan_p string_literal_p = pp.QuotedString(quoteChar='"', escChar="\\") number_list_literal_p = pp.Suppress("[") + pp.delimitedList( number_literal_p, delim=",") + pp.Suppress("]") number_list_literal_p.setParseAction(lambda tokens: List(tokens)) string_list_literal_p = pp.Suppress("[") + pp.delimitedList( string_literal_p, delim=",") + pp.Suppress("]") string_list_literal_p.setParseAction(lambda tokens: List(tokens)) range_index_p = decimal_integer_literal_p.copy().setParseAction( lambda tokens: [int("".join(tokens))]) | pp.Empty().setParseAction( lambda tokens: [None]) range_literal_p = range_index_p + pp.Suppress( ":") + range_index_p + pp.Optional(pp.Suppress(":") + range_index_p) range_literal_p.setParseAction(lambda tokens: slice(*tokens)) ellipsis_literal_p = pp.Literal("...") ellipsis_literal_p.setParseAction(lambda tokens: Ellipsis) slice_literal_p = range_literal_p | ellipsis_literal_p | decimal_integer_literal_p slices_literal_p = pp.delimitedList(slice_literal_p, delim="|") hyperslice_literal_p = pp.delimitedList(slice_literal_p, delim=",") hyperslice_literal_p.setParseAction(lambda tokens: Hyperslice(tokens))
raise pp.ParseFatalException(s, err.loc, 'invalid statement in %s' % expr) _indentation_stack = [1] # Holds cols to indent from. BLOCK = lambda el: ( S(':') + NEWLINE - # pylint: disable=g-long-lambda pp.indentedBlock(el, _indentation_stack).setFailAction(fail_block)) def reset_indentation(): # pylint: disable=invalid-name _indentation_stack[:] = [1] K = lambda el: pp.Keyword(el).setParseAction(lambda el, loc, t: [t[0], loc]) # pylint: disable=undefined-variable E = lambda el: pp.Empty().setParseAction(lambda: el) Tag = lambda el: pp.Empty().setParseAction(lambda s, loc, t: [el, loc]) # pylint: disable=invalid-name # pylint: enable=undefined-variable NEWLINE = pp.lineEnd.setWhitespaceChars(' ').suppress().ignore( pp.pythonStyleComment) def PARENS(el): # pylint: disable=invalid-name el.ignore(pp.pythonStyleComment) return S('(') - el - S(')') def DOCSTR_BLOCK(expr, resultsName=None): # pylint: disable=invalid-name """Block with an optional docstring followed by one of more `expr`.""" # Copied from pyparsing.indentedBlock
def _generate_grammar(self): # Define grammar: pp.ParserElement.setDefaultWhitespaceChars(" \t") def add_element(name: str, value: pp.ParserElement): nonlocal self if self.debug: value.setName(name) value.setDebug() return value EOL = add_element("EOL", pp.Suppress(pp.LineEnd())) Else = add_element("Else", pp.Keyword("else")) Identifier = add_element( "Identifier", pp.Word(f"{pp.alphas}_", bodyChars=pp.alphanums + "_-./")) BracedValue = add_element( "BracedValue", pp.nestedExpr(ignoreExpr=pp.quotedString | pp.QuotedString(quoteChar="$(", endQuoteChar=")", escQuote="\\", unquoteResults=False)). setParseAction(lambda s, l, t: ["(", *t[0], ")"]), ) Substitution = add_element( "Substitution", pp.Combine( pp.Literal("$") + (((pp.Literal("$") + Identifier + pp.Optional(pp.nestedExpr())) | (pp.Literal("(") + Identifier + pp.Literal(")")) | (pp.Literal("{") + Identifier + pp.Literal("}")) | (pp.Literal("$") + pp.Literal("{") + Identifier + pp.Optional(pp.nestedExpr()) + pp.Literal("}")) | (pp.Literal("$") + pp.Literal("[") + Identifier + pp.Literal("]"))))), ) LiteralValuePart = add_element( "LiteralValuePart", pp.Word(pp.printables, excludeChars="$#{}()")) SubstitutionValue = add_element( "SubstitutionValue", pp.Combine( pp.OneOrMore(Substitution | LiteralValuePart | pp.Literal("$"))), ) FunctionValue = add_element( "FunctionValue", pp.Group( pp.Suppress(pp.Literal("$") + pp.Literal("$")) + Identifier + pp.nestedExpr( ) # .setParseAction(lambda s, l, t: ['(', *t[0], ')']) ).setParseAction(lambda s, l, t: handle_function_value(*t)), ) Value = add_element( "Value", pp.NotAny(Else | pp.Literal("}") | EOL) + (pp.QuotedString(quoteChar='"', escChar="\\") | FunctionValue | SubstitutionValue | BracedValue), ) Values = add_element("Values", pp.ZeroOrMore(Value)("value")) Op = add_element( "OP", pp.Literal("=") | pp.Literal("-=") | pp.Literal("+=") | pp.Literal("*=") | pp.Literal("~="), ) Key = add_element("Key", Identifier) Operation = add_element( "Operation", Key("key") + pp.locatedExpr(Op)("operation") + Values("value")) CallArgs = add_element("CallArgs", pp.nestedExpr()) def parse_call_args(results): out = "" for item in chain(*results): if isinstance(item, str): out += item else: out += "(" + parse_call_args(item) + ")" return out CallArgs.setParseAction(parse_call_args) Load = add_element("Load", pp.Keyword("load") + CallArgs("loaded")) Include = add_element( "Include", pp.Keyword("include") + pp.locatedExpr(CallArgs)("included")) Option = add_element("Option", pp.Keyword("option") + CallArgs("option")) RequiresCondition = add_element("RequiresCondition", pp.originalTextFor(pp.nestedExpr())) def parse_requires_condition(s, l_unused, t): # The following expression unwraps the condition via the additional info # set by originalTextFor. condition_without_parentheses = s[t._original_start + 1:t._original_end - 1] # And this replaces the colons with '&&' similar how it's done for 'Condition'. condition_without_parentheses = ( condition_without_parentheses.strip().replace( ":", " && ").strip(" && ")) return condition_without_parentheses RequiresCondition.setParseAction(parse_requires_condition) Requires = add_element( "Requires", pp.Keyword("requires") + RequiresCondition("project_required_condition")) FunctionArgumentsAsString = add_element( "FunctionArgumentsAsString", pp.originalTextFor(pp.nestedExpr())) QtNoMakeTools = add_element( "QtNoMakeTools", pp.Keyword("qtNomakeTools") + FunctionArgumentsAsString("qt_no_make_tools_arguments"), ) # ignore the whole thing... DefineTestDefinition = add_element( "DefineTestDefinition", pp.Suppress( pp.Keyword("defineTest") + CallArgs + pp.nestedExpr(opener="{", closer="}", ignoreExpr=pp.LineEnd()) ), ) # ignore the whole thing... ForLoop = add_element( "ForLoop", pp.Suppress( pp.Keyword("for") + CallArgs + pp.nestedExpr(opener="{", closer="}", ignoreExpr=pp.LineEnd()) ), ) # ignore the whole thing... ForLoopSingleLine = add_element( "ForLoopSingleLine", pp.Suppress( pp.Keyword("for") + CallArgs + pp.Literal(":") + pp.SkipTo(EOL)), ) # ignore the whole thing... FunctionCall = add_element("FunctionCall", pp.Suppress(Identifier + pp.nestedExpr())) Scope = add_element("Scope", pp.Forward()) Statement = add_element( "Statement", pp.Group(Load | Include | Option | Requires | QtNoMakeTools | ForLoop | ForLoopSingleLine | DefineTestDefinition | FunctionCall | Operation), ) StatementLine = add_element("StatementLine", Statement + (EOL | pp.FollowedBy("}"))) StatementGroup = add_element( "StatementGroup", pp.ZeroOrMore(StatementLine | Scope | pp.Suppress(EOL))) Block = add_element( "Block", pp.Suppress("{") + pp.Optional(EOL) + StatementGroup + pp.Optional(EOL) + pp.Suppress("}") + pp.Optional(EOL), ) ConditionEnd = add_element( "ConditionEnd", pp.FollowedBy( (pp.Optional(pp.White()) + (pp.Literal(":") | pp.Literal("{") | pp.Literal("|")))), ) ConditionPart1 = add_element( "ConditionPart1", (pp.Optional("!") + Identifier + pp.Optional(BracedValue))) ConditionPart2 = add_element("ConditionPart2", pp.CharsNotIn("#{}|:=\\\n")) ConditionPart = add_element( "ConditionPart", (ConditionPart1 ^ ConditionPart2) + ConditionEnd) ConditionOp = add_element("ConditionOp", pp.Literal("|") ^ pp.Literal(":")) ConditionWhiteSpace = add_element( "ConditionWhiteSpace", pp.Suppress(pp.Optional(pp.White(" ")))) ConditionRepeated = add_element( "ConditionRepeated", pp.ZeroOrMore(ConditionOp + ConditionWhiteSpace + ConditionPart)) Condition = add_element("Condition", pp.Combine(ConditionPart + ConditionRepeated)) Condition.setParseAction( lambda x: " ".join(x).strip().replace(":", " && ").strip(" && ")) # Weird thing like write_file(a)|error() where error() is the alternative condition # which happens to be a function call. In this case there is no scope, but our code expects # a scope with a list of statements, so create a fake empty statement. ConditionEndingInFunctionCall = add_element( "ConditionEndingInFunctionCall", pp.Suppress(ConditionOp) + FunctionCall + pp.Empty().setParseAction(lambda x: [[]]).setResultsName( "statements"), ) SingleLineScope = add_element( "SingleLineScope", pp.Suppress(pp.Literal(":")) + pp.Group(Block | (Statement + EOL))("statements"), ) MultiLineScope = add_element("MultiLineScope", Block("statements")) SingleLineElse = add_element( "SingleLineElse", pp.Suppress(pp.Literal(":")) + (Scope | Block | (Statement + pp.Optional(EOL))), ) MultiLineElse = add_element("MultiLineElse", Block) ElseBranch = add_element( "ElseBranch", pp.Suppress(Else) + (SingleLineElse | MultiLineElse)) # Scope is already add_element'ed in the forward declaration above. Scope <<= pp.Group( Condition("condition") + (SingleLineScope | MultiLineScope | ConditionEndingInFunctionCall) + pp.Optional(ElseBranch)("else_statements")) Grammar = StatementGroup("statements") Grammar.ignore(pp.pythonStyleComment()) return Grammar
elements = [(make_element_literal(element), element) for element in chemistry.elements] organic_element = reduce(operator.xor, (literal for literal, element in elements if element.smiles_organic)) element = reduce(operator.xor, (literal for literal, element in elements)) tetrahedral = (pp.Regex('@{0,2}') .setParseAction(lambda t: len(t[0])) .setResultsName('tetrahedral')) hydrogens = (pp.Empty().setParseAction(lambda: 0) ^ pp.Literal('H').suppress() + (pp.Empty().setParseAction(lambda: 1) ^ pp.Word(pp.nums).setParseAction(lambda t: int(t[0]))) ).setResultsName('hydrogens') bond_type = (pp.Empty().setParseAction(lambda: 1) ^ pp.Literal('-').setParseAction(lambda: 1) ^ pp.Literal('\\').setParseAction(lambda: (1, 'left')) ^ pp.Literal('/').setParseAction(lambda: (1, 'right')) ^ pp.Literal('=').setParseAction(lambda: 2) ^ pp.Literal('#').setParseAction(lambda: 3) ^ pp.Literal('$').setParseAction(lambda: 4) ) def parse_charge(tokens):
def _match_boolean(literal): return (literal + pyparsing.Empty().setParseAction(pyparsing.replaceWith("=")) + pyparsing.Empty().setParseAction(pyparsing.replaceWith(True)))