class AsnBoolean(AsnDefinition): """X.680 18.3""" def __init__(self, toks: pp.ParseResults): value = {'TRUE': True, 'FALSE': False}[toks[0]] self.value = value # type: bool _raw_syntax = pp.Keyword('TRUE') | pp.Keyword('FALSE')
def parse_dbuPerMicron(self): EOL = pp.LineEnd().suppress() linebreak = pp.Suppress(";" + pp.LineEnd()) identifier = pp.Word( pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~') # CONFLICT with '();' number = pp.pyparsing_common.number word = pp.Word(pp.alphas) LPAR = pp.Suppress('(') RPAR = pp.Suppress(')') ORIENT = (pp.Keyword('N') | pp.Keyword('S') | pp.Keyword('E') | pp.Keyword('W') | pp.Keyword('FN') | pp.Keyword('FS') | pp.Keyword('FE') | pp.Keyword('FW')) pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y self.events[0].wait() # event[0] (parse_dbuPerMicron) has priority dbuPerMicron_id = pp.Keyword('UNITS DISTANCE MICRONS') dbuPerMicron = dbuPerMicron_id + number('dbuPerMicron') + linebreak return dbuPerMicron
def compile(): LBRACE, RBRACE, LBRACK, RBRACK, COLON = map(pp.Suppress, '{}[]:') value = pp.Forward() true = pp.Keyword('true').setParseAction(pp.replaceWith(True)) false = pp.Keyword('false').setParseAction(pp.replaceWith(False)) null = pp.Keyword('null').setParseAction(pp.replaceWith(None)) number = (pp.Regex( r'-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][-+]?[0-9]+)?').setParseAction( pp.tokenMap(float))) string = (pp.Regex( r'"([ !#-\[\]-\U0010ffff]+' r'|\\(?:["\\/bfnrt]|u[0-9A-Fa-f]{4}))*"').setParseAction( pp.tokenMap(json_unescape))) items = pp.delimitedList(value) array = (pp.Group(LBRACK - pp.Optional(items) + RBRACK).setParseAction(lambda t: t.asList())) member = pp.Group(string + COLON + value) members = pp.delimitedList(member) object = (pp.Dict(LBRACE - pp.Optional(members) + RBRACE).setParseAction(lambda t: t.asDict())) value << (object | array | string | number | true | false | null) json = value('top') + pp.StringEnd() json.setDefaultWhitespaceChars(' \t\n\r') json.parseWithTabs() return lambda s: json.parseString(s)['top']
def grammer(): colon = pp.Suppress(pp.Literal(":")) scolon = pp.Suppress(pp.Literal(";")) lparen = pp.Suppress(pp.Literal("(")) rparen = pp.Suppress(pp.Literal(")")) lbrack = pp.Suppress(pp.Literal("[")) rbrack = pp.Suppress(pp.Literal("]")) module = pp.Suppress(pp.Keyword("module")) input = pp.Keyword("input") output = pp.Keyword("output") inout = pp.Keyword("inout") num = pp.Word(pp.nums).addParseAction(lambda s, l, t: int(t[0])) width = (lbrack + num + colon + num + rbrack).setParseAction( lambda s, l, t: t[0] + 1) iden = pp.Word(pp.alphanums + "\\_", pp.alphanums + "_") idenlist = pp.delimitedList(iden, delim=",") def check_module(s, l, t): if t[0] == '\\not': raise pp.ParseException( s, l, "Error parsing module definition. Ensure the schematic " "has a module_name= attribute defined") mod_iden = iden.copy() mod_iden.addParseAction(check_module) defmod = module + mod_iden + lparen + pp.Group(idenlist) + rparen + scolon wires = pp.Group(pp.OneOrMore(pp.Group( (input | output | inout) + pp.Optional(width) + iden) + scolon)) g = defmod + wires g.ignore(pp.cStyleComment) g.ignore("`" + pp.restOfLine) return g
def read_arff(self, _fname): text = ''.join(open(_fname, 'r').readlines()) relationToken = p.Keyword('@RELATION', caseless=True) dataToken = p.Keyword('@DATA', caseless=True) attribToken = p.Keyword('@ATTRIBUTE', caseless=True) ident = p.ZeroOrMore(p.Suppress('\'')) + p.Word( p.alphas, p.alphanums + '_-.').setName('identifier') + p.ZeroOrMore(p.Suppress('\'')) relation = p.Suppress(relationToken) + p.ZeroOrMore(p.Suppress('"'))\ +ident.setResultsName('relation') + p.ZeroOrMore(p.Suppress('"')) attribute = p.Suppress(attribToken) + p.quotedString.setParseAction( lambda t: t.asList()[0].strip("'")).setResultsName( 'attrname') + p.Suppress(p.restOfLine) int_num = p.Word(p.nums) pm_sign = p.Optional(p.Suppress("+") | p.Literal("-")) float_num = p.Combine(pm_sign + int_num + p.Optional('.' + int_num) + p.Optional('e' + pm_sign + int_num) ).setParseAction(lambda t: float(t.asList()[0])) module_name = p.Group( (int_num.setParseAction(lambda t: int(t.asList()[0])) ).setName('Key') + (p.quotedString.setParseAction(lambda t: t.asList()[0].strip("'")) | float_num).setName('Value') + p.Suppress(',')) dataList = (p.Suppress('{') + p.OneOrMore(module_name) + p.Suppress('}')).setParseAction(lambda t: [t.asList()]) comment = '%' + p.restOfLine arffFormat = (p.OneOrMore(p.Suppress(comment)) + relation.setResultsName('relation') + p.OneOrMore(attribute).setResultsName('identifiers') + dataToken + p.OneOrMore(dataList).setResultsName('dataList') ).setResultsName('arffdata') tokens = arffFormat.parseString(text) featureNames = tokens.arffdata.identifiers return (tokens.arffdata)
def _add_imports_to_result(self, result: AbstractResult, analysis): LOGGER.debug(f'extracting imports from base result {result.scanned_file_name}...') list_of_words_with_newline_strings = result.scanned_tokens source_string_no_comments = self._filter_source_tokens_without_comments( list_of_words_with_newline_strings, ObjCParsingKeyword.INLINE_COMMENT.value, ObjCParsingKeyword.START_BLOCK_COMMENT.value, ObjCParsingKeyword.STOP_BLOCK_COMMENT.value) filtered_list_no_comments = self.preprocess_file_content_and_generate_token_list(source_string_no_comments) for _, obj, following in self._gen_word_read_ahead(filtered_list_no_comments): if obj == ObjCParsingKeyword.IMPORT.value: read_ahead_string = self.create_read_ahead_string(obj, following) include_name = pp.Word(pp.alphanums + CoreParsingKeyword.DOT.value + CoreParsingKeyword.SLASH.value + CoreParsingKeyword.UNDERSCORE.value) expression_to_match = pp.Keyword(ObjCParsingKeyword.IMPORT.value) + \ (pp.Keyword(CoreParsingKeyword.OPENING_ANGLE_BRACKET.value) | pp.Keyword(CoreParsingKeyword.DOUBLE_QUOTE.value)) + \ include_name.setResultsName(CoreParsingKeyword.IMPORT_ENTITY_NAME.value) try: parsing_result = expression_to_match.parseString(read_ahead_string) except Exception as some_exception: result.analysis.statistics.increment(Statistics.Key.PARSING_MISSES) LOGGER.warning(f'warning: could not parse result {result=}\n{some_exception}') LOGGER.warning(f'next tokens: {[obj] + following[:AbstractParsingCore.Constants.MAX_DEBUG_TOKENS_READAHEAD.value]}') continue analysis.statistics.increment(Statistics.Key.PARSING_HITS) # ignore any dependency substring from the config ignore list dependency = getattr(parsing_result, CoreParsingKeyword.IMPORT_ENTITY_NAME.value) if self._is_dependency_in_ignore_list(dependency, analysis): LOGGER.debug(f'ignoring dependency from {result.unique_name} to {dependency}') else: result.scanned_import_dependencies.append(dependency) LOGGER.debug(f'adding import: {dependency}')
def BoolstrResult(expr, true_variables): """Determine if a boolean expression is satisfied. BoolstrResult('A and B and not C', {'A', 'C'}) -> False Args: expr: The orginal boolean expression, like 'A and B'. true_variables: Collection to be checked whether satisfy the boolean expr. Returns: True if the given |true_variables| cause the boolean expression |expr| to be satisfied, False otherwise. """ boolstr = _ExprOverwrite(expr, true_variables) # Define the boolean logic TRUE = pyparsing.Keyword('True') FALSE = pyparsing.Keyword('False') boolOperand = TRUE | FALSE boolOperand.setParseAction(_BoolOperand) # Define expression, based on expression operand and list of operations in # precedence order. boolExpr = pyparsing.infixNotation(boolOperand, [ ('not', 1, pyparsing.opAssoc.RIGHT, _BoolNot), ('and', 2, pyparsing.opAssoc.LEFT, _BoolAnd), ('or', 2, pyparsing.opAssoc.LEFT, _BoolOr), ]) try: res = boolExpr.parseString(boolstr)[0] return bool(res) except (AttributeError, pyparsing.ParseException): raise BoolParseError( 'Cannot parse the boolean expression string "%s".' % expr)
def generate_entity_results_from_analysis(self, analysis): LOGGER.debug(f'generating entity results...') filtered_results = { k: v for (k, v) in self.results.items() if v.analysis is analysis and isinstance(v, AbstractFileResult) } result: AbstractFileResult for _, result in filtered_results.items(): entity_keywords: List[str] = [GroovyParsingKeyword.CLASS.value] entity_name = pp.Word(pp.alphanums) match_expression = pp.Keyword(GroovyParsingKeyword.CLASS.value) + \ entity_name.setResultsName(CoreParsingKeyword.ENTITY_NAME.value) + \ pp.Optional(pp.Keyword(GroovyParsingKeyword.EXTENDS.value) + entity_name.setResultsName(CoreParsingKeyword.INHERITED_ENTITY_NAME.value)) + \ pp.SkipTo(pp.FollowedBy(GroovyParsingKeyword.OPEN_SCOPE.value)) comment_keywords: Dict[str, str] = { CoreParsingKeyword.LINE_COMMENT.value: GroovyParsingKeyword.INLINE_COMMENT.value, CoreParsingKeyword.START_BLOCK_COMMENT.value: GroovyParsingKeyword.START_BLOCK_COMMENT.value, CoreParsingKeyword.STOP_BLOCK_COMMENT.value: GroovyParsingKeyword.STOP_BLOCK_COMMENT.value } entity_results = result.generate_entity_results_from_scopes( entity_keywords, match_expression, comment_keywords) for entity_result in entity_results: self._add_inheritance_to_entity_result(entity_result) self._add_imports_to_entity_result(entity_result) self.create_unique_entity_name(entity_result) self._results[entity_result.unique_name] = entity_result
def parse_diearea(self): EOL = pp.LineEnd().suppress() linebreak = pp.Suppress(";" + pp.LineEnd()) identifier = pp.Word( pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~') # CONFLICT with '();' number = pp.pyparsing_common.number word = pp.Word(pp.alphas) LPAR = pp.Suppress('(') RPAR = pp.Suppress(')') ORIENT = (pp.Keyword('N') | pp.Keyword('S') | pp.Keyword('E') | pp.Keyword('W') | pp.Keyword('FN') | pp.Keyword('FS') | pp.Keyword('FE') | pp.Keyword('FW')) pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y self.events[0].wait() # event[0] (parse_dbuPerMicron) has priority diearea_id = pp.Keyword('DIEAREA') diearea = pp.Group( pp.Suppress(diearea_id) + pp.OneOrMore(pt) + linebreak).setResultsName('DIEAREA') return diearea
def build_parser(self): parsed_term = pyparsing.Group(pyparsing.Combine(pyparsing.Word(pyparsing.alphanums) + \ pyparsing.Suppress('*'))).setResultsName('wildcard') | \ pyparsing.Group(pyparsing.Combine(pyparsing.Word(pyparsing.alphanums+"._") + \ pyparsing.Word(':') + pyparsing.Group(pyparsing.Optional("\"") + \ pyparsing.Optional("<") + pyparsing.Optional(">") + pyparsing.Optional("=") + \ pyparsing.Optional("-") + pyparsing.Word(pyparsing.alphanums+"._/") + \ pyparsing.Optional("&") + pyparsing.Optional("<") + pyparsing.Optional(">") + \ pyparsing.Optional("=") + pyparsing.Optional("-") + \ pyparsing.Optional(pyparsing.Word(pyparsing.alphanums+"._/")) + \ pyparsing.Optional("\"")))).setResultsName('fields') | \ pyparsing.Group(pyparsing.Combine(pyparsing.Suppress('-')+ \ pyparsing.Word(pyparsing.alphanums+"."))).setResultsName('not_term') | \ pyparsing.Group(pyparsing.Word(pyparsing.alphanums)).setResultsName('term') parsed_or = pyparsing.Forward() parsed_quote_block = pyparsing.Forward() parsed_quote_block << ( (parsed_term + parsed_quote_block) | parsed_term) parsed_quote = pyparsing.Group(pyparsing.Suppress('"') + parsed_quote_block + \ pyparsing.Suppress('"')).setResultsName("quotes") | parsed_term parsed_parenthesis = pyparsing.Group((pyparsing.Suppress("(") + parsed_or + \ pyparsing.Suppress(")"))).setResultsName("parenthesis") | parsed_quote parsed_and = pyparsing.Forward() parsed_and << (pyparsing.Group(parsed_parenthesis + pyparsing.Suppress(pyparsing.Keyword("and")) + \ parsed_and).setResultsName("and") | \ pyparsing.Group(parsed_parenthesis + pyparsing.OneOrMore(~pyparsing.oneOf("or and") + \ parsed_and)).setResultsName("and") | parsed_parenthesis) parsed_or << (pyparsing.Group(parsed_and + pyparsing.Suppress(pyparsing.Keyword("or")) + \ parsed_or).setResultsName("or") | parsed_and) return parsed_or.parseString
class Profile(Node): """ Description of a pulseaudio profile. """ __fragments__ = { 'name': 'profile-name', 'label': 'profile-label', 'sink_cnt': 'profile-sink-count', 'source_cnt': 'profile-source-count', 'priority': 'profile-priority', } __syntax__ = ( p.Word(p.alphanums + "+-:").setParseAction( lambda t: t[0].rstrip(':')).setResultsName("profile-name") + p.delimitedList( p.Literal("(HDMI)") | p.Literal("(IEC958)") | p.Regex('[^ (\n]+'), ' ', combine=True).setResultsName('profile-label') + p.Suppress('(') + p.Keyword('sinks').suppress() + p.Suppress(':') + p.Word(p.nums).setParseAction(lambda t: int(t[0])).setResultsName( 'profile-sink-count') + p.Suppress(',') + p.Keyword('sources').suppress() + p.Suppress(':') + p.Word(p.nums).setParseAction(lambda t: int(t[0])).setResultsName( 'profile-source-count') + p.Suppress(',') + p.Keyword('priority').suppress() + p.MatchFirst([ p.Suppress('.'), # Merged on 2013-06-03 (YYYY-MM-DD) # http://cgit.freedesktop.org/pulseaudio/pulseaudio/commit/src/utils/pactl.c?id=83c3cf0a65fb05900f81bd2dbb38e6956eb23935 p.Suppress(':'), ]) + p.Word(p.nums).setParseAction(lambda t: int(t[0])).setResultsName( 'profile-priority') + p.Suppress(')')).setResultsName("profile")
class PortWithProfile(Node): """ Variant of :class:`Port` that is used by "card" records inside the "Ports" property. It differs from the normal port syntax by having different entries inside the last section. Availability is not listed here, only priority. Priority does not have a colon before the actual number. This port is followed by profile assignment. """ __fragments__ = { 'name': 'port-name', 'label': 'port-label', 'priority': 'port-priority', 'latency_offset': 'port-latency-offset', 'availability': 'port-availability', 'properties': lambda t: t['port-properties'].asList(), 'profile_list': lambda t: t['port-profile-list'].asList(), } __syntax__ = ( p.Word(p.alphanums + "-;").setResultsName('port-name') + p.Suppress(':') # This part was very tricky to write. The label is basically arbitrary # localized Unicode text. We want to grab all of it in one go but # without consuming the upcoming and latest '(' character or the space # that comes immediately before. # # The syntax here combines a sequence of words, as defined by anything # other than a space and '(', delimited by a single whitespace. + p.Combine( p.OneOrMore(~p.FollowedBy(p.Regex('\(.+?\)') + p.LineEnd()) + p.Regex('[^ \n]+') + p.White().suppress()), ' ').setResultsName('port-label') + p.Suppress('(') + p.Keyword('priority').suppress() + p.Optional(p.Suppress(':')) + p.Word(p.nums).setParseAction(lambda t: int(t[0])).setResultsName( 'port-priority') + p.Optional( p.MatchFirst([ p.Suppress(',') + p.Keyword('latency offset:').suppress() + p.Word(p.nums).setParseAction(lambda t: int(t[0])) + p.Literal("usec").suppress(), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-latency-offset')) + p.Optional( p.MatchFirst([ p.Suppress(',') + p.Literal('not available'), p.Suppress(',') + p.Literal('available'), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-availability')) + p.Suppress(')') + p.LineEnd().suppress() + p.Optional( p.MatchFirst([ p.LineStart().suppress() + p.NotAny(p.White(' ')) + p.White('\t').suppress() + p.Keyword('Properties:').suppress() + p.LineEnd().suppress() + PropertyAttributeValue, p.Empty().setParseAction(lambda t: []) ]).setResultsName('port-properties')) + p.White('\t', max=3).suppress() + p.Literal("Part of profile(s)").suppress() + p.Suppress(":") + p.delimitedList( p.Word(p.alphanums + "+-:"), ", ").setResultsName("port-profile-list")).setResultsName("port")
def parse_spectre(netlist_string): # newlines are part of the grammar, thus redifine the whitespaces without it ws = ' \t' _p.ParserElement.setDefaultWhitespaceChars(ws) # spectre netlist grammar definition EOL = _p.LineEnd().suppress() # end of line linebreak = _p.Suppress( "\\" + _p.LineEnd()) # breaking a line with backslash newline identifier = _p.Word(_p.alphanums + '_!<>-+') # a name for... number = _p.Word(_p.nums + ".") # a number net = identifier # a net nets = _p.Group(_p.OneOrMore(net('net') | linebreak)) # many nets cktname = identifier # name of a subcircuit cktname_end = _p.Keyword("ends").suppress() comment = _p.Suppress("//" + _p.SkipTo(_p.LineEnd())) expression = _p.Word(_p.alphanums + '._*+-/()') inst_param_key = identifier + _p.Suppress("=") inst_param_value = expression('expression') inst_parameter = _p.Group( inst_param_key('name') + inst_param_value('value')).setResultsName('key') parameters = _p.Group( _p.ZeroOrMore(inst_parameter | linebreak)).setResultsName('parameters') instref = identifier instname = identifier instance = _p.Group( instname('name') + _p.Suppress('(') + nets('nets') + _p.Suppress(')') + instref('reference') + parameters + EOL).setResultsName('instance') subcircuit_content = _p.Group( _p.ZeroOrMore(instance | EOL | comment)).setResultsName('subnetlist') subcircuit = _p.Group( # matches subckt <name> <nets> <newline> _p.Keyword("subckt").suppress() + cktname('name') + nets('nets') + EOL # matches the content of the subcircuit + subcircuit_content # matches ends <name> <newline> + cktname_end + _p.matchPreviousExpr(cktname).suppress() + EOL).setResultsName('subcircuit') topcircuit = _p.Group( # matches subckt <name> <nets> <newline> _p.Keyword("topckt").suppress() + cktname('name') + nets('nets') + EOL # matches the content of the subcircuit + subcircuit_content # matches ends <name> <newline> + cktname_end + _p.matchPreviousExpr(cktname).suppress() + EOL).setResultsName('topcircuit') netlist_element = subcircuit | topcircuit | EOL | comment('comment') netlist = _p.ZeroOrMore(netlist_element) + _p.StringEnd() parameters.setParseAction(handle_parameters) instance.setParseAction(handle_instance) subcircuit.setParseAction(handle_subcircuit) topcircuit.setParseAction(handle_topcircuit) return netlist.parseString(netlist_string)
def parse_string(logicstr): ''' Parse the logic string using pyparsing ''' and_ = pyparsing.Keyword('and') or_ = pyparsing.Keyword('or') nor_ = pyparsing.Keyword('nor') nand_ = pyparsing.Keyword('nand') xor_ = pyparsing.Keyword('xor') xnor_ = pyparsing.Keyword('xnor') not_ = pyparsing.Keyword('not') true_ = pyparsing.Keyword('true') false_ = pyparsing.Keyword('false') not_op = not_ | '~' | '¬' and_op = and_ | nand_ | '&' | '∧' xor_op = xor_ | xnor_ | '⊕' | '⊻' or_op = or_ | nor_ | '|' | '∨' | '+' expr = pyparsing.Forward() identifier = ~(and_ | or_ | nand_ | nor_ | not_ | true_ | false_) + \ pyparsing.Word('$' + pyparsing.alphas + '_', pyparsing.alphanums +'_'+'$') atom = identifier | pyparsing.Group('(' + expr + ')') factor = pyparsing.Group(pyparsing.ZeroOrMore(not_op) + atom) term = pyparsing.Group(factor + pyparsing.ZeroOrMore(and_op + factor)) expr = pyparsing.infixNotation(true_ | false_ | identifier, [(not_op, 1, pyparsing.opAssoc.RIGHT), (and_op, 2, pyparsing.opAssoc.LEFT), (or_op, 2, pyparsing.opAssoc.LEFT), (xor_op, 2, pyparsing.opAssoc.LEFT)]) return expr.parseString(logicstr)[0]
def _typeof_expression(): keyword = ( pyparsing.Keyword('typeof') | pyparsing.Keyword('__typeof__') ) return pyparsing.Combine( keyword + pyparsing.Literal('(') + pyparsing.Combine(_anything_beetween('()')) + pyparsing.Literal(')') )
def __init__(self): # directive grammar lparen = pp.Literal('(').suppress() rparen = pp.Literal(')').suppress() comma = pp.Literal(',').suppress() semicolon = pp.Literal(';').suppress() equalTok = pp.Literal('=').suppress() self.floatTok = pp.Optional((pp.Literal('-'))|pp.Literal('+')) + pp.Word(pp.nums) + pp.Optional(pp.Literal('.') + pp.Optional(pp.Word(pp.nums))) self.floatTok.addParseAction(lambda toks: float("".join(toks))) self.stringTok = pp.Group(pp.dblQuotedString() ^ pp.sglQuotedString()) self.stringTok.addParseAction(lambda toks: "".join(toks[0]).strip('"').strip("'")) self.trueTok = pp.Keyword("true") self.trueTok.addParseAction(lambda _: True) self.falseTok = pp.Keyword("false") self.falseTok.addParseAction(lambda _: False) self.boolTok = self.trueTok | self.falseTok self.identifierTok = pp.Word(pp.alphas + '_', pp.alphanums + '_')('identifier') # self.posKeywordTok = \ # pp.Keyword("tl") | \ # pp.Keyword("tc") | \ # pp.Keyword("tr") | \ # pp.Keyword("cl") | \ # pp.Keyword("cc") | \ # pp.Keyword("cr") | \ # pp.Keyword("bl") | \ # pp.Keyword("bc") | \ # pp.Keyword("br") self.posKeywordTok = \ pp.Word(pp.alphas + '_', pp.alphanums + '_') self.posKeywordTok.addParseAction(lambda toks: str(toks[0])) self.positionalArgTok = self.floatTok | self.stringTok | self.boolTok self.keywordArgTok = pp.Group(self.identifierTok + equalTok + (self.positionalArgTok | self.posKeywordTok)) self.keywordArgTok.addParseAction(lambda toks: [x for x in toks]) self.argsTok = pp.Optional( (self.positionalArgTok + pp.ZeroOrMore(comma + self.positionalArgTok) + pp.ZeroOrMore(comma + self.keywordArgTok)) | (self.keywordArgTok + pp.ZeroOrMore(comma + self.keywordArgTok)) )('args') # self.argsTok.addParseAction(lambda toks: [toks]) self.argsTok.addParseAction(lambda toks: DirectiveArgs(toks)) self.directiveTok = pp.Group(self.identifierTok + lparen + self.argsTok + rparen) self.mainTok = self.directiveTok + pp.ZeroOrMore(semicolon + self.directiveTok) + pp.Optional(semicolon)
def _build_grammar(): pp.ParserElement.setDefaultWhitespaceChars(" \t\n") place_statement = pp.Keyword("place") + pp.QuotedString('"')("name") + \ pp.Optional(pp.Suppress("init") + pp.Word(pp.nums), default=0)("init") trans_statement = pp.Keyword("trans") + pp.QuotedString('"')("name") + \ pp.Suppress("~") + pp.QuotedString('"')("event") + \ pp.Optional(pp.Suppress("in") + pp.Group(pp.OneOrMore(pp.QuotedString('"')))("inputs")) + \ pp.Optional(pp.Suppress("out") + pp.Group(pp.OneOrMore(pp.QuotedString('"')))("outputs")) statement = pp.Group(place_statement | trans_statement) + pp.Suppress(";") tpn = pp.OneOrMore(statement) tpn.ignore(pp.pythonStyleComment) return tpn
def _init_parser(self): #outputParser = (pyparsing.Literal('>>') | (pyparsing.WordStart() + '>') | pyparsing.Regex('[^=]>'))('output') outputParser = (pyparsing.Literal(self.redirector *2) | \ (pyparsing.WordStart() + self.redirector) | \ pyparsing.Regex('[^=]' + self.redirector))('output') inputMark = pyparsing.Literal('<')('input') terminatorParser = pyparsing.Or([(hasattr(t, 'parseString') and t) or pyparsing.Literal(t) for t in self.terminators])('terminator') stringEnd = pyparsing.stringEnd ^ '\nEOF' self.multilineCommand = pyparsing.Or([pyparsing.Keyword(c, caseless=self.case_insensitive) for c in self.multilineCommands])('multilineCommand') oneLineCommand = (~self.multilineCommand + pyparsing.Word(self.legalChars))('command') pipe = pyparsing.Keyword('|', identChars='|') self.commentGrammars.ignore(pyparsing.quotedString).setParseAction(lambda x: '') doNotParse = self.commentGrammars | self.commentInProgress | pyparsing.quotedString afterElements = \ pyparsing.Optional(inputMark + pyparsing.SkipTo(outputParser ^ pipe ^ stringEnd, ignore=doNotParse).setParseAction(lambda x: x[0].strip())('inputFrom')) + \ pyparsing.Optional(pipe + pyparsing.SkipTo(outputParser ^ stringEnd, ignore=doNotParse)('pipeTo')) + \ pyparsing.Optional(outputParser + pyparsing.SkipTo(stringEnd, ignore=doNotParse).setParseAction(lambda x: x[0].strip())('outputTo')) if self.case_insensitive: self.multilineCommand.setParseAction(lambda x: x[0].lower()) oneLineCommand.setParseAction(lambda x: x[0].lower()) if self.blankLinesAllowed: self.blankLineTerminationParser = pyparsing.NoMatch else: self.blankLineTerminator = (pyparsing.lineEnd + pyparsing.lineEnd)('terminator') self.blankLineTerminator.setResultsName('terminator') self.blankLineTerminationParser = ((self.multilineCommand ^ oneLineCommand) + pyparsing.SkipTo(self.blankLineTerminator, ignore=doNotParse).setParseAction(lambda x: x[0].strip())('args') + self.blankLineTerminator)('statement') self.multilineParser = (((self.multilineCommand ^ oneLineCommand) + pyparsing.SkipTo(terminatorParser, ignore=doNotParse).setParseAction(lambda x: x[0].strip())('args') + terminatorParser)('statement') + pyparsing.SkipTo(outputParser ^ inputMark ^ pipe ^ stringEnd, ignore=doNotParse).setParseAction(lambda x: x[0].strip())('suffix') + afterElements) self.multilineParser.ignore(self.commentInProgress) self.singleLineParser = ((oneLineCommand + pyparsing.SkipTo(terminatorParser ^ stringEnd ^ pipe ^ outputParser ^ inputMark, ignore=doNotParse).setParseAction(lambda x:x[0].strip())('args'))('statement') + pyparsing.Optional(terminatorParser) + afterElements) #self.multilineParser = self.multilineParser.setResultsName('multilineParser') #self.singleLineParser = self.singleLineParser.setResultsName('singleLineParser') self.blankLineTerminationParser = self.blankLineTerminationParser.setResultsName('statement') self.parser = self.prefixParser + ( stringEnd | self.multilineParser | self.singleLineParser | self.blankLineTerminationParser | self.multilineCommand + pyparsing.SkipTo(stringEnd, ignore=doNotParse) ) self.parser.ignore(self.commentGrammars) fileName = pyparsing.Word(self.legalChars + '/\\') inputFrom = fileName('inputFrom') # a not-entirely-satisfactory way of distinguishing < as in "import from" from < # as in "lesser than" self.inputParser = inputMark + pyparsing.Optional(inputFrom) + pyparsing.Optional('>') + \ pyparsing.Optional(fileName) + (pyparsing.stringEnd | '|') self.inputParser.ignore(self.commentInProgress)
def lexical_analysis(self, src): delimited = re.sub(r'\s+', ' ', ' '.join(src.strip().split('\n'))).split(';') result = [] for stmt in delimited: if stmt == '': return result string = pp.Regex('[a-zA-Z0-9=_]+') nums = pp.Regex('[0-9]+') ws = pp.OneOrMore(pp.White()).suppress() lp = pp.Regex('[(]').suppress() rp = pp.Regex('[)]').suppress() c = pp.Regex('[,]').suppress() q = pp.Regex("[']").suppress() table_name = string.setResultsName('table_name') create_table = (pp.Keyword('CREATE', caseless = True) + ws + pp.Keyword('TABLE', caseless = True) + ws + pp.Optional(pp.Keyword('IF', caseless = True) + ws + pp.Keyword('NOT', caseless = True) + ws + pp.Keyword('EXISTS', caseless = True))).suppress() + table_name + lp column_name = string.setResultsName('column_name') data_type = string.setResultsName('data_type') length = lp + nums.setResultsName('length') + rp nullable = (pp.Optional(pp.Keyword('NOT', caseless = True) + ws) + pp.Keyword('NULL', caseless = True)).setResultsName('nullable') default_value = pp.Keyword('DEFAULT', caseless = True).suppress() + ws + string.setResultsName('default_value') auto_increment = pp.Keyword('AUTO_INCREMENT', caseless = True).setResultsName('auto_increment') column = pp.Optional(ws) + column_name + ws + data_type + pp.Optional(pp.MatchFirst([length, ws + nullable, ws + default_value, ws + auto_increment])) + pp.Optional(pp.MatchFirst([ws + nullable, ws + default_value, ws + auto_increment])) + pp.Optional(pp.MatchFirst([ws + default_value, ws + auto_increment])) + pp.Optional(ws + auto_increment) + pp.Optional(ws) + c primary_key = pp.Keyword('PRIMARY KEY', caseless = True).suppress() + lp + pp.OneOrMore(q + string.setResultsName('primary_key') + q + pp.Optional(c)) + rp + pp.Optional(c) key = pp.Keyword('KEY', caseless = True).suppress() + lp + q + string.setResultsName('key') + q + pp.Optional(c) + rp + pp.Optional(c) parser = create_table + pp.OneOrMore(pp.Group(column)) + pp.Optional(primary_key) + pp.Optional(key) + rp + pp.OneOrMore(ws + string).suppress() result.append(parser.parseString(stmt, parseAll=True)) return result
def compile(self): manipulation_set = pp.Optional( pp.Suppress(pp.Keyword("THEN")) + pp.Suppress("|") + pp.SkipTo(pp.Suppress(";"), include=True)) manipulation_set.setParseAction( lambda x: self._add_manipulation_set(x[0])) parser = (pp.Keyword("CONNECT") + self.connect_block.parser() + pp.Keyword("RETRIEVE") + self.retrieve_block.parser() + pp.Optional(pp.Keyword("JOIN") + self.join_block.parser())) try: parser.parseString(self.qgl_str) except pp.ParseException, e: raise QGLSyntaxError("Couldn't parse query: \n %s" % e)
class AsnReal(AsnDefinition): """X.680 21.6 Limitations: - SequenceValue not supprted; parsed into dict by AsnSequence """ def __init__(self, toks: pp.ParseResults): self.value = float(toks[0]) _raw_syntax = (realnumber.copy() | (pp.Suppress(pp.Literal('-')) + realnumber.copy()) | pp.Keyword('PLUS-INFINITY').setParseAction(lambda: 'inf') | pp.Keyword('MINUS-INFINITY').setParseAction(lambda: '-inf') | pp.Keyword('NOT-A-NUMBER').setParseAction(lambda: 'nan'))
def nline_grammar(): fields = [ nonNegativeInteger('ncase'), nonNegativeInteger('ixxn'), nonNegativeInteger('jxxn'), floatNumber('timmax'), pp.oneOf(['0', '1'])('ibrspl'), nonNegativeInteger('nbrspl'), pp.oneOf(['0', '1', '2'])('irrltt'), None, ] IBRSPL = 4 ICM_SPLIT = 7 # no brem splitting, no icm fields[IBRSPL] = pp.oneOf(['0', '1'])('ibrspl') fields[ICM_SPLIT] = pp.Keyword('0')('icm_split') nline_no_brem_no_icm = commaSeparatedLine(fields) # no brem splitting, with icm fields[ICM_SPLIT] = positiveInteger('icm_split') nline_no_brem_with_icm = commaSeparatedLine(fields) # with brem, with icm fields[IBRSPL] = pp.oneOf(['2', '29'])('ibrspl') nline_with_brem_with_icm = commaSeparatedLine(fields) # with brem, no icm fields[ICM_SPLIT] = pp.Keyword('0')('ICM_SPLIT') nline_with_brem_no_icm = commaSeparatedLine(fields) # if ibrspl = 2 or ibrspl = 29 ibrspl_line = commaSeparatedLine([ floatNumber('fs'), floatNumber('ssd'), pp.Optional(nonNegativeInteger('nmin')), pp.Optional(nonNegativeInteger('icm_dbs')), pp.Optional(nonNegativeInteger)('zplane_dbs'), pp.Optional(pp.oneOf(['0', '1']))('irad_dbs'), pp.Optional(floatNumber)('zrr_dbs') ]) # if ICM_SPLIT > 0 icm_split_line = commaSeparatedLine( [nonNegativeInteger('nsplit_phot'), nonNegativeInteger('nsplit_elec')]) return nline_no_brem_no_icm | \ (nline_no_brem_with_icm + icm_split_line) | \ (nline_with_brem_no_icm + ibrspl_line) | \ (nline_with_brem_with_icm + ibrspl_line + icm_split_line)
def build_element(self): """ Builds an element containing the full command and arguments Elements are joined with the `-` operator, which stops parsing immediately if the element fails to match. Elements are joined with a token that matches one or more whitespace characters. """ if isinstance(self.name, ParserElement): command_element = self.name else: command_element = pp.Keyword(self.name) # SetResultsName creates a copy instead of mutating the element command_element = command_element.setResultsName("command_name") for arg_name, arg_element in self.params.items(): if hasattr(arg_element, "name"): display_name = f"{arg_name} ({arg_element.name})" else: display_name = arg_name element = WS_TOK - arg_element.setResultsName(arg_name).setName(display_name) # If element is optional, make WS + element optional if isinstance(arg_element, pp.Optional): element = Optional(element) command_element -= element return command_element
def string(allow_private=False): if allow_private: # private functions are restricted! identifier = pp.Word(pp.alphas + "_", pp.alphanums + "_") else: identifier = pp.Word(pp.alphas, pp.alphanums + "_") template = pp.Forward() pool_var = identifier.copy() var = _Parsers.builtins | pool_var.setParseAction( lambda name: _PoolVar(name)) | template special_chars = pp.Keyword('$$').setParseAction(pp.replaceWith('$')) template << (pp.Suppress('$') + pp.Suppress('{') + var + pp.ZeroOrMore( pp.Suppress('.') + _Parsers.expression( allow_private=allow_private)) + pp.Suppress('}')) def template_parse_action(toks): name = toks[0] try: funcs = toks[1:] except IndexError: funcs = [] return _TemplateVar(name, funcs) template.setParseAction(template_parse_action) restricted_chars = '$' printables = ''.join( c for c in (set(pp.printables) - set(restricted_chars))) string = pp.ZeroOrMore(special_chars | template | pp.Combine(pp.Word(printables + ' ')).leaveWhitespace()) return string
def _cast_expression(expression): """A function returning a (pyparsing) parser for parsing cast expressions. Args: expression: a pyparsing parser for parsing an expression to be cast. Returns: A (pyparsing) parser for parsing cast expressions. """ word = pyparsing.Word(pyparsing.alphanums + '_*[]') nested = pyparsing.Forward() # pylint: disable=expression-not-assigned nested << ( pyparsing.Literal('(') + pyparsing.ZeroOrMore(word | nested) + pyparsing.Literal(')') ) typeof_expression = pyparsing.Keyword('typeof') + nested simple_type_expression = ( pyparsing.Word(pyparsing.alphanums + ' _[]') + pyparsing.Optional(pyparsing.Word(' *')) ) type_expression = ( typeof_expression | simple_type_expression ) return ( _OPEN_PARENTHESIS + pyparsing.Combine(type_expression) + _CLOSE_PARENTHESIS + ~(_PLUS | _MINUS) + expression ).setParseAction(_create_cast_expression)
def _add_package_name_to_result(self, result: AbstractResult) -> str: LOGGER.debug( f'extracting package name from base result {result.scanned_file_name}...' ) list_of_words = result.scanned_tokens for _, obj, following in self._gen_word_read_ahead(list_of_words): if obj == JavaParsingKeyword.PACKAGE.value: read_ahead_string = self.create_read_ahead_string( obj, following) package_name = pp.Word(pp.alphanums + CoreParsingKeyword.DOT.value) expression_to_match = pp.Keyword(JavaParsingKeyword.PACKAGE.value) + package_name.setResultsName(JavaParsingKeyword.PACKAGE_NAME.value) + \ pp.FollowedBy(CoreParsingKeyword.SEMICOLON.value) try: parsing_result = expression_to_match.parseString( read_ahead_string) except Exception as some_exception: result.analysis.statistics.increment( Statistics.Key.PARSING_MISSES) LOGGER.warning( f'warning: could not parse result {result=}\n{some_exception}' ) LOGGER.warning(f'next tokens: {obj} {following[:10]}') continue result.module_name = parsing_result.package_name result.analysis.statistics.increment( Statistics.Key.PARSING_HITS) LOGGER.debug( f'package found: {parsing_result.package_name} and added to result' )
def NearbyObject(player, priority=None): """ Matches an object in the player's inventory or the player's location. Accepts "my" keyword to specify inventory. Args: player: A Player to search near. priority: If "room" or "inventory", the search will favor matches there. If None, perfect matches in either set are preferred over partial matches in either. """ if priority == "inventory": preferred = ObjectIn(player) elif priority == "room": preferred = ObjectIn(player.location) elif priority is None: preferred = ObjectIn(player, player.location, exact=True) else: raise ValueError("Unknown priority ({}), expected 'room' or " "'inventory'".format(priority)) return MatchFirst([ preferred.setName('nearby object'), pyp.Suppress(pyp.Keyword('my')) + ObjectIn(player).setName('object in your inventory'), Me(player), Here(player), ObjectIn(player, player.location, exact=False, location=True) ]).setName('nearby object')
class Port(Node): """ Description of a port on a sink """ __fragments__ = { 'name': 'port-name', 'label': 'port-label', 'priority': 'port-priority', 'availability': 'port-availability' } __syntax__ = ( p.Word(p.alphanums + "-;").setResultsName('port-name') + p.Suppress(':') # This part was very tricky to write. The label is basically # arbitrary localized Unicode text. We want to grab all of it in # one go but without consuming the upcoming '(' character or the # space that comes immediately before. # # The syntax here combines a sequence of words, as defined by # anything other than a space and '(', delimited by a single # whitespace. + p.delimitedList(p.Regex('[^ (\n]+'), ' ', combine=True).setResultsName('port-label') + p.Suppress('(') + p.Keyword('priority').suppress() + p.Suppress(':') + p.Word(p.nums).setParseAction(lambda t: int(t[0])).setResultsName( 'port-priority') + p.MatchFirst([ p.Suppress(',') + p.Literal('not available'), p.Suppress(',') + p.Literal('available'), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-availability') + p.Suppress(')')).setResultsName("port")
class GroovyFunctionParser(object): """ Given a string containing a single function definition this class will parse the function definition and return information regarding it. """ # Simple Groovy sub-grammar definitions KeywordDef = pyparsing.Keyword('def') VarName = pyparsing.Regex(r'[A-Za-z_]\w*') FuncName = VarName FuncDefn = KeywordDef + FuncName + "(" + pyparsing.delimitedList(VarName) + ")" + "{" @classmethod def parse(cls, data): """ Parse the given function definition and return information regarding the contained definition. :param data: The function definition in a string :type data: str | basestring :rtype: dict """ try: # Parse the function here result = cls.FuncDefn.parseString(data) result_list = result.asList() args = result_list[3:result_list.index(')')] # Return single line or multi-line function body fn_body = re.sub(r'[^\{]+\{', '', data, count=1) parts = fn_body.strip().split('\n') fn_body = '\n'.join(parts[0:-1]) return GroovyFunction(result[1], args, fn_body, data) except Exception as ex: return None
def _band_parser(): kw = pp.Keyword("BAND") title = pp.Word(pp.alphas)('title') settings = pp.Group(7 * pc.integer)('settings') point = pp.Group(3 * pc.integer) segment = pp.Group(2 * point) return kw + title + settings + pp.OneOrMore(segment)('path')