def parse_file(file_name): number = pp.Word(pp.nums) identifier = pp.Word(pp.alphas + "_", pp.alphanums + "_") lbrace = pp.Literal('{').suppress() rbrace = pp.Literal('}').suppress() cls = pp.Keyword('class') colon = pp.Literal(":") semi = pp.Literal(";").suppress() langle = pp.Literal("<") rangle = pp.Literal(">") equals = pp.Literal("=") comma = pp.Literal(",") lparen = pp.Literal("(") rparen = pp.Literal(")") lbrack = pp.Literal("[") rbrack = pp.Literal("]") mins = pp.Literal("-") struct = pp.Keyword('struct') template = pp.Keyword('template') final = pp.Keyword('final')("final") stub = pp.Keyword('stub')("stub") with_colon = pp.Word(pp.alphanums + "_" + ":") btype = with_colon type = pp.Forward() nestedParens = pp.nestedExpr('<', '>') tmpl = pp.Group( btype("template_name") + langle.suppress() + pp.Group(pp.delimitedList(type)) + rangle.suppress()) type << (tmpl | btype) enum_lit = pp.Keyword('enum') enum_class = pp.Group(enum_lit + cls) ns = pp.Keyword("namespace") enum_init = equals.suppress() + pp.Optional(mins) + number enum_value = pp.Group(identifier + pp.Optional(enum_init)) enum_values = pp.Group(lbrace + pp.delimitedList(enum_value) + pp.Optional(comma) + rbrace) content = pp.Forward() member_name = pp.Combine( pp.Group(identifier + pp.Optional(lparen + rparen))) attrib = pp.Group(lbrack.suppress() + lbrack.suppress() + pp.SkipTo(']') + rbrack.suppress() + rbrack.suppress()) opt_attribute = pp.Optional(attrib)("attribute") namespace = pp.Group( ns("type") + identifier("name") + lbrace + pp.Group(pp.OneOrMore(content))("content") + rbrace) enum = pp.Group( enum_class("type") + identifier("name") + colon.suppress() + identifier("underline_type") + enum_values("enum_values") + pp.Optional(semi).suppress()) default_value = equals.suppress() + pp.SkipTo(';') class_member = pp.Group( type("type") + member_name("name") + opt_attribute + pp.Optional(default_value)("default") + semi.suppress())("member") template_param = pp.Group(identifier("type") + identifier("name")) template_def = pp.Group(template + langle + pp.Group(pp.delimitedList(template_param)) ("params") + rangle) class_content = pp.Forward() class_def = pp.Group( pp.Optional(template_def)("template") + (cls | struct)("type") + with_colon("name") + pp.Optional(final) + pp.Optional(stub) + opt_attribute + lbrace + pp.Group(pp.ZeroOrMore(class_content))("members") + rbrace + pp.Optional(semi)) content << (enum | class_def | namespace) class_content << (enum | class_def | class_member) for varname in "enum class_def class_member content namespace template_def".split( ): locals()[varname].setName(varname) rt = pp.OneOrMore(content) singleLineComment = "//" + pp.restOfLine rt.ignore(singleLineComment) rt.ignore(pp.cStyleComment) return rt.parseFile(file_name, parseAll=True)
def jsParse(inStr): # This disaster is a context-free grammar parser for parsing javascript object literals. # It needs to be able to handle a lot of the definitional messes you find in in-the-wild # javascript object literals. # Unfortunately, Javascript is /way/ more tolerant then JSON when it comes to object literals # so we can't just parse objects using python's `json` library. TRUE = pp.Keyword("true").setParseAction(pp.replaceWith(True)) FALSE = pp.Keyword("false").setParseAction(pp.replaceWith(False)) NULL = pp.Keyword("null").setParseAction(pp.replaceWith(None)) jsonString = pp.quotedString.setParseAction(pp.removeQuotes) jsonNumber = pp.Combine( pp.Optional('-') + ('0' | pp.Word('123456789', pp.nums)) + pp.Optional('.' + pp.Word(pp.nums)) + pp.Optional(pp.Word('eE', exact=1) + pp.Word(pp.nums + '+-', pp.nums))) jsonObject = pp.Forward() jsonValue = pp.Forward() jsonDict = pp.Forward() jsonArray = pp.Forward() jsonElements = pp.Forward() rawText = pp.Regex('[a-zA-Z_$][0-9a-zA-Z_$]*') commaToNull = pp.Word(',,', exact=1).setParseAction(pp.replaceWith(None)) jsonElements << pp.ZeroOrMore(commaToNull) + pp.Optional( jsonObject) + pp.ZeroOrMore( (pp.Suppress(',') + jsonObject) | commaToNull) jsonValue << (jsonString | jsonNumber | TRUE | FALSE | NULL) dictMembers = pp.delimitedList( pp.Group((rawText | jsonString) + pp.Suppress(':') + (jsonValue | jsonDict | jsonArray))) jsonDict << (pp.Dict( pp.Suppress('{') + pp.Optional(dictMembers) + pp.ZeroOrMore(pp.Suppress(',')) + pp.Suppress('}'))) jsonArray << (pp.Group( pp.Suppress('[') + pp.Optional(jsonElements) + pp.Suppress(']'))) jsonObject << (jsonValue | jsonDict | jsonArray) jsonComment = pp.cppStyleComment jsonObject.ignore(jsonComment) def convertDict(s, l, toks): return dict(toks.asList()) def convertNumbers(s, l, toks): n = toks[0] try: return int(n) except ValueError: return float(n) jsonNumber.setParseAction(convertNumbers) jsonDict.setParseAction(convertDict) # jsonObject.setDebug() jsonObject.parseString('"inStr"').pop() return jsonObject.parseString(inStr).pop()
def _multiword_argument(): return pyparsing.Group( _variable() + pyparsing.OneOrMore(_variable()) ).setParseAction(util.action(pre_ast.CompositeBlock))
+ pp.Optional(dot + basic_expression) + pp.Optional(pp.OneOrMore(operator) + basic_expression)) expression << pp.Optional(operator) + basic_expression mark = pp.Empty().setParseAction(lambda loc, t: loc) full_expression = (mark + expression.suppress() + mark) \ .setParseAction(lambda s, loc, t: s[t[0]:t[1]].strip()) expressions = pp.delimitedList(full_expression, ";") typename << (pp.ZeroOrMore(ident + pp.Optional(template) + pp.Optional(pp.Literal("*")))) edge_config_param = lpar + full_expression + rpar edge_config_item = pp.Group(ident + pp.Optional(edge_config_param, None)) edge_config = lbracket + pp.Group(pp.delimitedList(edge_config_item, ",")) + rbracket edge_inscription = pp.Group( pp.Optional(edge_config, ()) + pp.Optional(full_expression, None) + pp.Optional(pp.Suppress("@") + full_expression, None)) edge_expr = pp.delimitedList(edge_inscription, ";") init_by_expressions = (lbracket + expressions + rbracket) \ .setParseAction(lambda t: ("exprs", tuple(t))) init_by_vector = pp.ParseElementEnhance(full_expression).setParseAction( lambda t: ("vector", t[0])) init_expression = init_by_expressions | init_by_vector
def grammar(): """Define the query grammar for the external backend that reads host files.""" return pp.Group(pp.Word(pp.alphanums + '.-_/()')('filename'))
class SkyDriveOldLogParser(text_parser.PyparsingSingleLineTextParser): """Parse SkyDrive old log files.""" NAME = u'skydrive_log_old' DESCRIPTION = u'Parser for OneDrive (or SkyDrive) old log files.' _ENCODING = u'UTF-8-SIG' _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS # Common SDOL (SkyDriveOldLog) pyparsing objects. _SDOL_COLON = pyparsing.Literal(u':') _SDOL_EXCLAMATION = pyparsing.Literal(u'!') # Date and time format used in the header is: DD-MM-YYYY hhmmss.### # For example: 08-01-2013 21:22:28.999 _SDOL_DATE_TIME = pyparsing.Group( _TWO_DIGITS.setResultsName(u'month') + pyparsing.Suppress(u'-') + _TWO_DIGITS.setResultsName(u'day_of_month') + pyparsing.Suppress(u'-') + _FOUR_DIGITS.setResultsName(u'year') + text_parser.PyparsingConstants.TIME_MSEC_ELEMENTS).setResultsName( u'date_time') _SDOL_SOURCE_CODE = pyparsing.Combine( pyparsing.CharsNotIn(u':') + _SDOL_COLON + text_parser.PyparsingConstants.INTEGER + _SDOL_EXCLAMATION + pyparsing.Word(pyparsing.printables)).setResultsName(u'source_code') _SDOL_LOG_LEVEL = (pyparsing.Literal(u'(').suppress() + pyparsing.SkipTo(u')').setResultsName(u'log_level') + pyparsing.Literal(u')').suppress()) _SDOL_LINE = (_SDOL_DATE_TIME + _SDOL_SOURCE_CODE + _SDOL_LOG_LEVEL + _SDOL_COLON + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'text')) # Sometimes the timestamped log line is followed by an empy line, # then by a file name plus other data and finally by another empty # line. It could happen that a logline is split in two parts. # These lines will not be discarded and an event will be generated # ad-hoc (see source), based on the last one if available. _SDOL_NO_HEADER_SINGLE_LINE = ( pyparsing.Optional(pyparsing.Literal(u'->').suppress()) + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'text')) # Define the available log line structures. LINE_STRUCTURES = [ (u'logline', _SDOL_LINE), (u'no_header_single_line', _SDOL_NO_HEADER_SINGLE_LINE), ] def __init__(self): """Initializes a parser object.""" super(SkyDriveOldLogParser, self).__init__() self._last_date_time = None self._last_event_data = None self.offset = 0 def _ParseLogline(self, parser_mediator, structure): """Parse a logline and store appropriate attributes. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ # TODO: Verify if date and time value is locale dependent. month, day_of_month, year, hours, minutes, seconds, milliseconds = ( structure.date_time) time_elements_tuple = (year, month, day_of_month, hours, minutes, seconds, milliseconds) try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: parser_mediator.ProduceExtractionError( u'invalid date time value: {0!s}'.format(structure.date_time)) return event_data = SkyDriveOldLogEventData() event_data.log_level = structure.log_level event_data.offset = self.offset event_data.source_code = structure.source_code event_data.text = structure.text event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) self._last_date_time = date_time self._last_event_data = event_data def _ParseNoHeaderSingleLine(self, parser_mediator, structure): """Parse an isolated header line and store appropriate attributes. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ if not self._last_event_data: logging.debug( u'SkyDrive, found isolated line with no previous events') return event_data = SkyDriveOldLogEventData() event_data.offset = self._last_event_data.offset event_data.text = structure.text event = time_events.DateTimeValuesEvent( self._last_date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) # TODO think to a possible refactoring for the non-header lines. self._last_date_time = None self._last_event_data = None def ParseRecord(self, parser_mediator, key, structure): """Parse each record structure and return an EventObject if applicable. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key not in (u'logline', u'no_header_single_line'): raise errors.ParseError( u'Unable to parse record, unknown structure: {0:s}'.format( key)) if key == u'logline': self._ParseLogline(parser_mediator, structure) elif key == u'no_header_single_line': self._ParseNoHeaderSingleLine(parser_mediator, structure) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a SkyDrive old log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (bytes): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._SDOL_LINE.parseString(line) except pyparsing.ParseException: logging.debug(u'Not a SkyDrive old log file') return False day_of_month, month, year, hours, minutes, seconds, milliseconds = ( structure.date_time) time_elements_tuple = (year, month, day_of_month, hours, minutes, seconds, milliseconds) try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: logging.debug( u'Not a SkyDrive old log file, invalid date and time: {0!s}'. format(structure.date_time)) return False return True
pyparsing.Literal('char'), pyparsing.Literal('int'), pyparsing.Literal('long'), pyparsing.Literal('double') ]), pyparsing.ZeroOrMore(pyparsing.Literal('*')) ])) domains = pyparsing.Group( pyparsing.And([ datatypes, pyparsing.Optional( pyparsing.And([ pyparsing.Literal('[').suppress(), pyparsing.Or([integers, identifiers]), pyparsing.ZeroOrMore( pyparsing.And([ pyparsing.Literal(',').suppress(), pyparsing.Or([integers, identifiers]) ])), pyparsing.Literal(']').suppress() ])) ])) ranges = pyparsing.Group( pyparsing.And([ datatypes, pyparsing.Optional( pyparsing.And([ pyparsing.Literal('[').suppress(), integers,
def parse_specialnets(self): # GLOBALS for this class EOL = pp.LineEnd().suppress() linebreak = pp.Suppress(";" + pp.LineEnd()) identifier = pp.Word(pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~') # CONFLICT with '();' number = pp.pyparsing_common.number word = pp.Word(pp.alphas) LPAR = pp.Suppress('(') RPAR = pp.Suppress(')') ORIENT = (pp.Keyword('N') | pp.Keyword('S') | pp.Keyword('E') | pp.Keyword('W') | pp.Keyword('FN') | pp.Keyword('FS') | pp.Keyword('FE') | pp.Keyword('FW')) pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y specialnets_id = pp.Suppress(pp.Keyword('SPECIALNETS')) end_specialnets_id = pp.Keyword("END SPECIALNETS").suppress() begin_specialnet = pp.Suppress(pp.Keyword('-')) ws_snet = pp.Suppress(pp.Keyword('+')) # parameter division in NETS # netName netName_1 = pp.Group(LPAR + identifier('compName') + identifier('pinName') + pp.Optional(ws_snet + pp.Keyword('SYNTHESIZED'))('SYNTHESIZED') + RPAR ) netName = identifier('netName') + pp.ZeroOrMore(netName_1).setResultsName('nets') # MASK MASK = pp.Group(pp.Keyword('MASK') + number('maskNum')).setResultsName('MASK') MASK_id = pp.Keyword('MASK') RECT_id = pp.Keyword('RECT') VIRTUAL_id = pp.Keyword('VIRTUAL') routingPoints_1 = pp.Optional(MASK('MASK') + number('maskNum')) + pp.Group(pt) routingPoints_2 = pp.Optional(MASK_id('MASK') + number('viaMaskNum')) + pp.NotAny(pp.Keyword('NEW') | pp.Keyword('RECT')) \ + identifier('viaName') + pp.Optional(ORIENT('orient')) \ + pp.Optional(pp.Suppress(pp.Keyword('DO')) + number('numX') + pp.Suppress(pp.Keyword('BY')) + number('numY') + pp.Suppress(pp.Keyword('STEP')) + number('stepX') + number('stepY')) routingPoints = (pp.Group(pt) + pp.OneOrMore(routingPoints_1 | routingPoints_2)) specialWiring_placement = (ws_snet + ((pp.Keyword('COVER')('PLACEMENT')) | (pp.Keyword('FIXED')('PLACEMENT')) | (pp.Keyword('ROUTED')('PLACEMENT')) | (pp.Keyword('SHIELD')('PLACEMENT') + identifier('shieldNetName')) ) ) specialWiring_1 = (pp.Optional(specialWiring_placement) + pp.Optional(ws_snet + pp.Keyword('SHAPE') + identifier('shapeType')) + pp.Optional(ws_snet + pp.Keyword('MASK') + number('maskNum')) + ((ws_snet + pp.Keyword('POLYGON') + identifier('layerName') + pp.OneOrMore(pt)) | (ws_snet + pp.Keyword('RECT') + identifier('layerName') + pt + pt) | (ws_snet + pp.Keyword('VIA') + identifier('viaName') + pp.Optional(ORIENT('orient')) + pp.OneOrMore(pt)) ) ) SHAPE_elems = (pp.Keyword('RING') | pp.Keyword('PADRING') | pp.Keyword('BLOCKRING') | pp.Keyword('STRIPE') | pp.Keyword('FOLLOWPIN') | pp.Keyword('IOWIRE') | pp.Keyword('COREWIRE') | pp.Keyword('BLOCKWIRE') | pp.Keyword('BLOCKAGEWIRE') | pp.Keyword('FILLWIRE') | pp.Keyword('FILLWIREOPC') | pp.Keyword('DRCFILL') ) specialWiring_2 = (specialWiring_placement + identifier('layerName') + number('routeWidth') + pp.Optional(ws_snet + pp.Keyword('SHAPE') + SHAPE_elems('SHAPE')) + pp.Optional(ws_snet + pp.Keyword('STYLE') + number('styleNum')) + routingPoints('routingPoints') + pp.Group(pp.ZeroOrMore(pp.Group(pp.Keyword('NEW') + identifier('layerName') + number('routeWidth') + pp.Optional(ws_snet + pp.Keyword('SHAPE') + SHAPE_elems('SHAPE') ) + pp.Optional(ws_snet + pp.Keyword('STYLE') + identifier('styleNum') ) + routingPoints('routingPoints') ) ) ) )('NEW') specialWiring = pp.Group(pp.OneOrMore(specialWiring_1 | specialWiring_2))('specialWiring') VOLTAGE = ws_snet + pp.Keyword('VOLTAGE') + number('VOLTAGE') SOURCE = ws_snet + pp.Keyword('SOURCE') + (pp.Keyword('DIST') | pp.Keyword('NETLIST') | pp.Keyword('TIMING') | pp.Keyword('USER')) FIXEDBUMP = ws_snet + pp.Keyword('FIXEDBUMP')('FIXEDBUMP') ORIGINAL = ws_snet + pp.Keyword('ORIGINAL') + identifier('ORIGINAL_netName') USE_ids = (pp.Keyword('ANALOG') | pp.Keyword('CLOCK') | pp.Keyword('GROUND') | pp.Keyword('POWER') | pp.Keyword('RESET') | pp.Keyword('SCAN') | pp.Keyword('SIGNAL') | pp.Keyword('TIEOFF')) USE = ws_snet + pp.Keyword('USE') + USE_ids('USE') PATTERN_ids = (pp.Keyword('BALANCED') | pp.Keyword('STEINER') | pp.Keyword('TRUNK') | pp.Keyword('WIREDLOGIC')) PATTERN = ws_snet + pp.Keyword('PATTERN') + PATTERN_ids('PATTERN') ESTCAP = ws_snet + pp.Keyword('ESTCAP') + number('ESTCAP_wireCapacitance') WEIGHT = ws_snet + pp.Keyword('WEIGHT') + number('WEIGHT') PROPERTY = pp.Group(ws_snet + pp.Keyword('PROPERTY') + pp.OneOrMore(identifier('propName') + number('propVal')) )('PROPERTY') specialnet = pp.Group(begin_specialnet + netName + pp.Optional(VOLTAGE) + pp.ZeroOrMore(specialWiring) + pp.Optional(SOURCE) + pp.Optional(FIXEDBUMP) + pp.Optional(ORIGINAL) + pp.Optional(USE) + pp.Optional(PATTERN) + pp.Optional(ESTCAP) + pp.Optional(WEIGHT) + pp.ZeroOrMore(PROPERTY) + linebreak ).setResultsName('specialnets', listAllMatches=True) specialnets = pp.Group(specialnets_id + number('numNets') + linebreak + pp.ZeroOrMore(specialnet) + pp.Suppress(end_specialnets_id) ).setResultsName('SPECIALNETS') return specialnets
p.Suppress(',') + p.Literal('available'), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-availability') + p.Suppress(')') ).setResultsName("port") # ================= # Shared Attributes # ================= PropertyAttributeValue = ( p.Group( p.OneOrMore( p.LineStart().suppress() + p.Optional(p.White('\t')).suppress() + p.Optional(Property.Syntax) + p.LineEnd().suppress() ) ).setResultsName("attribute-value")) @class_with_syntax class PortWithProfile(Node): """ Variant of :class:`Port` that is used by "card" records inside the "Ports" property. It differs from the normal port syntax by having different entries inside the last section. Availability is not listed here, only priority. Priority does not have a colon before the actual number. This port is followed by profile assignment. """ __fragments__ = {
def parse_pins(self): # GLOBALS for this class EOL = pp.LineEnd().suppress() linebreak = pp.Suppress(";" + pp.LineEnd()) identifier = pp.Word(pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~') # CONFLICT with '();' number = pp.pyparsing_common.number word = pp.Word(pp.alphas) LPAR = pp.Suppress('(') RPAR = pp.Suppress(')') ORIENT = (pp.Keyword('N') | pp.Keyword('S') | pp.Keyword('E') | pp.Keyword('W') | pp.Keyword('FN') | pp.Keyword('FS') | pp.Keyword('FE') | pp.Keyword('FW')) pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y pins_id = pp.Keyword('PINS') end_pins_id = pp.Keyword("END PINS").suppress() begin_pin = pp.Keyword('-') ws_pin = pp.Suppress(pp.Keyword('+')) # parameter division in pins # pinName pinName = (identifier('pin_name') + ws_pin + pp.Keyword('NET') + identifier('netName') ) # SPECIAL SPECIAL = (ws_pin + pp.Keyword('SPECIAL')('SPECIAL') ) # DIRECTION DIRECTION_ids = (pp.Keyword('INPUT') | pp.Keyword('OUTPUT') | pp.Keyword('INOUT') | pp.Keyword('FEEDTHRU') ) DIRECTION = (ws_pin + pp.Keyword('DIRECTION') + pp.OneOrMore(DIRECTION_ids('DIRECTION')) ) # NETEXPR NETEXPR = pp.Group(ws_pin + pp.Keyword('NETEXPR') + pp.OneOrMore(word)('net_expr') ).setResultsName('NETEXPR') # SUPPLYSENSITIVITY SUPPLYSENSITIVITY = pp.Group(ws_pin + pp.Keyword('SUPPLYSENSITIVITY') + identifier('supply_sensitivity') ).setResultsName('SUPPLYSENSITIVITY') # GROUNDSENSITIVITY GROUNDSENSITIVITY = pp.Group(ws_pin + pp.Keyword('GROUNDSENSITIVITY') + identifier('ground_sensitivity') ).setResultsName('GROUNDSENSITIVITY') # USE USE_ids = (pp.Keyword('SIGNAL') | pp.Keyword('POWER') | pp.Keyword('GROUND') | pp.Keyword('CLOCK') | pp.Keyword('TIEOFF') | pp.Keyword('ANALOG') | pp.Keyword('SCAN') | pp.Keyword('RESET') ) USE = (ws_pin + pp.Keyword('USE') + pp.OneOrMore(USE_ids('USE')) ) # Common element to be used in the following subsections LAYER_layerName = pp.Keyword('LAYER') + identifier('layerName') # ANTENNAPINPARTIALMETALAREA ANTENNAPINPARTIALMETALAREA = pp.Group(ws_pin + pp.Keyword('ANTENNAPINPARTIALMETALAREA') + number + pp.Optional(LAYER_layerName) ).setResultsName('ANTENNAPINPARTIALMETALAREA') # ANTENNAPINPARTIALMETALSIDEAREA ANTENNAPINPARTIALMETALSIDEAREA = pp.Group(ws_pin + pp.Keyword('ANTENNAPINPARTIALMETALSIDEAREA') + number + pp.Optional(LAYER_layerName) ).setResultsName('ANTENNAPINPARTIALMETALSIDEAREA') # ANTENNAPINPARTIALCUTAREA ANTENNAPINPARTIALCUTAREA = pp.Group(ws_pin + pp.Keyword('ANTENNAPINPARTIALCUTAREA') + number + pp.Optional(LAYER_layerName) ).setResultsName('ANTENNAPINPARTIALCUTAREA') # ANTENNAPINDIFFAREA ANTENNAPINDIFFAREA = pp.Group(ws_pin + pp.Keyword('ANTENNAPINDIFFAREA') + number + pp.Optional(LAYER_layerName) ).setResultsName('ANTENNAPINDIFFAREA') # ANTENNAMODEL ANTENNAMODEL_ids = (pp.Keyword('OXIDE1') | pp.Keyword('OXIDE2') | pp.Keyword('OXIDE3') | pp.Keyword('OXIDE4') ) ANTENNAMODEL = pp.Group(ws_pin + pp.Keyword('ANTENNAMODEL') + ANTENNAMODEL_ids ).setResultsName('ANTENNAMODEL') # ANTENNAPINGATEAREA ANTENNAPINGATEAREA = pp.Group(ws_pin + pp.Keyword('ANTENNAPINGATEAREA') + number + pp.Optional(LAYER_layerName) ).setResultsName('ANTENNAPINGATEAREA') # ANTENNAPINMAXAREACAR ANTENNAPINMAXAREACAR = pp.Group(ws_pin + pp.Keyword('ANTENNAPINMAXAREACAR') + number + LAYER_layerName ).setResultsName('ANTENNAPINMAXAREACAR') # ANTENNAPINMAXSIDEAREACAR ANTENNAPINMAXSIDEAREACAR = pp.Group(ws_pin + pp.Keyword('ANTENNAPINMAXSIDEAREACAR') + number + LAYER_layerName ).setResultsName('ANTENNAPINMAXSIDEAREACAR') # ANTENNAPINMAXCUTCAR ANTENNAPINMAXCUTCAR = pp.Group(ws_pin + pp.Keyword('ANTENNAPINMAXCUTCAR') + number + LAYER_layerName ).setResultsName('ANTENNAPINMAXCUTCAR') # PLACEMENT_PINS PORT = (ws_pin + pp.Keyword('PORT')('PORT') ) MASK = pp.Group( pp.Suppress(pp.Keyword('MASK')) + number('maskNum') ).setResultsName('MASK') SPACING = pp.Group( pp.Suppress(pp.Keyword('SPACING')) + number('minSpacing') ).setResultsName('SPACING') DESIGNRULEWIDTH = pp.Group( pp.Suppress(pp.Keyword('DESIGNRULEWIDTH')) + number('effectiveWidth') ).setResultsName('DESIGNRULEWIDTH') LAYER = pp.Group(ws_pin + pp.Suppress(pp.Keyword('LAYER')) + identifier('layerName') + pp.Optional(MASK) + pp.Optional(SPACING | DESIGNRULEWIDTH) + pp.OneOrMore(pp.Group(pt))('coord') ).setResultsName('LAYER') POLYGON = pp.Group(ws_pin + pp.Suppress(pp.Keyword('POLYGON')) + identifier('layerName') + pp.Optional(MASK) + pp.Optional(SPACING | DESIGNRULEWIDTH) + pp.OneOrMore(pp.Group(pt))('coord') ).setResultsName('POLYGON') VIA = pp.Group(ws_pin + pp.Suppress(pp.Keyword('VIA')) + identifier('viaName') + pp.Optional(MASK) + pp.Group(pt)('coord') ).setResultsName('VIA') COVER = pp.Group(ws_pin + pp.Suppress(pp.Keyword('COVER')) + pp.Group(pt)('coord') + ORIENT('orient') ).setResultsName('COVER') FIXED = pp.Group(ws_pin + pp.Suppress( pp.Keyword('FIXED')) + pp.Group(pt)('coord') + ORIENT('orient') ).setResultsName('FIXED') PLACED = pp.Group(ws_pin + pp.Suppress(pp.Keyword('PLACED')) + pp.Group(pt)('coord') + ORIENT('orient') ).setResultsName('PLACED') PLACEMENT_PINS = (PORT | pp.Group(LAYER | POLYGON | VIA) | pp.Group(COVER | FIXED | PLACED) ) pin = pp.Group(pp.Suppress(begin_pin) + pinName + pp.Optional(SPECIAL) + pp.Optional(DIRECTION) + pp.Optional(NETEXPR) + pp.Optional(SUPPLYSENSITIVITY) + pp.Optional(GROUNDSENSITIVITY) + pp.Optional(USE) + pp.ZeroOrMore(ANTENNAPINPARTIALMETALAREA) + pp.ZeroOrMore(ANTENNAPINPARTIALMETALSIDEAREA) + pp.ZeroOrMore(ANTENNAPINPARTIALCUTAREA) + pp.ZeroOrMore(ANTENNAPINDIFFAREA) + pp.ZeroOrMore(ANTENNAMODEL) + pp.ZeroOrMore(ANTENNAPINGATEAREA) + pp.ZeroOrMore(ANTENNAPINMAXAREACAR) + pp.ZeroOrMore(ANTENNAPINMAXSIDEAREACAR) + pp.ZeroOrMore(ANTENNAPINMAXCUTCAR) + pp.ZeroOrMore(PLACEMENT_PINS)('PLACEMENT') + linebreak ).setResultsName('pin', listAllMatches=True) pins = pp.Group(pp.Suppress(pins_id) + number('numPins') + linebreak + pp.OneOrMore(pin) + pp.Suppress(end_pins_id) ).setResultsName('PINS') return pins
def parse_nets(self): # GLOBALS for this class EOL = pp.LineEnd().suppress() linebreak = pp.Suppress(";" + pp.LineEnd()) identifier = pp.Word(pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~') # CONFLICT with '();' number = pp.pyparsing_common.number word = pp.Word(pp.alphas) LPAR = pp.Suppress('(') RPAR = pp.Suppress(')') ORIENT = (pp.Keyword('N') | pp.Keyword('S') | pp.Keyword('E') | pp.Keyword('W') | pp.Keyword('FN') | pp.Keyword('FS') | pp.Keyword('FE') | pp.Keyword('FW')) pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y nets_id = pp.Keyword('NETS') end_nets_id = pp.Keyword("END NETS").suppress() begin_net = pp.Keyword('-') ws_net = pp.Suppress(pp.Keyword('+')) # parameter division in NETS # netName netName_1 = pp.Group(LPAR + identifier('compName') + identifier('pinName') + pp.Optional(ws_net + pp.Keyword('SYNTHESIZED') )('SYNTHESIZED') + RPAR ).setResultsName('netName') netName_2 = pp.Group(pp.Keyword('MUSTJOIN') + LPAR + identifier('compName') + identifier('pinName') + RPAR ).setResultsName('MUSTJOIN') netName = (identifier('netName') + pp.OneOrMore(netName_1 | netName_2) ).setResultsName('netName') # SHIELDNET SHIELDNET = pp.Group(ws_net + pp.Keyword('SHIELDNET') + identifier('shieldNetName') ).setResultsName('SHIELDNET') # VPIN VPIN_PLACEMENT_ids = (pp.Keyword('PLACED') | pp.Keyword('FIXED') | pp.Keyword('COVER') ) VPIN_PLACEMENT = (VPIN_PLACEMENT_ids('PLACEMENT') + pp.Group(pt)('pt') + pp.ZeroOrMore(word('orient')) ) VPIN_LAYER = pp.Keyword('LAYER') + identifier('layerName') VPIN = pp.Group(ws_net + pp.Keyword('VPIN') + identifier('vpinName') + pp.Optional(VPIN_LAYER) + pp.Group(pt)('pt1') + pp.Group(pt)('pt2') + pp.Optional(pp.Group(VPIN_PLACEMENT)('PLACEMENT')) )('VPIN') # routingPoints (used by regularWiring) MASK_id = pp.Keyword('MASK')('MASK') RECT_id = pp.Keyword('RECT')('RECT') VIRTUAL_id = pp.Keyword('VIRTUAL')('VIRTUAL') routingPoints_1 = (pp.Optional(MASK_id + number('maskNum')) + pp.Group(pt) ) routingPoints_2 = (pp.Optional(MASK_id + number('viaMaskNum')) + pp.NotAny(pp.Keyword('NEW') | pp.Keyword('RECT')) + identifier('viaName') + pp.Optional(ORIENT('orient')) ) routingPoints_3 = (pp.Optional(MASK_id + number('maskNum')) + RECT_id + pp.Group(pt) ) routingPoints_4 = (VIRTUAL_id + pp.Group(pt)) routingPoints = (pp.Group(pt) + pp.OneOrMore(routingPoints_1 | routingPoints_2 | routingPoints_3 | routingPoints_4 ) ) # regularWiring regularWiring_ids = (pp.Keyword('COVER') | pp.Keyword('FIXED') | pp.Keyword('ROUTED') | pp.Keyword('NOSHIELD') ) TAPER_RULE = ((pp.Keyword('TAPER') | pp.Keyword('TAPERRULE')) + identifier('ruleName') ) STYLE = (pp.Keyword('STYLE') + identifier('layerName') + pp.OneOrMore(pt)) regularWiring_Head = pp.Group(regularWiring_ids('WIRING_id') + identifier('layerName') + pp.Optional(TAPER_RULE)('TAPER_RULE') + pp.Optional(STYLE)('STYLE') + pp.OneOrMore(routingPoints)('routingPoints') ) NEW_WIRING = pp.Group(pp.Keyword('NEW')('WIRING_id') + identifier('layerName') + pp.Optional(TAPER_RULE)('TAPER_RULE') + pp.Optional(STYLE)('STYLE') + pp.OneOrMore(routingPoints)('routingPoints') ) regularWiring = pp.Group(ws_net + pp.Group(regularWiring_Head)('WIRING_Head') + pp.Group(pp.ZeroOrMore(NEW_WIRING))('NEW_WIRING') )('WIRING') # SUBNET SUBNET_regularWiring = pp.Group(pp.Group(regularWiring_Head)('WIRING_Head') + pp.Group(pp.ZeroOrMore(NEW_WIRING))('NEW_WIRING') )('WIRING') SUBNET_NONDEFAULTRULE = (pp.Keyword('NONDEFAULTRULE') + identifier('NONDEFAULTRULE_ruleName') ) SUBNET_pin_type = (pp.Keyword('VPIN')('VPIN') | pp.Keyword('PIN')('PIN') | identifier('compName') ) SUBNET = pp.Group(ws_net + pp.Keyword('SUBNET') + identifier('subnetName') + pp.ZeroOrMore(LPAR + SUBNET_pin_type + identifier('pinName') + RPAR ) + pp.Optional(SUBNET_NONDEFAULTRULE) + pp.ZeroOrMore(SUBNET_regularWiring) )('SUBNET') # XTALK XTALK = (ws_net + pp.Keyword('XTALK') + number('XTALK_class') ) # NONDEFAULTRULE NONDEFAULTRULE = (ws_net + pp.Keyword('NONDEFAULTRULE') + identifier('NONDEFAULTRULE_ruleName') ) # SOURCE SOURCE =(ws_net + pp.Keyword('SOURCE') + (pp.Keyword('DIST') | pp.Keyword('NETLIST') | pp.Keyword('TEST') | pp.Keyword('TIMING') | pp.Keyword('USER') )('SOURCE') ) # FIXEDBUMP FIXEDBUMP = (ws_net + pp.Keyword('FIXEDBUMP')('FIXEDBUMP') ) # FREQUENCY FREQUENCY = (ws_net + pp.Keyword('FREQUENCY') + number('FREQUENCY') ) # ORIGINAL ORIGINAL = (ws_net + pp.Keyword('ORIGINAL') + identifier('ORIGINAL_netName') ) # USE > USE_ids USE_ids = (pp.Keyword('ANALOG') | pp.Keyword('CLOCK') | pp.Keyword('GROUND') | pp.Keyword('POWER') | pp.Keyword('RESET') | pp.Keyword('SCAN') | pp.Keyword('SIGNAL') | pp.Keyword('TIEOFF') ) # USE USE = ws_net + pp.Keyword('USE') + USE_ids('USE') # PATTERN PATTERN_ids = (pp.Keyword('BALANCED') | pp.Keyword('STEINER') | pp.Keyword('TRUNK') | pp.Keyword('WIREDLOGIC') ) PATTERN = (ws_net + pp.Keyword('PATTERN') + PATTERN_ids('PATTERN') ) # ESTCAP ESTCAP = (ws_net + pp.Keyword('ESTCAP') + number('ESTCAP_wireCap') ) # WEIGHT WEIGHT = (ws_net + pp.Keyword('WEIGHT') + number('WEIGHT') ) # PROPERTY PROPERTY = pp.Group(ws_net + pp.Keyword('PROPERTY') + pp.OneOrMore(identifier('propName') + number('propVal')) )('PROPERTY') # Refactor this!? if self.ignore_nets_route: regularWiring = pp.SkipTo((EOL + ws_net) | linebreak) net = pp.Group(pp.Suppress(begin_net) + netName + pp.Optional(SHIELDNET) + pp.ZeroOrMore(VPIN) + pp.ZeroOrMore(SUBNET) + pp.Optional(XTALK) + pp.Optional(NONDEFAULTRULE) + pp.ZeroOrMore(regularWiring) + pp.Optional(SOURCE) + pp.Optional(FIXEDBUMP) + pp.Optional(FREQUENCY) + pp.Optional(ORIGINAL) + pp.Optional(USE) + pp.Optional(PATTERN) + pp.Optional(ESTCAP) + pp.Optional(WEIGHT) + pp.ZeroOrMore(PROPERTY) + linebreak ).setResultsName('net', listAllMatches=True) nets = pp.Group(pp.Suppress(nets_id) + number('numNets') + linebreak + pp.ZeroOrMore(net) + pp.Suppress(end_nets_id) ).setResultsName('NETS') return nets
def parse_components(self): # GLOBALS for this class EOL = pp.LineEnd().suppress() linebreak = pp.Suppress(";" + pp.LineEnd()) identifier = pp.Word(pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~') # CONFLICT with '();' number = pp.pyparsing_common.number word = pp.Word(pp.alphas) LPAR = pp.Suppress('(') RPAR = pp.Suppress(')') ORIENT = (pp.Keyword('N') | pp.Keyword('S') | pp.Keyword('E') | pp.Keyword('W') | pp.Keyword('FN') | pp.Keyword('FS') | pp.Keyword('FE') | pp.Keyword('FW')) pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y self.events[0].wait() # Wait for event[0] to finish components_id = pp.Keyword('COMPONENTS') end_components_id = pp.Keyword("END COMPONENTS").suppress() begin_comp = pp.Suppress(pp.Keyword('-')) ws_comp = pp.Suppress(pp.Keyword('+')) # parameter division in components # compName compName = (identifier('comp_name') + identifier('cell') ).setResultsName('compName') # EEQMASTER EEQMASTER = (ws_comp + identifier + identifier('EEQMASTER') ) # SOURCE SOURCE = (ws_comp + pp.Suppress(pp.Keyword('SOURCE')) + identifier('source_type') ).setResultsName('SOURCE') # PLACEMENT PLACEMENT_ids = (pp.Keyword('FIXED') | pp.Keyword('COVER') | pp.Keyword('PLACED') | pp.Keyword('UNPLACED') ) PLACEMENT_coord = (LPAR + number('placement_x') + number('placement_y') + RPAR ) PLACEMENT = (ws_comp + PLACEMENT_ids + pp.Optional(PLACEMENT_coord + ORIENT('orientation')) ).setResultsName('PLACEMENT') # MASKSHIFT MASKSHIFT = (ws_comp + pp.Suppress(pp.Keyword('MASKSHIFT')) + number('shiftLayerMasks') ).setResultsName('MASKSHIFT') # HALO HALO = (ws_comp + pp.Keyword('HALO') + pp.Optional(pp.Keyword('SOFT')) + number('haloL') + number('haloB') + number('haloR') + number('haloT') ).setResultsName('HALO') # ROUTEHALO ROUTEHALO = (ws_comp + pp.Keyword('ROUTEHALO') + number('rhaloDist') + identifier('rhaloMinLayer') + identifier('rhaloMaxLayer') ).setResultsName('ROUTEHALO') # WEIGHT WEIGHT = (ws_comp + pp.Keyword('WEIGHT') + number('weight') ).setResultsName('WEIGHT') # REGION REGION = (ws_comp + pp.Keyword('REGION') + identifier('region') ).setResultsName('REGION') # PROPERTY PROPERTY = (ws_comp + pp.Keyword('PROPERTY') + identifier('propName') + identifier('propVal') ).setResultsName('PROPERTY') subcomponent = pp.Group(begin_comp + compName + pp.Optional(EEQMASTER) + pp.Optional(SOURCE) + pp.Optional(PLACEMENT) + pp.Optional(MASKSHIFT) + pp.Optional(HALO) + pp.Optional(ROUTEHALO) + pp.Optional(WEIGHT) + pp.Optional(REGION) + pp.ZeroOrMore(PROPERTY) + linebreak ).setResultsName('subcomponents', listAllMatches=True) components = pp.Group(pp.Suppress(components_id) + number('numComps') + linebreak + pp.OneOrMore(subcomponent) + pp.Suppress(end_components_id) ).setResultsName('COMPONENTS') return components
class SELinuxParser(text_parser.PyparsingSingleLineTextParser): """Parser for SELinux audit.log files.""" NAME = 'selinux' DESCRIPTION = 'Parser for SELinux audit.log files.' _ENCODING = 'utf-8' _SELINUX_KEY_VALUE_GROUP = pyparsing.Group( pyparsing.Word(pyparsing.alphanums).setResultsName('key') + pyparsing.Suppress('=') + ( pyparsing.QuotedString('"') ^ pyparsing.Word(pyparsing.printables)).setResultsName('value')) _SELINUX_KEY_VALUE_DICT = pyparsing.Dict( pyparsing.ZeroOrMore(_SELINUX_KEY_VALUE_GROUP)) _SELINUX_BODY_GROUP = pyparsing.Group( pyparsing.Empty().setResultsName('key') + pyparsing.restOfLine.setResultsName('value')) _SELINUX_MSG_GROUP = pyparsing.Group( pyparsing.Literal('msg').setResultsName('key') + pyparsing.Suppress('=audit(') + pyparsing.Word(pyparsing.nums).setResultsName('seconds') + pyparsing.Suppress('.') + pyparsing.Word(pyparsing.nums).setResultsName('milliseconds') + pyparsing.Suppress(':') + pyparsing.Word(pyparsing.nums).setResultsName('serial') + pyparsing.Suppress('):')) _SELINUX_TYPE_GROUP = pyparsing.Group( pyparsing.Literal('type').setResultsName('key') + pyparsing.Suppress('=') + ( pyparsing.Word(pyparsing.srange('[A-Z_]')) ^ pyparsing.Regex(r'UNKNOWN\[[0-9]+\]')).setResultsName('value')) _SELINUX_TYPE_AVC_GROUP = pyparsing.Group( pyparsing.Literal('type').setResultsName('key') + pyparsing.Suppress('=') + ( pyparsing.Word('AVC') ^ pyparsing.Word('USER_AVC')).setResultsName('value')) # A log line is formatted as: type=TYPE msg=audit([0-9]+\.[0-9]+:[0-9]+): .* _SELINUX_LOG_LINE = pyparsing.Dict( _SELINUX_TYPE_GROUP + _SELINUX_MSG_GROUP + _SELINUX_BODY_GROUP) LINE_STRUCTURES = [('line', _SELINUX_LOG_LINE)] def ParseRecord(self, parser_mediator, key, structure): """Parses a structure of tokens derived from a line of a text file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key != 'line': raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) msg_value = self._GetValueFromStructure(structure, 'msg') if not msg_value: parser_mediator.ProduceExtractionWarning( 'missing msg value: {0!s}'.format(structure)) return try: seconds = int(msg_value[0], 10) except ValueError: parser_mediator.ProduceExtractionWarning( 'unsupported number of seconds in msg value: {0!s}'.format( structure)) return try: milliseconds = int(msg_value[1], 10) except ValueError: parser_mediator.ProduceExtractionWarning( 'unsupported number of milliseconds in msg value: {0!s}'.format( structure)) return timestamp = ((seconds * 1000) + milliseconds) * 1000 body_text = structure[2][0] try: # Try to parse the body text as key value pairs. Note that not # all log lines will be properly formatted key value pairs. body_structure = self._SELINUX_KEY_VALUE_DICT.parseString(body_text) except pyparsing.ParseException: body_structure = pyparsing.ParseResults() event_data = SELinuxLogEventData() event_data.audit_type = self._GetValueFromStructure(structure, 'type') event_data.body = body_text event_data.pid = self._GetValueFromStructure(body_structure, 'pid') # TODO: pass line number to offset or remove. event_data.offset = 0 event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data) def VerifyStructure(self, parser_mediator, line): """Verifies if a line from a text file is in the expected format. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._SELINUX_LOG_LINE.parseString(line) except pyparsing.ParseException as exception: logger.debug( 'Unable to parse SELinux audit.log file with error: {0!s}'.format( exception)) return False return 'type' in structure and 'msg' in structure
def _generate_grammar(self): # Define grammar: pp.ParserElement.setDefaultWhitespaceChars(" \t") def add_element(name: str, value: pp.ParserElement): nonlocal self if self.debug: value.setName(name) value.setDebug() return value EOL = add_element("EOL", pp.Suppress(pp.LineEnd())) Else = add_element("Else", pp.Keyword("else")) Identifier = add_element( "Identifier", pp.Word(f"{pp.alphas}_", bodyChars=pp.alphanums + "_-./") ) BracedValue = add_element( "BracedValue", pp.nestedExpr( ignoreExpr=pp.quotedString | pp.QuotedString( quoteChar="$(", endQuoteChar=")", escQuote="\\", unquoteResults=False ) ).setParseAction(lambda s, l, t: ["(", *t[0], ")"]), ) Substitution = add_element( "Substitution", pp.Combine( pp.Literal("$") + ( ( (pp.Literal("$") + Identifier + pp.Optional(pp.nestedExpr())) | (pp.Literal("(") + Identifier + pp.Literal(")")) | (pp.Literal("{") + Identifier + pp.Literal("}")) | ( pp.Literal("$") + pp.Literal("{") + Identifier + pp.Optional(pp.nestedExpr()) + pp.Literal("}") ) | (pp.Literal("$") + pp.Literal("[") + Identifier + pp.Literal("]")) ) ) ), ) LiteralValuePart = add_element( "LiteralValuePart", pp.Word(pp.printables, excludeChars="$#{}()") ) SubstitutionValue = add_element( "SubstitutionValue", pp.Combine(pp.OneOrMore(Substitution | LiteralValuePart | pp.Literal("$"))), ) FunctionValue = add_element( "FunctionValue", pp.Group( pp.Suppress(pp.Literal("$") + pp.Literal("$")) + Identifier + pp.nestedExpr() # .setParseAction(lambda s, l, t: ['(', *t[0], ')']) ).setParseAction(lambda s, l, t: handle_function_value(*t)), ) Value = add_element( "Value", pp.NotAny(Else | pp.Literal("}") | EOL) + ( pp.QuotedString(quoteChar='"', escChar="\\") | FunctionValue | SubstitutionValue | BracedValue ), ) Values = add_element("Values", pp.ZeroOrMore(Value)("value")) Op = add_element( "OP", pp.Literal("=") | pp.Literal("-=") | pp.Literal("+=") | pp.Literal("*=") | pp.Literal("~="), ) Key = add_element("Key", Identifier) Operation = add_element( "Operation", Key("key") + pp.locatedExpr(Op)("operation") + Values("value") ) CallArgs = add_element("CallArgs", pp.nestedExpr()) def parse_call_args(results): out = "" for item in chain(*results): if isinstance(item, str): out += item else: out += "(" + parse_call_args(item) + ")" return out CallArgs.setParseAction(parse_call_args) Load = add_element("Load", pp.Keyword("load") + CallArgs("loaded")) Include = add_element( "Include", pp.Keyword("include") + pp.locatedExpr(CallArgs)("included") ) Option = add_element("Option", pp.Keyword("option") + CallArgs("option")) RequiresCondition = add_element("RequiresCondition", pp.originalTextFor(pp.nestedExpr())) def parse_requires_condition(s, l, t): # The following expression unwraps the condition via the additional info # set by originalTextFor. condition_without_parentheses = s[t._original_start + 1 : t._original_end - 1] # And this replaces the colons with '&&' similar how it's done for 'Condition'. condition_without_parentheses = ( condition_without_parentheses.strip().replace(":", " && ").strip(" && ") ) return condition_without_parentheses RequiresCondition.setParseAction(parse_requires_condition) Requires = add_element( "Requires", pp.Keyword("requires") + RequiresCondition("project_required_condition") ) FunctionArgumentsAsString = add_element( "FunctionArgumentsAsString", pp.originalTextFor(pp.nestedExpr()) ) QtNoMakeTools = add_element( "QtNoMakeTools", pp.Keyword("qtNomakeTools") + FunctionArgumentsAsString("qt_no_make_tools_arguments"), ) # ignore the whole thing... DefineTestDefinition = add_element( "DefineTestDefinition", pp.Suppress( pp.Keyword("defineTest") + CallArgs + pp.nestedExpr(opener="{", closer="}", ignoreExpr=pp.LineEnd()) ), ) # ignore the whole thing... ForLoop = add_element( "ForLoop", pp.Suppress( pp.Keyword("for") + CallArgs + pp.nestedExpr(opener="{", closer="}", ignoreExpr=pp.LineEnd()) ), ) # ignore the whole thing... ForLoopSingleLine = add_element( "ForLoopSingleLine", pp.Suppress(pp.Keyword("for") + CallArgs + pp.Literal(":") + pp.SkipTo(EOL)), ) # ignore the whole thing... FunctionCall = add_element("FunctionCall", pp.Suppress(Identifier + pp.nestedExpr())) Scope = add_element("Scope", pp.Forward()) Statement = add_element( "Statement", pp.Group( Load | Include | Option | Requires | QtNoMakeTools | ForLoop | ForLoopSingleLine | DefineTestDefinition | FunctionCall | Operation ), ) StatementLine = add_element("StatementLine", Statement + (EOL | pp.FollowedBy("}"))) StatementGroup = add_element( "StatementGroup", pp.ZeroOrMore(StatementLine | Scope | pp.Suppress(EOL)) ) Block = add_element( "Block", pp.Suppress("{") + pp.Optional(EOL) + StatementGroup + pp.Optional(EOL) + pp.Suppress("}") + pp.Optional(EOL), ) ConditionEnd = add_element( "ConditionEnd", pp.FollowedBy( (pp.Optional(pp.White()) + (pp.Literal(":") | pp.Literal("{") | pp.Literal("|"))) ), ) ConditionPart1 = add_element( "ConditionPart1", (pp.Optional("!") + Identifier + pp.Optional(BracedValue)) ) ConditionPart2 = add_element("ConditionPart2", pp.CharsNotIn("#{}|:=\\\n")) ConditionPart = add_element( "ConditionPart", (ConditionPart1 ^ ConditionPart2) + ConditionEnd ) ConditionOp = add_element("ConditionOp", pp.Literal("|") ^ pp.Literal(":")) ConditionWhiteSpace = add_element( "ConditionWhiteSpace", pp.Suppress(pp.Optional(pp.White(" "))) ) ConditionRepeated = add_element( "ConditionRepeated", pp.ZeroOrMore(ConditionOp + ConditionWhiteSpace + ConditionPart) ) Condition = add_element("Condition", pp.Combine(ConditionPart + ConditionRepeated)) Condition.setParseAction(lambda x: " ".join(x).strip().replace(":", " && ").strip(" && ")) # Weird thing like write_file(a)|error() where error() is the alternative condition # which happens to be a function call. In this case there is no scope, but our code expects # a scope with a list of statements, so create a fake empty statement. ConditionEndingInFunctionCall = add_element( "ConditionEndingInFunctionCall", pp.Suppress(ConditionOp) + FunctionCall + pp.Empty().setParseAction(lambda x: [[]]).setResultsName("statements"), ) SingleLineScope = add_element( "SingleLineScope", pp.Suppress(pp.Literal(":")) + pp.Group(Block | (Statement + EOL))("statements"), ) MultiLineScope = add_element("MultiLineScope", Block("statements")) SingleLineElse = add_element( "SingleLineElse", pp.Suppress(pp.Literal(":")) + (Scope | Block | (Statement + pp.Optional(EOL))), ) MultiLineElse = add_element("MultiLineElse", Block) ElseBranch = add_element("ElseBranch", pp.Suppress(Else) + (SingleLineElse | MultiLineElse)) # Scope is already add_element'ed in the forward declaration above. Scope <<= pp.Group( Condition("condition") + (SingleLineScope | MultiLineScope | ConditionEndingInFunctionCall) + pp.Optional(ElseBranch)("else_statements") ) Grammar = StatementGroup("statements") Grammar.ignore(pp.pythonStyleComment()) return Grammar
def select_oemol_atom_idx_by_language(system, mask=''): """ This function selects the atom indexes from the passed oemol molecular complex by using a defined language. The language allows the selection of the ligand, protein, waters, mono-atomic ions, excipients, residue numbers and distance selection. Logic operators not, or, and, noh, diff, around can be used to refine the selection Parameters ---------- system : OEMol of the bio-molecular complex protein-ligand The molecular complex mask : python string A string used to select atoms. A Backus–Naur Form grammar (https://en.wikipedia.org/wiki/Backus–Naur_form) is defined by the python module pyparsing. The defined grammar tokens are: "ligand", "protein", "ca_protein" ,"water", "ions", "excipients" and "resid chain1:res_idx1 chain2:res_idx2 ... res_idxn" that respectively define the ligand, the protein, carbon alpha protein atoms, water molecules, ions, excipients (not protein, ligand, water or ions) and residue numbers. The atom selection can be refined by using the following operator tokens: "not" = invert selection "or" = add selections "and" = intersect selections "diff" = logic difference between selections "noh" = remove hydrogens from the selection "around" = select atoms inside the cutoff distance from a given selection Returns ------- atom_set : python set the select atom indexes Notes ----- Example of selection string: mask = "ligand or protein" mask = "not water or not ions" mask = "ligand or protein or excipients" mask = "noh protein" mask = "resid A:17 B:12 17 18" mask = "protein diff resid A:1" mask = "5.0 around protein" """ def split(system, ligand_res_name='LIG'): """ This function splits the passed molecule in components and tracks the mapping between the original molecule and the split components. The mapping is created as separated atom component index sets. Parameters: ----------- system: OEMol The system to split in components. The components are: the protein atoms, the protein carbon alpha atoms the water atoms, the ion atoms, the excipients atoms Returns: -------- dic_set: python dictionary The sysetm is splitted in a dictionary with token words as keys and for value the related atom set. The token keywords are: protein, ca_protein, ligand, water, ions, excipients, system """ # Define Empty sets lig_set = set() prot_set = set() ca_prot_set = set() wat_set = set() excp_set = set() ion_set = set() # Atom Bond Set vector used to contains the whole system frags = oechem.OEAtomBondSetVector() # Define Options for the Filter opt = oechem.OESplitMolComplexOptions() # The protein filter is set to avoid that multiple # chains are separated during the splitting and peptide # molecules are recognized as ligands pf = oechem.OEMolComplexFilterFactory( oechem.OEMolComplexFilterCategory_Protein) peptide = oechem.OEMolComplexFilterFactory( oechem.OEMolComplexFilterCategory_Peptide) protein_filter = oechem.OEOrRoleSet(pf, peptide) opt.SetProteinFilter(protein_filter) # The ligand filter is set to recognize just the ligand lf = oechem.OEMolComplexFilterFactory( oechem.OEMolComplexFilterCategory_Ligand) not_protein_filter = oechem.OENotRoleSet(protein_filter) ligand_filter = oechem.OEAndRoleSet(lf, not_protein_filter) opt.SetLigandFilter(ligand_filter) # The water filter is set to recognize just water molecules wf = oechem.OEMolComplexFilterFactory( oechem.OEMolComplexFilterCategory_Water) opt.SetWaterFilter(wf) # Set Category cat = oechem.OEMolComplexCategorizer() cat.AddLigandName(ligand_res_name) opt.SetCategorizer(cat) # Define the system fragments if not oechem.OEGetMolComplexFragments(frags, system, opt): raise ValueError('Unable to generate the system fragments') # Set empty OEMol containers prot = oechem.OEMol() lig = oechem.OEMol() wat = oechem.OEMol() excp = oechem.OEMol() # Split the protein from the system atommap = oechem.OEAtomArray(system.GetMaxAtomIdx()) if not oechem.OECombineMolComplexFragments( prot, frags, opt, opt.GetProteinFilter(), atommap): raise ValueError('Unable to split the Protein') # Populate the protein set and the protein carbon alpha set pred = oechem.OEIsCAlpha() for sys_at in system.GetAtoms(): sys_idx = sys_at.GetIdx() at_idx = atommap[sys_idx] if at_idx: prot_set.add(sys_idx) at = system.GetAtom(oechem.OEHasAtomIdx(sys_idx)) if pred(at): ca_prot_set.add(sys_idx) # print(sys_idx, '->', at_idx) # Split the ligand from the system atommap = oechem.OEAtomArray(system.GetMaxAtomIdx()) if not oechem.OECombineMolComplexFragments( lig, frags, opt, opt.GetLigandFilter(), atommap): raise ValueError('Unable to split the Ligand') # Populate the ligand set for sys_at in system.GetAtoms(): sys_idx = sys_at.GetIdx() at_idx = atommap[sys_idx] if at_idx: lig_set.add(sys_idx) # print(sys_idx, '->', at_idx) # Split the water from the system atommap = oechem.OEAtomArray(system.GetMaxAtomIdx()) if not oechem.OECombineMolComplexFragments( wat, frags, opt, opt.GetWaterFilter(), atommap): raise ValueError('Unable to split the Water') # Populate the water set for sys_at in system.GetAtoms(): sys_idx = sys_at.GetIdx() at_idx = atommap[sys_idx] if at_idx: wat_set.add(sys_idx) # print(sys_idx, '->', at_idx) # Split the excipients from the system atommap = oechem.OEAtomArray(system.GetMaxAtomIdx()) if not oechem.OECombineMolComplexFragments( excp, frags, opt, opt.GetOtherFilter(), atommap): raise ValueError('Unable to split the Excipients') # Populate the excipient set for sys_at in system.GetAtoms(): sys_idx = sys_at.GetIdx() at_idx = atommap[sys_idx] if at_idx: excp_set.add(sys_idx) # print(sys_idx, '->', at_idx) # Create the mono-atomic ions set for exc_idx in excp_set: atom = system.GetAtom(oechem.OEHasAtomIdx(exc_idx)) if atom.GetDegree() == 0: ion_set.add(exc_idx) # Create the excipients set which are not protein, ligand, waters or ions excipients_set = excp_set - ion_set # Create the system set system_set = prot_set | lig_set | excp_set | wat_set if len(system_set) != system.NumAtoms(): raise ValueError("The total system atom number {} is different " "from its set representation {}".format( system.NumAtoms(), system_set)) # The dictionary is used to link the token keywords to the created molecule sets dic_set = { 'ligand': lig_set, 'protein': prot_set, 'ca_protein': ca_prot_set, 'water': wat_set, 'ions': ion_set, 'excipients': excipients_set, 'system': system_set } return dic_set def build_set(ls, dsets): """ This function select the atom indexes Parameters: ----------- ls: python list the parsed list with tokens and operand tokes for the selection dsets: python dictionary the dictionary containing the sets for the selection Return: ------- atom_set: python set the set containing the atom index """ def noh(ls, dsets): """ This function remove hydrogens from the selection """ data_set = build_set(ls[1], dsets) noh_set = set() pred = oechem.OEIsHydrogen() for idx in data_set: atom = system.GetAtom(oechem.OEHasAtomIdx(idx)) if not pred(atom): noh_set.add(idx) return noh_set def residues(ls): """ This function select residues based on the residue numbers. An example of selection can be: mask = 'resid A:16 17 19 B:1' """ # List residue atom index to be restrained res_atom_set = set() # Dictionary of lists with the chain residues selected to be restrained # e.g. {chainA:[res1, res15], chainB:[res19, res17]} chain_dic = {'': []} # Fill out the chain dictionary i = 0 while i < len(ls): if ls[i].isdigit(): chain_dic[''].append(int(ls[i])) i += 1 else: try: chain_dic[ls[i]].append(int(ls[i + 2])) except: chain_dic[ls[i]] = [] chain_dic[ls[i]].append(int(ls[i + 2])) i += 3 # Loop over the molecular system to select the atom indexes to be selected hv = oechem.OEHierView( system, oechem.OEAssumption_BondedResidue + oechem.OEAssumption_ResPerceived) for chain in hv.GetChains(): chain_id = chain.GetChainID() if chain_id not in chain_dic: continue for frag in chain.GetFragments(): for hres in frag.GetResidues(): res_num = hres.GetOEResidue().GetResidueNumber() if res_num not in chain_dic[chain_id]: continue for oe_at in hres.GetAtoms(): res_atom_set.add(oe_at.GetIdx()) return res_atom_set def around(dist, ls): """ This function select atom not far than the threshold distance from the current selection. The threshold distance is in Angstrom selection can be: mask = '5.0 around ligand' """ # at = system.GetAtom(oechem.OEHasAtomIdx(idx)) # Atom set selection atom_set_around = set() # Create a OE bit vector mask for each atoms bv_around = oechem.OEBitVector(system.GetMaxAtomIdx()) # Set the mask atom for at in system.GetAtoms(): if at.GetIdx() in ls: bv_around.SetBitOn(at.GetIdx()) # Predicate pred = oechem.OEAtomIdxSelected(bv_around) # Create the system molecule based on the atom mask molecules = oechem.OEMol() oechem.OESubsetMol(molecules, system, pred) # Create the Nearest neighbours nn = oechem.OENearestNbrs(system, float(dist)) for nbrs in nn.GetNbrs(molecules): for atom in oechem.OEGetResidueAtoms(nbrs.GetBgn()): if atom.GetIdx() in ls: continue atom_set_around.add(atom.GetIdx()) return atom_set_around # Start Body of the selection function by language # Terminal Literal return the related set if isinstance(ls, str): return dsets[ls] # Not or Noh if len(ls) == 2: if ls[0] == 'noh': # Noh case return noh(ls, dsets) elif ls[0] == 'not': # Not case return dsets['system'] - build_set(ls[1], dsets) else: # Resid case with one index return residues(ls[1]) if len(ls) == 3: if ls[1] == 'or': # Or Case (set union) return build_set(ls[0], dsets) | build_set(ls[2], dsets) elif ls[1] == 'and': # And Case (set intersection) return build_set(ls[0], dsets) & build_set(ls[2], dsets) elif ls[1] == 'diff': # Diff case (set difference) return build_set(ls[0], dsets) - build_set(ls[2], dsets) elif ls[1] == 'around': # Around case return around(ls[0], build_set(ls[2], dsets)) else: return residues(ls[1:]) # Resid case with one or two indexes else: if ls[0] == 'resid': return residues(ls[1:]) # Resid case with multiple indexes else: raise ValueError( "The passed list have too many tokens: {}".format(ls)) # Parse Action-Maker def makeLRlike(numterms): if numterms is None: # None operator can only by binary op initlen = 2 incr = 1 else: initlen = {0: 1, 1: 2, 2: 3, 3: 5}[numterms] incr = {0: 1, 1: 1, 2: 2, 3: 4}[numterms] # Define parse action for this number of terms, # to convert flat list of tokens into nested list def pa(s, l, t): t = t[0] if len(t) > initlen: ret = pyp.ParseResults(t[:initlen]) i = initlen while i < len(t): ret = pyp.ParseResults([ret] + t[i:i + incr]) i += incr return pyp.ParseResults([ret]) return pa # Selection function body # Residue number selection id = pyp.Optional(pyp.Word(pyp.alphanums) + pyp.Literal(':')) + pyp.Word( pyp.nums) resid = pyp.Group(pyp.Literal("resid") + pyp.OneOrMore(id)) # Real number for around operator selection real = pyp.Regex(r"\d+(\.\d*)?").setParseAction(lambda t: float(t[0])) # Define the tokens for the BNF grammar operand = pyp.Literal("protein") | pyp.Literal("ca_protein") | \ pyp.Literal("ligand") | pyp.Literal("water") | \ pyp.Literal("ions") | pyp.Literal("excipients") | resid # BNF Grammar definition with parseAction makeLRlike expr = pyp.operatorPrecedence( operand, [(None, 2, pyp.opAssoc.LEFT, makeLRlike(None)), (pyp.Literal("not"), 1, pyp.opAssoc.RIGHT, makeLRlike(1)), (pyp.Literal("noh"), 1, pyp.opAssoc.RIGHT, makeLRlike(1)), (pyp.Literal("and"), 2, pyp.opAssoc.LEFT, makeLRlike(2)), (pyp.Literal("or"), 2, pyp.opAssoc.LEFT, makeLRlike(2)), (pyp.Literal("diff"), 2, pyp.opAssoc.LEFT, makeLRlike(2)), (real + pyp.Literal("around"), 1, pyp.opAssoc.RIGHT, makeLRlike(2))]) # Parse the input string try: ls = expr.parseString(mask, parseAll=True) except Exception as e: raise ValueError("The passed restraint mask is not valid: {}".format( str(e))) # Split the system dic_sets = split(system) # Select atom indexes atom_set = build_set(ls[0], dic_sets) return atom_set
def __init__(self): # Bibtex keywords string_def_start = pp.CaselessKeyword("@string") preamble_start = pp.CaselessKeyword("@preamble") comment_line_start = pp.CaselessKeyword('@comment') # String names string_name = pp.Word(pp.alphanums + '_')('StringName') self.set_string_name_parse_action(lambda s, l, t: None) string_name.addParseAction(self._string_name_parse_action) # Values inside bibtex fields # Values can be integer or string expressions. The latter may use # quoted or braced values. # Integer values integer = pp.Word(pp.nums)('Integer') # Braced values: braced values can contain nested (but balanced) braces braced_value_content = pp.CharsNotIn('{}') braced_value = pp.Forward() # Recursive definition for nested braces braced_value <<= pp.originalTextFor( '{' + pp.ZeroOrMore(braced_value | braced_value_content) + '}' )('BracedValue') braced_value.setParseAction(remove_braces) # TODO add ignore for "\}" and "\{" ? # TODO @ are not parsed by bibtex in braces # Quoted values: may contain braced content with balanced braces brace_in_quoted = pp.nestedExpr('{', '}', ignoreExpr=None) text_in_quoted = pp.CharsNotIn('"{}') # (quotes should be escaped by braces in quoted value) quoted_value = pp.originalTextFor( '"' + pp.ZeroOrMore(text_in_quoted | brace_in_quoted) + '"' )('QuotedValue') quoted_value.addParseAction(pp.removeQuotes) # String expressions string_expr = pp.delimitedList( (quoted_value | braced_value | string_name), delim='#' )('StringExpression') self.set_string_expression_parse_action(lambda s, l, t: None) string_expr.addParseAction(self._string_expr_parse_action) value = (integer | string_expr)('Value') # Entries # @EntryType { ... entry_type = (pp.Suppress('@') + pp.Word(pp.alphas))('EntryType') entry_type.setParseAction(first_token) # Entry key: any character up to a ',' without leading and trailing # spaces. key = pp.SkipTo(',')('Key') # Exclude @',\#}{~% key.setParseAction(lambda s, l, t: first_token(s, l, t).strip()) # Field name: word of letters, digits, dashes and underscores field_name = pp.Word(pp.alphanums + '_-()')('FieldName') field_name.setParseAction(first_token) # Field: field_name = value field = pp.Group(field_name + pp.Suppress('=') + value)('Field') field.setParseAction(field_to_pair) # List of fields: comma separeted fields field_list = (pp.delimitedList(field) + pp.Suppress(pp.Optional(',')) )('Fields') field_list.setParseAction( lambda s, l, t: {k: v for (k, v) in reversed(t.get('Fields'))}) # Entry: type, key, and fields self.entry = (entry_type + in_braces_or_pars(key + pp.Suppress(',') + field_list) )('Entry') # Other stuff: comments, string definitions, and preamble declarations # Explicit comments: @comment + everything up to next valid declaration # starting on new line. not_an_implicit_comment = (pp.LineStart() + pp.Literal('@') ) | pp.stringEnd() self.explicit_comment = ( pp.Suppress(comment_line_start) + pp.originalTextFor(pp.SkipTo(not_an_implicit_comment), asString=True))('ExplicitComment') self.explicit_comment.addParseAction(remove_trailing_newlines) self.explicit_comment.addParseAction(remove_braces) # Previous implementation included comment until next '}'. # This is however not inline with bibtex behavior that is to only # ignore until EOL. Brace stipping is arbitrary here but avoids # duplication on bibtex write. # Empty implicit_comments lead to infinite loop of zeroOrMore def mustNotBeEmpty(t): if not t[0]: raise pp.ParseException("Match must not be empty.") # Implicit comments: not anything else self.implicit_comment = pp.originalTextFor( pp.SkipTo(not_an_implicit_comment).setParseAction(mustNotBeEmpty), asString=True)('ImplicitComment') self.implicit_comment.addParseAction(remove_trailing_newlines) # String definition self.string_def = (pp.Suppress(string_def_start) + in_braces_or_pars( string_name + pp.Suppress('=') + string_expr('StringValue') ))('StringDefinition') # Preamble declaration self.preamble_decl = (pp.Suppress(preamble_start) + in_braces_or_pars(value))('PreambleDeclaration') # Main bibtex expression self.main_expression = pp.ZeroOrMore( self.string_def | self.preamble_decl | self.explicit_comment | self.entry | self.implicit_comment)
from aocd import get_data lines = get_data(day=16, year=2020).splitlines() # Split lines by empty element into parts from itertools import groupby parts = [list(g) for k, g in groupby(lines, key=bool) if k] # Create rule for parsing "departure location: 37-479 or 485-954" into name and ranges import pyparsing as pp range_rule = pp.Group(pp.Word(pp.nums) + pp.Suppress("-") + pp.Word(pp.nums)) field_rule = ... + pp.Suppress(":") + pp.Group(pp.OneOrMore(range_rule + pp.Optional(pp.Suppress("or")))) valid_rows = {} valid_numbers = set() for line in parts[0]: parsed = field_rule.parseString(line) this_numbers = [] for r in parsed[1]: this_numbers.extend(list(range(int(r[0]), int(r[1]) + 1))) valid_rows[parsed[0]] = this_numbers valid_numbers |= set(this_numbers) your_ticket = [int(entry) for entry in parts[1][1].split(",")] ticket_len = len(your_ticket) nearby_tickets = [] for line in parts[2]: if line.startswith("nearby"): continue nearby_tickets.append([int(entry) for entry in line.split(",")]) ### Part A error_rate = 0
# Forward(). Forward() lets us fill in the definition of a type later, # using the << operator. expr = pp.Forward() # Arrays are defined as '[' (expr {',' expr}) ']' arr = ( pp.Suppress('[') + pp.Optional(pp.delimitedList(expr)('elements')) + pp.Suppress(']') ) # Objects are defined as '{' (string ':' expr {',' string: expr}) '}' keyval = string('key') + pp.Suppress(':') + expr('value') obj = ( pp.Suppress('{') + pp.Optional(pp.delimitedList(pp.Group(keyval))('elements')) + pp.Suppress('}') ) # Set actions to convert them into the equivalent Python types. def arr_action(tok): return [list(tok.elements)] def obj_action(tok): return [{t.key: t.value for t in tok.elements}] arr.setParseAction(arr_action) obj.setParseAction(obj_action)
class SkyDriveLogParser(text_parser.PyparsingMultiLineTextParser): """Parses SkyDrive log files.""" NAME = u'skydrive_log' DESCRIPTION = u'Parser for OneDrive (or SkyDrive) log files.' _ENCODING = u'utf-8' # Common SDF (SkyDrive Format) structures. _COMMA = pyparsing.Literal(u',').suppress() _HYPHEN = text_parser.PyparsingConstants.HYPHEN _THREE_DIGITS = text_parser.PyparsingConstants.THREE_DIGITS _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS MSEC = pyparsing.Word(pyparsing.nums, max=3).setParseAction(text_parser.PyParseIntCast) IGNORE_FIELD = pyparsing.CharsNotIn(u',').suppress() # Date and time format used in the header is: YYYY-MM-DD-hhmmss.### # For example: 2013-07-25-160323.291 _SDF_HEADER_DATE_TIME = pyparsing.Group( text_parser.PyparsingConstants.DATE_ELEMENTS + _HYPHEN + _TWO_DIGITS.setResultsName(u'hours') + _TWO_DIGITS.setResultsName(u'minutes') + _TWO_DIGITS.setResultsName(u'seconds') + pyparsing.Literal(u'.').suppress() + _THREE_DIGITS.setResultsName(u'milliseconds')).setResultsName( u'header_date_time') # Date and time format used in lines other than the header is: # MM-DD-YY,hh:mm:ss.### # For example: 07-25-13,16:06:31.820 _SDF_DATE_TIME = (_TWO_DIGITS.setResultsName(u'month') + _HYPHEN + _TWO_DIGITS.setResultsName(u'day') + _HYPHEN + _TWO_DIGITS.setResultsName(u'year') + _COMMA + text_parser.PyparsingConstants.TIME_ELEMENTS + pyparsing.Suppress('.') + _THREE_DIGITS.setResultsName(u'milliseconds') ).setResultsName(u'date_time') _SDF_HEADER_START = ( pyparsing.Literal(u'######').suppress() + pyparsing.Literal(u'Logging started.').setResultsName(u'log_start')) # Multiline entry end marker, matched from right to left. _SDF_ENTRY_END = pyparsing.StringEnd() | _SDF_HEADER_START | _SDF_DATE_TIME _SDF_LINE = (_SDF_DATE_TIME + _COMMA + IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA + pyparsing.CharsNotIn(u',').setResultsName(u'module') + _COMMA + pyparsing.CharsNotIn(u',').setResultsName(u'source_code') + _COMMA + IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA + pyparsing.CharsNotIn(u',').setResultsName(u'log_level') + _COMMA + pyparsing.SkipTo(_SDF_ENTRY_END).setResultsName(u'detail') + pyparsing.ZeroOrMore(pyparsing.lineEnd())) _SDF_HEADER = ( _SDF_HEADER_START + pyparsing.Literal(u'Version=').setResultsName(u'version_string') + pyparsing.Word(pyparsing.nums + u'.').setResultsName(u'version_number') + pyparsing.Literal(u'StartSystemTime:').suppress() + _SDF_HEADER_DATE_TIME + pyparsing.Literal( u'StartLocalTime:').setResultsName(u'local_time_string') + pyparsing.SkipTo(pyparsing.lineEnd()).setResultsName(u'details') + pyparsing.lineEnd()) LINE_STRUCTURES = [(u'logline', _SDF_LINE), (u'header', _SDF_HEADER)] def _ParseHeader(self, parser_mediator, structure): """Parse header lines and store appropriate attributes. [u'Logging started.', u'Version=', u'17.0.2011.0627', [2013, 7, 25], 16, 3, 23, 291, u'StartLocalTime', u'<details>'] Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=structure.header_date_time) except ValueError: parser_mediator.ProduceExtractionError( u'invalid date time value: {0!s}'.format( structure.header_date_time)) return event_data = SkyDriveLogEventData() # TODO: refactor detail to individual event data attributes. event_data.detail = u'{0:s} {1:s} {2:s} {3:s} {4:s}'.format( structure.log_start, structure.version_string, structure.version_number, structure.local_time_string, structure.details) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def _ParseLine(self, parser_mediator, structure): """Parses a logline and store appropriate attributes. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ # TODO: Verify if date and time value is locale dependent. month, day_of_month, year, hours, minutes, seconds, milliseconds = ( structure.date_time) year += 2000 time_elements_tuple = (year, month, day_of_month, hours, minutes, seconds, milliseconds) try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: parser_mediator.ProduceExtractionError( u'invalid date time value: {0!s}'.format(structure.date_time)) return event_data = SkyDriveLogEventData() # Replace newlines with spaces in structure.detail to preserve output. # TODO: refactor detail to individual event data attributes. event_data.detail = structure.detail.replace(u'\n', u' ') event_data.log_level = structure.log_level event_data.module = structure.module event_data.source_code = structure.source_code event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parse each record structure and return an EventObject if applicable. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key not in (u'header', u'logline'): raise errors.ParseError( u'Unable to parse record, unknown structure: {0:s}'.format( key)) if key == u'logline': self._ParseLine(parser_mediator, structure) elif key == u'header': self._ParseHeader(parser_mediator, structure) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a SkyDrive log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (bytes): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._SDF_HEADER.parseString(line) except pyparsing.ParseException: logging.debug(u'Not a SkyDrive log file') return False try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=structure.header_date_time) except ValueError: logging.debug( u'Not a SkyDrive log file, invalid date and time: {0!s}'. format(structure.header_date_time)) return False return True
} """ __all__ = ("config_from_file", ) # The tests for this are in test_dhcpformat. from pkgcore.config import dhcpformat, errors from snakeoil import mappings import pyparsing as pyp _section_contents = pyp.Forward() _value = (pyp.Word(pyp.alphanums + './_').setWhitespaceChars(' \t') | pyp.quotedString.copy().setParseAction(pyp.removeQuotes)) _section = pyp.Group( pyp.Suppress('{' + pyp.lineEnd) + _section_contents + pyp.Suppress('}')) # "statement seems to have no effect" # pylint: disable-msg=W0104 _section_contents << pyp.dictOf( _value + pyp.Suppress('='), pyp.Group(pyp.OneOrMore((_value | _section).setWhitespaceChars(' \t'))) + pyp.Suppress(pyp.lineEnd)) parser = (pyp.stringStart + pyp.dictOf( pyp.Suppress('[') + _value + pyp.Suppress(']' + pyp.lineEnd), _section_contents).ignore(pyp.pythonStyleComment) + pyp.stringEnd) def config_from_file(file_obj): try:
def verify_length(s, l, t): t = t[0] if t.len is not None: t1len = len(t[1]) if t1len != t.len: raise pp.ParseFatalException(s, l, "invalid data of length {0}, expected {1}".format(t1len, t.len)) return t[1] # define punctuation literals LPAR, RPAR, LBRK, RBRK, LBRC, RBRC, VBAR, COLON = (pp.Suppress(c).setName(c) for c in "()[]{}|:") decimal = pp.Regex(r'-?0|[1-9]\d*').setParseAction(lambda t: int(t[0])) hexadecimal = ("#" + pp.Word(pp.hexnums)[1, ...] + "#").setParseAction(lambda t: int("".join(t[1:-1]), 16)) bytes = pp.Word(pp.printables) raw = pp.Group(decimal("len") + COLON + bytes).setParseAction(verify_length) base64_ = pp.Group(pp.Optional(decimal | hexadecimal, default=None)("len") + VBAR + pp.Word(pp.alphanums + "+/=")[1, ...].setParseAction(lambda t: b64decode("".join(t))) + VBAR ).setParseAction(verify_length) real = pp.Regex(r"[+-]?\d+\.\d*([eE][+-]?\d+)?").setParseAction(lambda tokens: float(tokens[0])) token = pp.Word(pp.alphanums + "-./_:*+=!<>") qString = pp.Group(pp.Optional(decimal, default=None)("len") + pp.dblQuotedString.setParseAction(pp.removeQuotes) ).setParseAction(verify_length) simpleString = real | base64_ | raw | decimal | token | hexadecimal | qString display = LBRK + simpleString + RBRK
false = pp.Keyword("false").setParseAction(lambda t: False) null = pp.Keyword("null").setParseAction(Null) value = number("number") | string("string") | array("array") | jobject("object") | true("true") \ | false("false") | null("null") value.runTests(""" true false null 1.0 \"meh\" """) left_sbrack = pp.Suppress("[") right_sbrack = pp.Suppress("]") array << (pp.Group(left_sbrack + pp.delimitedList(value) + right_sbrack) | pp.Group(left_sbrack + right_sbrack)) array.setResultsName("array") array.runTests(""" [ \" meh \" , 1.0 , true , false , null ] [] """) left_cbrace = pp.Suppress("{") right_cbrace = pp.Suppress("}") member = pp.Group(string + pp.Suppress(":") + value) members = pp.ZeroOrMore(member) jobject = left_cbrace + pp.Dict(members) + right_cbrace jobject.runTests("""
def list_of(parser, delim, begin, end): return pp.Group(begin + pp.Optional(parser + pp.ZeroOrMore(delim + parser)) + end)
# Order is important. Put longer prefix first. relational_op = (less_than_eq_op | less_than_op | greater_than_eq_op | greater_than_op)("relational_op") AND = pyparsing.CaselessLiteral("and") | pyparsing.CaselessLiteral("&&") OR = pyparsing.CaselessLiteral("or") | pyparsing.CaselessLiteral("||") logical_op = (AND | OR)("logical_op") times = pyparsing.CaselessLiteral("times") dimension = dimension_name + EQUAL + dimension_value dimension.setParseAction(lambda tokens: "".join(tokens)) dimension_list = pyparsing.Group( (LBRACE + pyparsing.Optional(pyparsing.delimitedList(dimension)) + RBRACE))("dimensions_list") metric = metric_name + pyparsing.Optional(dimension_list) period = integer_number("period") threshold = decimal_number("threshold") periods = integer_number("periods") function_and_metric = (func + LPAREN + metric + pyparsing.Optional(COMMA + period) + RPAREN) expression = pyparsing.Forward() sub_expression = ((function_and_metric | metric) + relational_op + threshold + pyparsing.Optional(times + periods) | LPAREN + expression + RPAREN)
import pyparsing as pp animal_type = pp.oneOf("CAT DOG HORSE FISH RAT") type_attr = pp.Suppress("type:") + animal_type("type") name = pp.Word(pp.alphas) name_attr = pp.Literal("name:").suppress() + name("pet name") pet_spec = name_attr + "," + type_attr pets_dict = pp.Dict(pp.OneOrMore(pp.Group(pet_spec))) pets_dict.runTests(""" name: Brian , type: DOG \ name: Tom , type: CAT """) print( pets_dict.parseString(""" name: Brian , type: DOG \ name: Tom , type: CAT """).asDict())
def fof_arg_list(x): return pp.Group(pp.Suppress("(") + pp.delimitedList(x) + pp.Suppress(")"))
def _arguments(expression): return pyparsing.Group( pyparsing.Optional(pyparsing.delimitedList(_argument(expression))) )
#--------------------- integer = pp.Regex(r'[1-9][0-9]*') indxdir = pp.oneOf(['<','>']) fixbit = pp.oneOf(['0','1']) number = integer|fixbit number.setParseAction(lambda r: int(r[0])) unklen = pp.Literal('*') length = number|unklen unkbit = pp.oneOf(['-']) fixbyte = pp.Regex(r'{[0-9a-fA-F][0-9a-fA-F]}').setParseAction(lambda r: Bits(int(r[0][1:3],16),8)) fixed = fixbyte|fixbit|unkbit option = pp.oneOf(['.','~','#','=']) symbol = pp.Regex(r'[A-Za-z_][A-Za-z0-9_]*') location = pp.Suppress('(')+length+pp.Suppress(')') directive = pp.Group(pp.Optional(option,default='')+symbol+pp.Optional(location,default=1)) speclen = pp.Group(length+pp.Optional(indxdir,default='<')) specformat = pp.Group(pp.Suppress('[')+pp.OneOrMore(directive|fixed)+pp.Suppress(']')) specoption = pp.Optional(pp.Literal('+').setParseAction(lambda r:True),default=False) specdecode = speclen+specformat+specoption def ispec_register(x,module): F = [] try: S = module.ISPECS except AttributeError: logger.error("spec modules must declare ISPECS=[] before @ispec decorators") raise AttributeError logger.progress(len(S),pfx='loading %s instructions '%module.__name__) f = x.fixed() if f in F:
def __init__(self): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ point = pyp.Literal(".") e = pyp.CaselessLiteral("E") fnumber = pyp.Combine( pyp.Word("+-" + pyp.nums, pyp.nums) + pyp.Optional(point + pyp.Optional(pyp.Word(pyp.nums))) + pyp.Optional(e + pyp.Word("+-" + pyp.nums, pyp.nums))) ident = pyp.Word(pyp.alphas, pyp.alphas + pyp.nums + "_$") plus = pyp.Literal("+") minus = pyp.Literal("-") mult = pyp.Literal("*") div = pyp.Literal("/") lpar = pyp.Literal("(").suppress() rpar = pyp.Literal(")").suppress() addop = plus | minus multop = mult | div expop = pyp.Literal("^") pi = pyp.CaselessLiteral("PI") expr = pyp.Forward() atom = ((pyp.Optional(pyp.oneOf("- +")) + (pi | e | fnumber | ident + lpar + expr + rpar).setParseAction(self.pushFirst)) | pyp.Optional(pyp.oneOf("- +")) + pyp.Group(lpar + expr + rpar)).setParseAction(self.pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = pyp.Forward() factor << atom + pyp.ZeroOrMore( (expop + factor).setParseAction(self.pushFirst)) term = factor + pyp.ZeroOrMore( (multop + factor).setParseAction(self.pushFirst)) expr << term + pyp.ZeroOrMore( (addop + term).setParseAction(self.pushFirst)) self.bnf = expr # map operator symbols to corresponding arithmetic operations epsilon = 1e-12 self.opn = { "+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, "^": operator.pow } self.fn = { "sin": math.sin, "cos": math.cos, "tan": math.tan, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0 } self.exprStack = []
"Na": 22.9897, "Cl": 35.4527, "C": 12.0107, } digits = "0123456789" # Version 1 element = pp.Word(pp.alphas.upper(), pp.alphas.lower(), max=2).set_name("element") # for stricter matching, use this Regex instead # element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|" # "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|" # "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|" # "S[bcegimnr]?|T[abcehilm]|U(u[bhopqst])?|V|W|Xe|Yb?|Z[nr]") elementRef = pp.Group(element + pp.Optional(pp.Word(digits), default="1")) formula = elementRef[...] def sum_atomic_weights(element_list): return sum(atomicWeight[elem] * int(qty) for elem, qty in element_list) formula.runTests( """\ H2O C6H5OH NaCl """, fullDump=False, postParse=lambda _, tokens: "Molecular weight: {}".format(