def build_attribute_parser(): quoted = pp.QuotedString('"') ^ pp.QuotedString("'") colon = pp.Literal(':').suppress() attribute_name = pp.Word(pp.srange('[a-z]'), pp.srange('[a-z0-9_]')).setResultsName('name') data_type = (pp.Combine(pp.Word(pp.alphas) + pp.SkipTo("#", ignore=quoted)) ^ pp.QuotedString('<', endQuoteChar='>', unquoteResults=False)).setResultsName('type') default = pp.Literal('=').suppress() + pp.SkipTo(colon, ignore=quoted).setResultsName('default') comment = pp.Literal('#').suppress() + pp.restOfLine.setResultsName('comment') return attribute_name + pp.Optional(default) + colon + data_type + comment
def depersonalisefolders(parseresults): """removes personal_toolbar_folder tag. Acts on ParseResults instance in place (ie. a procedure).""" folders = top_folders_dict(parseresults) tag = pp.Literal('PERSONAL_TOOLBAR_FOLDER="true" ') parser = pp.Combine( pp.Optional(pp.SkipTo(tag) + tag.suppress()) + pp.SkipTo(pp.stringEnd)) for i in folders.values(): i = i[0] parseresults[i][0] = parser.parseString(parseresults[i][0])[0]
def getSklWriteResGroupDefParser(): writeResGroup = pp.Word(pp.alphanums) resources = pp.SkipTo("]") latency = pp.Word(pp.nums) microOps = pp.Word(pp.nums) resourceCycles = pp.SkipTo("]") return pp.Suppress("def ") + writeResGroup("SKLWriteResGroup") + pp.Suppress(": SchedWriteRes<[") + resources("Resources") + pp.Suppress(pp.restOfLine) + ( pp.Suppress("let Latency = ") + latency("Latency") + pp.Suppress(pp.restOfLine) + pp.Suppress("let NumMicroOps = ") + microOps("NumMicroOps") + pp.Suppress(pp.restOfLine) + pp.Suppress("let ResourceCycles = [") + resourceCycles("ResourceCycles") + pp.Suppress(pp.restOfLine) )
def compile_attribute(line, in_key=False): """ Convert attribute definition from DataJoint format to SQL :param line: attribution line :param in_key: set to True if attribute is in primary key set :returns: (name, sql) -- attribute name and sql code for its declaration """ quoted = pp.Or(pp.QuotedString('"'), pp.QuotedString("'")) colon = pp.Literal(':').suppress() attribute_name = pp.Word(pp.srange('[a-z]'), pp.srange('[a-z0-9_]')).setResultsName('name') data_type = pp.Combine(pp.Word(pp.alphas) + pp.SkipTo("#", ignore=quoted)).setResultsName( 'type') default = pp.Literal('=').suppress() + pp.SkipTo( colon, ignore=quoted).setResultsName('default') comment = pp.Literal('#').suppress() + pp.restOfLine.setResultsName( 'comment') attribute_parser = attribute_name + pp.Optional( default) + colon + data_type + comment match = attribute_parser.parseString(line + '#', parseAll=True) match['comment'] = match['comment'].rstrip('#') if 'default' not in match: match['default'] = '' match = {k: v.strip() for k, v in match.items()} match['nullable'] = match['default'].lower() == 'null' literals = ['CURRENT_TIMESTAMP'] # not to be enclosed in quotes if match['nullable']: if in_key: raise DataJointError( 'Primary key attributes cannot be nullable in line %s' % line) match[ 'default'] = 'DEFAULT NULL' # nullable attributes default to null else: if match['default']: quote = match['default'].upper( ) not in literals and match['default'][0] not in '"\'' match['default'] = ('NOT NULL DEFAULT ' + ('"%s"' if quote else "%s") % match['default']) else: match['default'] = 'NOT NULL' match['comment'] = match['comment'].replace( '"', '\\"') # escape double quotes in comment sql = ('`{name}` {type} {default}' + (' COMMENT "{comment}"' if match['comment'] else '')).format( **match) return match['name'], sql
def _parse_map_tables(report_str: str) -> Dict[str, str]: """ Parse the tables from a ISE map report. Keys are the title of the table, values are the table body. """ # Capture the title from section headings like: # # Section 12 - Control Set Information # ------------------------------------ title = ( pp.lineStart() + "Section" + ppc.integer + "-" + pp.SkipTo(pp.lineEnd())("title").setParseAction(pp.tokenMap(str.strip)) + pp.lineEnd() ) sec_hline = pp.Suppress(pp.lineStart() + pp.Word("-") + pp.lineEnd() * (1,)) # Table horizontal lines like # +-------------------------------+ hline = pp.lineStart() + pp.Word("+", "+-") + pp.lineEnd() # Most tables will have the format # +-----------------------+ # | Col 1 | Col 2 | Col 3 | # +-----------------------+ # | D1 | D2 | D3 | # ... # +-----------------------+ # # However "Control Set Information" appears to use horizontal lines to # separate clocks within the data section. Therefore, just grab # everything until a horizontal line followed by a blank line rather # than something more precise. table = pp.Combine(hline + pp.SkipTo(hline + pp.LineEnd(), include=True))( "body" ) table_section = title + sec_hline + table # Make line endings significant table_section.setWhitespaceChars(" \t") result = {t.title: t.body for t in table_section.searchString(report_str)} return result
def build_attribute_parser(): quoted = pp.QuotedString('"') ^ pp.QuotedString("'") colon = pp.Literal(":").suppress() attribute_name = pp.Word(pp.srange("[a-z]"), pp.srange("[a-z0-9_]")).setResultsName("name") data_type = ( pp.Combine(pp.Word(pp.alphas) + pp.SkipTo("#", ignore=quoted)) ^ pp.QuotedString("<", endQuoteChar=">", unquoteResults=False)).setResultsName("type") default = pp.Literal("=").suppress() + pp.SkipTo( colon, ignore=quoted).setResultsName("default") comment = pp.Literal("#").suppress() + pp.restOfLine.setResultsName( "comment") return attribute_name + pp.Optional(default) + colon + data_type + comment
def _preprocessing_artifact(): return ( pyparsing.Literal('#') + _natural() + pyparsing.dblQuotedString + pyparsing.SkipTo(pyparsing.LineEnd()) ).suppress()
def _bit_field(): expression = expression_parser.expression_parser() return ( pyparsing.Optional(_identifier(), None) + _COLON + pyparsing.SkipTo(_SEMICOLON | _COMA) ).setParseAction(_construct_bitfield(expression))
class PyparsingConstants(object): """Constants for pyparsing-based parsers.""" # Numbers. INTEGER = pyparsing.Word(pyparsing.nums).setParseAction(PyParseIntCast) IPV4_ADDRESS = pyparsing.pyparsing_common.ipv4_address IPV6_ADDRESS = pyparsing.pyparsing_common.ipv6_address IP_ADDRESS = (IPV4_ADDRESS | IPV6_ADDRESS) # TODO: deprecate and remove, use THREE_LETTERS instead. # TODO: fix Python 3 compatibility of .uppercase and .lowercase. # pylint: disable=no-member MONTH = pyparsing.Word(pyparsing.string.ascii_uppercase, pyparsing.string.ascii_lowercase, exact=3) # Define date structures. HYPHEN = pyparsing.Literal('-').suppress() ONE_OR_TWO_DIGITS = pyparsing.Word(pyparsing.nums, min=1, max=2).setParseAction(PyParseIntCast) TWO_DIGITS = pyparsing.Word(pyparsing.nums, exact=2).setParseAction(PyParseIntCast) THREE_DIGITS = pyparsing.Word(pyparsing.nums, exact=3).setParseAction(PyParseIntCast) FOUR_DIGITS = pyparsing.Word(pyparsing.nums, exact=4).setParseAction(PyParseIntCast) THREE_LETTERS = pyparsing.Word(pyparsing.alphas, exact=3) DATE_ELEMENTS = (FOUR_DIGITS.setResultsName('year') + pyparsing.Suppress('-') + TWO_DIGITS.setResultsName('month') + pyparsing.Suppress('-') + TWO_DIGITS.setResultsName('day_of_month')) TIME_ELEMENTS = (TWO_DIGITS.setResultsName('hours') + pyparsing.Suppress(':') + TWO_DIGITS.setResultsName('minutes') + pyparsing.Suppress(':') + TWO_DIGITS.setResultsName('seconds')) TIME_MSEC_ELEMENTS = (TIME_ELEMENTS + pyparsing.Word('.,', exact=1).suppress() + INTEGER.setResultsName('microseconds')) # Date structures defined as a single group. DATE = pyparsing.Group(DATE_ELEMENTS) DATE_TIME = pyparsing.Group(DATE_ELEMENTS + TIME_ELEMENTS) DATE_TIME_MSEC = pyparsing.Group(DATE_ELEMENTS + TIME_MSEC_ELEMENTS) TIME = pyparsing.Group(TIME_ELEMENTS) TIME_MSEC = TIME + pyparsing.Suppress('.') + INTEGER # TODO: replace by # TIME_MSEC = pyparsing.Group(TIME_MSEC_ELEMENTS) COMMENT_LINE_HASH = pyparsing.Literal('#') + pyparsing.SkipTo( pyparsing.LineEnd()) # TODO: Add more commonly used structs that can be used by parsers. PID = pyparsing.Word(pyparsing.nums, min=1, max=5).setParseAction(PyParseIntCast)
def getBoldUrls(lines=[],sub=0): abstart,abend = pyparsing.makeHTMLTags('B') grammer2 = abstart + pyparsing.SkipTo(abend) + abend.suppress() for x1,x2,x3 in grammer2.scanString(''.join(lines)): print x1 print x2 print x3
def generate_entity_results_from_analysis(self, analysis): LOGGER.debug(f'generating entity results...') filtered_results = { k: v for (k, v) in self.results.items() if v.analysis is analysis and isinstance(v, AbstractFileResult) } result: AbstractFileResult for _, result in filtered_results.items(): entity_keywords: List[str] = [GroovyParsingKeyword.CLASS.value] entity_name = pp.Word(pp.alphanums) match_expression = pp.Keyword(GroovyParsingKeyword.CLASS.value) + \ entity_name.setResultsName(CoreParsingKeyword.ENTITY_NAME.value) + \ pp.Optional(pp.Keyword(GroovyParsingKeyword.EXTENDS.value) + entity_name.setResultsName(CoreParsingKeyword.INHERITED_ENTITY_NAME.value)) + \ pp.SkipTo(pp.FollowedBy(GroovyParsingKeyword.OPEN_SCOPE.value)) comment_keywords: Dict[str, str] = { CoreParsingKeyword.LINE_COMMENT.value: GroovyParsingKeyword.INLINE_COMMENT.value, CoreParsingKeyword.START_BLOCK_COMMENT.value: GroovyParsingKeyword.START_BLOCK_COMMENT.value, CoreParsingKeyword.STOP_BLOCK_COMMENT.value: GroovyParsingKeyword.STOP_BLOCK_COMMENT.value } entity_results = result.generate_entity_results_from_scopes( entity_keywords, match_expression, comment_keywords) for entity_result in entity_results: self._add_inheritance_to_entity_result(entity_result) self._add_imports_to_entity_result(entity_result) self.create_unique_entity_name(entity_result) self._results[entity_result.unique_name] = entity_result
def enumeratedItems(baseExpr=None, form='[1]', **min_max): """Parser for enumerated items Examples: [1] abc [2] def ==> ['abc', 'def'] """ if form is None: form = '[1]' if '1' in form: no = pp.Regex(re.escape(form).replace( '1', '(?P<no>\\d+)')) #.setParseAction(lambda x:x.no) else: no = pp.Regex(re.escape(form)) # no.suppress() if 'exact' in min_max and min_max['exact'] > 0: max_ = min_ = exact else: min_ = min_max.get('min', 0) max_ = min_max.get('max', None) if baseExpr is None: return (pp.Group(no + pp.SkipTo(pp.StringEnd() | no).setParseAction( _strip()))) * (min_, max_) else: return (pp.Group(no + baseExpr.setParseAction(_strip()))) * (min_, max_)
def parse_first_case_line(first_case_line): data = {"case_order": first_case_line[0]} gender = p.Suppress(p.Literal("(")) + p.Word( p.alphas, exact=1).setResultsName("gender") + p.Suppress( p.Literal(")")) dob = p.Suppress( p.Literal("DOB:")) + date.setResultsName("dob") + p.Suppress( p.Literal("Age:")) + p.Word(p.nums).setResultsName("age") linked_case = p.Suppress(p.Literal("LINKED CASE")) provisional = p.Suppress(p.Literal("PROVISIONAL")) first_case_line_detail = p.And([ p.SkipTo(p.White(" ", min=10) ^ gender).setResultsName("name"), p.Optional(gender), p.Optional(dob), p.Optional(linked_case), p.Optional(provisional), p.Word(p.nums), ]) for key, value in first_case_line_detail.parseString( first_case_line[1]).asDict().items(): data[key] = value.strip() return data
def parse_cosmo_ricc2(text): def to_float(s, loc, toks): try: return float(toks[0]) except ValueError: return 0. float_ = pp.Word(pp.nums + ".-").setParseAction(to_float) int_ = pp.Word(pp.nums).setParseAction(lambda t: int(t[0])) big_sep = pp.Suppress(pp.Word("+=")) small_sep = pp.Suppress(pp.Word("+-")) bar = pp.Suppress(pp.Literal("|")) sym = pp.Word(pp.alphanums + "'" + '"' + "*") multi = int_ state = int_ E_tot = float_ E_diff = float_ E_exci = float_ E_exc= float_ line = pp.Group( bar + sym + bar + multi + bar + state + bar + E_tot + bar + E_diff + bar + E_exci + bar + E_exc + bar ) parser = ( pp.Suppress(pp.SkipTo("E(exc(OCC))/eV|", include=True)) + big_sep + pp.OneOrMore(line + small_sep) ) res = parser.parseString(text) return res
class CronPlugin(interface.SyslogPlugin): """A syslog plugin for parsing cron messages.""" NAME = u'cron' DESCRIPTION = u'Parser for syslog cron messages.' REPORTER = u'CRON' _PYPARSING_COMPONENTS = { u'command': pyparsing.Combine( pyparsing.SkipTo(pyparsing.Literal(u')') + pyparsing.StringEnd())).setResultsName( u'command'), u'username': pyparsing.Word(pyparsing.alphanums).setResultsName(u'username'), } _TASK_RUN_GRAMMAR = (pyparsing.Literal(u'(') + _PYPARSING_COMPONENTS[u'username'] + pyparsing.Literal(u')') + pyparsing.Literal(u'CMD') + pyparsing.Literal(u'(') + _PYPARSING_COMPONENTS[u'command'] + pyparsing.Literal(u')') + pyparsing.StringEnd()) MESSAGE_GRAMMARS = [(u'task_run', _TASK_RUN_GRAMMAR)] def ParseMessage(self, parser_mediator, key, timestamp, tokens): """Parses a syslog body that matched one of defined grammars. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the matching grammar. timestamp (int): the timestamp, which contains the number of micro seconds since January 1, 1970, 00:00:00 UTC or 0 on error. tokens (dict[str, str]): tokens derived from a syslog message based on the defined grammar. Raises: AttributeError: If an unknown key is provided. """ # TODO: change AttributeError into ValueError or equiv. if key != u'task_run': raise AttributeError(u'Unknown grammar key: {0:s}'.format(key)) event_data = CronTaskRunEventData() event_data.body = tokens.get(u'body', None) event_data.command = tokens.get(u'command', None) event_data.hostname = tokens.get(u'hostname', None) # TODO: pass line number to offset or remove. event_data.offset = 0 event_data.pid = tokens.get(u'pid', None) event_data.reporter = tokens.get(u'reporter', None) event_data.severity = tokens.get(u'severity', None) event_data.username = tokens.get(u'username', None) event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)
def _parse_utilization_tables(util_str: str) -> Dict[str, str]: """ Find all of the section titles and tables in a Vivado utilization report. These are returned as a dict with the section titles as keys and the table as the value. """ # Find section headings, discarding the number and following horizontal # line. For example: # # 1.1 Summary of Registers by Type # -------------------------------- sec_num = pp.Suppress(pp.lineStart() + pp.Word(pp.nums + ".")) sec_title = sec_num + pp.SkipTo( pp.lineEnd())("title") + pp.lineEnd().suppress() # ------------------------------- sec_hline = pp.Suppress(pp.lineStart() + pp.Word("-") + pp.lineEnd()) sec_head = sec_title + sec_hline + pp.lineEnd().suppress() # Tables use horizontal lines with like the following to mark column # headings and the end of the table: # # +------+------+-------+ table_hline = pp.lineStart() + pp.Word("+", "-+") + pp.lineEnd() # Tables may just be a header with no data rows, or a full header and # data rows, so there will be one or two more horizontal lines. data = pp.SkipTo(table_hline, failOn=pp.lineEnd() * 2, include=True) table = pp.Combine(table_hline + data * (1, 2)) section = sec_head + table("table") # Make line endings significant section.setWhitespaceChars(" \t") table_dict = { x["title"]: x["table"] for x in section.searchString(util_str) } return table_dict
def _struct_typedef(): return (_TYPEDEF + (_STRUCT.setResultsName("type") | _UNION.setResultsName("type")) + pyparsing.Optional(_IDENTIFIER).setResultsName("id") + parsers.anything_in_curly() + pyparsing.Optional(_STAR) + _IDENTIFIER.setResultsName("typedef_name") + pyparsing.SkipTo(_SEMICOLON) + _SEMICOLON).setResultsName("_struct_typedef")
def _attributeParser(): # --- attribute parser --- attributeIndicator = p.LineStart() + p.Suppress(p.Literal('@')) attributeName = p.Word(p.alphanums).setResultsName('attributename') attributeSeparator = p.Suppress(p.Literal('::')) # TODO force case insensitivity in attributeMode keyword match # TODO add debug names # TODO add a conditional debug flag attributeMode = (p.Word(MODE_KEYWORD_SINGLE) | p.Word(MODE_KEYWORD_MULTIPLE)).setResultsName( 'attributemode') + p.Literal(':').suppress() attributeType = (p.Word( p.alphanums).setResultsName('attributetype')).setParseAction(caps) attributePosargs = p.ZeroOrMore( (p.Word(p.alphanums) | p.Combine(p.Literal('[') + p.SkipTo(']') + p.Literal(']'))) + ~p.FollowedBy(p.Literal('=')) + p.Optional(p.Literal(',').suppress())).setResultsName('posargs') kwargprintables = p.printables.translate( str.maketrans('', '', '=,[]()')) attributeKwargs = p.ZeroOrMore( p.Group( p.Word(p.alphanums).setResultsName('keyword') + p.Literal('=').suppress() + (p.Word(kwargprintables) | p.Combine( p.Literal('[').suppress() + p.SkipTo(']') + p.Literal(']').suppress())).setResultsName('value') + p.Optional( p.Literal(',').suppress() ) #TODO figure out how to make quotes work as enclosers instead of [] )).setResultsName('kwargs') attributeArgs = ( p.Literal('(').suppress() + attributePosargs + attributeKwargs + p.Literal(')').suppress()).setResultsName('attributeargs') attributeList = attributeIndicator + attributeName + attributeSeparator + \ attributeMode + attributeType + p.Optional(attributeArgs) return attributeList
class CronSyslogPlugin(interface.SyslogPlugin): """A syslog plugin for parsing cron messages.""" NAME = 'cron' DATA_FORMAT = 'Cron syslog line' REPORTER = 'CRON' _PYPARSING_COMPONENTS = { 'command': pyparsing.Combine( pyparsing.SkipTo(pyparsing.Literal(')') + pyparsing.StringEnd())).setResultsName('command'), 'username': pyparsing.Word(pyparsing.alphanums).setResultsName('username'), } _TASK_RUN_GRAMMAR = (pyparsing.Literal('(') + _PYPARSING_COMPONENTS['username'] + pyparsing.Literal(')') + pyparsing.Literal('CMD') + pyparsing.Literal('(') + _PYPARSING_COMPONENTS['command'] + pyparsing.Literal(')') + pyparsing.StringEnd()) MESSAGE_GRAMMARS = [('task_run', _TASK_RUN_GRAMMAR)] def ParseMessage(self, parser_mediator, key, date_time, tokens): """Parses a syslog body that matched one of defined grammars. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the matching grammar. date_time (dfdatetime.DateTimeValues): date and time values. tokens (dict[str, str]): tokens derived from a syslog message based on the defined grammar. Raises: ValueError: If an unknown key is provided. """ if key != 'task_run': raise ValueError('Unknown grammar key: {0:s}'.format(key)) event_data = CronTaskRunEventData() event_data.body = tokens.get('body', None) event_data.command = tokens.get('command', None) event_data.hostname = tokens.get('hostname', None) # TODO: pass line number to offset or remove. event_data.offset = 0 event_data.pid = tokens.get('pid', None) event_data.reporter = tokens.get('reporter', None) event_data.severity = tokens.get('severity', None) event_data.username = tokens.get('username', None) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)
def _add_imports_to_result(self, result: AbstractResult, analysis): LOGGER.debug( f'extracting imports from base result {result.scanned_file_name}...' ) list_of_words_with_newline_strings = result.scanned_tokens source_string_no_comments = self._filter_source_tokens_without_comments( list_of_words_with_newline_strings, JavaScriptParsingKeyword.INLINE_COMMENT.value, JavaScriptParsingKeyword.START_BLOCK_COMMENT.value, JavaScriptParsingKeyword.STOP_BLOCK_COMMENT.value) filtered_list_no_comments = self.preprocess_file_content_and_generate_token_list_by_mapping( source_string_no_comments, self._token_mappings) for _, obj, following in self._gen_word_read_ahead( filtered_list_no_comments): if obj == JavaScriptParsingKeyword.IMPORT.value: read_ahead_string = self.create_read_ahead_string( obj, following) valid_name = pp.Word(pp.alphanums + CoreParsingKeyword.AT.value + CoreParsingKeyword.DOT.value + CoreParsingKeyword.ASTERISK.value + CoreParsingKeyword.UNDERSCORE.value + CoreParsingKeyword.DASH.value + CoreParsingKeyword.SLASH.value) expression_to_match = pp.SkipTo(pp.Literal(JavaScriptParsingKeyword.FROM.value)) + pp.Literal(JavaScriptParsingKeyword.FROM.value) + \ pp.Suppress(pp.Literal(CoreParsingKeyword.SINGLE_QUOTE.value)) + \ pp.FollowedBy(pp.OneOrMore(valid_name.setResultsName(CoreParsingKeyword.IMPORT_ENTITY_NAME.value))) try: parsing_result = expression_to_match.parseString( read_ahead_string) except Exception as some_exception: result.analysis.statistics.increment( Statistics.Key.PARSING_MISSES) LOGGER.warning( f'warning: could not parse result {result=}\n{some_exception}' ) LOGGER.warning( f'next tokens: {[obj] + following[:AbstractParsingCore.Constants.MAX_DEBUG_TOKENS_READAHEAD.value]}' ) continue analysis.statistics.increment(Statistics.Key.PARSING_HITS) # ignore any dependency substring from the config ignore list dependency = getattr( parsing_result, CoreParsingKeyword.IMPORT_ENTITY_NAME.value) if self._is_dependency_in_ignore_list(dependency, analysis): LOGGER.debug( f'ignoring dependency from {result.unique_name} to {dependency}' ) else: result.scanned_import_dependencies.append(dependency) LOGGER.debug(f'adding import: {dependency}')
def __init__(self): ''' See `notes.md` for notes on the structure of the file and see `pyparsing_notes.md` for notes on pyparsing stuff example full lines we are parsing: getJsonValue json:string = JsonValue; setPollAnswer chat_id:int53 message_id:int53 option_ids:vector<int32> = Ok; getInlineQueryResults bot_user_id:int32 chat_id:int53 user_location:location query:string offset:string = InlineQueryResults; ''' # a literal newline self.pe_newline = pyparsing.Literal('\n') # a semicolon literal self.pe_semicolon_literal = pyparsing.Literal(";") # a literal colon self.pe_colon_literal = pyparsing.Literal(":") # literal equal sign self.pe_equal_sign_literal = pyparsing.Literal("=") # a literal for the start of a comment self.pe_comment_literal = pyparsing.Literal('//') # token that skips to the end of a line # used for the program argument to skip 'N' number of lines self.pe_skip_line = pyparsing.SkipTo(pyparsing.lineEnd, include=True) # words that can appear in a class name self.pe_class_name = pyparsing.Word(pyparsing.alphanums) # characters that appear in a parameter name or type self.pe_param_name = pyparsing.Word(pyparsing.alphanums + "_") # need the angle brackets for stuff like `vector<String>` self.pe_param_type = pyparsing.Word(pyparsing.alphanums + "<>") # a single param and type pair # so like `message:string` self.pe_param_listing = pyparsing.Group( self.pe_param_name(constants.RESULT_NAME_PARAM_NAME) + self.pe_colon_literal.suppress() + self.pe_param_type(constants.RESULT_NAME_PARAM_TYPE)) # grouping of zero or more parameters self.pe_zero_or_more_params = pyparsing.ZeroOrMore( self.pe_param_listing(f"{constants.RESULT_NAME_PARAMS}*")) # the actual name of the class/type that is being defined self.pe_tdlib_class_name = self.pe_class_name( constants.RESULT_NAME_CLASS_OR_FUNCTION_NAME)
def parse_buffer(cls, sensor_uuid, buf): # Defining generic pyparsing objects. integer = pyp.Word(pyp.nums) ip_addr = pyp.Combine(integer + '.' + integer+ '.' + integer + '.' + integer) port = pyp.Suppress(':') + integer # Defining pyparsing objects from expected format: # # [**] [1:160:2] COMMUNITY SIP TCP/IP message flooding directed to SIP proxy [**] # [Classification: Attempted Denial of Service] [Priority: 2] # 01/10-00:08:23.598520 201.233.20.33:63035 -> 192.234.122.1:22 # TCP TTL:53 TOS:0x10 ID:2145 IpLen:20 DgmLen:100 DF # ***AP*** Seq: 0xD34C30CE Ack: 0x6B1F7D18 Win: 0x2000 TcpLen: 32 # # Note: This format is known to change over versions. # Works with Snort version 2.9.2 IPv6 GRE (Build 78) header = ( pyp.Suppress("[**] [") + pyp.Combine(integer + ":" + integer + ":" + integer) + pyp.Suppress("]") ) signature = ( pyp.Combine(pyp.SkipTo("[**]", include=False)) + pyp.Suppress("[**]") ) classif = ( pyp.Suppress(pyp.Literal("[Classification:")) + pyp.Regex("[^]]*") + pyp.Suppress(']') ) pri = pyp.Suppress("[Priority:") + integer + pyp.Suppress("]") date = pyp.Combine( # day/month/year (year is optional, depends on snort being started with -y) integer + "/" + integer + pyp.Optional(pyp.Combine("/" + integer), default="/"+str(datetime.now().year)[2:4]) + \ '-' + integer + ':' + integer + ':' + integer + '.' + integer ) src_ip = ip_addr src_port = port arrow = pyp.Suppress("->") dest_ip = ip_addr dest_port = port proto = pyp.Regex("\S+") bnf = header + signature + pyp.Optional(classif, default='') + pri + date + \ src_ip + pyp.Optional(src_port, default='') + arrow + dest_ip + \ pyp.Optional(dest_port, default='') + proto fields = bnf.searchString(buf) if fields: if abs(datetime.utcnow() - datetime.now()).total_seconds() > 1: # Since snort doesn't log in UTC, a correction is needed to # convert the logged time to UTC. The following code calculates # the delta between local time and UTC and uses it to convert # the logged time to UTC. Additional time formatting makes # sure the previous code doesn't break. fields[0] = [f.strip() for f in fields[0]] return cls(sensor_uuid, *fields[0]) else: return None
def parse_spectre(netlist_string): # newlines are part of the grammar, thus redifine the whitespaces without it ws = ' \t' _p.ParserElement.setDefaultWhitespaceChars(ws) # spectre netlist grammar definition EOL = _p.LineEnd().suppress() # end of line linebreak = _p.Suppress( "\\" + _p.LineEnd()) # breaking a line with backslash newline identifier = _p.Word(_p.alphanums + '_!<>-+') # a name for... number = _p.Word(_p.nums + ".") # a number net = identifier # a net nets = _p.Group(_p.OneOrMore(net('net') | linebreak)) # many nets cktname = identifier # name of a subcircuit cktname_end = _p.Keyword("ends").suppress() comment = _p.Suppress("//" + _p.SkipTo(_p.LineEnd())) expression = _p.Word(_p.alphanums + '._*+-/()') inst_param_key = identifier + _p.Suppress("=") inst_param_value = expression('expression') inst_parameter = _p.Group( inst_param_key('name') + inst_param_value('value')).setResultsName('key') parameters = _p.Group( _p.ZeroOrMore(inst_parameter | linebreak)).setResultsName('parameters') instref = identifier instname = identifier instance = _p.Group( instname('name') + _p.Suppress('(') + nets('nets') + _p.Suppress(')') + instref('reference') + parameters + EOL).setResultsName('instance') subcircuit_content = _p.Group( _p.ZeroOrMore(instance | EOL | comment)).setResultsName('subnetlist') subcircuit = _p.Group( # matches subckt <name> <nets> <newline> _p.Keyword("subckt").suppress() + cktname('name') + nets('nets') + EOL # matches the content of the subcircuit + subcircuit_content # matches ends <name> <newline> + cktname_end + _p.matchPreviousExpr(cktname).suppress() + EOL).setResultsName('subcircuit') topcircuit = _p.Group( # matches subckt <name> <nets> <newline> _p.Keyword("topckt").suppress() + cktname('name') + nets('nets') + EOL # matches the content of the subcircuit + subcircuit_content # matches ends <name> <newline> + cktname_end + _p.matchPreviousExpr(cktname).suppress() + EOL).setResultsName('topcircuit') netlist_element = subcircuit | topcircuit | EOL | comment('comment') netlist = _p.ZeroOrMore(netlist_element) + _p.StringEnd() parameters.setParseAction(handle_parameters) instance.setParseAction(handle_instance) subcircuit.setParseAction(handle_subcircuit) topcircuit.setParseAction(handle_topcircuit) return netlist.parseString(netlist_string)
def __parse_netem_param(self, line, parse_param_name, word_pattern): pattern = (pp.SkipTo(parse_param_name, include=True) + pp.Word(word_pattern)) try: result = pattern.parseString(_to_unicode(line))[-1] if dataproperty.is_not_empty_string(result): self.__parsed_param[parse_param_name] = result except pp.ParseException: pass
def table_row(start_tag, end_tag): body = pp.SkipTo(end_tag) body.addParseAction(pp.tokenMap(str.strip), pp.tokenMap(strip_html)) row = pp.Group(tr.suppress() + pp.ZeroOrMore(start_tag.suppress() + body + end_tag.suppress()) + tr_end.suppress()) return row
def __parse_netem_delay_distro(self, line): parse_param_name = "delay" pattern = (pp.SkipTo(parse_param_name, include=True) + pp.Word(pp.nums + ".msu") + pp.Word(pp.nums + ".msu")) try: parsed_list = pattern.parseString(line) self.__parsed_param[parse_param_name] = parsed_list[2] self.__parsed_param["delay-distro"] = parsed_list[3] except pp.ParseException: pass
def _parse_duplicate(self, line): packet_pattern = ( pp.SkipTo(pp.Word(pp.nums) + pp.Literal("duplicates,")) + pp.Word(pp.nums) + pp.Literal("duplicates,")) try: duplicate_parse_list = packet_pattern.parseString( _to_unicode(line)) except pp.ParseException: return 0 return int(duplicate_parse_list[-2])
def getUrls(lines=[]): grammer = '' astart,aend = pyparsing.makeHTMLTags('a') grammer = astart + pyparsing.SkipTo(aend) + aend.suppress() urls = [] for x1,x2,x3 in grammer.scanString(''.join(lines)): urls.append(str(x1[1][1])) return urls
def __parse_bandwidth_rate(self, line): parse_param_name = "rate" pattern = pp.SkipTo(parse_param_name, include=True) + pp.Word(pp.alphanums + "." + ":") try: result = pattern.parseString(line)[-1] if typepy.is_not_null_string(result): result = result.rstrip("bit") self.__parsed_param[parse_param_name] = result except pp.ParseException: pass
def __parse_netem_param(self, line, parse_param_name, word_pattern, key_name=None): pattern = pp.SkipTo(parse_param_name, include=True) + pp.Word(word_pattern) if not key_name: key_name = parse_param_name try: result = pattern.parseString(line)[-1] if typepy.is_not_null_string(result): self.__parsed_param[key_name] = result except pp.ParseException: pass