'six 1.3 or later is required; you have %s' % ( six.__version__)) try: import pyparsing except ImportError: raise ImportError("matplotlib requires pyparsing") else: if not compare_versions(pyparsing.__version__, '1.5.6'): raise ImportError( "matplotlib requires pyparsing >= 1.5.6") # pyparsing 2.0.0 bug, but it may be patched in distributions try: f = pyparsing.Forward() f <<= pyparsing.Literal('a') bad_pyparsing = f is None except TypeError: bad_pyparsing = True # pyparsing 1.5.6 does not have <<= on the Forward class, but # pyparsing 2.0.0 and later will spew deprecation warnings if # using << instead. Additionally, the <<= in pyparsing 1.5.7 is # broken, since it doesn't return self. In order to support # pyparsing 1.5.6 and above with a common code base, this small # monkey patch is applied. if bad_pyparsing: def _forward_ilshift(self, other): self.__lshift__(other) return self pyparsing.Forward.__ilshift__ = _forward_ilshift
UniformIntegerHyperparameter, UniformFloatHyperparameter, \ NumericalHyperparameter, Constant, IntegerHyperparameter, \ NormalIntegerHyperparameter, NormalFloatHyperparameter from ConfigSpace.conditions import EqualsCondition, NotEqualsCondition,\ InCondition, AndConjunction, OrConjunction, ConditionComponent # from ConfigSpace.forbidden import ForbiddenEqualsClause, \ # ForbiddenAndConjunction, ForbiddenInClause, AbstractForbiddenComponent, MultipleValueForbiddenClause from ConfigSpace.forbidden import ForbiddenEqualsClause, \ ForbiddenAndConjunction, ForbiddenInClause, AbstractForbiddenComponent, MultipleValueForbiddenClause # Build pyparsing expressions for params pp_param_name = pyparsing.Word(pyparsing.alphanums + "_" + "-" + "@" + "." + ":" + ";" + "\\" + "/" + "?" + "!" + "$" + "%" + "&" + "*" + "+" + "<" + ">") pp_digits = "0123456789" pp_plusorminus = pyparsing.Literal('+') | pyparsing.Literal('-') pp_int = pyparsing.Combine( pyparsing.Optional(pp_plusorminus) + pyparsing.Word(pp_digits)) pp_float = pyparsing.Combine( pyparsing.Optional(pp_plusorminus) + pyparsing.Optional(pp_int) + "." + pp_int) pp_eorE = pyparsing.Literal('e') | pyparsing.Literal('E') pp_floatorint = pp_float | pp_int pp_e_notation = pyparsing.Combine(pp_floatorint + pp_eorE + pp_int) pp_number = pp_e_notation | pp_float | pp_int pp_numberorname = pp_number | pp_param_name pp_il = pyparsing.Word("il") pp_choices = pp_param_name + pyparsing.Optional( pyparsing.OneOrMore("," + pp_param_name)) pp_cont_param = pp_param_name + "[" + pp_number + "," + pp_number + "]" + \
def parse_alg_expr(estr): """ Parses algebraic expressions compatible with Kappa syntax Parameters ---------- estr : str String corresponding to an algebraic expression Returns ------- list Returns a list of tokens corresponding to elements in a Kappa algebraic expression """ point = pp.Literal(".") e = pp.CaselessLiteral("E") fnumber = pp.Combine(pp.Word("+-" + pp.nums, pp.nums) + pp.Optional(point + pp.Optional(pp.Word(pp.nums))) + pp.Optional(e + pp.Word("+-" + pp.nums, pp.nums))) # infix operators plus = pp.Literal("+") minus = pp.Literal("-") mult = pp.Literal("*") div = pp.Literal("/") mod = pp.Literal("[mod]") lpar = pp.Literal("(") rpar = pp.Literal(")") expop = pp.Literal("^") addop = plus | minus multop = mult | div | mod # constants inf = pp.Literal("inf") pi = pp.Literal("[pi]") events = pp.Literal("[E]") null_events = pp.Literal("[E-]") event_limit = pp.Literal("[Emax]") time = pp.Literal("[T]") cpu_time = pp.Literal("[Tsim]") time_limit = pp.Literal("[Tmax]") plot_points = pp.Literal("[pp]") constant = inf | pi | events | null_events | event_limit | time | cpu_time | time_limit | plot_points # variables variable = pp.QuotedString("'") # patterns pattern = pp.Combine( pp.Literal("|") + pp.CharsNotIn("|") + pp.Literal("|")) # parse what's in between later # unary functions (one arg) logfunc = pp.Literal("[log]") expfunc = pp.Literal("[exp]") sinfunc = pp.Literal("[sin]") cosfunc = pp.Literal("[cos]") tanfunc = pp.Literal("[tan]") sqrtfunc = pp.Literal("[sqrt]") floorfunc = pp.Literal("[int]") unary_one_funcs = logfunc | expfunc | sinfunc | cosfunc | tanfunc | sqrtfunc | floorfunc # unary functions (two args) maxfunc = pp.Literal("[max]") minfunc = pp.Literal("[min]") unary_two_funcs = maxfunc | minfunc expr = pp.Forward() atom = (pp.Optional("-") + ( constant | variable | fnumber | lpar + expr + rpar | unary_one_funcs + expr | unary_two_funcs + expr + expr | pattern)) factor = pp.Forward() factor << atom + pp.ZeroOrMore((expop + factor)) term = factor + pp.ZeroOrMore((multop + factor)) expr << term + pp.ZeroOrMore((addop + term)) fullExpr = expr return fullExpr.parseString(estr.strip())
class TcFilterParser(AbstractParser): class FilterMatchIdIpv4(object): INCOMING_NETWORK = 12 OUTGOING_NETWORK = 16 PORT = 20 class FilterMatchIdIpv6(object): INCOMING_NETWORK_LIST = [8, 12, 16, 20] OUTGOING_NETWORK_LIST = [24, 28, 32, 36] PORT = 40 __FILTER_FLOWID_PATTERN = (pp.Literal("filter parent") + pp.SkipTo("flowid", include=True) + pp.Word(pp.hexnums + ":")) __FILTER_PROTOCOL_PATTERN = (pp.Literal("filter parent") + pp.SkipTo("protocol", include=True) + pp.Word(pp.alphanums)) __FILTER_PRIORITY_PATTERN = (pp.Literal("filter parent") + pp.SkipTo("pref", include=True) + pp.Word(pp.nums)) __FILTER_ID_PATTERN = (pp.Literal("filter parent") + pp.SkipTo("fh", include=True) + pp.Word(pp.hexnums + ":")) __FILTER_MATCH_PATTERN = (pp.Literal("match") + pp.Word(pp.alphanums + "/") + pp.Literal("at") + pp.Word(pp.nums)) __FILTER_MANGLE_MARK_PATTERN = (pp.Literal("filter parent") + pp.SkipTo("handle", include=True) + pp.Word(pp.hexnums) + pp.SkipTo("classid", include=True) + pp.Word(pp.hexnums + ":")) @property def protocol(self): return self.__protocol @property def _tc_subcommand(self): return TcSubCommand.FILTER.value def __init__(self, con, ip_version): super(TcFilterParser, self).__init__() self.__con = con self.__ip_version = ip_version self.__buffer = None self.__parse_idx = 0 self.__protocol = None self._clear() def parse(self, device, text): self._clear() if typepy.is_null_string(text): return [] filter_data_matrix = [] self.__buffer = self._to_unicode(text).splitlines() self.__parse_idx = 0 while self.__parse_idx < len(self.__buffer): line = self._to_unicode(self.__buffer[self.__parse_idx].strip()) self.__parse_idx += 1 if typepy.is_null_string(line): continue self.__device = device try: self.__parse_mangle_mark(line) except pp.ParseException: logger.debug("failed to parse mangle: {}".format(line)) else: filter_data_matrix.append({ Tc.Param.DEVICE: self.__device, Tc.Param.CLASS_ID: self.__classid, Tc.Param.HANDLE: self.__handle, }) self._clear() continue tc_filter = self.__get_filter() try: self.__parse_flow_id(line) self.__parse_protocol(line) self.__parse_priority(line) self.__parse_filter_id(line) if tc_filter.get(Tc.Param.FLOW_ID): logger.debug("store filter: {}".format(tc_filter)) filter_data_matrix.append(tc_filter) self._clear() self.__parse_flow_id(line) self.__parse_protocol(line) self.__parse_priority(line) self.__parse_filter_id(line) continue except pp.ParseException: logger.debug("failed to parse flow id: {}".format(line)) try: if self.__ip_version == 4: self.__parse_filter_ipv4(line) elif self.__ip_version == 6: self.__parse_filter_ipv6(line) else: raise ValueError("unknown ip version: {}".format( self.__ip_version)) except pp.ParseException: logger.debug("failed to parse filter: {}".format(line)) if self.__flow_id: filter_data_matrix.append(self.__get_filter()) if filter_data_matrix: self.__con.create_table_from_data_matrix( table_name=self._tc_subcommand, attr_name_list=list(self.__get_filter()), data_matrix=filter_data_matrix, ) logger.debug("tc {:s} parse result: {}".format( self._tc_subcommand, json.dumps(filter_data_matrix, indent=4))) return filter_data_matrix def parse_incoming_device(self, text): if typepy.is_null_string(text): return None match = re.search(r"Egress Redirect to device ifb[\d]+", self._to_unicode(text), re.MULTILINE) if match is None: return None return re.search(r"ifb[\d]+", match.group()).group() def _clear(self): self.__device = None self.__filter_id = None self.__flow_id = None self.__protocol = None self.__priority = None self.__filter_src_network = None self.__filter_dst_network = None self.__filter_src_port = None self.__filter_dst_port = None self.__handle = None self.__classid = None def __get_filter(self): tc_filter = OrderedDict() tc_filter[Tc.Param.DEVICE] = self.__device tc_filter[Tc.Param.FILTER_ID] = self.__filter_id tc_filter[Tc.Param.FLOW_ID] = self.__flow_id tc_filter[Tc.Param.PROTOCOL] = self.protocol tc_filter[Tc.Param.PRIORITY] = self.__priority tc_filter[Tc.Param.SRC_NETWORK] = sanitize_network( self.__filter_src_network, self.__ip_version) tc_filter[Tc.Param.DST_NETWORK] = sanitize_network( self.__filter_dst_network, self.__ip_version) tc_filter[Tc.Param.SRC_PORT] = self.__filter_src_port tc_filter[Tc.Param.DST_PORT] = self.__filter_dst_port return tc_filter def __parse_flow_id(self, line): parsed_list = self.__FILTER_FLOWID_PATTERN.parseString(line) self.__flow_id = parsed_list[-1] logger.debug("succeed to parse flow id: flow-id={}, line={}".format( self.__flow_id, line)) def __parse_protocol(self, line): parsed_list = self.__FILTER_PROTOCOL_PATTERN.parseString(line) self.__protocol = parsed_list[-1] logger.debug("succeed to parse protocol: protocol={}, line={}".format( self.__protocol, line)) def __parse_priority(self, line): parsed_list = self.__FILTER_PRIORITY_PATTERN.parseString(line) self.__priority = int(parsed_list[-1]) logger.debug("succeed to parse priority: priority={}, line={}".format( self.__priority, line)) def __parse_filter_id(self, line): parsed_list = self.__FILTER_ID_PATTERN.parseString(line) self.__filter_id = parsed_list[-1] logger.debug( "succeed to parse filter id: filter-id={}, line={}".format( self.__filter_id, line)) def __parse_mangle_mark(self, line): parsed_list = self.__FILTER_MANGLE_MARK_PATTERN.parseString(line) self.__classid = parsed_list[-1] self.__handle = int("0" + parsed_list[-3], 16) logger.debug("succeed to parse mangle mark: " "classid={}, handle={}, line={}".format( self.__classid, self.__handle, line)) def __parse_filter_ip_line(self, line): parsed_list = self.__FILTER_MATCH_PATTERN.parseString(line) value_hex, mask_hex = parsed_list[1].split("/") match_id = int(parsed_list[3]) return (value_hex, mask_hex, match_id) def __parse_filter_ipv4_network(self, value_hex, mask_hex, match_id): ipaddr = ".".join([ text_type(int(value_hex[i:i + 2], 16)) for i in range(0, len(value_hex), 2) ]) netmask = bin(int(mask_hex, 16)).count("1") network = "{:s}/{:d}".format(ipaddr, netmask) if match_id == self.FilterMatchIdIpv4.INCOMING_NETWORK: self.__filter_src_network = network elif match_id == self.FilterMatchIdIpv4.OUTGOING_NETWORK: self.__filter_dst_network = network else: logger.warn("unknown match id: {}".format(match_id)) def __parse_filter_ipv6_network(self, value_hex, mask_hex, match_id): from collections import namedtuple Ipv6Entry = namedtuple("Ipv6Entry", "match_id octet_list mask_hex") OCTET_LEN = 4 ipv6_entry_list = [ Ipv6Entry( match_id=match_id, octet_list=[ value_hex[i:i + OCTET_LEN] for i in range(0, len(value_hex), OCTET_LEN) ], mask_hex=mask_hex, ) ] while True: try: line = self.__buffer[self.__parse_idx].strip() except IndexError: break try: value_hex, mask_hex, match_id = self.__parse_filter_ip_line( line) except pp.ParseException: break if (match_id in self.FilterMatchIdIpv6.INCOMING_NETWORK_LIST or match_id in self.FilterMatchIdIpv6.OUTGOING_NETWORK_LIST): ipv6_entry_list.append( Ipv6Entry( match_id=match_id, octet_list=[ value_hex[i:i + OCTET_LEN] for i in range(0, len(value_hex), OCTET_LEN) ], mask_hex=mask_hex, )) else: break self.__parse_idx += 1 src_octet_list = [] dst_octet_list = [] src_netmask = 0 dst_netmask = 0 for ipv6_entry in ipv6_entry_list: part_netmask = bin(int(ipv6_entry.mask_hex, 16)).count("1") if ipv6_entry.match_id in self.FilterMatchIdIpv6.INCOMING_NETWORK_LIST: src_octet_list.extend(ipv6_entry.octet_list) src_netmask += part_netmask elif ipv6_entry.match_id in self.FilterMatchIdIpv6.OUTGOING_NETWORK_LIST: dst_octet_list.extend(ipv6_entry.octet_list) dst_netmask += part_netmask else: raise ValueError( "unexpected ipv6 entry: {}".format(ipv6_entry)) while len(src_octet_list) < 8: src_octet_list.append("0000") while len(dst_octet_list) < 8: dst_octet_list.append("0000") self.__filter_dst_network = ipaddress.IPv6Network("{:s}/{:d}".format( ":".join(dst_octet_list), dst_netmask)).compressed self.__filter_src_network = ipaddress.IPv6Network("{:s}/{:d}".format( ":".join(src_octet_list), src_netmask)).compressed def __parse_filter_port(self, value_hex): # Port filter consists eight hex digits. # The upper-half represents source port filter and # the bottom-half represents destination port filter. if len(value_hex) != 8: raise ValueError("invalid port filter value: {}".format(value_hex)) src_port_hex = value_hex[:4] dst_port_hex = value_hex[4:] logger.debug( "parse ipv4 port: src-port-hex={}, dst-port-hex={}".format( src_port_hex, dst_port_hex)) src_port_decimal = int(src_port_hex, 16) self.__filter_src_port = src_port_decimal if src_port_decimal != 0 else None dst_port_decimal = int(dst_port_hex, 16) self.__filter_dst_port = dst_port_decimal if dst_port_decimal != 0 else None def __parse_filter_ipv4(self, line): value_hex, mask_hex, match_id = self.__parse_filter_ip_line(line) if match_id in [ self.FilterMatchIdIpv4.INCOMING_NETWORK, self.FilterMatchIdIpv4.OUTGOING_NETWORK, ]: self.__parse_filter_ipv4_network(value_hex, mask_hex, match_id) elif match_id == self.FilterMatchIdIpv4.PORT: self.__parse_filter_port(value_hex) elif match_id in (self.FilterMatchIdIpv6.INCOMING_NETWORK_LIST + self.FilterMatchIdIpv6.OUTGOING_NETWORK_LIST + [self.FilterMatchIdIpv6.PORT]): logger.warn( "unknown match id for an IPv4 filter: might be an IPv6 filter. " "try to use --ipv6 option. (id={})".format(match_id)) return else: logger.debug("unknown match id: {}".format(match_id)) return logger.debug("succeed to parse ipv4 filter: " + ", ".join([ "src_network={}".format(self.__filter_src_network), "dst_network={}".format(self.__filter_dst_network), "src_port={}".format(self.__filter_src_port), "dst_port={}".format(self.__filter_dst_port), "line={}".format(line), ])) def __parse_filter_ipv6(self, line): value_hex, mask_hex, match_id = self.__parse_filter_ip_line(line) if (match_id in self.FilterMatchIdIpv6.INCOMING_NETWORK_LIST or match_id in self.FilterMatchIdIpv6.OUTGOING_NETWORK_LIST): self.__parse_filter_ipv6_network(value_hex, mask_hex, match_id) elif match_id == self.FilterMatchIdIpv6.PORT: self.__parse_filter_port(value_hex) else: logger.debug("unknown match id: {}".format(match_id)) return logger.debug("succeed to parse ipv6 filter: " + ", ".join([ "src_network={}".format(self.__filter_src_network), "dst_network={}".format(self.__filter_dst_network), "src_port={}".format(self.__filter_src_port), "dst_port={}".format(self.__filter_dst_port), "line={}".format(line), ]))
sub_operand for operand in self.operands for sub_operand in operand.operands_list ]) class AndSubExpr(BinaryOp): """Expand later as needed.""" pass class OrSubExpr(BinaryOp): """Expand later as needed.""" pass COMMA = pyparsing.Suppress(pyparsing.Literal(",")) LPAREN = pyparsing.Suppress(pyparsing.Literal("(")) RPAREN = pyparsing.Suppress(pyparsing.Literal(")")) EQUAL = pyparsing.Literal("=") LBRACE = pyparsing.Suppress(pyparsing.Literal("{")) RBRACE = pyparsing.Suppress(pyparsing.Literal("}")) # Initialize non-ascii unicode code points in the Basic Multilingual Plane. unicode_printables = u''.join( unichr(c) for c in xrange(128, 65536) if not unichr(c).isspace()) # Does not like comma. No Literals from above allowed. valid_identifier_chars = ((unicode_printables + pyparsing.alphanums + ".-_#!$%&'*+/:;?@[\\]^`|~")) metric_name = (pyparsing.Word(valid_identifier_chars, min=1,
def _build_tgrep_parser(set_parse_actions = True): ''' Builds a pyparsing-based parser object for tokenizing and interpreting tgrep search strings. ''' tgrep_op = (pyparsing.Optional('!') + pyparsing.Regex('[$%,.<>][%,.<>0-9-\':]*')) tgrep_qstring = pyparsing.QuotedString(quoteChar='"', escChar='\\', unquoteResults=False) tgrep_node_regex = pyparsing.QuotedString(quoteChar='/', escChar='\\', unquoteResults=False) tgrep_qstring_icase = pyparsing.Regex( 'i@\\"(?:[^"\\n\\r\\\\]|(?:\\\\.))*\\"') tgrep_node_regex_icase = pyparsing.Regex( 'i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/') tgrep_node_literal = pyparsing.Regex('[^][ \r\t\n;:.,&|<>()$!@%\'^=]+') tgrep_expr = pyparsing.Forward() tgrep_relations = pyparsing.Forward() tgrep_parens = pyparsing.Literal('(') + tgrep_expr + ')' tgrep_nltk_tree_pos = ( pyparsing.Literal('N(') + pyparsing.Optional(pyparsing.Word(pyparsing.nums) + ',' + pyparsing.Optional(pyparsing.delimitedList( pyparsing.Word(pyparsing.nums), delim=',') + pyparsing.Optional(','))) + ')') tgrep_node_label = pyparsing.Regex('[A-Za-z0-9]+') tgrep_node_label_use = pyparsing.Combine('=' + tgrep_node_label) # see _tgrep_segmented_pattern_action tgrep_node_label_use_pred = tgrep_node_label_use.copy() macro_name = pyparsing.Regex('[^];:.,&|<>()[$!@%\'^=\r\t\n ]+') macro_name.setWhitespaceChars('') macro_use = pyparsing.Combine('@' + macro_name) tgrep_node_expr = (tgrep_node_label_use_pred | macro_use | tgrep_nltk_tree_pos | tgrep_qstring_icase | tgrep_node_regex_icase | tgrep_qstring | tgrep_node_regex | '*' | tgrep_node_literal) tgrep_node_expr2 = ((tgrep_node_expr + pyparsing.Literal('=').setWhitespaceChars('') + tgrep_node_label.copy().setWhitespaceChars('')) | tgrep_node_expr) tgrep_node = (tgrep_parens | (pyparsing.Optional("'") + tgrep_node_expr2 + pyparsing.ZeroOrMore("|" + tgrep_node_expr))) tgrep_brackets = pyparsing.Optional('!') + '[' + tgrep_relations + ']' tgrep_relation = tgrep_brackets | (tgrep_op + tgrep_node) tgrep_rel_conjunction = pyparsing.Forward() tgrep_rel_conjunction << (tgrep_relation + pyparsing.ZeroOrMore(pyparsing.Optional('&') + tgrep_rel_conjunction)) tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore( "|" + tgrep_relations) tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations) tgrep_expr_labeled = tgrep_node_label_use + pyparsing.Optional(tgrep_relations) tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(':' + tgrep_expr_labeled) macro_defn = (pyparsing.Literal('@') + pyparsing.White().suppress() + macro_name + tgrep_expr2) tgrep_exprs = (pyparsing.Optional(macro_defn + pyparsing.ZeroOrMore(';' + macro_defn) + ';') + tgrep_expr2 + pyparsing.ZeroOrMore(';' + (macro_defn | tgrep_expr2)) + pyparsing.ZeroOrMore(';').suppress()) if set_parse_actions: tgrep_node_label_use.setParseAction(_tgrep_node_label_use_action) tgrep_node_label_use_pred.setParseAction(_tgrep_node_label_pred_use_action) macro_use.setParseAction(_tgrep_macro_use_action) tgrep_node.setParseAction(_tgrep_node_action) tgrep_node_expr2.setParseAction(_tgrep_bind_node_label_action) tgrep_parens.setParseAction(_tgrep_parens_action) tgrep_nltk_tree_pos.setParseAction(_tgrep_nltk_tree_pos_action) tgrep_relation.setParseAction(_tgrep_relation_action) tgrep_rel_conjunction.setParseAction(_tgrep_conjunction_action) tgrep_relations.setParseAction(_tgrep_rel_disjunction_action) macro_defn.setParseAction(_macro_defn_action) # the whole expression is also the conjunction of two # predicates: the first node predicate, and the remaining # relation predicates tgrep_expr.setParseAction(_tgrep_conjunction_action) tgrep_expr_labeled.setParseAction(_tgrep_segmented_pattern_action) tgrep_expr2.setParseAction(functools.partial(_tgrep_conjunction_action, join_char = ':')) tgrep_exprs.setParseAction(_tgrep_exprs_action) return tgrep_exprs.ignore('#' + pyparsing.restOfLine)
class PortWithProfile(Node): """ Variant of :class:`Port` that is used by "card" records inside the "Ports" property. It differs from the normal port syntax by having different entries inside the last section. Availability is not listed here, only priority. Priority does not have a colon before the actual number. This port is followed by profile assignment. """ __fragments__ = { 'name': 'port-name', 'label': 'port-label', 'priority': 'port-priority', 'latency_offset': 'port-latency-offset', 'availability': 'port-availability', 'properties': lambda t: t['port-properties'].asList(), 'profile_list': lambda t: t['port-profile-list'].asList(), } __syntax__ = ( p.Optional('[Out] ').suppress() + p.Optional('[In] ').suppress() + p.Word(p.alphanums + " -;").setResultsName('port-name') + p.Suppress(':') # This part was very tricky to write. The label is basically arbitrary # localized Unicode text. We want to grab all of it in one go but # without consuming the upcoming and latest '(' character or the space # that comes immediately before. # # The syntax here combines a sequence of words, as defined by anything # other than a space and '(', delimited by a single whitespace. + p.Combine( p.OneOrMore( ~p.FollowedBy( p.Regex('\(.+?\)') + p.LineEnd() ) + p.Regex('[^ \n]+') + p.White().suppress() ), ' ' ).setResultsName('port-label') + p.Suppress('(') + p.Keyword('priority').suppress() + p.Optional( p.Suppress(':') ) + p.Word(p.nums).setParseAction( lambda t: int(t[0]) ).setResultsName('port-priority') + p.Optional( p.MatchFirst([ p.Suppress(',') + p.Keyword('latency offset:').suppress() + p.Word(p.nums).setParseAction(lambda t: int(t[0])) + p.Literal("usec").suppress(), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-latency-offset') ) + p.Optional( p.MatchFirst([ p.Suppress(',') + p.Literal('not available'), p.Suppress(',') + p.Literal('available'), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-availability') ) + p.Suppress(')') + p.LineEnd().suppress() + p.Optional( p.MatchFirst([ p.LineStart().suppress() + p.NotAny(p.White(' ')) + p.White('\t').suppress() + p.Keyword('Properties:').suppress() + p.LineEnd().suppress() + PropertyAttributeValue, p.Empty().setParseAction(lambda t: []) ]).setResultsName('port-properties') ) + p.White('\t', max=3).suppress() + p.Literal("Part of profile(s)").suppress() + p.Suppress(":") + p.delimitedList( p.Word(p.alphanums + "+-:"), ", " ).setResultsName("port-profile-list") ).setResultsName("port")
def _create_parser(self): #----------------------------------------------------------------------# # TOKENS # #----------------------------------------------------------------------# START = pp.StringStart().suppress() END = pp.StringEnd().suppress() # # NUMBER # #NUMBER = pp.Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") .setParseAction( lambda s, loc, toks: [ self.create_NumberLiteral(int(toks[0])) ] ) # # -foo_bar: TERM = pp.Word(pp.alphanums, pp.alphanums + '.:-+_/') # # "..." # '...' # QUOTED = pp.QuotedString('"', escChar='\\') | pp.QuotedString("'", escChar='\\') # # r"..." # r'...' # REGEXP = pp.Combine(pp.Suppress('r') + QUOTED).setParseAction( self.create_RegExLiteral) STRINGS = (REGEXP | QUOTED.setParseAction(self.create_StringLiteral) | TERM.setParseAction(self.create_StringLiteral)) # # SYNTAX # LPAR, RPAR = map(pp.Suppress, "()") PLUS = pp.Suppress('+') MINUS = pp.Suppress('-') COLON = pp.Suppress(':') EQ = pp.Suppress('=') LT = pp.Literal('<') LTE = pp.Literal('<=') GT = pp.Literal('>') GTE = pp.Literal('>=') NOT = pp.Suppress('NOT') AND = pp.Suppress('AND') OR = pp.Suppress('OR') TOKENS = COLON | LPAR | RPAR | NOT | AND | OR | PLUS | MINUS # # IDENTIFIER (field_names) # FIELD = pp.Word(pp.alphas, pp.alphanums + ".").setParseAction(lambda s, loc, toks: [toks[0]]) #FIELD = (~(TOKENS)) .setParseAction( lambda s, loc, toks: [ toks[0] ] ) basic_value = (~(TOKENS) + STRINGS) #----------------------------------------------------------------------# # TERMS # #----------------------------------------------------------------------# # # Simple TERM # simple_term = ( # bool_term #| basic_value.copy()).setParseAction(self.create_SimpleTerm) # # COMPLEX TERM # # <field name> ':' <field_value> # multi_term_expr = ( (PLUS + basic_value).setParseAction(self.create_BoolMust) | (MINUS + basic_value).setParseAction(self.create_BoolMustNot) | basic_value) multi_term_sequence = (LPAR + pp.OneOrMore(multi_term_expr).setParseAction( self.create_MultiValue) + RPAR) compare_term = ((LTE | LT | GTE | GT) + basic_value).setParseAction( self.create_CompareValue) complex_value = (simple_term | multi_term_sequence | compare_term) complex_term = (FIELD + (EQ | COLON) + complex_value).setParseAction( self.create_ComplexTerm) #------------------------------------------------------------------- # EXPRESSION #------------------------------------------------------------------- query = pp.Forward() # # <field>:<query> # <term> # ( <query> ) # base_expr = ( complex_term | simple_term | (LPAR + query + RPAR).setParseAction(lambda s, loc, toks: [toks[0]])) #------------------------------------------------------------------- # BOOLEAN EXPRESSION #------------------------------------------------------------------- # NOT expr # expr unary_expr = ((NOT + base_expr).setParseAction(self.create_NotExpr) | (PLUS + base_expr).setParseAction(self.create_BoolMust) | (MINUS + base_expr).setParseAction(self.create_BoolMustNot) | base_expr) #simple_expr = unary_expr # # expr ( AND expr ) * # and_expr = (unary_expr + pp.ZeroOrMore(AND + unary_expr)).setParseAction( self.create_AndExpr) # # expr ( OR expr ) * # or_expr = (and_expr + pp.ZeroOrMore(OR + and_expr)).setParseAction( self.create_OrExpr) boolean_expr = or_expr full_expr = boolean_expr # # clause ::= cond_expr + # clauses = pp.OneOrMore(full_expr) query <<= clauses # # PARSER # parser = (START + query.setParseAction(self.create_Query) + END) return parser
def parse_file(file_name): number = pp.Word(pp.nums) identifier = pp.Word(pp.alphas + "_", pp.alphanums + "_") lbrace = pp.Literal('{').suppress() rbrace = pp.Literal('}').suppress() cls = pp.Keyword('class') colon = pp.Literal(":") semi = pp.Literal(";").suppress() langle = pp.Literal("<") rangle = pp.Literal(">") equals = pp.Literal("=") comma = pp.Literal(",") lparen = pp.Literal("(") rparen = pp.Literal(")") lbrack = pp.Literal("[") rbrack = pp.Literal("]") mins = pp.Literal("-") struct = pp.Keyword('struct') template = pp.Keyword('template') final = pp.Keyword('final')("final") stub = pp.Keyword('stub')("stub") with_colon = pp.Word(pp.alphanums + "_" + ":") btype = with_colon type = pp.Forward() nestedParens = pp.nestedExpr('<', '>') tmpl = pp.Group( btype("template_name") + langle.suppress() + pp.Group(pp.delimitedList(type)) + rangle.suppress()) type << (tmpl | btype) enum_lit = pp.Keyword('enum') enum_class = pp.Group(enum_lit + cls) ns = pp.Keyword("namespace") enum_init = equals.suppress() + pp.Optional(mins) + number enum_value = pp.Group(identifier + pp.Optional(enum_init)) enum_values = pp.Group(lbrace + pp.delimitedList(enum_value) + pp.Optional(comma) + rbrace) content = pp.Forward() member_name = pp.Combine( pp.Group(identifier + pp.Optional(lparen + rparen))) attrib = pp.Group(lbrack.suppress() + lbrack.suppress() + pp.SkipTo(']') + rbrack.suppress() + rbrack.suppress()) opt_attribute = pp.Optional(attrib)("attribute") namespace = pp.Group( ns("type") + identifier("name") + lbrace + pp.Group(pp.OneOrMore(content))("content") + rbrace) enum = pp.Group( enum_class("type") + identifier("name") + colon.suppress() + identifier("underline_type") + enum_values("enum_values") + pp.Optional(semi).suppress()) default_value = equals.suppress() + pp.SkipTo(';') class_member = pp.Group( type("type") + member_name("name") + opt_attribute + pp.Optional(default_value)("default") + semi.suppress())("member") template_param = pp.Group(identifier("type") + identifier("name")) template_def = pp.Group(template + langle + pp.Group(pp.delimitedList(template_param)) ("params") + rangle) class_content = pp.Forward() class_def = pp.Group( pp.Optional(template_def)("template") + (cls | struct)("type") + with_colon("name") + pp.Optional(final) + pp.Optional(stub) + opt_attribute + lbrace + pp.Group(pp.ZeroOrMore(class_content))("members") + rbrace + pp.Optional(semi)) content << (enum | class_def | namespace) class_content << (enum | class_def | class_member) for varname in "enum class_def class_member content namespace template_def".split( ): locals()[varname].setName(varname) rt = pp.OneOrMore(content) singleLineComment = "//" + pp.restOfLine rt.ignore(singleLineComment) rt.ignore(pp.cStyleComment) return rt.parseFile(file_name, parseAll=True)
pp_identifier = ( # keywords is not identifier, pp.NotAny( pp.Keyword("void") | pp.Keyword("unsigned") | pp.Keyword("signed") | pp.Keyword("int") | pp.Keyword("float") | pp.Keyword("const") | pp.Keyword("volatile") | pp.Keyword("extern") | pp.Keyword("static")) + pp.Word(pp.alphas + "_", pp.alphanums + "_", asKeyword=True)) #pp_identifier = pp.Word(pp.alphas+"_", pp.alphanums+"_") pp_semicolon = pp.Literal(";") def get_type_spec(p): print("get_type_spec: " + str(p)) pp_type_spec = (pp.Keyword("void") | (pp.Optional(pp.Keyword("unsigned") | pp.Keyword("signed")) + pp.Keyword("int")) | pp.Keyword("float") | pp_identifier).setParseAction(get_type_spec) pp_type_qual = (pp.Keyword("const") | pp.Keyword("volatile")) pp_strage_spec = (pp.Keyword("extern") | pp.Keyword("static")) decl_spec = (pp.Optional(pp_type_qual) & pp.Optional(pp_strage_spec)
class XChatScrollbackParser(text_parser.PyparsingSingleLineTextParser): """Parses XChat scrollback log files.""" NAME = 'xchatscrollback' DESCRIPTION = 'Parser for XChat scrollback log files.' _ENCODING = 'utf-8' # Define how a log line should look like. LOG_LINE = (pyparsing.Literal('T').suppress() + pyparsing.Word(pyparsing.nums).setResultsName('timestamp') + pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('text')) LOG_LINE.parseWithTabs() # Define the available log line structures. LINE_STRUCTURES = [ ('logline', LOG_LINE), ] # Define for the stripping phase. STRIPPER = (pyparsing.Word('\x03', pyparsing.nums, max=3).suppress() | pyparsing.Word('\x02\x07\x08\x0f\x16\x1d\x1f', exact=1).suppress()) # Define the structure for parsing <text> and get <nickname> and <text> MSG_NICK_START = pyparsing.Literal('<') MSG_NICK_END = pyparsing.Literal('>') MSG_NICK = pyparsing.SkipTo(MSG_NICK_END).setResultsName('nickname') MSG_ENTRY_NICK = pyparsing.Optional(MSG_NICK_START + MSG_NICK + MSG_NICK_END) MSG_ENTRY_TEXT = pyparsing.SkipTo( pyparsing.LineEnd()).setResultsName('text') MSG_ENTRY = MSG_ENTRY_NICK + MSG_ENTRY_TEXT MSG_ENTRY.parseWithTabs() def __init__(self): """Initializes a parser.""" super(XChatScrollbackParser, self).__init__() self._offset = 0 def _StripThenGetNicknameAndText(self, text): """Strips decorators from text and gets <nickname> if available. This method implements the XChat strip_color2 and fe_print_text functions, slightly modified to get pure text. From the parsing point of view, after having stripped, the code takes everything as is, simply replacing tabs with spaces (as the original XChat code). So the VerifyStructure plays an important role in checking if the source file has the right format, since the method will not raise any parse exception and every content will be good. Args: text (str): text obtained from the log record. Returns: tuple: containing: nickname (str): nickname. text (str): text sent by nickname or service messages. """ stripped = self.STRIPPER.transformString(text) structure = self.MSG_ENTRY.parseString(stripped) text = structure.text.replace('\t', ' ') return structure.nickname, text def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. """ if key != 'logline': logger.warning( 'Unable to parse record, unknown structure: {0:s}'.format(key)) return try: timestamp = int(structure.timestamp) except ValueError: logger.debug( 'Invalid timestamp string {0:s}, skipping record'.format( structure.timestamp)) return try: nickname, text = self._StripThenGetNicknameAndText(structure.text) except pyparsing.ParseException: logger.debug('Error parsing entry at offset {0:d}'.format( self._offset)) return event_data = XChatScrollbackEventData() event_data.nickname = nickname event_data.offset = self._offset event_data.text = text date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a XChat scrollback log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line was successfully parsed. """ structure = self.LOG_LINE try: parsed_structure = structure.parseString(line) except pyparsing.ParseException: logger.debug('Not a XChat scrollback log file') return False try: int(parsed_structure.timestamp, 10) except ValueError: logger.debug( 'Not a XChat scrollback log file, invalid timestamp string') return False return True
class PopularityContestParser(text_parser.PyparsingSingleLineTextParser): """Parse popularity contest log files.""" NAME = 'popularity_contest' DATA_FORMAT = 'Popularity Contest log file' _ASCII_PRINTABLES = pyparsing.printables _UNICODE_PRINTABLES = ''.join( chr(character) for character in range(65536) if not chr(character).isspace()) MRU = pyparsing.Word(_UNICODE_PRINTABLES).setResultsName('mru') PACKAGE = pyparsing.Word(_ASCII_PRINTABLES).setResultsName('package') TAG = pyparsing.QuotedString('<', endQuoteChar='>').setResultsName('tag') HEADER = ( pyparsing.Literal('POPULARITY-CONTEST-').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName('session') + pyparsing.Literal('TIME:').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName('timestamp') + pyparsing.Literal('ID:').suppress() + pyparsing.Word(pyparsing.alphanums, exact=32).setResultsName('id') + pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('details')) FOOTER = ( pyparsing.Literal('END-POPULARITY-CONTEST-').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName('session') + pyparsing.Literal('TIME:').suppress() + text_parser.PyparsingConstants.INTEGER.setResultsName('timestamp')) LOG_LINE = ( text_parser.PyparsingConstants.INTEGER.setResultsName('atime') + text_parser.PyparsingConstants.INTEGER.setResultsName('ctime') + (PACKAGE + TAG | PACKAGE + MRU + pyparsing.Optional(TAG))) LINE_STRUCTURES = [ ('logline', LOG_LINE), ('header', HEADER), ('footer', FOOTER), ] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) _ENCODING = 'UTF-8' def _ParseLogLine(self, parser_mediator, structure): """Extracts events from a log line. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure parsed from the log file. """ # Required fields are <mru> and <atime> and we are not interested in # log lines without <mru>. mru = self._GetValueFromStructure(structure, 'mru') if not mru: return event_data = PopularityContestEventData() event_data.mru = mru event_data.package = self._GetValueFromStructure(structure, 'package') event_data.record_tag = self._GetValueFromStructure(structure, 'tag') # The <atime> field (as <ctime>) is always present but could be 0. # In case of <atime> equal to 0, we are in <NOFILES> case, safely return # without logging. access_time = self._GetValueFromStructure(structure, 'atime') if access_time: # TODO: not doing any check on <tag> fields, even if only informative # probably it could be better to check for the expected values. date_time = dfdatetime_posix_time.PosixTime(timestamp=access_time) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_LAST_ACCESS) parser_mediator.ProduceEventWithEventData(event, event_data) change_time = self._GetValueFromStructure(structure, 'ctime') if change_time: date_time = dfdatetime_posix_time.PosixTime(timestamp=change_time) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ENTRY_MODIFICATION) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) # TODO: Add anomaly objects for abnormal timestamps, such as when the log # timestamp is greater than the session start. if key == 'logline': self._ParseLogLine(parser_mediator, structure) else: timestamp = self._GetValueFromStructure(structure, 'timestamp') if timestamp is None: logger.debug('[{0:s}] {1:s} with invalid timestamp.'.format( self.NAME, key)) return session = self._GetValueFromStructure(structure, 'session') event_data = PopularityContestSessionEventData() # TODO: determine why session is formatted as a string. event_data.session = '{0!s}'.format(session) if key == 'header': event_data.details = self._GetValueFromStructure(structure, 'details') event_data.hostid = self._GetValueFromStructure(structure, 'id') event_data.status = 'start' elif key == 'footer': event_data.status = 'end' date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a Popularity Contest log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line was successfully parsed. """ try: self.HEADER.parseString(line) except pyparsing.ParseException: logger.debug('Not a Popularity Contest log file, invalid header') return False return True
regs_str = [] regs_expr = [] regs_init = [] for rname in rnames: r = m2_expr.ExprId(rname, sz) r_init = m2_expr.ExprId(rname + '_init', sz) regs_str.append(rname) regs_expr.append(r) regs_init.append(r_init) env[rname] = r reginfo = reg_info(regs_str, regs_expr) return regs_expr, regs_init, reginfo LPARENTHESIS = pyparsing.Literal("(") RPARENTHESIS = pyparsing.Literal(")") def int2expr(tokens): v = tokens[0] return (m2_expr.ExprInt, v) def parse_op(tokens): v = tokens[0] return (m2_expr.ExprOp, v) def parse_id(tokens): v = tokens[0]
def normalize(normal_value): '''returns function to normalize output of keyword lists''' return lambda s, l, t, n=normal_value: t.__setitem__(0, n) # ========Grammar definitions============ # ***Comparison Operators*** EQUALS = keyword_list(['=', 'equal', 'equals', 'is', 'to']).setParseAction(normalize('=')) GT = keyword_list( ['>', 'greater than', 'more than', 'is greater than', 'not less than']).setParseAction(normalize('>')) LT = keyword_list(['<', 'less than', 'is less than', 'not more than']).setParseAction(normalize('<')) GTE = pp.Literal('>=') LTE = pp.Literal('<=') NOTEQUAL = keyword_list(['<>', 'not equal', '!=']).setParseAction(normalize('!=')) compare = (GT ^ LT ^ GTE ^ LTE ^ EQUALS ^ NOTEQUAL).setResultsName('compare') ASSIGN = pp.Literal(':=') operator = (compare ^ ASSIGN).setResultsName('operator') # ***Logicals*** OR = pp.CaselessKeyword('OR') AND = pp.CaselessKeyword('AND') NOT = pp.CaselessKeyword('NOT') logical = (AND ^ OR ^ NOT) # ***Primitive data types for comparisons*** NUMBER = pp.Regex(r'\d+(\.\d*)?')
def _generate_grammar(self): # Define grammar: pp.ParserElement.setDefaultWhitespaceChars(" \t") def add_element(name: str, value: pp.ParserElement): nonlocal self if self.debug: value.setName(name) value.setDebug() return value EOL = add_element("EOL", pp.Suppress(pp.LineEnd())) Else = add_element("Else", pp.Keyword("else")) Identifier = add_element( "Identifier", pp.Word(f"{pp.alphas}_", bodyChars=pp.alphanums + "_-./") ) BracedValue = add_element( "BracedValue", pp.nestedExpr( ignoreExpr=pp.quotedString | pp.QuotedString( quoteChar="$(", endQuoteChar=")", escQuote="\\", unquoteResults=False ) ).setParseAction(lambda s, l, t: ["(", *t[0], ")"]), ) Substitution = add_element( "Substitution", pp.Combine( pp.Literal("$") + ( ( (pp.Literal("$") + Identifier + pp.Optional(pp.nestedExpr())) | (pp.Literal("(") + Identifier + pp.Literal(")")) | (pp.Literal("{") + Identifier + pp.Literal("}")) | ( pp.Literal("$") + pp.Literal("{") + Identifier + pp.Optional(pp.nestedExpr()) + pp.Literal("}") ) | (pp.Literal("$") + pp.Literal("[") + Identifier + pp.Literal("]")) ) ) ), ) LiteralValuePart = add_element( "LiteralValuePart", pp.Word(pp.printables, excludeChars="$#{}()") ) SubstitutionValue = add_element( "SubstitutionValue", pp.Combine(pp.OneOrMore(Substitution | LiteralValuePart | pp.Literal("$"))), ) FunctionValue = add_element( "FunctionValue", pp.Group( pp.Suppress(pp.Literal("$") + pp.Literal("$")) + Identifier + pp.nestedExpr() # .setParseAction(lambda s, l, t: ['(', *t[0], ')']) ).setParseAction(lambda s, l, t: handle_function_value(*t)), ) Value = add_element( "Value", pp.NotAny(Else | pp.Literal("}") | EOL) + ( pp.QuotedString(quoteChar='"', escChar="\\") | FunctionValue | SubstitutionValue | BracedValue ), ) Values = add_element("Values", pp.ZeroOrMore(Value)("value")) Op = add_element( "OP", pp.Literal("=") | pp.Literal("-=") | pp.Literal("+=") | pp.Literal("*=") | pp.Literal("~="), ) Key = add_element("Key", Identifier) Operation = add_element( "Operation", Key("key") + pp.locatedExpr(Op)("operation") + Values("value") ) CallArgs = add_element("CallArgs", pp.nestedExpr()) def parse_call_args(results): out = "" for item in chain(*results): if isinstance(item, str): out += item else: out += "(" + parse_call_args(item) + ")" return out CallArgs.setParseAction(parse_call_args) Load = add_element("Load", pp.Keyword("load") + CallArgs("loaded")) Include = add_element( "Include", pp.Keyword("include") + pp.locatedExpr(CallArgs)("included") ) Option = add_element("Option", pp.Keyword("option") + CallArgs("option")) RequiresCondition = add_element("RequiresCondition", pp.originalTextFor(pp.nestedExpr())) def parse_requires_condition(s, l, t): # The following expression unwraps the condition via the additional info # set by originalTextFor. condition_without_parentheses = s[t._original_start + 1 : t._original_end - 1] # And this replaces the colons with '&&' similar how it's done for 'Condition'. condition_without_parentheses = ( condition_without_parentheses.strip().replace(":", " && ").strip(" && ") ) return condition_without_parentheses RequiresCondition.setParseAction(parse_requires_condition) Requires = add_element( "Requires", pp.Keyword("requires") + RequiresCondition("project_required_condition") ) FunctionArgumentsAsString = add_element( "FunctionArgumentsAsString", pp.originalTextFor(pp.nestedExpr()) ) QtNoMakeTools = add_element( "QtNoMakeTools", pp.Keyword("qtNomakeTools") + FunctionArgumentsAsString("qt_no_make_tools_arguments"), ) # ignore the whole thing... DefineTestDefinition = add_element( "DefineTestDefinition", pp.Suppress( pp.Keyword("defineTest") + CallArgs + pp.nestedExpr(opener="{", closer="}", ignoreExpr=pp.LineEnd()) ), ) # ignore the whole thing... ForLoop = add_element( "ForLoop", pp.Suppress( pp.Keyword("for") + CallArgs + pp.nestedExpr(opener="{", closer="}", ignoreExpr=pp.LineEnd()) ), ) # ignore the whole thing... ForLoopSingleLine = add_element( "ForLoopSingleLine", pp.Suppress(pp.Keyword("for") + CallArgs + pp.Literal(":") + pp.SkipTo(EOL)), ) # ignore the whole thing... FunctionCall = add_element("FunctionCall", pp.Suppress(Identifier + pp.nestedExpr())) Scope = add_element("Scope", pp.Forward()) Statement = add_element( "Statement", pp.Group( Load | Include | Option | Requires | QtNoMakeTools | ForLoop | ForLoopSingleLine | DefineTestDefinition | FunctionCall | Operation ), ) StatementLine = add_element("StatementLine", Statement + (EOL | pp.FollowedBy("}"))) StatementGroup = add_element( "StatementGroup", pp.ZeroOrMore(StatementLine | Scope | pp.Suppress(EOL)) ) Block = add_element( "Block", pp.Suppress("{") + pp.Optional(EOL) + StatementGroup + pp.Optional(EOL) + pp.Suppress("}") + pp.Optional(EOL), ) ConditionEnd = add_element( "ConditionEnd", pp.FollowedBy( (pp.Optional(pp.White()) + (pp.Literal(":") | pp.Literal("{") | pp.Literal("|"))) ), ) ConditionPart1 = add_element( "ConditionPart1", (pp.Optional("!") + Identifier + pp.Optional(BracedValue)) ) ConditionPart2 = add_element("ConditionPart2", pp.CharsNotIn("#{}|:=\\\n")) ConditionPart = add_element( "ConditionPart", (ConditionPart1 ^ ConditionPart2) + ConditionEnd ) ConditionOp = add_element("ConditionOp", pp.Literal("|") ^ pp.Literal(":")) ConditionWhiteSpace = add_element( "ConditionWhiteSpace", pp.Suppress(pp.Optional(pp.White(" "))) ) ConditionRepeated = add_element( "ConditionRepeated", pp.ZeroOrMore(ConditionOp + ConditionWhiteSpace + ConditionPart) ) Condition = add_element("Condition", pp.Combine(ConditionPart + ConditionRepeated)) Condition.setParseAction(lambda x: " ".join(x).strip().replace(":", " && ").strip(" && ")) # Weird thing like write_file(a)|error() where error() is the alternative condition # which happens to be a function call. In this case there is no scope, but our code expects # a scope with a list of statements, so create a fake empty statement. ConditionEndingInFunctionCall = add_element( "ConditionEndingInFunctionCall", pp.Suppress(ConditionOp) + FunctionCall + pp.Empty().setParseAction(lambda x: [[]]).setResultsName("statements"), ) SingleLineScope = add_element( "SingleLineScope", pp.Suppress(pp.Literal(":")) + pp.Group(Block | (Statement + EOL))("statements"), ) MultiLineScope = add_element("MultiLineScope", Block("statements")) SingleLineElse = add_element( "SingleLineElse", pp.Suppress(pp.Literal(":")) + (Scope | Block | (Statement + pp.Optional(EOL))), ) MultiLineElse = add_element("MultiLineElse", Block) ElseBranch = add_element("ElseBranch", pp.Suppress(Else) + (SingleLineElse | MultiLineElse)) # Scope is already add_element'ed in the forward declaration above. Scope <<= pp.Group( Condition("condition") + (SingleLineScope | MultiLineScope | ConditionEndingInFunctionCall) + pp.Optional(ElseBranch)("else_statements") ) Grammar = StatementGroup("statements") Grammar.ignore(pp.pythonStyleComment()) return Grammar
class SkyDriveOldLogParser(text_parser.PyparsingSingleLineTextParser): """Parse SkyDrive old log files.""" NAME = u'skydrive_log_old' DESCRIPTION = u'Parser for OneDrive (or SkyDrive) old log files.' _ENCODING = u'UTF-8-SIG' _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS # Common SDOL (SkyDriveOldLog) pyparsing objects. _SDOL_COLON = pyparsing.Literal(u':') _SDOL_EXCLAMATION = pyparsing.Literal(u'!') # Date and time format used in the header is: DD-MM-YYYY hhmmss.### # For example: 08-01-2013 21:22:28.999 _SDOL_DATE_TIME = pyparsing.Group( _TWO_DIGITS.setResultsName(u'month') + pyparsing.Suppress(u'-') + _TWO_DIGITS.setResultsName(u'day_of_month') + pyparsing.Suppress(u'-') + _FOUR_DIGITS.setResultsName(u'year') + text_parser.PyparsingConstants.TIME_MSEC_ELEMENTS).setResultsName( u'date_time') _SDOL_SOURCE_CODE = pyparsing.Combine( pyparsing.CharsNotIn(u':') + _SDOL_COLON + text_parser.PyparsingConstants.INTEGER + _SDOL_EXCLAMATION + pyparsing.Word(pyparsing.printables)).setResultsName(u'source_code') _SDOL_LOG_LEVEL = (pyparsing.Literal(u'(').suppress() + pyparsing.SkipTo(u')').setResultsName(u'log_level') + pyparsing.Literal(u')').suppress()) _SDOL_LINE = (_SDOL_DATE_TIME + _SDOL_SOURCE_CODE + _SDOL_LOG_LEVEL + _SDOL_COLON + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'text')) # Sometimes the timestamped log line is followed by an empy line, # then by a file name plus other data and finally by another empty # line. It could happen that a logline is split in two parts. # These lines will not be discarded and an event will be generated # ad-hoc (see source), based on the last one if available. _SDOL_NO_HEADER_SINGLE_LINE = ( pyparsing.Optional(pyparsing.Literal(u'->').suppress()) + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'text')) # Define the available log line structures. LINE_STRUCTURES = [ (u'logline', _SDOL_LINE), (u'no_header_single_line', _SDOL_NO_HEADER_SINGLE_LINE), ] def __init__(self): """Initializes a parser object.""" super(SkyDriveOldLogParser, self).__init__() self._last_date_time = None self._last_event_data = None self.offset = 0 def _ParseLogline(self, parser_mediator, structure): """Parse a logline and store appropriate attributes. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ # TODO: Verify if date and time value is locale dependent. month, day_of_month, year, hours, minutes, seconds, milliseconds = ( structure.date_time) time_elements_tuple = (year, month, day_of_month, hours, minutes, seconds, milliseconds) try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: parser_mediator.ProduceExtractionError( u'invalid date time value: {0!s}'.format(structure.date_time)) return event_data = SkyDriveOldLogEventData() event_data.log_level = structure.log_level event_data.offset = self.offset event_data.source_code = structure.source_code event_data.text = structure.text event = time_events.DateTimeValuesEvent( date_time, eventdata.EventTimestamp.ADDED_TIME) parser_mediator.ProduceEventWithEventData(event, event_data) self._last_date_time = date_time self._last_event_data = event_data def _ParseNoHeaderSingleLine(self, parser_mediator, structure): """Parse an isolated header line and store appropriate attributes. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ if not self._last_event_data: logging.debug( u'SkyDrive, found isolated line with no previous events') return event_data = SkyDriveOldLogEventData() event_data.offset = self._last_event_data.offset event_data.text = structure.text event = time_events.DateTimeValuesEvent( self._last_date_time, eventdata.EventTimestamp.ADDED_TIME) parser_mediator.ProduceEventWithEventData(event, event_data) # TODO think to a possible refactoring for the non-header lines. self._last_date_time = None self._last_event_data = None def ParseRecord(self, parser_mediator, key, structure): """Parse each record structure and return an EventObject if applicable. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key not in (u'logline', u'no_header_single_line'): raise errors.ParseError( u'Unable to parse record, unknown structure: {0:s}'.format( key)) if key == u'logline': self._ParseLogline(parser_mediator, structure) elif key == u'no_header_single_line': self._ParseNoHeaderSingleLine(parser_mediator, structure) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a SkyDrive old log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (bytes): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._SDOL_LINE.parseString(line) except pyparsing.ParseException: logging.debug(u'Not a SkyDrive old log file') return False day_of_month, month, year, hours, minutes, seconds, milliseconds = ( structure.date_time) time_elements_tuple = (year, month, day_of_month, hours, minutes, seconds, milliseconds) try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: logging.debug( u'Not a SkyDrive old log file, invalid date and time: {0!s}'. format(structure.date_time)) return False return True
class SELinuxParser(text_parser.PyparsingSingleLineTextParser): """Parser for SELinux audit.log files.""" NAME = 'selinux' DESCRIPTION = 'Parser for SELinux audit.log files.' _ENCODING = 'utf-8' _SELINUX_KEY_VALUE_GROUP = pyparsing.Group( pyparsing.Word(pyparsing.alphanums).setResultsName('key') + pyparsing.Suppress('=') + ( pyparsing.QuotedString('"') ^ pyparsing.Word(pyparsing.printables)).setResultsName('value')) _SELINUX_KEY_VALUE_DICT = pyparsing.Dict( pyparsing.ZeroOrMore(_SELINUX_KEY_VALUE_GROUP)) _SELINUX_BODY_GROUP = pyparsing.Group( pyparsing.Empty().setResultsName('key') + pyparsing.restOfLine.setResultsName('value')) _SELINUX_MSG_GROUP = pyparsing.Group( pyparsing.Literal('msg').setResultsName('key') + pyparsing.Suppress('=audit(') + pyparsing.Word(pyparsing.nums).setResultsName('seconds') + pyparsing.Suppress('.') + pyparsing.Word(pyparsing.nums).setResultsName('milliseconds') + pyparsing.Suppress(':') + pyparsing.Word(pyparsing.nums).setResultsName('serial') + pyparsing.Suppress('):')) _SELINUX_TYPE_GROUP = pyparsing.Group( pyparsing.Literal('type').setResultsName('key') + pyparsing.Suppress('=') + ( pyparsing.Word(pyparsing.srange('[A-Z_]')) ^ pyparsing.Regex(r'UNKNOWN\[[0-9]+\]')).setResultsName('value')) _SELINUX_TYPE_AVC_GROUP = pyparsing.Group( pyparsing.Literal('type').setResultsName('key') + pyparsing.Suppress('=') + ( pyparsing.Word('AVC') ^ pyparsing.Word('USER_AVC')).setResultsName('value')) # A log line is formatted as: type=TYPE msg=audit([0-9]+\.[0-9]+:[0-9]+): .* _SELINUX_LOG_LINE = pyparsing.Dict( _SELINUX_TYPE_GROUP + _SELINUX_MSG_GROUP + _SELINUX_BODY_GROUP) LINE_STRUCTURES = [('line', _SELINUX_LOG_LINE)] def ParseRecord(self, parser_mediator, key, structure): """Parses a structure of tokens derived from a line of a text file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key != 'line': raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) msg_value = self._GetValueFromStructure(structure, 'msg') if not msg_value: parser_mediator.ProduceExtractionWarning( 'missing msg value: {0!s}'.format(structure)) return try: seconds = int(msg_value[0], 10) except ValueError: parser_mediator.ProduceExtractionWarning( 'unsupported number of seconds in msg value: {0!s}'.format( structure)) return try: milliseconds = int(msg_value[1], 10) except ValueError: parser_mediator.ProduceExtractionWarning( 'unsupported number of milliseconds in msg value: {0!s}'.format( structure)) return timestamp = ((seconds * 1000) + milliseconds) * 1000 body_text = structure[2][0] try: # Try to parse the body text as key value pairs. Note that not # all log lines will be properly formatted key value pairs. body_structure = self._SELINUX_KEY_VALUE_DICT.parseString(body_text) except pyparsing.ParseException: body_structure = pyparsing.ParseResults() event_data = SELinuxLogEventData() event_data.audit_type = self._GetValueFromStructure(structure, 'type') event_data.body = body_text event_data.pid = self._GetValueFromStructure(body_structure, 'pid') # TODO: pass line number to offset or remove. event_data.offset = 0 event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data) def VerifyStructure(self, parser_mediator, line): """Verifies if a line from a text file is in the expected format. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._SELINUX_LOG_LINE.parseString(line) except pyparsing.ParseException as exception: logger.debug( 'Unable to parse SELinux audit.log file with error: {0!s}'.format( exception)) return False return 'type' in structure and 'msg' in structure
class SkyDriveLogParser(text_parser.PyparsingMultiLineTextParser): """Parses SkyDrive log files.""" NAME = u'skydrive_log' DESCRIPTION = u'Parser for OneDrive (or SkyDrive) log files.' _ENCODING = u'utf-8' # Common SDF (SkyDrive Format) structures. _COMMA = pyparsing.Literal(u',').suppress() _HYPHEN = text_parser.PyparsingConstants.HYPHEN _THREE_DIGITS = text_parser.PyparsingConstants.THREE_DIGITS _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS MSEC = pyparsing.Word(pyparsing.nums, max=3).setParseAction(text_parser.PyParseIntCast) IGNORE_FIELD = pyparsing.CharsNotIn(u',').suppress() # Date and time format used in the header is: YYYY-MM-DD-hhmmss.### # For example: 2013-07-25-160323.291 _SDF_HEADER_DATE_TIME = pyparsing.Group( text_parser.PyparsingConstants.DATE_ELEMENTS + _HYPHEN + _TWO_DIGITS.setResultsName(u'hours') + _TWO_DIGITS.setResultsName(u'minutes') + _TWO_DIGITS.setResultsName(u'seconds') + pyparsing.Literal(u'.').suppress() + _THREE_DIGITS.setResultsName(u'milliseconds')).setResultsName( u'header_date_time') # Date and time format used in lines other than the header is: # MM-DD-YY,hh:mm:ss.### # For example: 07-25-13,16:06:31.820 _SDF_DATE_TIME = (_TWO_DIGITS.setResultsName(u'month') + _HYPHEN + _TWO_DIGITS.setResultsName(u'day') + _HYPHEN + _TWO_DIGITS.setResultsName(u'year') + _COMMA + text_parser.PyparsingConstants.TIME_ELEMENTS + pyparsing.Suppress('.') + _THREE_DIGITS.setResultsName(u'milliseconds') ).setResultsName(u'date_time') _SDF_HEADER_START = ( pyparsing.Literal(u'######').suppress() + pyparsing.Literal(u'Logging started.').setResultsName(u'log_start')) # Multiline entry end marker, matched from right to left. _SDF_ENTRY_END = pyparsing.StringEnd() | _SDF_HEADER_START | _SDF_DATE_TIME _SDF_LINE = (_SDF_DATE_TIME + _COMMA + IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA + pyparsing.CharsNotIn(u',').setResultsName(u'module') + _COMMA + pyparsing.CharsNotIn(u',').setResultsName(u'source_code') + _COMMA + IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA + pyparsing.CharsNotIn(u',').setResultsName(u'log_level') + _COMMA + pyparsing.SkipTo(_SDF_ENTRY_END).setResultsName(u'detail') + pyparsing.ZeroOrMore(pyparsing.lineEnd())) _SDF_HEADER = ( _SDF_HEADER_START + pyparsing.Literal(u'Version=').setResultsName(u'version_string') + pyparsing.Word(pyparsing.nums + u'.').setResultsName(u'version_number') + pyparsing.Literal(u'StartSystemTime:').suppress() + _SDF_HEADER_DATE_TIME + pyparsing.Literal( u'StartLocalTime:').setResultsName(u'local_time_string') + pyparsing.SkipTo(pyparsing.lineEnd()).setResultsName(u'details') + pyparsing.lineEnd()) LINE_STRUCTURES = [(u'logline', _SDF_LINE), (u'header', _SDF_HEADER)] def _ParseHeader(self, parser_mediator, structure): """Parse header lines and store appropriate attributes. [u'Logging started.', u'Version=', u'17.0.2011.0627', [2013, 7, 25], 16, 3, 23, 291, u'StartLocalTime', u'<details>'] Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=structure.header_date_time) except ValueError: parser_mediator.ProduceExtractionError( u'invalid date time value: {0!s}'.format( structure.header_date_time)) return event_data = SkyDriveLogEventData() # TODO: refactor detail to individual event data attributes. event_data.detail = u'{0:s} {1:s} {2:s} {3:s} {4:s}'.format( structure.log_start, structure.version_string, structure.version_number, structure.local_time_string, structure.details) event = time_events.DateTimeValuesEvent( date_time, eventdata.EventTimestamp.ADDED_TIME) parser_mediator.ProduceEventWithEventData(event, event_data) def _ParseLine(self, parser_mediator, structure): """Parses a logline and store appropriate attributes. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ # TODO: Verify if date and time value is locale dependent. month, day_of_month, year, hours, minutes, seconds, milliseconds = ( structure.date_time) year += 2000 time_elements_tuple = (year, month, day_of_month, hours, minutes, seconds, milliseconds) try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: parser_mediator.ProduceExtractionError( u'invalid date time value: {0!s}'.format(structure.date_time)) return event_data = SkyDriveLogEventData() # Replace newlines with spaces in structure.detail to preserve output. # TODO: refactor detail to individual event data attributes. event_data.detail = structure.detail.replace(u'\n', u' ') event_data.log_level = structure.log_level event_data.module = structure.module event_data.source_code = structure.source_code event = time_events.DateTimeValuesEvent( date_time, eventdata.EventTimestamp.ADDED_TIME) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parse each record structure and return an EventObject if applicable. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key not in (u'header', u'logline'): raise errors.ParseError( u'Unable to parse record, unknown structure: {0:s}'.format( key)) if key == u'logline': self._ParseLine(parser_mediator, structure) elif key == u'header': self._ParseHeader(parser_mediator, structure) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a SkyDrive log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (bytes): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._SDF_HEADER.parseString(line) except pyparsing.ParseException: logging.debug(u'Not a SkyDrive log file') return False try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=structure.header_date_time) except ValueError: logging.debug( u'Not a SkyDrive log file, invalid date and time: {0!s}'. format(structure.header_date_time)) return False return True
+ p.delimitedList( p.Word(p.alphanums + "+-:"), ", " ).setResultsName("port-profile-list") ).setResultsName("port") # ========================= # Non-collection attributes # ========================= AttributeName = p.Regex("[a-zA-Z][^:\n]+").setResultsName("attribute-name") ActivePortAttributeValue = ( p.Combine( p.Or([p.Literal('[Out] '), p.Literal('[In] ')]).suppress() + p.Regex("[^\n]*") + p.LineEnd().suppress(), adjacent=False ).setResultsName("attribute-value") ) VolumeAttributeValue = ( p.Combine( p.Or([ p.Or([ p.Literal("(invalid)"), p.Regex("([0-9]+: +[0-9]+% ?)+") ]), p.Or([
def sl(s): return pp.Suppress(pp.Literal(s))
def __init__(self): # Bibtex keywords string_def_start = pp.CaselessKeyword("@string") preamble_start = pp.CaselessKeyword("@preamble") comment_line_start = pp.CaselessKeyword('@comment') # String names string_name = pp.Word(pp.alphanums + '_')('StringName') self.set_string_name_parse_action(lambda s, l, t: None) string_name.addParseAction(self._string_name_parse_action) # Values inside bibtex fields # Values can be integer or string expressions. The latter may use # quoted or braced values. # Integer values integer = pp.Word(pp.nums)('Integer') # Braced values: braced values can contain nested (but balanced) braces braced_value_content = pp.CharsNotIn('{}') braced_value = pp.Forward() # Recursive definition for nested braces braced_value <<= pp.originalTextFor( '{' + pp.ZeroOrMore(braced_value | braced_value_content) + '}' )('BracedValue') braced_value.setParseAction(remove_braces) # TODO add ignore for "\}" and "\{" ? # TODO @ are not parsed by bibtex in braces # Quoted values: may contain braced content with balanced braces brace_in_quoted = pp.nestedExpr('{', '}', ignoreExpr=None) text_in_quoted = pp.CharsNotIn('"{}') # (quotes should be escaped by braces in quoted value) quoted_value = pp.originalTextFor( '"' + pp.ZeroOrMore(text_in_quoted | brace_in_quoted) + '"' )('QuotedValue') quoted_value.addParseAction(pp.removeQuotes) # String expressions string_expr = pp.delimitedList( (quoted_value | braced_value | string_name), delim='#' )('StringExpression') self.set_string_expression_parse_action(lambda s, l, t: None) string_expr.addParseAction(self._string_expr_parse_action) value = (integer | string_expr)('Value') # Entries # @EntryType { ... entry_type = (pp.Suppress('@') + pp.Word(pp.alphas))('EntryType') entry_type.setParseAction(first_token) # Entry key: any character up to a ',' without leading and trailing # spaces. key = pp.SkipTo(',')('Key') # Exclude @',\#}{~% key.setParseAction(lambda s, l, t: first_token(s, l, t).strip()) # Field name: word of letters, digits, dashes and underscores field_name = pp.Word(pp.alphanums + '_-()')('FieldName') field_name.setParseAction(first_token) # Field: field_name = value field = pp.Group(field_name + pp.Suppress('=') + value)('Field') field.setParseAction(field_to_pair) # List of fields: comma separeted fields field_list = (pp.delimitedList(field) + pp.Suppress(pp.Optional(',')) )('Fields') field_list.setParseAction( lambda s, l, t: {k: v for (k, v) in reversed(t.get('Fields'))}) # Entry: type, key, and fields self.entry = (entry_type + in_braces_or_pars(key + pp.Suppress(',') + field_list) )('Entry') # Other stuff: comments, string definitions, and preamble declarations # Explicit comments: @comment + everything up to next valid declaration # starting on new line. not_an_implicit_comment = (pp.LineStart() + pp.Literal('@') ) | pp.stringEnd() self.explicit_comment = ( pp.Suppress(comment_line_start) + pp.originalTextFor(pp.SkipTo(not_an_implicit_comment), asString=True))('ExplicitComment') self.explicit_comment.addParseAction(remove_trailing_newlines) self.explicit_comment.addParseAction(remove_braces) # Previous implementation included comment until next '}'. # This is however not inline with bibtex behavior that is to only # ignore until EOL. Brace stipping is arbitrary here but avoids # duplication on bibtex write. # Empty implicit_comments lead to infinite loop of zeroOrMore def mustNotBeEmpty(t): if not t[0]: raise pp.ParseException("Match must not be empty.") # Implicit comments: not anything else self.implicit_comment = pp.originalTextFor( pp.SkipTo(not_an_implicit_comment).setParseAction(mustNotBeEmpty), asString=True)('ImplicitComment') self.implicit_comment.addParseAction(remove_trailing_newlines) # String definition self.string_def = (pp.Suppress(string_def_start) + in_braces_or_pars( string_name + pp.Suppress('=') + string_expr('StringValue') ))('StringDefinition') # Preamble declaration self.preamble_decl = (pp.Suppress(preamble_start) + in_braces_or_pars(value))('PreambleDeclaration') # Main bibtex expression self.main_expression = pp.ZeroOrMore( self.string_def | self.preamble_decl | self.explicit_comment | self.entry | self.implicit_comment)
class Catch2: error_line = pp.LineStart() + pp.SkipTo(":")("filename") + pp.Literal(":") + pp.Word(pp.nums) + pp.Literal(": FAILED:")
# # JSON Parser exercise # import pyparsing as pp pp.ParserElement.setDefaultWhitespaceChars(' ') escapechar = (pp.Literal("\\t") | pp.Literal("\\n")) characters = (pp.Word(pp.alphanums) | escapechar | pp.White(' ')) string = pp.Literal("\"").suppress() + pp.Combine( pp.OneOrMore(characters)) + pp.Literal("\"").suppress() print(string.parseString("\"meh \\t \"")) string.runTests("\"meh \\t \"") # # It is also possible to use the quoted string class # string2 = pp.QuotedString('"', unquoteResults=True) string2.runTests(""" \"meh\" """) """ <number> ::= <int> <frac> <int> ::= <digit> | <onenine> <digits> | - <digit> | - <onenine><digits> <frac> ::= "" | . <digits> <digits> ::= <digit> | <digit> <digits>
class UnitTestPlusPlus: error_line = pp.LineStart() + pp.SkipTo(":")("filename") + pp.Literal(":") + pp.Word(pp.nums) + pp.Literal(":") + pp.Word(pp.nums) + pp.Literal(": error:") + pp.SkipTo(pp.LineEnd())
t = f(i) if isinstance(t,str): t = [(Token.Literal,t)] s.extend(t) return s if toks else highlight(s) # ispec format parser: #--------------------- integer = pp.Regex(r'[1-9][0-9]*') indxdir = pp.oneOf(['<','>']) fixbit = pp.oneOf(['0','1']) number = integer|fixbit number.setParseAction(lambda r: int(r[0])) unklen = pp.Literal('*') length = number|unklen unkbit = pp.oneOf(['-']) fixbyte = pp.Regex(r'{[0-9a-fA-F][0-9a-fA-F]}').setParseAction(lambda r: Bits(int(r[0][1:3],16),8)) fixed = fixbyte|fixbit|unkbit option = pp.oneOf(['.','~','#','=']) symbol = pp.Regex(r'[A-Za-z_][A-Za-z0-9_]*') location = pp.Suppress('(')+length+pp.Suppress(')') directive = pp.Group(pp.Optional(option,default='')+symbol+pp.Optional(location,default=1)) speclen = pp.Group(length+pp.Optional(indxdir,default='<')) specformat = pp.Group(pp.Suppress('[')+pp.OneOrMore(directive|fixed)+pp.Suppress(']')) specoption = pp.Optional(pp.Literal('+').setParseAction(lambda r:True),default=False) specdecode = speclen+specformat+specoption def ispec_register(x,module): F = []
class WinIISParser(text_parser.PyparsingSingleLineTextParser): """Parses a Microsoft IIS log file.""" NAME = 'winiis' DATA_FORMAT = 'Microsoft IIS log file' # Common Fields (6.0: date time s-sitename s-ip cs-method cs-uri-stem # cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status # sc-substatus sc-win32-status. # Common Fields (7.5): date time s-ip cs-method cs-uri-stem cs-uri-query # s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus # sc-win32-status time-taken BLANK = pyparsing.Literal('-') WORD = pyparsing.Word(pyparsing.alphanums + '-') | BLANK INTEGER = ( pyparsing.Word(pyparsing.nums, min=1).setParseAction( text_parser.ConvertTokenToInteger) | BLANK) IP_ADDRESS = ( text_parser.PyparsingConstants.IPV4_ADDRESS | text_parser.PyparsingConstants.IPV6_ADDRESS | BLANK) PORT = ( pyparsing.Word(pyparsing.nums, min=1, max=6).setParseAction( text_parser.ConvertTokenToInteger) | BLANK) _URI_SAFE_CHARACTERS = '/.?&+;_=()-:,%' _URI_UNSAFE_CHARACTERS = '{}|\\^~[]`' URI = pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS) | BLANK # Per https://blogs.iis.net/nazim/use-of-special-characters-like-in-an-iis-url # IIS does not require the a query comply with RFC1738 restrictions on valid # URI characters QUERY = (pyparsing.Word( pyparsing.alphanums + _URI_SAFE_CHARACTERS + _URI_UNSAFE_CHARACTERS) | BLANK) DATE_TIME = ( text_parser.PyparsingConstants.DATE_ELEMENTS + text_parser.PyparsingConstants.TIME_ELEMENTS) DATE_METADATA = ( pyparsing.Literal('Date:') + DATE_TIME.setResultsName('date_time')) FIELDS_METADATA = ( pyparsing.Literal('Fields:') + pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('fields')) COMMENT = pyparsing.Literal('#') + ( DATE_METADATA | FIELDS_METADATA | pyparsing.SkipTo(pyparsing.LineEnd())) LOG_LINE_6_0 = ( DATE_TIME.setResultsName('date_time') + URI.setResultsName('s_sitename') + IP_ADDRESS.setResultsName('dest_ip') + WORD.setResultsName('http_method') + URI.setResultsName('cs_uri_stem') + URI.setResultsName('cs_uri_query') + PORT.setResultsName('dest_port') + WORD.setResultsName('cs_username') + IP_ADDRESS.setResultsName('source_ip') + URI.setResultsName('user_agent') + INTEGER.setResultsName('sc_status') + INTEGER.setResultsName('sc_substatus') + INTEGER.setResultsName('sc_win32_status')) _LOG_LINE_STRUCTURES = {} # Common fields. Set results name with underscores, not hyphens because regex # will not pick them up. _LOG_LINE_STRUCTURES['date'] = ( text_parser.PyparsingConstants.DATE.setResultsName('date')) _LOG_LINE_STRUCTURES['time'] = ( text_parser.PyparsingConstants.TIME.setResultsName('time')) _LOG_LINE_STRUCTURES['s-sitename'] = URI.setResultsName('s_sitename') _LOG_LINE_STRUCTURES['s-ip'] = IP_ADDRESS.setResultsName('dest_ip') _LOG_LINE_STRUCTURES['cs-method'] = WORD.setResultsName('http_method') _LOG_LINE_STRUCTURES['cs-uri-stem'] = URI.setResultsName( 'requested_uri_stem') _LOG_LINE_STRUCTURES['cs-uri-query'] = QUERY.setResultsName('cs_uri_query') _LOG_LINE_STRUCTURES['s-port'] = PORT.setResultsName('dest_port') _LOG_LINE_STRUCTURES['cs-username'] = WORD.setResultsName('cs_username') _LOG_LINE_STRUCTURES['c-ip'] = IP_ADDRESS.setResultsName('source_ip') _LOG_LINE_STRUCTURES['cs(User-Agent)'] = URI.setResultsName('user_agent') _LOG_LINE_STRUCTURES['sc-status'] = INTEGER.setResultsName('http_status') _LOG_LINE_STRUCTURES['sc-substatus'] = INTEGER.setResultsName( 'sc_substatus') _LOG_LINE_STRUCTURES['sc-win32-status'] = INTEGER.setResultsName( 'sc_win32_status') # Less common fields. _LOG_LINE_STRUCTURES['s-computername'] = URI.setResultsName( 's_computername') _LOG_LINE_STRUCTURES['sc-bytes'] = INTEGER.setResultsName('sent_bytes') _LOG_LINE_STRUCTURES['cs-bytes'] = INTEGER.setResultsName('received_bytes') _LOG_LINE_STRUCTURES['time-taken'] = INTEGER.setResultsName('time_taken') _LOG_LINE_STRUCTURES['cs-version'] = URI.setResultsName('protocol_version') _LOG_LINE_STRUCTURES['cs-host'] = URI.setResultsName('cs_host') _LOG_LINE_STRUCTURES['cs(Cookie)'] = URI.setResultsName('cs_cookie') _LOG_LINE_STRUCTURES['cs(Referrer)'] = URI.setResultsName('cs_referrer') _LOG_LINE_STRUCTURES['cs(Referer)'] = URI.setResultsName('cs_referrer') # Define the available log line structures. Default to the IIS v. 6.0 # common format. LINE_STRUCTURES = [ ('comment', COMMENT), ('logline', LOG_LINE_6_0)] # Define a signature value for the log file. _SIGNATURE = '#Software: Microsoft Internet Information Services' # Per https://msdn.microsoft.com/en-us/library/ms525807(v=vs.90).aspx: # "log file format(s) are all ASCII text formats (unless UTF-8 is enabled for # your Web sites) _ENCODING = 'utf-8' def __init__(self): """Initializes a parser.""" super(WinIISParser, self).__init__() self._day_of_month = None self._month = None self._year = None def _ParseComment(self, structure): """Parses a comment. Args: structure (pyparsing.ParseResults): structure parsed from the log file. """ # TODO: refactor. Why is this method named _ParseComment when it extracts # the date and time? if structure[1] == 'Date:': time_elements_tuple = self._GetValueFromStructure(structure, 'date_time') self._year, self._month, self._day_of_month, _, _, _ = time_elements_tuple elif structure[1] == 'Fields:': self._ParseFieldsMetadata(structure) def _ParseFieldsMetadata(self, structure): """Parses the fields metadata and updates the log line definition to match. Args: structure (pyparsing.ParseResults): structure parsed from the log file. """ fields = self._GetValueFromStructure(structure, 'fields', default_value='') fields = fields.strip() fields = fields.split(' ') log_line_structure = pyparsing.Empty() if fields[0] == 'date' and fields[1] == 'time': log_line_structure += self.DATE_TIME.setResultsName('date_time') fields = fields[2:] for member in fields: log_line_structure += self._LOG_LINE_STRUCTURES.get(member, self.URI) updated_structures = [] for line_structure in self._line_structures: if line_structure[0] != 'logline': updated_structures.append(line_structure) updated_structures.append(('logline', log_line_structure)) # TODO: self._line_structures is a work-around and this needs # a structural fix. self._line_structures = updated_structures def _ParseLogLine(self, parser_mediator, structure): """Parse a single log line and produce an event object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure parsed from the log file. """ time_elements_structure = structure.get('date_time', None) if time_elements_structure: # Ensure time_elements_tuple is not a pyparsing.ParseResults otherwise # copy.deepcopy() of the dfDateTime object will fail on Python 3.8 with: # "TypeError: 'str' object is not callable" due to pyparsing.ParseResults # overriding __getattr__ with a function that returns an empty string when # named token does not exists. year, month, day_of_month, hours, minutes, seconds = ( time_elements_structure) time_elements_tuple = (year, month, day_of_month, hours, minutes, seconds) else: time_tuple = self._GetValueFromStructure(structure, 'time') if not time_tuple: parser_mediator.ProduceExtractionWarning('missing time values') return date_tuple = self._GetValueFromStructure(structure, 'date') if not date_tuple: time_elements_tuple = ( self._year, self._month, self._day_of_month, time_tuple[0], time_tuple[1], time_tuple[2]) else: time_elements_tuple = ( date_tuple[0], date_tuple[1], date_tuple[2], time_tuple[0], time_tuple[1], time_tuple[2]) try: date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(time_elements_tuple)) return event_data = IISEventData() for key, value in structure.items(): if key in ('date', 'date_time', 'time') or value == '-': continue if isinstance(value, pyparsing.ParseResults): value = ''.join(value) setattr(event_data, key, value) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. Raises: ParseError: when the structure type is unknown. """ if key not in ('comment', 'logline'): raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) if key == 'logline': self._ParseLogLine(parser_mediator, structure) elif key == 'comment': self._ParseComment(structure) # pylint: disable=unused-argument def VerifyStructure(self, parser_mediator, line): """Verify that this file is an IIS log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line was successfully parsed. """ # TODO: self._line_structures is a work-around and this needs # a structural fix. self._line_structures = self.LINE_STRUCTURES self._day_of_month = None self._month = None self._year = None # TODO: Examine other versions of the file format and if this parser should # support them. For now just checking if it contains the IIS header. if self._SIGNATURE in line: return True return False
def select_oemol_atom_idx_by_language(system, mask=''): """ This function selects the atom indexes from the passed oemol molecular complex by using a defined language. The language allows the selection of the ligand, protein, waters, mono-atomic ions, excipients, residue numbers and distance selection. Logic operators not, or, and, noh, diff, around can be used to refine the selection Parameters ---------- system : OEMol of the bio-molecular complex protein-ligand The molecular complex mask : python string A string used to select atoms. A Backus–Naur Form grammar (https://en.wikipedia.org/wiki/Backus–Naur_form) is defined by the python module pyparsing. The defined grammar tokens are: "ligand", "protein", "ca_protein" ,"water", "ions", "excipients" and "resid chain1:res_idx1 chain2:res_idx2 ... res_idxn" that respectively define the ligand, the protein, carbon alpha protein atoms, water molecules, ions, excipients (not protein, ligand, water or ions) and residue numbers. The atom selection can be refined by using the following operator tokens: "not" = invert selection "or" = add selections "and" = intersect selections "diff" = logic difference between selections "noh" = remove hydrogens from the selection "around" = select atoms inside the cutoff distance from a given selection Returns ------- atom_set : python set the select atom indexes Notes ----- Example of selection string: mask = "ligand or protein" mask = "not water or not ions" mask = "ligand or protein or excipients" mask = "noh protein" mask = "resid A:17 B:12 17 18" mask = "protein diff resid A:1" mask = "5.0 around protein" """ def split(system, ligand_res_name='LIG'): """ This function splits the passed molecule in components and tracks the mapping between the original molecule and the split components. The mapping is created as separated atom component index sets. Parameters: ----------- system: OEMol The system to split in components. The components are: the protein atoms, the protein carbon alpha atoms the water atoms, the ion atoms, the excipients atoms Returns: -------- dic_set: python dictionary The sysetm is splitted in a dictionary with token words as keys and for value the related atom set. The token keywords are: protein, ca_protein, ligand, water, ions, excipients, system """ # Define Empty sets lig_set = set() prot_set = set() ca_prot_set = set() wat_set = set() excp_set = set() ion_set = set() # Atom Bond Set vector used to contains the whole system frags = oechem.OEAtomBondSetVector() # Define Options for the Filter opt = oechem.OESplitMolComplexOptions() # The protein filter is set to avoid that multiple # chains are separated during the splitting and peptide # molecules are recognized as ligands pf = oechem.OEMolComplexFilterFactory( oechem.OEMolComplexFilterCategory_Protein) peptide = oechem.OEMolComplexFilterFactory( oechem.OEMolComplexFilterCategory_Peptide) protein_filter = oechem.OEOrRoleSet(pf, peptide) opt.SetProteinFilter(protein_filter) # The ligand filter is set to recognize just the ligand lf = oechem.OEMolComplexFilterFactory( oechem.OEMolComplexFilterCategory_Ligand) not_protein_filter = oechem.OENotRoleSet(protein_filter) ligand_filter = oechem.OEAndRoleSet(lf, not_protein_filter) opt.SetLigandFilter(ligand_filter) # The water filter is set to recognize just water molecules wf = oechem.OEMolComplexFilterFactory( oechem.OEMolComplexFilterCategory_Water) opt.SetWaterFilter(wf) # Set Category cat = oechem.OEMolComplexCategorizer() cat.AddLigandName(ligand_res_name) opt.SetCategorizer(cat) # Define the system fragments if not oechem.OEGetMolComplexFragments(frags, system, opt): raise ValueError('Unable to generate the system fragments') # Set empty OEMol containers prot = oechem.OEMol() lig = oechem.OEMol() wat = oechem.OEMol() excp = oechem.OEMol() # Split the protein from the system atommap = oechem.OEAtomArray(system.GetMaxAtomIdx()) if not oechem.OECombineMolComplexFragments( prot, frags, opt, opt.GetProteinFilter(), atommap): raise ValueError('Unable to split the Protein') # Populate the protein set and the protein carbon alpha set pred = oechem.OEIsCAlpha() for sys_at in system.GetAtoms(): sys_idx = sys_at.GetIdx() at_idx = atommap[sys_idx] if at_idx: prot_set.add(sys_idx) at = system.GetAtom(oechem.OEHasAtomIdx(sys_idx)) if pred(at): ca_prot_set.add(sys_idx) # print(sys_idx, '->', at_idx) # Split the ligand from the system atommap = oechem.OEAtomArray(system.GetMaxAtomIdx()) if not oechem.OECombineMolComplexFragments( lig, frags, opt, opt.GetLigandFilter(), atommap): raise ValueError('Unable to split the Ligand') # Populate the ligand set for sys_at in system.GetAtoms(): sys_idx = sys_at.GetIdx() at_idx = atommap[sys_idx] if at_idx: lig_set.add(sys_idx) # print(sys_idx, '->', at_idx) # Split the water from the system atommap = oechem.OEAtomArray(system.GetMaxAtomIdx()) if not oechem.OECombineMolComplexFragments( wat, frags, opt, opt.GetWaterFilter(), atommap): raise ValueError('Unable to split the Water') # Populate the water set for sys_at in system.GetAtoms(): sys_idx = sys_at.GetIdx() at_idx = atommap[sys_idx] if at_idx: wat_set.add(sys_idx) # print(sys_idx, '->', at_idx) # Split the excipients from the system atommap = oechem.OEAtomArray(system.GetMaxAtomIdx()) if not oechem.OECombineMolComplexFragments( excp, frags, opt, opt.GetOtherFilter(), atommap): raise ValueError('Unable to split the Excipients') # Populate the excipient set for sys_at in system.GetAtoms(): sys_idx = sys_at.GetIdx() at_idx = atommap[sys_idx] if at_idx: excp_set.add(sys_idx) # print(sys_idx, '->', at_idx) # Create the mono-atomic ions set for exc_idx in excp_set: atom = system.GetAtom(oechem.OEHasAtomIdx(exc_idx)) if atom.GetDegree() == 0: ion_set.add(exc_idx) # Create the excipients set which are not protein, ligand, waters or ions excipients_set = excp_set - ion_set # Create the system set system_set = prot_set | lig_set | excp_set | wat_set if len(system_set) != system.NumAtoms(): raise ValueError("The total system atom number {} is different " "from its set representation {}".format( system.NumAtoms(), system_set)) # The dictionary is used to link the token keywords to the created molecule sets dic_set = { 'ligand': lig_set, 'protein': prot_set, 'ca_protein': ca_prot_set, 'water': wat_set, 'ions': ion_set, 'excipients': excipients_set, 'system': system_set } return dic_set def build_set(ls, dsets): """ This function select the atom indexes Parameters: ----------- ls: python list the parsed list with tokens and operand tokes for the selection dsets: python dictionary the dictionary containing the sets for the selection Return: ------- atom_set: python set the set containing the atom index """ def noh(ls, dsets): """ This function remove hydrogens from the selection """ data_set = build_set(ls[1], dsets) noh_set = set() pred = oechem.OEIsHydrogen() for idx in data_set: atom = system.GetAtom(oechem.OEHasAtomIdx(idx)) if not pred(atom): noh_set.add(idx) return noh_set def residues(ls): """ This function select residues based on the residue numbers. An example of selection can be: mask = 'resid A:16 17 19 B:1' """ # List residue atom index to be restrained res_atom_set = set() # Dictionary of lists with the chain residues selected to be restrained # e.g. {chainA:[res1, res15], chainB:[res19, res17]} chain_dic = {'': []} # Fill out the chain dictionary i = 0 while i < len(ls): if ls[i].isdigit(): chain_dic[''].append(int(ls[i])) i += 1 else: try: chain_dic[ls[i]].append(int(ls[i + 2])) except: chain_dic[ls[i]] = [] chain_dic[ls[i]].append(int(ls[i + 2])) i += 3 # Loop over the molecular system to select the atom indexes to be selected hv = oechem.OEHierView( system, oechem.OEAssumption_BondedResidue + oechem.OEAssumption_ResPerceived) for chain in hv.GetChains(): chain_id = chain.GetChainID() if chain_id not in chain_dic: continue for frag in chain.GetFragments(): for hres in frag.GetResidues(): res_num = hres.GetOEResidue().GetResidueNumber() if res_num not in chain_dic[chain_id]: continue for oe_at in hres.GetAtoms(): res_atom_set.add(oe_at.GetIdx()) return res_atom_set def around(dist, ls): """ This function select atom not far than the threshold distance from the current selection. The threshold distance is in Angstrom selection can be: mask = '5.0 around ligand' """ # at = system.GetAtom(oechem.OEHasAtomIdx(idx)) # Atom set selection atom_set_around = set() # Create a OE bit vector mask for each atoms bv_around = oechem.OEBitVector(system.GetMaxAtomIdx()) # Set the mask atom for at in system.GetAtoms(): if at.GetIdx() in ls: bv_around.SetBitOn(at.GetIdx()) # Predicate pred = oechem.OEAtomIdxSelected(bv_around) # Create the system molecule based on the atom mask molecules = oechem.OEMol() oechem.OESubsetMol(molecules, system, pred) # Create the Nearest neighbours nn = oechem.OENearestNbrs(system, float(dist)) for nbrs in nn.GetNbrs(molecules): for atom in oechem.OEGetResidueAtoms(nbrs.GetBgn()): if atom.GetIdx() in ls: continue atom_set_around.add(atom.GetIdx()) return atom_set_around # Start Body of the selection function by language # Terminal Literal return the related set if isinstance(ls, str): return dsets[ls] # Not or Noh if len(ls) == 2: if ls[0] == 'noh': # Noh case return noh(ls, dsets) elif ls[0] == 'not': # Not case return dsets['system'] - build_set(ls[1], dsets) else: # Resid case with one index return residues(ls[1]) if len(ls) == 3: if ls[1] == 'or': # Or Case (set union) return build_set(ls[0], dsets) | build_set(ls[2], dsets) elif ls[1] == 'and': # And Case (set intersection) return build_set(ls[0], dsets) & build_set(ls[2], dsets) elif ls[1] == 'diff': # Diff case (set difference) return build_set(ls[0], dsets) - build_set(ls[2], dsets) elif ls[1] == 'around': # Around case return around(ls[0], build_set(ls[2], dsets)) else: return residues(ls[1:]) # Resid case with one or two indexes else: if ls[0] == 'resid': return residues(ls[1:]) # Resid case with multiple indexes else: raise ValueError( "The passed list have too many tokens: {}".format(ls)) # Parse Action-Maker def makeLRlike(numterms): if numterms is None: # None operator can only by binary op initlen = 2 incr = 1 else: initlen = {0: 1, 1: 2, 2: 3, 3: 5}[numterms] incr = {0: 1, 1: 1, 2: 2, 3: 4}[numterms] # Define parse action for this number of terms, # to convert flat list of tokens into nested list def pa(s, l, t): t = t[0] if len(t) > initlen: ret = pyp.ParseResults(t[:initlen]) i = initlen while i < len(t): ret = pyp.ParseResults([ret] + t[i:i + incr]) i += incr return pyp.ParseResults([ret]) return pa # Selection function body # Residue number selection id = pyp.Optional(pyp.Word(pyp.alphanums) + pyp.Literal(':')) + pyp.Word( pyp.nums) resid = pyp.Group(pyp.Literal("resid") + pyp.OneOrMore(id)) # Real number for around operator selection real = pyp.Regex(r"\d+(\.\d*)?").setParseAction(lambda t: float(t[0])) # Define the tokens for the BNF grammar operand = pyp.Literal("protein") | pyp.Literal("ca_protein") | \ pyp.Literal("ligand") | pyp.Literal("water") | \ pyp.Literal("ions") | pyp.Literal("excipients") | resid # BNF Grammar definition with parseAction makeLRlike expr = pyp.operatorPrecedence( operand, [(None, 2, pyp.opAssoc.LEFT, makeLRlike(None)), (pyp.Literal("not"), 1, pyp.opAssoc.RIGHT, makeLRlike(1)), (pyp.Literal("noh"), 1, pyp.opAssoc.RIGHT, makeLRlike(1)), (pyp.Literal("and"), 2, pyp.opAssoc.LEFT, makeLRlike(2)), (pyp.Literal("or"), 2, pyp.opAssoc.LEFT, makeLRlike(2)), (pyp.Literal("diff"), 2, pyp.opAssoc.LEFT, makeLRlike(2)), (real + pyp.Literal("around"), 1, pyp.opAssoc.RIGHT, makeLRlike(2))]) # Parse the input string try: ls = expr.parseString(mask, parseAll=True) except Exception as e: raise ValueError("The passed restraint mask is not valid: {}".format( str(e))) # Split the system dic_sets = split(system) # Select atom indexes atom_set = build_set(ls[0], dic_sets) return atom_set
class WinIISParser(text_parser.PyparsingSingleLineTextParser): """Parses a Microsoft IIS log file.""" NAME = 'winiis' DESCRIPTION = 'Parser for Microsoft IIS log files.' # Common Fields (6.0: date time s-sitename s-ip cs-method cs-uri-stem # cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status # sc-substatus sc-win32-status. # Common Fields (7.5): date time s-ip cs-method cs-uri-stem cs-uri-query # s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus # sc-win32-status time-taken BLANK = pyparsing.Literal('-') WORD = pyparsing.Word(pyparsing.alphanums + '-') | BLANK INTEGER = (pyparsing.Word(pyparsing.nums, min=1).setParseAction( text_parser.ConvertTokenToInteger) | BLANK) IP_ADDRESS = (text_parser.PyparsingConstants.IPV4_ADDRESS | text_parser.PyparsingConstants.IPV6_ADDRESS | BLANK) PORT = (pyparsing.Word(pyparsing.nums, min=1, max=6).setParseAction( text_parser.ConvertTokenToInteger) | BLANK) URI = pyparsing.Word(pyparsing.alphanums + '/.?&+;_=()-:,%') | BLANK DATE_TIME = (text_parser.PyparsingConstants.DATE_ELEMENTS + text_parser.PyparsingConstants.TIME_ELEMENTS) DATE_METADATA = (pyparsing.Literal('Date:') + DATE_TIME.setResultsName('date_time')) FIELDS_METADATA = ( pyparsing.Literal('Fields:') + pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('fields')) COMMENT = pyparsing.Literal('#') + (DATE_METADATA | FIELDS_METADATA | pyparsing.SkipTo(pyparsing.LineEnd())) LOG_LINE_6_0 = (DATE_TIME.setResultsName('date_time') + URI.setResultsName('s_sitename') + IP_ADDRESS.setResultsName('dest_ip') + WORD.setResultsName('http_method') + URI.setResultsName('cs_uri_stem') + URI.setResultsName('cs_uri_query') + PORT.setResultsName('dest_port') + WORD.setResultsName('cs_username') + IP_ADDRESS.setResultsName('source_ip') + URI.setResultsName('user_agent') + INTEGER.setResultsName('sc_status') + INTEGER.setResultsName('sc_substatus') + INTEGER.setResultsName('sc_win32_status')) _LOG_LINE_STRUCTURES = {} # Common fields. Set results name with underscores, not hyphens because regex # will not pick them up. _LOG_LINE_STRUCTURES['date'] = ( text_parser.PyparsingConstants.DATE.setResultsName('date')) _LOG_LINE_STRUCTURES['time'] = ( text_parser.PyparsingConstants.TIME.setResultsName('time')) _LOG_LINE_STRUCTURES['s-sitename'] = URI.setResultsName('s_sitename') _LOG_LINE_STRUCTURES['s-ip'] = IP_ADDRESS.setResultsName('dest_ip') _LOG_LINE_STRUCTURES['cs-method'] = WORD.setResultsName('http_method') _LOG_LINE_STRUCTURES['cs-uri-stem'] = URI.setResultsName( 'requested_uri_stem') _LOG_LINE_STRUCTURES['cs-uri-query'] = URI.setResultsName('cs_uri_query') _LOG_LINE_STRUCTURES['s-port'] = PORT.setResultsName('dest_port') _LOG_LINE_STRUCTURES['cs-username'] = WORD.setResultsName('cs_username') _LOG_LINE_STRUCTURES['c-ip'] = IP_ADDRESS.setResultsName('source_ip') _LOG_LINE_STRUCTURES['cs(User-Agent)'] = URI.setResultsName('user_agent') _LOG_LINE_STRUCTURES['sc-status'] = INTEGER.setResultsName('http_status') _LOG_LINE_STRUCTURES['sc-substatus'] = INTEGER.setResultsName( 'sc_substatus') _LOG_LINE_STRUCTURES['sc-win32-status'] = INTEGER.setResultsName( 'sc_win32_status') # Less common fields. _LOG_LINE_STRUCTURES['s-computername'] = URI.setResultsName( 's_computername') _LOG_LINE_STRUCTURES['sc-bytes'] = INTEGER.setResultsName('sent_bytes') _LOG_LINE_STRUCTURES['cs-bytes'] = INTEGER.setResultsName('received_bytes') _LOG_LINE_STRUCTURES['time-taken'] = INTEGER.setResultsName('time_taken') _LOG_LINE_STRUCTURES['cs-version'] = URI.setResultsName('protocol_version') _LOG_LINE_STRUCTURES['cs-host'] = URI.setResultsName('cs_host') _LOG_LINE_STRUCTURES['cs(Cookie)'] = URI.setResultsName('cs_cookie') _LOG_LINE_STRUCTURES['cs(Referrer)'] = URI.setResultsName('cs_referrer') _LOG_LINE_STRUCTURES['cs(Referer)'] = URI.setResultsName('cs_referrer') # Define the available log line structures. Default to the IIS v. 6.0 # common format. LINE_STRUCTURES = [('comment', COMMENT), ('logline', LOG_LINE_6_0)] # Define a signature value for the log file. _SIGNATURE = '#Software: Microsoft Internet Information Services' # Per https://msdn.microsoft.com/en-us/library/ms525807(v=vs.90).aspx: # "log file format(s) are all ASCII text formats (unless UTF-8 is enabled for # your Web sites) _ENCODING = 'utf-8' def __init__(self): """Initializes a parser object.""" super(WinIISParser, self).__init__() self._day_of_month = None self._month = None self._year = None def _ParseComment(self, structure): """Parses a comment. Args: structure (pyparsing.ParseResults): structure parsed from the log file. """ if structure[1] == 'Date:': self._year, self._month, self._day_of_month, _, _, _ = structure.date_time elif structure[1] == 'Fields:': self._ParseFieldsMetadata(structure) def _ParseFieldsMetadata(self, structure): """Parses the fields metadata. Args: structure (pyparsing.ParseResults): structure parsed from the log file. """ fields = structure.fields.split(' ') log_line_structure = pyparsing.Empty() if fields[0] == 'date' and fields[1] == 'time': log_line_structure += self.DATE_TIME.setResultsName('date_time') fields = fields[2:] for member in fields: log_line_structure += self._LOG_LINE_STRUCTURES.get( member, self.URI) # TODO: self._line_structures is a work-around and this needs # a structural fix. self._line_structures[1] = ('logline', log_line_structure) def _ParseLogLine(self, parser_mediator, structure): """Parse a single log line and produce an event object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure parsed from the log file. """ if structure.date_time: time_elements_tuple = structure.date_time elif structure.date and structure.time: year, month, day_of_month = structure.date hours, minutes, seconds = structure.time time_elements_tuple = (year, month, day_of_month, hours, minutes, seconds) elif structure.time: hours, minutes, seconds = structure.time time_elements_tuple = (self._year, self._month, self._day_of_month, hours, minutes, seconds) else: parser_mediator.ProduceExtractionError( 'missing date and time values') return try: date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: parser_mediator.ProduceExtractionError( 'invalid date time value: {0!s}'.format(time_elements_tuple)) return event_data = IISEventData() for key, value in iter(structure.items()): if key in ('date', 'date_time', 'time') or value == '-': continue if isinstance(value, pyparsing.ParseResults): value = ''.join(value) setattr(event_data, key, value) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. Raises: ParseError: when the structure type is unknown. """ if key not in ('comment', 'logline'): raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) if key == 'logline': self._ParseLogLine(parser_mediator, structure) elif key == 'comment': self._ParseComment(structure) def VerifyStructure(self, unused_parser_mediator, line): """Verify that this file is an IIS log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line was successfully parsed. """ # TODO: self._line_structures is a work-around and this needs # a structural fix. self._line_structures = self.LINE_STRUCTURES self._day_of_month = None self._month = None self._year = None # TODO: Examine other versions of the file format and if this parser should # support them. For now just checking if it contains the IIS header. if self._SIGNATURE in line: return True return False
def test_str(self): expr = pp.Literal('abc') s_expr = StyledElement({}, 'class:abc', expr) self.assertEqual(str(expr), str(s_expr))
def banana_grammar(emitter=emit.PrintEmitter()): """ Generate a banana parser that can be then used to parse a banana content. It build an AST on which operation can then be applied. :return: Return a banana parser :rtype: BananaScopeParser """ # Should debug debug_grammar = False # Actions def action_str_lit(s, l, t): return ast.StringLit(ast.make_span(s, l, t), t[0]) def action_num_lit(s, l, t): return ast.Number(ast.make_span(s, l, t), t[0]) def action_ident(s, l, t): return ast.Ident(ast.make_span(s, l, t), t[0]) def action_expr(s, l, t): if len(t) != 1: raise exception.BananaGrammarBug( 'Bug found in the grammar for expression,' ' Please report this bug.') if isinstance(t[0], ast.Expr): return t[0] return ast.Expr(ast.make_span(s, l, t), t[0]) def action_dot_path(s, l, t): # First token is the name of the variable # The rest is the property path if isinstance(t[0], ast.StringLit) and len(t[1:]) == 0: return t[0] return ast.DotPath(ast.make_span(s, l, t), t[0], t[1:]) def action_json_obj(s, l, t): return ast.JsonObj(ast.make_span(s, l, t), t) def action_parse_ctor_arg(s, l, t): if len(t) > 1: return ast.ComponentCtorArg(ast.make_span(s, l, t), t[1], t[0]) else: return ast.ComponentCtorArg(ast.make_span(s, l, t), t[0]) def action_parse_comp_ctor(s, l, tokens): comp = ast.Component(ast.make_span(s, l, tokens)) for tok in tokens: if isinstance(tok, ast.Ident): comp.set_ctor(tok) elif isinstance(tok, ast.ComponentCtorArg): comp.add_arg(tok) else: raise exception.BananaGrammarBug( 'Bug found in the grammar, Please report this bug') return comp def action_assignment(s, l, t): return ast.Assignment(ast.make_span(s, l, t), t[0], t[1]) def action_create_connections(s, l, t): ast_conn = ast.into_connection(t[0]) ast_conn.span = ast.make_span(s, l, t) for i in range(1, len(t)): next_conn = ast.into_connection(t[i]) ast_conn.connect_to(next_conn, emitter) return ast_conn def action_merge_connections(s, l, t): ast_conn = ast.Connection(ast.make_span(s, l, t)) ast_conn.merge_all(t, emitter) return ast_conn def action_root_ast(s, l, tokens): root = ast.BananaFile(emitter) for tok in tokens: if isinstance(tok, ast.Assignment): if isinstance(tok.rhs, ast.Component): root.add_component_ctor(tok.lhs, tok.rhs) else: root.add_assignment(tok.lhs, tok.rhs) elif isinstance(tok, ast.Connection): root.add_connections(tok) else: raise exception.BananaGrammarBug( 'Bug found in the grammar, Please report this bug.') return root # TODO(Joan): Remove once it is no longer needed def print_stmt(s, l, t): print("\nPRINT AST") print((l, [str(x) for x in t])) print("END PRINT AST\n") def action_unimplemented(s, l, t): raise exception.BananaGrammarBug("unimplemented code reached") # Tokens equals = p.Literal("=").suppress().setName('"="').setDebug(debug_grammar) arrow = p.Literal("->").suppress().setName('"->"').setDebug(debug_grammar) lbra = p.Literal("[").suppress().setName('"["').setDebug(debug_grammar) rbra = p.Literal("]").suppress().setName('"]"').setDebug(debug_grammar) colon = p.Literal(":").suppress().setName('":"') comma = p.Literal(",").suppress().setName(",") less = p.Literal("<").suppress().setName('"<"') greater = p.Literal(">").suppress().setName('">"') lbrace = p.Literal("{").suppress().setName('"{"').setDebug(debug_grammar) rbrace = p.Literal("}").suppress().setName('"}"').setDebug(debug_grammar) lpar = p.Literal("(").suppress().setName('"("') rpar = p.Literal(")").suppress().setName('")"') # Keywords ing = p.Literal("ing").suppress() imp = p.Literal("import").suppress() fro = p.Literal("from").suppress() # String Literal, Numbers, Identifiers string_lit = p.quotedString()\ .setParseAction(action_str_lit)\ .setName(const.STRING_LIT) number_lit = p.Regex(r'\d+(\.\d*)?([eE]\d+)?')\ .setParseAction(action_num_lit)\ .setName(const.NUMBER) ident = p.Word(p.alphas + "_", p.alphanums + "_")\ .setParseAction(action_ident)\ .setName(const.IDENT) # Path for properties dot_prop = ident | string_lit dot_path = p.delimitedList(dot_prop, ".")\ .setParseAction(action_dot_path)\ .setName(const.DOT_PATH)\ .setDebug(debug_grammar) # Expressions # Here to simplify the logic, we can match directly # against ident and string_lit to avoid having to deal # only with dot_path. It also allow to remove the confusion # where '"a"' could be interpreted as a dot_path and would thus # be the same as 'a'. With the following, the first we # always be type-checked as a String whereas the latter will # be as the type of the variable. expr = p.infixNotation(number_lit | dot_path, [ (p.oneOf('* /'), 2, p.opAssoc.LEFT), (p.oneOf('+ -'), 2, p.opAssoc.LEFT), ], lpar=lpar, rpar=rpar) expr.setParseAction(action_expr)\ .setName(const.EXPR)\ .setDebug(debug_grammar) # Json-like object (value are much more) json_obj = p.Forward() json_value = p.Forward() json_array = p.Group(lbra + p.Optional(p.delimitedList(json_value)) + rbra) json_array.setDebug(debug_grammar) json_array.setName(const.JSON_ARRAY) json_value <<= expr | json_obj | json_array json_value.setDebug(debug_grammar)\ .setName(const.JSON_VALUE) json_members = p.delimitedList(p.Group(dot_path + colon - json_value)) +\ p.Optional(comma) json_members.setDebug(debug_grammar)\ .setName(const.JSON_MEMBERS) json_obj <<= p.Dict(lbrace + p.Optional(json_members) - rbrace) json_obj.setParseAction(action_json_obj)\ .setName(const.JSON_OBJ)\ .setDebug(debug_grammar) # Component constructor arg = (ident + equals - (expr | json_obj)) | expr | json_obj arg.setParseAction(action_parse_ctor_arg) params = p.delimitedList(arg) comp_ctor = ident + lpar - p.Optional(params) + rpar comp_ctor.setParseAction(action_parse_comp_ctor)\ .setName(const.COMP_CTOR)\ .setDebug(debug_grammar) # Assignments assignment = dot_path + equals - (comp_ctor | expr | json_obj) assignment.setParseAction(action_assignment) # Connections connection = p.Forward() array_of_connection = p.Group(lbra + p.Optional(p.delimitedList(connection)) + rbra) array_of_connection.setParseAction(action_merge_connections) last_expr = ident | array_of_connection this_expr = p.Forward() match_expr = p.FollowedBy(last_expr + arrow - last_expr) + \ (last_expr + p.OneOrMore(arrow - last_expr)) this_expr <<= match_expr | last_expr connection <<= this_expr match_expr.setDebug(debug_grammar)\ .setName(const.CONNECTION) \ .setParseAction(action_create_connections) # Definitions definition = ing - less - string_lit - greater - ident - lbrace - rbrace definition.setDebug(debug_grammar)\ .setName(const.DEFINITION)\ .setParseAction(action_unimplemented) # Import directive module_def = (imp - ident) | fro - ident - imp - ident module_def.setDebug(debug_grammar)\ .setName(const.MOD_IMPORT)\ .setParseAction(action_unimplemented) # Comments comments = "#" + p.restOfLine statement = assignment | \ match_expr | \ definition | \ module_def statement.setName(const.STATEMENT) statement.setDebug(debug_grammar) statement.setParseAction(print_stmt) # Grammar grammar = p.OneOrMore(statement).ignore(comments) grammar.setParseAction(action_root_ast) return BananaScopeParser(grammar)