Esempio n. 1
0
        'six 1.3 or later is required; you have %s' % (
            six.__version__))

try:
    import pyparsing
except ImportError:
    raise ImportError("matplotlib requires pyparsing")
else:
    if not compare_versions(pyparsing.__version__, '1.5.6'):
        raise ImportError(
            "matplotlib requires pyparsing >= 1.5.6")

    # pyparsing 2.0.0 bug, but it may be patched in distributions
    try:
        f = pyparsing.Forward()
        f <<= pyparsing.Literal('a')
        bad_pyparsing = f is None
    except TypeError:
        bad_pyparsing = True

    # pyparsing 1.5.6 does not have <<= on the Forward class, but
    # pyparsing 2.0.0 and later will spew deprecation warnings if
    # using << instead.  Additionally, the <<= in pyparsing 1.5.7 is
    # broken, since it doesn't return self.  In order to support
    # pyparsing 1.5.6 and above with a common code base, this small
    # monkey patch is applied.
    if bad_pyparsing:
        def _forward_ilshift(self, other):
            self.__lshift__(other)
            return self
        pyparsing.Forward.__ilshift__ = _forward_ilshift
Esempio n. 2
0
    UniformIntegerHyperparameter, UniformFloatHyperparameter, \
    NumericalHyperparameter, Constant, IntegerHyperparameter, \
    NormalIntegerHyperparameter, NormalFloatHyperparameter
from ConfigSpace.conditions import EqualsCondition, NotEqualsCondition,\
    InCondition, AndConjunction, OrConjunction, ConditionComponent
# from ConfigSpace.forbidden import ForbiddenEqualsClause, \
#     ForbiddenAndConjunction, ForbiddenInClause, AbstractForbiddenComponent, MultipleValueForbiddenClause
from ConfigSpace.forbidden import ForbiddenEqualsClause, \
    ForbiddenAndConjunction, ForbiddenInClause, AbstractForbiddenComponent, MultipleValueForbiddenClause

# Build pyparsing expressions for params
pp_param_name = pyparsing.Word(pyparsing.alphanums + "_" + "-" + "@" + "." +
                               ":" + ";" + "\\" + "/" + "?" + "!" + "$" + "%" +
                               "&" + "*" + "+" + "<" + ">")
pp_digits = "0123456789"
pp_plusorminus = pyparsing.Literal('+') | pyparsing.Literal('-')
pp_int = pyparsing.Combine(
    pyparsing.Optional(pp_plusorminus) + pyparsing.Word(pp_digits))
pp_float = pyparsing.Combine(
    pyparsing.Optional(pp_plusorminus) + pyparsing.Optional(pp_int) + "." +
    pp_int)
pp_eorE = pyparsing.Literal('e') | pyparsing.Literal('E')
pp_floatorint = pp_float | pp_int
pp_e_notation = pyparsing.Combine(pp_floatorint + pp_eorE + pp_int)
pp_number = pp_e_notation | pp_float | pp_int
pp_numberorname = pp_number | pp_param_name
pp_il = pyparsing.Word("il")
pp_choices = pp_param_name + pyparsing.Optional(
    pyparsing.OneOrMore("," + pp_param_name))

pp_cont_param = pp_param_name + "[" + pp_number + "," + pp_number + "]" + \
Esempio n. 3
0
    def parse_alg_expr(estr):
        """
        Parses algebraic expressions compatible with Kappa syntax

        Parameters
        ----------
        estr : str
            String corresponding to an algebraic expression

        Returns
        -------
        list
            Returns a list of tokens corresponding to elements in a Kappa algebraic expression
        """
        point = pp.Literal(".")
        e = pp.CaselessLiteral("E")
        fnumber = pp.Combine(pp.Word("+-" + pp.nums, pp.nums) +
                             pp.Optional(point + pp.Optional(pp.Word(pp.nums))) +
                             pp.Optional(e + pp.Word("+-" + pp.nums, pp.nums)))

        # infix operators
        plus = pp.Literal("+")
        minus = pp.Literal("-")
        mult = pp.Literal("*")
        div = pp.Literal("/")
        mod = pp.Literal("[mod]")
        lpar = pp.Literal("(")
        rpar = pp.Literal(")")
        expop = pp.Literal("^")

        addop = plus | minus
        multop = mult | div | mod

        # constants
        inf = pp.Literal("inf")
        pi = pp.Literal("[pi]")
        events = pp.Literal("[E]")
        null_events = pp.Literal("[E-]")
        event_limit = pp.Literal("[Emax]")
        time = pp.Literal("[T]")
        cpu_time = pp.Literal("[Tsim]")
        time_limit = pp.Literal("[Tmax]")
        plot_points = pp.Literal("[pp]")

        constant = inf | pi | events | null_events | event_limit | time | cpu_time | time_limit | plot_points

        # variables
        variable = pp.QuotedString("'")

        # patterns
        pattern = pp.Combine(
            pp.Literal("|") + pp.CharsNotIn("|") + pp.Literal("|"))  # parse what's in between later

        # unary functions (one arg)
        logfunc = pp.Literal("[log]")
        expfunc = pp.Literal("[exp]")
        sinfunc = pp.Literal("[sin]")
        cosfunc = pp.Literal("[cos]")
        tanfunc = pp.Literal("[tan]")
        sqrtfunc = pp.Literal("[sqrt]")
        floorfunc = pp.Literal("[int]")

        unary_one_funcs = logfunc | expfunc | sinfunc | cosfunc | tanfunc | sqrtfunc | floorfunc

        # unary functions (two args)
        maxfunc = pp.Literal("[max]")
        minfunc = pp.Literal("[min]")

        unary_two_funcs = maxfunc | minfunc

        expr = pp.Forward()
        atom = (pp.Optional("-") + (
            constant | variable | fnumber | lpar + expr + rpar | unary_one_funcs + expr | unary_two_funcs + expr + expr | pattern))

        factor = pp.Forward()
        factor << atom + pp.ZeroOrMore((expop + factor))

        term = factor + pp.ZeroOrMore((multop + factor))
        expr << term + pp.ZeroOrMore((addop + term))
        fullExpr = expr

        return fullExpr.parseString(estr.strip())
Esempio n. 4
0
class TcFilterParser(AbstractParser):
    class FilterMatchIdIpv4(object):
        INCOMING_NETWORK = 12
        OUTGOING_NETWORK = 16
        PORT = 20

    class FilterMatchIdIpv6(object):
        INCOMING_NETWORK_LIST = [8, 12, 16, 20]
        OUTGOING_NETWORK_LIST = [24, 28, 32, 36]
        PORT = 40

    __FILTER_FLOWID_PATTERN = (pp.Literal("filter parent") +
                               pp.SkipTo("flowid", include=True) +
                               pp.Word(pp.hexnums + ":"))
    __FILTER_PROTOCOL_PATTERN = (pp.Literal("filter parent") +
                                 pp.SkipTo("protocol", include=True) +
                                 pp.Word(pp.alphanums))
    __FILTER_PRIORITY_PATTERN = (pp.Literal("filter parent") +
                                 pp.SkipTo("pref", include=True) +
                                 pp.Word(pp.nums))
    __FILTER_ID_PATTERN = (pp.Literal("filter parent") +
                           pp.SkipTo("fh", include=True) +
                           pp.Word(pp.hexnums + ":"))
    __FILTER_MATCH_PATTERN = (pp.Literal("match") +
                              pp.Word(pp.alphanums + "/") + pp.Literal("at") +
                              pp.Word(pp.nums))
    __FILTER_MANGLE_MARK_PATTERN = (pp.Literal("filter parent") +
                                    pp.SkipTo("handle", include=True) +
                                    pp.Word(pp.hexnums) +
                                    pp.SkipTo("classid", include=True) +
                                    pp.Word(pp.hexnums + ":"))

    @property
    def protocol(self):
        return self.__protocol

    @property
    def _tc_subcommand(self):
        return TcSubCommand.FILTER.value

    def __init__(self, con, ip_version):
        super(TcFilterParser, self).__init__()

        self.__con = con
        self.__ip_version = ip_version
        self.__buffer = None
        self.__parse_idx = 0

        self.__protocol = None

        self._clear()

    def parse(self, device, text):
        self._clear()

        if typepy.is_null_string(text):
            return []

        filter_data_matrix = []
        self.__buffer = self._to_unicode(text).splitlines()
        self.__parse_idx = 0

        while self.__parse_idx < len(self.__buffer):
            line = self._to_unicode(self.__buffer[self.__parse_idx].strip())
            self.__parse_idx += 1

            if typepy.is_null_string(line):
                continue

            self.__device = device

            try:
                self.__parse_mangle_mark(line)
            except pp.ParseException:
                logger.debug("failed to parse mangle: {}".format(line))
            else:
                filter_data_matrix.append({
                    Tc.Param.DEVICE: self.__device,
                    Tc.Param.CLASS_ID: self.__classid,
                    Tc.Param.HANDLE: self.__handle,
                })
                self._clear()
                continue

            tc_filter = self.__get_filter()

            try:
                self.__parse_flow_id(line)
                self.__parse_protocol(line)
                self.__parse_priority(line)
                self.__parse_filter_id(line)

                if tc_filter.get(Tc.Param.FLOW_ID):
                    logger.debug("store filter: {}".format(tc_filter))
                    filter_data_matrix.append(tc_filter)
                    self._clear()

                    self.__parse_flow_id(line)
                    self.__parse_protocol(line)
                    self.__parse_priority(line)
                    self.__parse_filter_id(line)

                continue
            except pp.ParseException:
                logger.debug("failed to parse flow id: {}".format(line))

            try:
                if self.__ip_version == 4:
                    self.__parse_filter_ipv4(line)
                elif self.__ip_version == 6:
                    self.__parse_filter_ipv6(line)
                else:
                    raise ValueError("unknown ip version: {}".format(
                        self.__ip_version))
            except pp.ParseException:
                logger.debug("failed to parse filter: {}".format(line))

        if self.__flow_id:
            filter_data_matrix.append(self.__get_filter())

        if filter_data_matrix:
            self.__con.create_table_from_data_matrix(
                table_name=self._tc_subcommand,
                attr_name_list=list(self.__get_filter()),
                data_matrix=filter_data_matrix,
            )

        logger.debug("tc {:s} parse result: {}".format(
            self._tc_subcommand, json.dumps(filter_data_matrix, indent=4)))

        return filter_data_matrix

    def parse_incoming_device(self, text):
        if typepy.is_null_string(text):
            return None

        match = re.search(r"Egress Redirect to device ifb[\d]+",
                          self._to_unicode(text), re.MULTILINE)
        if match is None:
            return None

        return re.search(r"ifb[\d]+", match.group()).group()

    def _clear(self):
        self.__device = None
        self.__filter_id = None
        self.__flow_id = None
        self.__protocol = None
        self.__priority = None
        self.__filter_src_network = None
        self.__filter_dst_network = None
        self.__filter_src_port = None
        self.__filter_dst_port = None

        self.__handle = None
        self.__classid = None

    def __get_filter(self):
        tc_filter = OrderedDict()
        tc_filter[Tc.Param.DEVICE] = self.__device
        tc_filter[Tc.Param.FILTER_ID] = self.__filter_id
        tc_filter[Tc.Param.FLOW_ID] = self.__flow_id
        tc_filter[Tc.Param.PROTOCOL] = self.protocol
        tc_filter[Tc.Param.PRIORITY] = self.__priority
        tc_filter[Tc.Param.SRC_NETWORK] = sanitize_network(
            self.__filter_src_network, self.__ip_version)
        tc_filter[Tc.Param.DST_NETWORK] = sanitize_network(
            self.__filter_dst_network, self.__ip_version)
        tc_filter[Tc.Param.SRC_PORT] = self.__filter_src_port
        tc_filter[Tc.Param.DST_PORT] = self.__filter_dst_port

        return tc_filter

    def __parse_flow_id(self, line):
        parsed_list = self.__FILTER_FLOWID_PATTERN.parseString(line)
        self.__flow_id = parsed_list[-1]
        logger.debug("succeed to parse flow id: flow-id={}, line={}".format(
            self.__flow_id, line))

    def __parse_protocol(self, line):
        parsed_list = self.__FILTER_PROTOCOL_PATTERN.parseString(line)
        self.__protocol = parsed_list[-1]
        logger.debug("succeed to parse protocol: protocol={}, line={}".format(
            self.__protocol, line))

    def __parse_priority(self, line):
        parsed_list = self.__FILTER_PRIORITY_PATTERN.parseString(line)
        self.__priority = int(parsed_list[-1])
        logger.debug("succeed to parse priority: priority={}, line={}".format(
            self.__priority, line))

    def __parse_filter_id(self, line):
        parsed_list = self.__FILTER_ID_PATTERN.parseString(line)
        self.__filter_id = parsed_list[-1]
        logger.debug(
            "succeed to parse filter id: filter-id={}, line={}".format(
                self.__filter_id, line))

    def __parse_mangle_mark(self, line):
        parsed_list = self.__FILTER_MANGLE_MARK_PATTERN.parseString(line)
        self.__classid = parsed_list[-1]
        self.__handle = int("0" + parsed_list[-3], 16)
        logger.debug("succeed to parse mangle mark: "
                     "classid={}, handle={}, line={}".format(
                         self.__classid, self.__handle, line))

    def __parse_filter_ip_line(self, line):
        parsed_list = self.__FILTER_MATCH_PATTERN.parseString(line)
        value_hex, mask_hex = parsed_list[1].split("/")
        match_id = int(parsed_list[3])

        return (value_hex, mask_hex, match_id)

    def __parse_filter_ipv4_network(self, value_hex, mask_hex, match_id):
        ipaddr = ".".join([
            text_type(int(value_hex[i:i + 2], 16))
            for i in range(0, len(value_hex), 2)
        ])
        netmask = bin(int(mask_hex, 16)).count("1")
        network = "{:s}/{:d}".format(ipaddr, netmask)

        if match_id == self.FilterMatchIdIpv4.INCOMING_NETWORK:
            self.__filter_src_network = network
        elif match_id == self.FilterMatchIdIpv4.OUTGOING_NETWORK:
            self.__filter_dst_network = network
        else:
            logger.warn("unknown match id: {}".format(match_id))

    def __parse_filter_ipv6_network(self, value_hex, mask_hex, match_id):
        from collections import namedtuple

        Ipv6Entry = namedtuple("Ipv6Entry", "match_id octet_list mask_hex")

        OCTET_LEN = 4
        ipv6_entry_list = [
            Ipv6Entry(
                match_id=match_id,
                octet_list=[
                    value_hex[i:i + OCTET_LEN]
                    for i in range(0, len(value_hex), OCTET_LEN)
                ],
                mask_hex=mask_hex,
            )
        ]

        while True:
            try:
                line = self.__buffer[self.__parse_idx].strip()
            except IndexError:
                break

            try:
                value_hex, mask_hex, match_id = self.__parse_filter_ip_line(
                    line)
            except pp.ParseException:
                break

            if (match_id in self.FilterMatchIdIpv6.INCOMING_NETWORK_LIST or
                    match_id in self.FilterMatchIdIpv6.OUTGOING_NETWORK_LIST):
                ipv6_entry_list.append(
                    Ipv6Entry(
                        match_id=match_id,
                        octet_list=[
                            value_hex[i:i + OCTET_LEN]
                            for i in range(0, len(value_hex), OCTET_LEN)
                        ],
                        mask_hex=mask_hex,
                    ))
            else:
                break

            self.__parse_idx += 1

        src_octet_list = []
        dst_octet_list = []
        src_netmask = 0
        dst_netmask = 0

        for ipv6_entry in ipv6_entry_list:
            part_netmask = bin(int(ipv6_entry.mask_hex, 16)).count("1")

            if ipv6_entry.match_id in self.FilterMatchIdIpv6.INCOMING_NETWORK_LIST:
                src_octet_list.extend(ipv6_entry.octet_list)
                src_netmask += part_netmask
            elif ipv6_entry.match_id in self.FilterMatchIdIpv6.OUTGOING_NETWORK_LIST:
                dst_octet_list.extend(ipv6_entry.octet_list)
                dst_netmask += part_netmask
            else:
                raise ValueError(
                    "unexpected ipv6 entry: {}".format(ipv6_entry))

        while len(src_octet_list) < 8:
            src_octet_list.append("0000")
        while len(dst_octet_list) < 8:
            dst_octet_list.append("0000")

        self.__filter_dst_network = ipaddress.IPv6Network("{:s}/{:d}".format(
            ":".join(dst_octet_list), dst_netmask)).compressed
        self.__filter_src_network = ipaddress.IPv6Network("{:s}/{:d}".format(
            ":".join(src_octet_list), src_netmask)).compressed

    def __parse_filter_port(self, value_hex):
        # Port filter consists eight hex digits.
        # The upper-half represents source port filter and
        # the bottom-half represents destination port filter.

        if len(value_hex) != 8:
            raise ValueError("invalid port filter value: {}".format(value_hex))

        src_port_hex = value_hex[:4]
        dst_port_hex = value_hex[4:]

        logger.debug(
            "parse ipv4 port: src-port-hex={}, dst-port-hex={}".format(
                src_port_hex, dst_port_hex))

        src_port_decimal = int(src_port_hex, 16)
        self.__filter_src_port = src_port_decimal if src_port_decimal != 0 else None

        dst_port_decimal = int(dst_port_hex, 16)
        self.__filter_dst_port = dst_port_decimal if dst_port_decimal != 0 else None

    def __parse_filter_ipv4(self, line):
        value_hex, mask_hex, match_id = self.__parse_filter_ip_line(line)

        if match_id in [
                self.FilterMatchIdIpv4.INCOMING_NETWORK,
                self.FilterMatchIdIpv4.OUTGOING_NETWORK,
        ]:
            self.__parse_filter_ipv4_network(value_hex, mask_hex, match_id)
        elif match_id == self.FilterMatchIdIpv4.PORT:
            self.__parse_filter_port(value_hex)
        elif match_id in (self.FilterMatchIdIpv6.INCOMING_NETWORK_LIST +
                          self.FilterMatchIdIpv6.OUTGOING_NETWORK_LIST +
                          [self.FilterMatchIdIpv6.PORT]):
            logger.warn(
                "unknown match id for an IPv4 filter: might be an IPv6 filter. "
                "try to use --ipv6 option. (id={})".format(match_id))
            return
        else:
            logger.debug("unknown match id: {}".format(match_id))
            return

        logger.debug("succeed to parse ipv4 filter: " + ", ".join([
            "src_network={}".format(self.__filter_src_network),
            "dst_network={}".format(self.__filter_dst_network),
            "src_port={}".format(self.__filter_src_port),
            "dst_port={}".format(self.__filter_dst_port),
            "line={}".format(line),
        ]))

    def __parse_filter_ipv6(self, line):
        value_hex, mask_hex, match_id = self.__parse_filter_ip_line(line)

        if (match_id in self.FilterMatchIdIpv6.INCOMING_NETWORK_LIST
                or match_id in self.FilterMatchIdIpv6.OUTGOING_NETWORK_LIST):
            self.__parse_filter_ipv6_network(value_hex, mask_hex, match_id)
        elif match_id == self.FilterMatchIdIpv6.PORT:
            self.__parse_filter_port(value_hex)
        else:
            logger.debug("unknown match id: {}".format(match_id))
            return

        logger.debug("succeed to parse ipv6 filter: " + ", ".join([
            "src_network={}".format(self.__filter_src_network),
            "dst_network={}".format(self.__filter_dst_network),
            "src_port={}".format(self.__filter_src_port),
            "dst_port={}".format(self.__filter_dst_port),
            "line={}".format(line),
        ]))
Esempio n. 5
0
            sub_operand for operand in self.operands
            for sub_operand in operand.operands_list
        ])


class AndSubExpr(BinaryOp):
    """Expand later as needed."""
    pass


class OrSubExpr(BinaryOp):
    """Expand later as needed."""
    pass


COMMA = pyparsing.Suppress(pyparsing.Literal(","))
LPAREN = pyparsing.Suppress(pyparsing.Literal("("))
RPAREN = pyparsing.Suppress(pyparsing.Literal(")"))
EQUAL = pyparsing.Literal("=")
LBRACE = pyparsing.Suppress(pyparsing.Literal("{"))
RBRACE = pyparsing.Suppress(pyparsing.Literal("}"))

# Initialize non-ascii unicode code points in the Basic Multilingual Plane.
unicode_printables = u''.join(
    unichr(c) for c in xrange(128, 65536) if not unichr(c).isspace())

# Does not like comma. No Literals from above allowed.
valid_identifier_chars = ((unicode_printables + pyparsing.alphanums +
                           ".-_#!$%&'*+/:;?@[\\]^`|~"))

metric_name = (pyparsing.Word(valid_identifier_chars, min=1,
Esempio n. 6
0
def _build_tgrep_parser(set_parse_actions = True):
    '''
    Builds a pyparsing-based parser object for tokenizing and
    interpreting tgrep search strings.
    '''
    tgrep_op = (pyparsing.Optional('!') +
                pyparsing.Regex('[$%,.<>][%,.<>0-9-\':]*'))
    tgrep_qstring = pyparsing.QuotedString(quoteChar='"', escChar='\\',
                                           unquoteResults=False)
    tgrep_node_regex = pyparsing.QuotedString(quoteChar='/', escChar='\\',
                                              unquoteResults=False)
    tgrep_qstring_icase = pyparsing.Regex(
        'i@\\"(?:[^"\\n\\r\\\\]|(?:\\\\.))*\\"')
    tgrep_node_regex_icase = pyparsing.Regex(
        'i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/')
    tgrep_node_literal = pyparsing.Regex('[^][ \r\t\n;:.,&|<>()$!@%\'^=]+')
    tgrep_expr = pyparsing.Forward()
    tgrep_relations = pyparsing.Forward()
    tgrep_parens = pyparsing.Literal('(') + tgrep_expr + ')'
    tgrep_nltk_tree_pos = (
        pyparsing.Literal('N(') +
        pyparsing.Optional(pyparsing.Word(pyparsing.nums) + ',' +
                           pyparsing.Optional(pyparsing.delimitedList(
                    pyparsing.Word(pyparsing.nums), delim=',') +
                                              pyparsing.Optional(','))) + ')')
    tgrep_node_label = pyparsing.Regex('[A-Za-z0-9]+')
    tgrep_node_label_use = pyparsing.Combine('=' + tgrep_node_label)
    # see _tgrep_segmented_pattern_action
    tgrep_node_label_use_pred = tgrep_node_label_use.copy()
    macro_name = pyparsing.Regex('[^];:.,&|<>()[$!@%\'^=\r\t\n ]+')
    macro_name.setWhitespaceChars('')
    macro_use = pyparsing.Combine('@' + macro_name)
    tgrep_node_expr = (tgrep_node_label_use_pred |
                       macro_use |
                       tgrep_nltk_tree_pos |
                       tgrep_qstring_icase |
                       tgrep_node_regex_icase |
                       tgrep_qstring |
                       tgrep_node_regex |
                       '*' |
                       tgrep_node_literal)
    tgrep_node_expr2 = ((tgrep_node_expr +
                         pyparsing.Literal('=').setWhitespaceChars('') +
                         tgrep_node_label.copy().setWhitespaceChars('')) |
                        tgrep_node_expr)
    tgrep_node = (tgrep_parens |
                  (pyparsing.Optional("'") +
                   tgrep_node_expr2 +
                   pyparsing.ZeroOrMore("|" + tgrep_node_expr)))
    tgrep_brackets = pyparsing.Optional('!') + '[' + tgrep_relations + ']'
    tgrep_relation = tgrep_brackets | (tgrep_op + tgrep_node)
    tgrep_rel_conjunction = pyparsing.Forward()
    tgrep_rel_conjunction << (tgrep_relation +
                              pyparsing.ZeroOrMore(pyparsing.Optional('&') +
                                                   tgrep_rel_conjunction))
    tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore(
        "|" + tgrep_relations)
    tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations)
    tgrep_expr_labeled = tgrep_node_label_use + pyparsing.Optional(tgrep_relations)
    tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(':' + tgrep_expr_labeled)
    macro_defn = (pyparsing.Literal('@') +
                  pyparsing.White().suppress() +
                  macro_name +
                  tgrep_expr2)
    tgrep_exprs = (pyparsing.Optional(macro_defn + pyparsing.ZeroOrMore(';' + macro_defn) + ';') +
                   tgrep_expr2 +
                   pyparsing.ZeroOrMore(';' + (macro_defn | tgrep_expr2)) +
                   pyparsing.ZeroOrMore(';').suppress())
    if set_parse_actions:
        tgrep_node_label_use.setParseAction(_tgrep_node_label_use_action)
        tgrep_node_label_use_pred.setParseAction(_tgrep_node_label_pred_use_action)
        macro_use.setParseAction(_tgrep_macro_use_action)
        tgrep_node.setParseAction(_tgrep_node_action)
        tgrep_node_expr2.setParseAction(_tgrep_bind_node_label_action)
        tgrep_parens.setParseAction(_tgrep_parens_action)
        tgrep_nltk_tree_pos.setParseAction(_tgrep_nltk_tree_pos_action)
        tgrep_relation.setParseAction(_tgrep_relation_action)
        tgrep_rel_conjunction.setParseAction(_tgrep_conjunction_action)
        tgrep_relations.setParseAction(_tgrep_rel_disjunction_action)
        macro_defn.setParseAction(_macro_defn_action)
        # the whole expression is also the conjunction of two
        # predicates: the first node predicate, and the remaining
        # relation predicates
        tgrep_expr.setParseAction(_tgrep_conjunction_action)
        tgrep_expr_labeled.setParseAction(_tgrep_segmented_pattern_action)
        tgrep_expr2.setParseAction(functools.partial(_tgrep_conjunction_action,
                                                     join_char = ':'))
        tgrep_exprs.setParseAction(_tgrep_exprs_action)
    return tgrep_exprs.ignore('#' + pyparsing.restOfLine)
class PortWithProfile(Node):
    """
    Variant of :class:`Port` that is used by "card" records inside
    the "Ports" property. It differs from the normal port syntax by having
    different entries inside the last section. Availability is not listed
    here, only priority. Priority does not have a colon before the actual
    number. This port is followed by profile assignment.
    """
    __fragments__ = {
        'name': 'port-name',
        'label': 'port-label',
        'priority': 'port-priority',
        'latency_offset': 'port-latency-offset',
        'availability': 'port-availability',
        'properties': lambda t: t['port-properties'].asList(),
        'profile_list': lambda t: t['port-profile-list'].asList(),
    }

    __syntax__ = (
        p.Optional('[Out] ').suppress()
        + p.Optional('[In] ').suppress()
        + p.Word(p.alphanums + " -;").setResultsName('port-name')
        + p.Suppress(':')
        # This part was very tricky to write. The label is basically arbitrary
        # localized Unicode text. We want to grab all of it in one go but
        # without consuming the upcoming and latest '(' character or the space
        # that comes immediately before.
        #
        # The syntax here combines a sequence of words, as defined by anything
        # other than a space and '(', delimited by a single whitespace.
        + p.Combine(
            p.OneOrMore(
                ~p.FollowedBy(
                    p.Regex('\(.+?\)')
                    + p.LineEnd()
                )
                + p.Regex('[^ \n]+')
                + p.White().suppress()
            ),
            ' '
        ).setResultsName('port-label')
        + p.Suppress('(')
        + p.Keyword('priority').suppress()
        + p.Optional(
            p.Suppress(':')
        )
        + p.Word(p.nums).setParseAction(
            lambda t: int(t[0])
        ).setResultsName('port-priority')
        + p.Optional(
            p.MatchFirst([
                p.Suppress(',') + p.Keyword('latency offset:').suppress()
                + p.Word(p.nums).setParseAction(lambda t: int(t[0]))
                + p.Literal("usec").suppress(),
                p.Empty().setParseAction(lambda t: '')
            ]).setResultsName('port-latency-offset')
        )
        + p.Optional(
            p.MatchFirst([
                p.Suppress(',') + p.Literal('not available'),
                p.Suppress(',') + p.Literal('available'),
                p.Empty().setParseAction(lambda t: '')
            ]).setResultsName('port-availability')
        )
        + p.Suppress(')')
        + p.LineEnd().suppress()
        + p.Optional(
            p.MatchFirst([
                p.LineStart().suppress()
                + p.NotAny(p.White(' '))
                + p.White('\t').suppress()
                + p.Keyword('Properties:').suppress()
                + p.LineEnd().suppress()
                + PropertyAttributeValue,
                p.Empty().setParseAction(lambda t: [])
            ]).setResultsName('port-properties')
        )
        + p.White('\t', max=3).suppress()
        + p.Literal("Part of profile(s)").suppress()
        + p.Suppress(":")
        + p.delimitedList(
            p.Word(p.alphanums + "+-:"), ", "
        ).setResultsName("port-profile-list")
    ).setResultsName("port")
Esempio n. 8
0
def _create_parser(self):

    #----------------------------------------------------------------------#
    # TOKENS                                                               #
    #----------------------------------------------------------------------#

    START = pp.StringStart().suppress()
    END = pp.StringEnd().suppress()

    #
    # NUMBER
    #
    #NUMBER = pp.Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?")           .setParseAction( lambda s, loc, toks: [ self.create_NumberLiteral(int(toks[0])) ] )

    #
    # -foo_bar:
    TERM = pp.Word(pp.alphanums, pp.alphanums + '.:-+_/')

    #
    # "..."
    # '...'
    #
    QUOTED = pp.QuotedString('"', escChar='\\') | pp.QuotedString("'",
                                                                  escChar='\\')

    #
    # r"..."
    # r'...'
    #
    REGEXP = pp.Combine(pp.Suppress('r') + QUOTED).setParseAction(
        self.create_RegExLiteral)

    STRINGS = (REGEXP
               | QUOTED.setParseAction(self.create_StringLiteral)
               | TERM.setParseAction(self.create_StringLiteral))

    #
    # SYNTAX
    #
    LPAR, RPAR = map(pp.Suppress, "()")

    PLUS = pp.Suppress('+')
    MINUS = pp.Suppress('-')

    COLON = pp.Suppress(':')
    EQ = pp.Suppress('=')

    LT = pp.Literal('<')
    LTE = pp.Literal('<=')
    GT = pp.Literal('>')
    GTE = pp.Literal('>=')

    NOT = pp.Suppress('NOT')
    AND = pp.Suppress('AND')
    OR = pp.Suppress('OR')

    TOKENS = COLON | LPAR | RPAR | NOT | AND | OR | PLUS | MINUS

    #
    # IDENTIFIER (field_names)
    #
    FIELD = pp.Word(pp.alphas, pp.alphanums +
                    ".").setParseAction(lambda s, loc, toks: [toks[0]])

    #FIELD = (~(TOKENS))            .setParseAction( lambda s, loc, toks: [ toks[0] ] )

    basic_value = (~(TOKENS) + STRINGS)

    #----------------------------------------------------------------------#
    # TERMS                                                                #
    #----------------------------------------------------------------------#

    #
    # Simple TERM
    #
    simple_term = (
        # bool_term
        #|
        basic_value.copy()).setParseAction(self.create_SimpleTerm)

    #
    # COMPLEX TERM
    #
    #     <field name> ':' <field_value>
    #

    multi_term_expr = (
        (PLUS + basic_value).setParseAction(self.create_BoolMust)
        | (MINUS + basic_value).setParseAction(self.create_BoolMustNot)
        | basic_value)

    multi_term_sequence = (LPAR + pp.OneOrMore(multi_term_expr).setParseAction(
        self.create_MultiValue) + RPAR)

    compare_term = ((LTE | LT | GTE | GT) + basic_value).setParseAction(
        self.create_CompareValue)

    complex_value = (simple_term | multi_term_sequence | compare_term)

    complex_term = (FIELD + (EQ | COLON) + complex_value).setParseAction(
        self.create_ComplexTerm)

    #-------------------------------------------------------------------
    # EXPRESSION
    #-------------------------------------------------------------------

    query = pp.Forward()

    #
    #   <field>:<query>
    #   <term>
    #   ( <query> )
    #
    base_expr = (
        complex_term
        | simple_term
        | (LPAR + query + RPAR).setParseAction(lambda s, loc, toks: [toks[0]]))

    #-------------------------------------------------------------------
    # BOOLEAN EXPRESSION
    #-------------------------------------------------------------------

    # NOT expr
    #     expr
    unary_expr = ((NOT + base_expr).setParseAction(self.create_NotExpr)
                  | (PLUS + base_expr).setParseAction(self.create_BoolMust)
                  | (MINUS + base_expr).setParseAction(self.create_BoolMustNot)
                  | base_expr)

    #simple_expr = unary_expr

    #
    # expr ( AND expr ) *
    #
    and_expr = (unary_expr + pp.ZeroOrMore(AND + unary_expr)).setParseAction(
        self.create_AndExpr)

    #
    # expr ( OR expr ) *
    #
    or_expr = (and_expr + pp.ZeroOrMore(OR + and_expr)).setParseAction(
        self.create_OrExpr)

    boolean_expr = or_expr

    full_expr = boolean_expr

    #
    # clause ::= cond_expr +
    #
    clauses = pp.OneOrMore(full_expr)

    query <<= clauses

    #
    # PARSER
    #
    parser = (START + query.setParseAction(self.create_Query) + END)

    return parser
Esempio n. 9
0
def parse_file(file_name):

    number = pp.Word(pp.nums)
    identifier = pp.Word(pp.alphas + "_", pp.alphanums + "_")

    lbrace = pp.Literal('{').suppress()
    rbrace = pp.Literal('}').suppress()
    cls = pp.Keyword('class')
    colon = pp.Literal(":")
    semi = pp.Literal(";").suppress()
    langle = pp.Literal("<")
    rangle = pp.Literal(">")
    equals = pp.Literal("=")
    comma = pp.Literal(",")
    lparen = pp.Literal("(")
    rparen = pp.Literal(")")
    lbrack = pp.Literal("[")
    rbrack = pp.Literal("]")
    mins = pp.Literal("-")
    struct = pp.Keyword('struct')
    template = pp.Keyword('template')
    final = pp.Keyword('final')("final")
    stub = pp.Keyword('stub')("stub")
    with_colon = pp.Word(pp.alphanums + "_" + ":")
    btype = with_colon
    type = pp.Forward()
    nestedParens = pp.nestedExpr('<', '>')

    tmpl = pp.Group(
        btype("template_name") + langle.suppress() +
        pp.Group(pp.delimitedList(type)) + rangle.suppress())
    type << (tmpl | btype)
    enum_lit = pp.Keyword('enum')
    enum_class = pp.Group(enum_lit + cls)
    ns = pp.Keyword("namespace")

    enum_init = equals.suppress() + pp.Optional(mins) + number
    enum_value = pp.Group(identifier + pp.Optional(enum_init))
    enum_values = pp.Group(lbrace + pp.delimitedList(enum_value) +
                           pp.Optional(comma) + rbrace)
    content = pp.Forward()

    member_name = pp.Combine(
        pp.Group(identifier + pp.Optional(lparen + rparen)))
    attrib = pp.Group(lbrack.suppress() + lbrack.suppress() + pp.SkipTo(']') +
                      rbrack.suppress() + rbrack.suppress())
    opt_attribute = pp.Optional(attrib)("attribute")
    namespace = pp.Group(
        ns("type") + identifier("name") + lbrace +
        pp.Group(pp.OneOrMore(content))("content") + rbrace)
    enum = pp.Group(
        enum_class("type") + identifier("name") + colon.suppress() +
        identifier("underline_type") + enum_values("enum_values") +
        pp.Optional(semi).suppress())
    default_value = equals.suppress() + pp.SkipTo(';')
    class_member = pp.Group(
        type("type") + member_name("name") + opt_attribute +
        pp.Optional(default_value)("default") + semi.suppress())("member")
    template_param = pp.Group(identifier("type") + identifier("name"))
    template_def = pp.Group(template + langle +
                            pp.Group(pp.delimitedList(template_param))
                            ("params") + rangle)
    class_content = pp.Forward()
    class_def = pp.Group(
        pp.Optional(template_def)("template") + (cls | struct)("type") +
        with_colon("name") + pp.Optional(final) + pp.Optional(stub) +
        opt_attribute + lbrace +
        pp.Group(pp.ZeroOrMore(class_content))("members") + rbrace +
        pp.Optional(semi))
    content << (enum | class_def | namespace)
    class_content << (enum | class_def | class_member)
    for varname in "enum class_def class_member content namespace template_def".split(
    ):
        locals()[varname].setName(varname)
    rt = pp.OneOrMore(content)
    singleLineComment = "//" + pp.restOfLine
    rt.ignore(singleLineComment)
    rt.ignore(pp.cStyleComment)
    return rt.parseFile(file_name, parseAll=True)
Esempio n. 10
0
pp_identifier = (
    # keywords is not identifier,
    pp.NotAny(
        pp.Keyword("void")
        | pp.Keyword("unsigned")
        | pp.Keyword("signed")
        | pp.Keyword("int")
        | pp.Keyword("float")
        | pp.Keyword("const")
        | pp.Keyword("volatile")
        | pp.Keyword("extern")
        | pp.Keyword("static")) +
    pp.Word(pp.alphas + "_", pp.alphanums + "_", asKeyword=True))
#pp_identifier = pp.Word(pp.alphas+"_", pp.alphanums+"_")
pp_semicolon = pp.Literal(";")


def get_type_spec(p):
    print("get_type_spec: " + str(p))


pp_type_spec = (pp.Keyword("void")
                | (pp.Optional(pp.Keyword("unsigned") | pp.Keyword("signed")) +
                   pp.Keyword("int"))
                | pp.Keyword("float")
                | pp_identifier).setParseAction(get_type_spec)
pp_type_qual = (pp.Keyword("const") | pp.Keyword("volatile"))
pp_strage_spec = (pp.Keyword("extern") | pp.Keyword("static"))
decl_spec = (pp.Optional(pp_type_qual)
             & pp.Optional(pp_strage_spec)
Esempio n. 11
0
class XChatScrollbackParser(text_parser.PyparsingSingleLineTextParser):
    """Parses XChat scrollback log files."""

    NAME = 'xchatscrollback'
    DESCRIPTION = 'Parser for XChat scrollback log files.'

    _ENCODING = 'utf-8'

    # Define how a log line should look like.
    LOG_LINE = (pyparsing.Literal('T').suppress() +
                pyparsing.Word(pyparsing.nums).setResultsName('timestamp') +
                pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('text'))
    LOG_LINE.parseWithTabs()

    # Define the available log line structures.
    LINE_STRUCTURES = [
        ('logline', LOG_LINE),
    ]

    # Define for the stripping phase.
    STRIPPER = (pyparsing.Word('\x03', pyparsing.nums, max=3).suppress()
                | pyparsing.Word('\x02\x07\x08\x0f\x16\x1d\x1f',
                                 exact=1).suppress())

    # Define the structure for parsing <text> and get <nickname> and <text>
    MSG_NICK_START = pyparsing.Literal('<')
    MSG_NICK_END = pyparsing.Literal('>')
    MSG_NICK = pyparsing.SkipTo(MSG_NICK_END).setResultsName('nickname')
    MSG_ENTRY_NICK = pyparsing.Optional(MSG_NICK_START + MSG_NICK +
                                        MSG_NICK_END)
    MSG_ENTRY_TEXT = pyparsing.SkipTo(
        pyparsing.LineEnd()).setResultsName('text')
    MSG_ENTRY = MSG_ENTRY_NICK + MSG_ENTRY_TEXT
    MSG_ENTRY.parseWithTabs()

    def __init__(self):
        """Initializes a parser."""
        super(XChatScrollbackParser, self).__init__()
        self._offset = 0

    def _StripThenGetNicknameAndText(self, text):
        """Strips decorators from text and gets <nickname> if available.

    This method implements the XChat strip_color2 and fe_print_text
    functions, slightly modified to get pure text. From the parsing point
    of view, after having stripped, the code takes everything as is,
    simply replacing tabs with spaces (as the original XChat code).
    So the VerifyStructure plays an important role in checking if
    the source file has the right format, since the method will not raise
    any parse exception and every content will be good.

    Args:
      text (str): text obtained from the log record.

    Returns:
      tuple: containing:

        nickname (str): nickname.
        text (str): text sent by nickname or service messages.
    """
        stripped = self.STRIPPER.transformString(text)
        structure = self.MSG_ENTRY.parseString(stripped)
        text = structure.text.replace('\t', ' ')
        return structure.nickname, text

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure parsed from the log file.
    """
        if key != 'logline':
            logger.warning(
                'Unable to parse record, unknown structure: {0:s}'.format(key))
            return

        try:
            timestamp = int(structure.timestamp)
        except ValueError:
            logger.debug(
                'Invalid timestamp string {0:s}, skipping record'.format(
                    structure.timestamp))
            return

        try:
            nickname, text = self._StripThenGetNicknameAndText(structure.text)
        except pyparsing.ParseException:
            logger.debug('Error parsing entry at offset {0:d}'.format(
                self._offset))
            return

        event_data = XChatScrollbackEventData()
        event_data.nickname = nickname
        event_data.offset = self._offset
        event_data.text = text

        date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp)
        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_ADDED)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is a XChat scrollback log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): line from a text file.

    Returns:
      bool: True if the line was successfully parsed.
    """
        structure = self.LOG_LINE

        try:
            parsed_structure = structure.parseString(line)
        except pyparsing.ParseException:
            logger.debug('Not a XChat scrollback log file')
            return False

        try:
            int(parsed_structure.timestamp, 10)
        except ValueError:
            logger.debug(
                'Not a XChat scrollback log file, invalid timestamp string')
            return False

        return True
Esempio n. 12
0
class PopularityContestParser(text_parser.PyparsingSingleLineTextParser):
  """Parse popularity contest log files."""

  NAME = 'popularity_contest'
  DATA_FORMAT = 'Popularity Contest log file'

  _ASCII_PRINTABLES = pyparsing.printables
  _UNICODE_PRINTABLES = ''.join(
      chr(character) for character in range(65536)
      if not chr(character).isspace())

  MRU = pyparsing.Word(_UNICODE_PRINTABLES).setResultsName('mru')
  PACKAGE = pyparsing.Word(_ASCII_PRINTABLES).setResultsName('package')
  TAG = pyparsing.QuotedString('<', endQuoteChar='>').setResultsName('tag')

  HEADER = (
      pyparsing.Literal('POPULARITY-CONTEST-').suppress() +
      text_parser.PyparsingConstants.INTEGER.setResultsName('session') +
      pyparsing.Literal('TIME:').suppress() +
      text_parser.PyparsingConstants.INTEGER.setResultsName('timestamp') +
      pyparsing.Literal('ID:').suppress() +
      pyparsing.Word(pyparsing.alphanums, exact=32).setResultsName('id') +
      pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('details'))

  FOOTER = (
      pyparsing.Literal('END-POPULARITY-CONTEST-').suppress() +
      text_parser.PyparsingConstants.INTEGER.setResultsName('session') +
      pyparsing.Literal('TIME:').suppress() +
      text_parser.PyparsingConstants.INTEGER.setResultsName('timestamp'))

  LOG_LINE = (
      text_parser.PyparsingConstants.INTEGER.setResultsName('atime') +
      text_parser.PyparsingConstants.INTEGER.setResultsName('ctime') +
      (PACKAGE + TAG | PACKAGE + MRU + pyparsing.Optional(TAG)))

  LINE_STRUCTURES = [
      ('logline', LOG_LINE),
      ('header', HEADER),
      ('footer', FOOTER),
  ]

  _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

  _ENCODING = 'UTF-8'

  def _ParseLogLine(self, parser_mediator, structure):
    """Extracts events from a log line.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure parsed from the log file.
    """
    # Required fields are <mru> and <atime> and we are not interested in
    # log lines without <mru>.
    mru = self._GetValueFromStructure(structure, 'mru')
    if not mru:
      return

    event_data = PopularityContestEventData()
    event_data.mru = mru
    event_data.package = self._GetValueFromStructure(structure, 'package')
    event_data.record_tag = self._GetValueFromStructure(structure, 'tag')

    # The <atime> field (as <ctime>) is always present but could be 0.
    # In case of <atime> equal to 0, we are in <NOFILES> case, safely return
    # without logging.
    access_time = self._GetValueFromStructure(structure, 'atime')
    if access_time:
      # TODO: not doing any check on <tag> fields, even if only informative
      # probably it could be better to check for the expected values.
      date_time = dfdatetime_posix_time.PosixTime(timestamp=access_time)
      event = time_events.DateTimeValuesEvent(
          date_time, definitions.TIME_DESCRIPTION_LAST_ACCESS)
      parser_mediator.ProduceEventWithEventData(event, event_data)

    change_time = self._GetValueFromStructure(structure, 'ctime')
    if change_time:
      date_time = dfdatetime_posix_time.PosixTime(timestamp=change_time)
      event = time_events.DateTimeValuesEvent(
          date_time, definitions.TIME_DESCRIPTION_ENTRY_MODIFICATION)
      parser_mediator.ProduceEventWithEventData(event, event_data)

  def ParseRecord(self, parser_mediator, key, structure):
    """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure parsed from the log file.

    Raises:
      ParseError: when the structure type is unknown.
    """
    if key not in self._SUPPORTED_KEYS:
      raise errors.ParseError(
          'Unable to parse record, unknown structure: {0:s}'.format(key))

    # TODO: Add anomaly objects for abnormal timestamps, such as when the log
    # timestamp is greater than the session start.
    if key == 'logline':
      self._ParseLogLine(parser_mediator, structure)

    else:
      timestamp = self._GetValueFromStructure(structure, 'timestamp')
      if timestamp is None:
        logger.debug('[{0:s}] {1:s} with invalid timestamp.'.format(
            self.NAME, key))
        return

      session = self._GetValueFromStructure(structure, 'session')

      event_data = PopularityContestSessionEventData()
      # TODO: determine why session is formatted as a string.
      event_data.session = '{0!s}'.format(session)

      if key == 'header':
        event_data.details = self._GetValueFromStructure(structure, 'details')
        event_data.hostid = self._GetValueFromStructure(structure, 'id')
        event_data.status = 'start'

      elif key == 'footer':
        event_data.status = 'end'

      date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp)
      event = time_events.DateTimeValuesEvent(
          date_time, definitions.TIME_DESCRIPTION_ADDED)
      parser_mediator.ProduceEventWithEventData(event, event_data)

  def VerifyStructure(self, parser_mediator, line):
    """Verify that this file is a Popularity Contest log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): line from a text file.

    Returns:
      bool: True if the line was successfully parsed.
    """
    try:
      self.HEADER.parseString(line)
    except pyparsing.ParseException:
      logger.debug('Not a Popularity Contest log file, invalid header')
      return False

    return True
Esempio n. 13
0
    regs_str = []
    regs_expr = []
    regs_init = []
    for rname in rnames:
        r = m2_expr.ExprId(rname, sz)
        r_init = m2_expr.ExprId(rname + '_init', sz)
        regs_str.append(rname)
        regs_expr.append(r)
        regs_init.append(r_init)
        env[rname] = r

    reginfo = reg_info(regs_str, regs_expr)
    return regs_expr, regs_init, reginfo


LPARENTHESIS = pyparsing.Literal("(")
RPARENTHESIS = pyparsing.Literal(")")


def int2expr(tokens):
    v = tokens[0]
    return (m2_expr.ExprInt, v)


def parse_op(tokens):
    v = tokens[0]
    return (m2_expr.ExprOp, v)


def parse_id(tokens):
    v = tokens[0]
Esempio n. 14
0
def normalize(normal_value):
    '''returns function to normalize output of keyword lists'''
    return lambda s, l, t, n=normal_value: t.__setitem__(0, n)


# ========Grammar definitions============

# ***Comparison Operators***
EQUALS = keyword_list(['=', 'equal', 'equals', 'is',
                       'to']).setParseAction(normalize('='))
GT = keyword_list(
    ['>', 'greater than', 'more than', 'is greater than',
     'not less than']).setParseAction(normalize('>'))
LT = keyword_list(['<', 'less than', 'is less than',
                   'not more than']).setParseAction(normalize('<'))
GTE = pp.Literal('>=')
LTE = pp.Literal('<=')
NOTEQUAL = keyword_list(['<>', 'not equal',
                         '!=']).setParseAction(normalize('!='))
compare = (GT ^ LT ^ GTE ^ LTE ^ EQUALS ^ NOTEQUAL).setResultsName('compare')
ASSIGN = pp.Literal(':=')
operator = (compare ^ ASSIGN).setResultsName('operator')

# ***Logicals***
OR = pp.CaselessKeyword('OR')
AND = pp.CaselessKeyword('AND')
NOT = pp.CaselessKeyword('NOT')
logical = (AND ^ OR ^ NOT)

# ***Primitive data types for comparisons***
NUMBER = pp.Regex(r'\d+(\.\d*)?')
Esempio n. 15
0
    def _generate_grammar(self):
        # Define grammar:
        pp.ParserElement.setDefaultWhitespaceChars(" \t")

        def add_element(name: str, value: pp.ParserElement):
            nonlocal self
            if self.debug:
                value.setName(name)
                value.setDebug()
            return value

        EOL = add_element("EOL", pp.Suppress(pp.LineEnd()))
        Else = add_element("Else", pp.Keyword("else"))
        Identifier = add_element(
            "Identifier", pp.Word(f"{pp.alphas}_", bodyChars=pp.alphanums + "_-./")
        )
        BracedValue = add_element(
            "BracedValue",
            pp.nestedExpr(
                ignoreExpr=pp.quotedString
                | pp.QuotedString(
                    quoteChar="$(", endQuoteChar=")", escQuote="\\", unquoteResults=False
                )
            ).setParseAction(lambda s, l, t: ["(", *t[0], ")"]),
        )

        Substitution = add_element(
            "Substitution",
            pp.Combine(
                pp.Literal("$")
                + (
                    (
                        (pp.Literal("$") + Identifier + pp.Optional(pp.nestedExpr()))
                        | (pp.Literal("(") + Identifier + pp.Literal(")"))
                        | (pp.Literal("{") + Identifier + pp.Literal("}"))
                        | (
                            pp.Literal("$")
                            + pp.Literal("{")
                            + Identifier
                            + pp.Optional(pp.nestedExpr())
                            + pp.Literal("}")
                        )
                        | (pp.Literal("$") + pp.Literal("[") + Identifier + pp.Literal("]"))
                    )
                )
            ),
        )
        LiteralValuePart = add_element(
            "LiteralValuePart", pp.Word(pp.printables, excludeChars="$#{}()")
        )
        SubstitutionValue = add_element(
            "SubstitutionValue",
            pp.Combine(pp.OneOrMore(Substitution | LiteralValuePart | pp.Literal("$"))),
        )
        FunctionValue = add_element(
            "FunctionValue",
            pp.Group(
                pp.Suppress(pp.Literal("$") + pp.Literal("$"))
                + Identifier
                + pp.nestedExpr()  # .setParseAction(lambda s, l, t: ['(', *t[0], ')'])
            ).setParseAction(lambda s, l, t: handle_function_value(*t)),
        )
        Value = add_element(
            "Value",
            pp.NotAny(Else | pp.Literal("}") | EOL)
            + (
                pp.QuotedString(quoteChar='"', escChar="\\")
                | FunctionValue
                | SubstitutionValue
                | BracedValue
            ),
        )

        Values = add_element("Values", pp.ZeroOrMore(Value)("value"))

        Op = add_element(
            "OP",
            pp.Literal("=")
            | pp.Literal("-=")
            | pp.Literal("+=")
            | pp.Literal("*=")
            | pp.Literal("~="),
        )

        Key = add_element("Key", Identifier)

        Operation = add_element(
            "Operation", Key("key") + pp.locatedExpr(Op)("operation") + Values("value")
        )
        CallArgs = add_element("CallArgs", pp.nestedExpr())

        def parse_call_args(results):
            out = ""
            for item in chain(*results):
                if isinstance(item, str):
                    out += item
                else:
                    out += "(" + parse_call_args(item) + ")"
            return out

        CallArgs.setParseAction(parse_call_args)

        Load = add_element("Load", pp.Keyword("load") + CallArgs("loaded"))
        Include = add_element(
            "Include", pp.Keyword("include") + pp.locatedExpr(CallArgs)("included")
        )
        Option = add_element("Option", pp.Keyword("option") + CallArgs("option"))
        RequiresCondition = add_element("RequiresCondition", pp.originalTextFor(pp.nestedExpr()))

        def parse_requires_condition(s, l, t):
            # The following expression unwraps the condition via the additional info
            # set by originalTextFor.
            condition_without_parentheses = s[t._original_start + 1 : t._original_end - 1]

            # And this replaces the colons with '&&' similar how it's done for 'Condition'.
            condition_without_parentheses = (
                condition_without_parentheses.strip().replace(":", " && ").strip(" && ")
            )
            return condition_without_parentheses

        RequiresCondition.setParseAction(parse_requires_condition)
        Requires = add_element(
            "Requires", pp.Keyword("requires") + RequiresCondition("project_required_condition")
        )

        FunctionArgumentsAsString = add_element(
            "FunctionArgumentsAsString", pp.originalTextFor(pp.nestedExpr())
        )
        QtNoMakeTools = add_element(
            "QtNoMakeTools",
            pp.Keyword("qtNomakeTools") + FunctionArgumentsAsString("qt_no_make_tools_arguments"),
        )

        # ignore the whole thing...
        DefineTestDefinition = add_element(
            "DefineTestDefinition",
            pp.Suppress(
                pp.Keyword("defineTest")
                + CallArgs
                + pp.nestedExpr(opener="{", closer="}", ignoreExpr=pp.LineEnd())
            ),
        )

        # ignore the whole thing...
        ForLoop = add_element(
            "ForLoop",
            pp.Suppress(
                pp.Keyword("for")
                + CallArgs
                + pp.nestedExpr(opener="{", closer="}", ignoreExpr=pp.LineEnd())
            ),
        )

        # ignore the whole thing...
        ForLoopSingleLine = add_element(
            "ForLoopSingleLine",
            pp.Suppress(pp.Keyword("for") + CallArgs + pp.Literal(":") + pp.SkipTo(EOL)),
        )

        # ignore the whole thing...
        FunctionCall = add_element("FunctionCall", pp.Suppress(Identifier + pp.nestedExpr()))

        Scope = add_element("Scope", pp.Forward())

        Statement = add_element(
            "Statement",
            pp.Group(
                Load
                | Include
                | Option
                | Requires
                | QtNoMakeTools
                | ForLoop
                | ForLoopSingleLine
                | DefineTestDefinition
                | FunctionCall
                | Operation
            ),
        )
        StatementLine = add_element("StatementLine", Statement + (EOL | pp.FollowedBy("}")))
        StatementGroup = add_element(
            "StatementGroup", pp.ZeroOrMore(StatementLine | Scope | pp.Suppress(EOL))
        )

        Block = add_element(
            "Block",
            pp.Suppress("{")
            + pp.Optional(EOL)
            + StatementGroup
            + pp.Optional(EOL)
            + pp.Suppress("}")
            + pp.Optional(EOL),
        )

        ConditionEnd = add_element(
            "ConditionEnd",
            pp.FollowedBy(
                (pp.Optional(pp.White()) + (pp.Literal(":") | pp.Literal("{") | pp.Literal("|")))
            ),
        )

        ConditionPart1 = add_element(
            "ConditionPart1", (pp.Optional("!") + Identifier + pp.Optional(BracedValue))
        )
        ConditionPart2 = add_element("ConditionPart2", pp.CharsNotIn("#{}|:=\\\n"))
        ConditionPart = add_element(
            "ConditionPart", (ConditionPart1 ^ ConditionPart2) + ConditionEnd
        )

        ConditionOp = add_element("ConditionOp", pp.Literal("|") ^ pp.Literal(":"))
        ConditionWhiteSpace = add_element(
            "ConditionWhiteSpace", pp.Suppress(pp.Optional(pp.White(" ")))
        )

        ConditionRepeated = add_element(
            "ConditionRepeated", pp.ZeroOrMore(ConditionOp + ConditionWhiteSpace + ConditionPart)
        )

        Condition = add_element("Condition", pp.Combine(ConditionPart + ConditionRepeated))
        Condition.setParseAction(lambda x: " ".join(x).strip().replace(":", " && ").strip(" && "))

        # Weird thing like write_file(a)|error() where error() is the alternative condition
        # which happens to be a function call. In this case there is no scope, but our code expects
        # a scope with a list of statements, so create a fake empty statement.
        ConditionEndingInFunctionCall = add_element(
            "ConditionEndingInFunctionCall",
            pp.Suppress(ConditionOp)
            + FunctionCall
            + pp.Empty().setParseAction(lambda x: [[]]).setResultsName("statements"),
        )

        SingleLineScope = add_element(
            "SingleLineScope",
            pp.Suppress(pp.Literal(":")) + pp.Group(Block | (Statement + EOL))("statements"),
        )
        MultiLineScope = add_element("MultiLineScope", Block("statements"))

        SingleLineElse = add_element(
            "SingleLineElse",
            pp.Suppress(pp.Literal(":")) + (Scope | Block | (Statement + pp.Optional(EOL))),
        )
        MultiLineElse = add_element("MultiLineElse", Block)
        ElseBranch = add_element("ElseBranch", pp.Suppress(Else) + (SingleLineElse | MultiLineElse))

        # Scope is already add_element'ed in the forward declaration above.
        Scope <<= pp.Group(
            Condition("condition")
            + (SingleLineScope | MultiLineScope | ConditionEndingInFunctionCall)
            + pp.Optional(ElseBranch)("else_statements")
        )

        Grammar = StatementGroup("statements")
        Grammar.ignore(pp.pythonStyleComment())

        return Grammar
Esempio n. 16
0
class SkyDriveOldLogParser(text_parser.PyparsingSingleLineTextParser):
    """Parse SkyDrive old log files."""

    NAME = u'skydrive_log_old'
    DESCRIPTION = u'Parser for OneDrive (or SkyDrive) old log files.'

    _ENCODING = u'UTF-8-SIG'

    _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS
    _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS

    # Common SDOL (SkyDriveOldLog) pyparsing objects.
    _SDOL_COLON = pyparsing.Literal(u':')
    _SDOL_EXCLAMATION = pyparsing.Literal(u'!')

    # Date and time format used in the header is: DD-MM-YYYY hhmmss.###
    # For example: 08-01-2013 21:22:28.999
    _SDOL_DATE_TIME = pyparsing.Group(
        _TWO_DIGITS.setResultsName(u'month') + pyparsing.Suppress(u'-') +
        _TWO_DIGITS.setResultsName(u'day_of_month') +
        pyparsing.Suppress(u'-') + _FOUR_DIGITS.setResultsName(u'year') +
        text_parser.PyparsingConstants.TIME_MSEC_ELEMENTS).setResultsName(
            u'date_time')

    _SDOL_SOURCE_CODE = pyparsing.Combine(
        pyparsing.CharsNotIn(u':') + _SDOL_COLON +
        text_parser.PyparsingConstants.INTEGER + _SDOL_EXCLAMATION +
        pyparsing.Word(pyparsing.printables)).setResultsName(u'source_code')

    _SDOL_LOG_LEVEL = (pyparsing.Literal(u'(').suppress() +
                       pyparsing.SkipTo(u')').setResultsName(u'log_level') +
                       pyparsing.Literal(u')').suppress())

    _SDOL_LINE = (_SDOL_DATE_TIME + _SDOL_SOURCE_CODE + _SDOL_LOG_LEVEL +
                  _SDOL_COLON +
                  pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'text'))

    # Sometimes the timestamped log line is followed by an empy line,
    # then by a file name plus other data and finally by another empty
    # line. It could happen that a logline is split in two parts.
    # These lines will not be discarded and an event will be generated
    # ad-hoc (see source), based on the last one if available.
    _SDOL_NO_HEADER_SINGLE_LINE = (
        pyparsing.Optional(pyparsing.Literal(u'->').suppress()) +
        pyparsing.SkipTo(pyparsing.lineEnd).setResultsName(u'text'))

    # Define the available log line structures.
    LINE_STRUCTURES = [
        (u'logline', _SDOL_LINE),
        (u'no_header_single_line', _SDOL_NO_HEADER_SINGLE_LINE),
    ]

    def __init__(self):
        """Initializes a parser object."""
        super(SkyDriveOldLogParser, self).__init__()
        self._last_date_time = None
        self._last_event_data = None
        self.offset = 0

    def _ParseLogline(self, parser_mediator, structure):
        """Parse a logline and store appropriate attributes.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
        # TODO: Verify if date and time value is locale dependent.
        month, day_of_month, year, hours, minutes, seconds, milliseconds = (
            structure.date_time)

        time_elements_tuple = (year, month, day_of_month, hours, minutes,
                               seconds, milliseconds)

        try:
            date_time = dfdatetime_time_elements.TimeElementsInMilliseconds(
                time_elements_tuple=time_elements_tuple)
        except ValueError:
            parser_mediator.ProduceExtractionError(
                u'invalid date time value: {0!s}'.format(structure.date_time))
            return

        event_data = SkyDriveOldLogEventData()
        event_data.log_level = structure.log_level
        event_data.offset = self.offset
        event_data.source_code = structure.source_code
        event_data.text = structure.text

        event = time_events.DateTimeValuesEvent(
            date_time, eventdata.EventTimestamp.ADDED_TIME)
        parser_mediator.ProduceEventWithEventData(event, event_data)

        self._last_date_time = date_time
        self._last_event_data = event_data

    def _ParseNoHeaderSingleLine(self, parser_mediator, structure):
        """Parse an isolated header line and store appropriate attributes.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
        if not self._last_event_data:
            logging.debug(
                u'SkyDrive, found isolated line with no previous events')
            return

        event_data = SkyDriveOldLogEventData()
        event_data.offset = self._last_event_data.offset
        event_data.text = structure.text

        event = time_events.DateTimeValuesEvent(
            self._last_date_time, eventdata.EventTimestamp.ADDED_TIME)
        parser_mediator.ProduceEventWithEventData(event, event_data)

        # TODO think to a possible refactoring for the non-header lines.
        self._last_date_time = None
        self._last_event_data = None

    def ParseRecord(self, parser_mediator, key, structure):
        """Parse each record structure and return an EventObject if applicable.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in (u'logline', u'no_header_single_line'):
            raise errors.ParseError(
                u'Unable to parse record, unknown structure: {0:s}'.format(
                    key))

        if key == u'logline':
            self._ParseLogline(parser_mediator, structure)

        elif key == u'no_header_single_line':
            self._ParseNoHeaderSingleLine(parser_mediator, structure)

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is a SkyDrive old log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (bytes): line from a text file.

    Returns:
      bool: True if the line is in the expected format, False if not.
    """
        try:
            structure = self._SDOL_LINE.parseString(line)
        except pyparsing.ParseException:
            logging.debug(u'Not a SkyDrive old log file')
            return False

        day_of_month, month, year, hours, minutes, seconds, milliseconds = (
            structure.date_time)

        time_elements_tuple = (year, month, day_of_month, hours, minutes,
                               seconds, milliseconds)

        try:
            dfdatetime_time_elements.TimeElementsInMilliseconds(
                time_elements_tuple=time_elements_tuple)
        except ValueError:
            logging.debug(
                u'Not a SkyDrive old log file, invalid date and time: {0!s}'.
                format(structure.date_time))
            return False

        return True
class SELinuxParser(text_parser.PyparsingSingleLineTextParser):
  """Parser for SELinux audit.log files."""

  NAME = 'selinux'
  DESCRIPTION = 'Parser for SELinux audit.log files.'

  _ENCODING = 'utf-8'

  _SELINUX_KEY_VALUE_GROUP = pyparsing.Group(
      pyparsing.Word(pyparsing.alphanums).setResultsName('key') +
      pyparsing.Suppress('=') + (
          pyparsing.QuotedString('"') ^
          pyparsing.Word(pyparsing.printables)).setResultsName('value'))

  _SELINUX_KEY_VALUE_DICT = pyparsing.Dict(
      pyparsing.ZeroOrMore(_SELINUX_KEY_VALUE_GROUP))

  _SELINUX_BODY_GROUP = pyparsing.Group(
      pyparsing.Empty().setResultsName('key') +
      pyparsing.restOfLine.setResultsName('value'))

  _SELINUX_MSG_GROUP = pyparsing.Group(
      pyparsing.Literal('msg').setResultsName('key') +
      pyparsing.Suppress('=audit(') +
      pyparsing.Word(pyparsing.nums).setResultsName('seconds') +
      pyparsing.Suppress('.') +
      pyparsing.Word(pyparsing.nums).setResultsName('milliseconds') +
      pyparsing.Suppress(':') +
      pyparsing.Word(pyparsing.nums).setResultsName('serial') +
      pyparsing.Suppress('):'))

  _SELINUX_TYPE_GROUP = pyparsing.Group(
      pyparsing.Literal('type').setResultsName('key') +
      pyparsing.Suppress('=') + (
          pyparsing.Word(pyparsing.srange('[A-Z_]')) ^
          pyparsing.Regex(r'UNKNOWN\[[0-9]+\]')).setResultsName('value'))

  _SELINUX_TYPE_AVC_GROUP = pyparsing.Group(
      pyparsing.Literal('type').setResultsName('key') +
      pyparsing.Suppress('=') + (
          pyparsing.Word('AVC') ^
          pyparsing.Word('USER_AVC')).setResultsName('value'))

  # A log line is formatted as: type=TYPE msg=audit([0-9]+\.[0-9]+:[0-9]+): .*
  _SELINUX_LOG_LINE = pyparsing.Dict(
      _SELINUX_TYPE_GROUP +
      _SELINUX_MSG_GROUP +
      _SELINUX_BODY_GROUP)

  LINE_STRUCTURES = [('line', _SELINUX_LOG_LINE)]

  def ParseRecord(self, parser_mediator, key, structure):
    """Parses a structure of tokens derived from a line of a text file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
    if key != 'line':
      raise errors.ParseError(
          'Unable to parse record, unknown structure: {0:s}'.format(key))

    msg_value = self._GetValueFromStructure(structure, 'msg')
    if not msg_value:
      parser_mediator.ProduceExtractionWarning(
          'missing msg value: {0!s}'.format(structure))
      return

    try:
      seconds = int(msg_value[0], 10)
    except ValueError:
      parser_mediator.ProduceExtractionWarning(
          'unsupported number of seconds in msg value: {0!s}'.format(
              structure))
      return

    try:
      milliseconds = int(msg_value[1], 10)
    except ValueError:
      parser_mediator.ProduceExtractionWarning(
          'unsupported number of milliseconds in msg value: {0!s}'.format(
              structure))
      return

    timestamp = ((seconds * 1000) + milliseconds) * 1000
    body_text = structure[2][0]

    try:
      # Try to parse the body text as key value pairs. Note that not
      # all log lines will be properly formatted key value pairs.
      body_structure = self._SELINUX_KEY_VALUE_DICT.parseString(body_text)
    except pyparsing.ParseException:
      body_structure = pyparsing.ParseResults()

    event_data = SELinuxLogEventData()
    event_data.audit_type = self._GetValueFromStructure(structure, 'type')
    event_data.body = body_text
    event_data.pid = self._GetValueFromStructure(body_structure, 'pid')
    # TODO: pass line number to offset or remove.
    event_data.offset = 0

    event = time_events.TimestampEvent(
        timestamp, definitions.TIME_DESCRIPTION_WRITTEN)
    parser_mediator.ProduceEventWithEventData(event, event_data)

  def VerifyStructure(self, parser_mediator, line):
    """Verifies if a line from a text file is in the expected format.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): line from a text file.

    Returns:
      bool: True if the line is in the expected format, False if not.
    """
    try:
      structure = self._SELINUX_LOG_LINE.parseString(line)
    except pyparsing.ParseException as exception:
      logger.debug(
          'Unable to parse SELinux audit.log file with error: {0!s}'.format(
              exception))
      return False

    return 'type' in structure and 'msg' in structure
Esempio n. 18
0
class SkyDriveLogParser(text_parser.PyparsingMultiLineTextParser):
    """Parses SkyDrive log files."""

    NAME = u'skydrive_log'
    DESCRIPTION = u'Parser for OneDrive (or SkyDrive) log files.'

    _ENCODING = u'utf-8'

    # Common SDF (SkyDrive Format) structures.
    _COMMA = pyparsing.Literal(u',').suppress()
    _HYPHEN = text_parser.PyparsingConstants.HYPHEN

    _THREE_DIGITS = text_parser.PyparsingConstants.THREE_DIGITS
    _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS

    MSEC = pyparsing.Word(pyparsing.nums,
                          max=3).setParseAction(text_parser.PyParseIntCast)
    IGNORE_FIELD = pyparsing.CharsNotIn(u',').suppress()

    # Date and time format used in the header is: YYYY-MM-DD-hhmmss.###
    # For example: 2013-07-25-160323.291
    _SDF_HEADER_DATE_TIME = pyparsing.Group(
        text_parser.PyparsingConstants.DATE_ELEMENTS + _HYPHEN +
        _TWO_DIGITS.setResultsName(u'hours') +
        _TWO_DIGITS.setResultsName(u'minutes') +
        _TWO_DIGITS.setResultsName(u'seconds') +
        pyparsing.Literal(u'.').suppress() +
        _THREE_DIGITS.setResultsName(u'milliseconds')).setResultsName(
            u'header_date_time')

    # Date and time format used in lines other than the header is:
    # MM-DD-YY,hh:mm:ss.###
    # For example: 07-25-13,16:06:31.820
    _SDF_DATE_TIME = (_TWO_DIGITS.setResultsName(u'month') + _HYPHEN +
                      _TWO_DIGITS.setResultsName(u'day') + _HYPHEN +
                      _TWO_DIGITS.setResultsName(u'year') + _COMMA +
                      text_parser.PyparsingConstants.TIME_ELEMENTS +
                      pyparsing.Suppress('.') +
                      _THREE_DIGITS.setResultsName(u'milliseconds')
                      ).setResultsName(u'date_time')

    _SDF_HEADER_START = (
        pyparsing.Literal(u'######').suppress() +
        pyparsing.Literal(u'Logging started.').setResultsName(u'log_start'))

    # Multiline entry end marker, matched from right to left.
    _SDF_ENTRY_END = pyparsing.StringEnd() | _SDF_HEADER_START | _SDF_DATE_TIME

    _SDF_LINE = (_SDF_DATE_TIME + _COMMA + IGNORE_FIELD + _COMMA +
                 IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA +
                 pyparsing.CharsNotIn(u',').setResultsName(u'module') +
                 _COMMA +
                 pyparsing.CharsNotIn(u',').setResultsName(u'source_code') +
                 _COMMA + IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA +
                 pyparsing.CharsNotIn(u',').setResultsName(u'log_level') +
                 _COMMA +
                 pyparsing.SkipTo(_SDF_ENTRY_END).setResultsName(u'detail') +
                 pyparsing.ZeroOrMore(pyparsing.lineEnd()))

    _SDF_HEADER = (
        _SDF_HEADER_START +
        pyparsing.Literal(u'Version=').setResultsName(u'version_string') +
        pyparsing.Word(pyparsing.nums + u'.').setResultsName(u'version_number')
        + pyparsing.Literal(u'StartSystemTime:').suppress() +
        _SDF_HEADER_DATE_TIME + pyparsing.Literal(
            u'StartLocalTime:').setResultsName(u'local_time_string') +
        pyparsing.SkipTo(pyparsing.lineEnd()).setResultsName(u'details') +
        pyparsing.lineEnd())

    LINE_STRUCTURES = [(u'logline', _SDF_LINE), (u'header', _SDF_HEADER)]

    def _ParseHeader(self, parser_mediator, structure):
        """Parse header lines and store appropriate attributes.

    [u'Logging started.', u'Version=', u'17.0.2011.0627',
    [2013, 7, 25], 16, 3, 23, 291, u'StartLocalTime', u'<details>']

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
        try:
            date_time = dfdatetime_time_elements.TimeElementsInMilliseconds(
                time_elements_tuple=structure.header_date_time)
        except ValueError:
            parser_mediator.ProduceExtractionError(
                u'invalid date time value: {0!s}'.format(
                    structure.header_date_time))
            return

        event_data = SkyDriveLogEventData()
        # TODO: refactor detail to individual event data attributes.
        event_data.detail = u'{0:s} {1:s} {2:s} {3:s} {4:s}'.format(
            structure.log_start, structure.version_string,
            structure.version_number, structure.local_time_string,
            structure.details)

        event = time_events.DateTimeValuesEvent(
            date_time, eventdata.EventTimestamp.ADDED_TIME)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def _ParseLine(self, parser_mediator, structure):
        """Parses a logline and store appropriate attributes.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
        # TODO: Verify if date and time value is locale dependent.
        month, day_of_month, year, hours, minutes, seconds, milliseconds = (
            structure.date_time)

        year += 2000
        time_elements_tuple = (year, month, day_of_month, hours, minutes,
                               seconds, milliseconds)

        try:
            date_time = dfdatetime_time_elements.TimeElementsInMilliseconds(
                time_elements_tuple=time_elements_tuple)
        except ValueError:
            parser_mediator.ProduceExtractionError(
                u'invalid date time value: {0!s}'.format(structure.date_time))
            return

        event_data = SkyDriveLogEventData()
        # Replace newlines with spaces in structure.detail to preserve output.
        # TODO: refactor detail to individual event data attributes.
        event_data.detail = structure.detail.replace(u'\n', u' ')
        event_data.log_level = structure.log_level
        event_data.module = structure.module
        event_data.source_code = structure.source_code

        event = time_events.DateTimeValuesEvent(
            date_time, eventdata.EventTimestamp.ADDED_TIME)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def ParseRecord(self, parser_mediator, key, structure):
        """Parse each record structure and return an EventObject if applicable.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in (u'header', u'logline'):
            raise errors.ParseError(
                u'Unable to parse record, unknown structure: {0:s}'.format(
                    key))

        if key == u'logline':
            self._ParseLine(parser_mediator, structure)

        elif key == u'header':
            self._ParseHeader(parser_mediator, structure)

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is a SkyDrive log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (bytes): line from a text file.

    Returns:
      bool: True if the line is in the expected format, False if not.
    """
        try:
            structure = self._SDF_HEADER.parseString(line)
        except pyparsing.ParseException:
            logging.debug(u'Not a SkyDrive log file')
            return False

        try:
            dfdatetime_time_elements.TimeElementsInMilliseconds(
                time_elements_tuple=structure.header_date_time)
        except ValueError:
            logging.debug(
                u'Not a SkyDrive log file, invalid date and time: {0!s}'.
                format(structure.header_date_time))
            return False

        return True
Esempio n. 19
0
        + p.delimitedList(
            p.Word(p.alphanums + "+-:"), ", "
        ).setResultsName("port-profile-list")
    ).setResultsName("port")


# =========================
# Non-collection attributes
# =========================

AttributeName = p.Regex("[a-zA-Z][^:\n]+").setResultsName("attribute-name")


ActivePortAttributeValue = (
    p.Combine(
        p.Or([p.Literal('[Out] '), p.Literal('[In] ')]).suppress()
        + p.Regex("[^\n]*")
        + p.LineEnd().suppress(),
        adjacent=False
    ).setResultsName("attribute-value")
)


VolumeAttributeValue = (
    p.Combine(
        p.Or([
            p.Or([
                p.Literal("(invalid)"),
                p.Regex("([0-9]+: +[0-9]+% ?)+")
            ]),
            p.Or([
Esempio n. 20
0
def sl(s):
    return pp.Suppress(pp.Literal(s))
Esempio n. 21
0
    def __init__(self):

        # Bibtex keywords

        string_def_start = pp.CaselessKeyword("@string")
        preamble_start = pp.CaselessKeyword("@preamble")
        comment_line_start = pp.CaselessKeyword('@comment')

        # String names
        string_name = pp.Word(pp.alphanums + '_')('StringName')
        self.set_string_name_parse_action(lambda s, l, t: None)
        string_name.addParseAction(self._string_name_parse_action)

        # Values inside bibtex fields
        # Values can be integer or string expressions. The latter may use
        # quoted or braced values.

        # Integer values
        integer = pp.Word(pp.nums)('Integer')

        # Braced values: braced values can contain nested (but balanced) braces
        braced_value_content = pp.CharsNotIn('{}')
        braced_value = pp.Forward()  # Recursive definition for nested braces
        braced_value <<= pp.originalTextFor(
            '{' + pp.ZeroOrMore(braced_value | braced_value_content) + '}'
            )('BracedValue')
        braced_value.setParseAction(remove_braces)
        # TODO add ignore for "\}" and "\{" ?
        # TODO @ are not parsed by bibtex in braces

        # Quoted values: may contain braced content with balanced braces
        brace_in_quoted = pp.nestedExpr('{', '}', ignoreExpr=None)
        text_in_quoted = pp.CharsNotIn('"{}')
        # (quotes should be escaped by braces in quoted value)
        quoted_value = pp.originalTextFor(
            '"' + pp.ZeroOrMore(text_in_quoted | brace_in_quoted) + '"'
            )('QuotedValue')
        quoted_value.addParseAction(pp.removeQuotes)

        # String expressions
        string_expr = pp.delimitedList(
            (quoted_value | braced_value | string_name), delim='#'
            )('StringExpression')
        self.set_string_expression_parse_action(lambda s, l, t: None)
        string_expr.addParseAction(self._string_expr_parse_action)

        value = (integer | string_expr)('Value')

        # Entries

        # @EntryType { ...
        entry_type = (pp.Suppress('@') + pp.Word(pp.alphas))('EntryType')
        entry_type.setParseAction(first_token)

        # Entry key: any character up to a ',' without leading and trailing
        # spaces.
        key = pp.SkipTo(',')('Key')  # Exclude @',\#}{~%
        key.setParseAction(lambda s, l, t: first_token(s, l, t).strip())

        # Field name: word of letters, digits, dashes and underscores
        field_name = pp.Word(pp.alphanums + '_-()')('FieldName')
        field_name.setParseAction(first_token)

        # Field: field_name = value
        field = pp.Group(field_name + pp.Suppress('=') + value)('Field')
        field.setParseAction(field_to_pair)

        # List of fields: comma separeted fields
        field_list = (pp.delimitedList(field) + pp.Suppress(pp.Optional(','))
                      )('Fields')
        field_list.setParseAction(
            lambda s, l, t: {k: v for (k, v) in reversed(t.get('Fields'))})

        # Entry: type, key, and fields
        self.entry = (entry_type +
                      in_braces_or_pars(key + pp.Suppress(',') + field_list)
                      )('Entry')

        # Other stuff: comments, string definitions, and preamble declarations

        # Explicit comments: @comment + everything up to next valid declaration
        # starting on new line.
        not_an_implicit_comment = (pp.LineStart() + pp.Literal('@')
                                   ) | pp.stringEnd()
        self.explicit_comment = (
            pp.Suppress(comment_line_start) +
            pp.originalTextFor(pp.SkipTo(not_an_implicit_comment),
                               asString=True))('ExplicitComment')
        self.explicit_comment.addParseAction(remove_trailing_newlines)
        self.explicit_comment.addParseAction(remove_braces)
        # Previous implementation included comment until next '}'.
        # This is however not inline with bibtex behavior that is to only
        # ignore until EOL. Brace stipping is arbitrary here but avoids
        # duplication on bibtex write.

        # Empty implicit_comments lead to infinite loop of zeroOrMore
        def mustNotBeEmpty(t):
            if not t[0]:
                raise pp.ParseException("Match must not be empty.")

        # Implicit comments: not anything else
        self.implicit_comment = pp.originalTextFor(
            pp.SkipTo(not_an_implicit_comment).setParseAction(mustNotBeEmpty),
            asString=True)('ImplicitComment')
        self.implicit_comment.addParseAction(remove_trailing_newlines)

        # String definition
        self.string_def = (pp.Suppress(string_def_start) + in_braces_or_pars(
            string_name +
            pp.Suppress('=') +
            string_expr('StringValue')
            ))('StringDefinition')

        # Preamble declaration
        self.preamble_decl = (pp.Suppress(preamble_start) +
                              in_braces_or_pars(value))('PreambleDeclaration')

        # Main bibtex expression

        self.main_expression = pp.ZeroOrMore(
                self.string_def |
                self.preamble_decl |
                self.explicit_comment |
                self.entry |
                self.implicit_comment)
Esempio n. 22
0
File: cli.py Progetto: CD3/cccpt
 class Catch2:
   error_line = pp.LineStart() + pp.SkipTo(":")("filename") + pp.Literal(":") + pp.Word(pp.nums) + pp.Literal(": FAILED:")
Esempio n. 23
0
#
# JSON Parser exercise
#
import pyparsing as pp
pp.ParserElement.setDefaultWhitespaceChars(' ')

escapechar = (pp.Literal("\\t") | pp.Literal("\\n"))
characters = (pp.Word(pp.alphanums) | escapechar | pp.White(' '))

string = pp.Literal("\"").suppress() + pp.Combine(
    pp.OneOrMore(characters)) + pp.Literal("\"").suppress()

print(string.parseString("\"meh \\t \""))

string.runTests("\"meh \\t \"")

#
# It is also possible to use the quoted string class
#
string2 = pp.QuotedString('"', unquoteResults=True)
string2.runTests("""
        \"meh\"
""")
"""
<number> ::= <int> <frac>

<int> ::=  <digit> | <onenine> <digits> | - <digit> | - <onenine><digits>

<frac> ::= "" | . <digits>

<digits> ::= <digit> | <digit> <digits>
Esempio n. 24
0
File: cli.py Progetto: CD3/cccpt
 class UnitTestPlusPlus:
   error_line = pp.LineStart() + pp.SkipTo(":")("filename") + pp.Literal(":") + pp.Word(pp.nums) + pp.Literal(":") + pp.Word(pp.nums) + pp.Literal(": error:") + pp.SkipTo(pp.LineEnd())
Esempio n. 25
0
                t = f(i)
            if isinstance(t,str):
                t = [(Token.Literal,t)]
            s.extend(t)
        return s if toks else highlight(s)


# ispec format parser:
#---------------------

integer    = pp.Regex(r'[1-9][0-9]*')
indxdir    = pp.oneOf(['<','>'])
fixbit     = pp.oneOf(['0','1'])
number     = integer|fixbit
number.setParseAction(lambda r: int(r[0]))
unklen     = pp.Literal('*')
length     = number|unklen
unkbit     = pp.oneOf(['-'])
fixbyte    = pp.Regex(r'{[0-9a-fA-F][0-9a-fA-F]}').setParseAction(lambda r: Bits(int(r[0][1:3],16),8))
fixed      = fixbyte|fixbit|unkbit
option     = pp.oneOf(['.','~','#','='])
symbol     = pp.Regex(r'[A-Za-z_][A-Za-z0-9_]*')
location   = pp.Suppress('(')+length+pp.Suppress(')')
directive  = pp.Group(pp.Optional(option,default='')+symbol+pp.Optional(location,default=1))
speclen    = pp.Group(length+pp.Optional(indxdir,default='<'))
specformat = pp.Group(pp.Suppress('[')+pp.OneOrMore(directive|fixed)+pp.Suppress(']'))
specoption = pp.Optional(pp.Literal('+').setParseAction(lambda r:True),default=False)
specdecode = speclen+specformat+specoption

def ispec_register(x,module):
    F = []
Esempio n. 26
0
File: iis.py Progetto: tomchop/plaso
class WinIISParser(text_parser.PyparsingSingleLineTextParser):
  """Parses a Microsoft IIS log file."""

  NAME = 'winiis'
  DATA_FORMAT = 'Microsoft IIS log file'

  # Common Fields (6.0: date time s-sitename s-ip cs-method cs-uri-stem
  # cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status
  # sc-substatus sc-win32-status.
  # Common Fields (7.5): date time s-ip cs-method cs-uri-stem cs-uri-query
  # s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus
  # sc-win32-status time-taken

  BLANK = pyparsing.Literal('-')
  WORD = pyparsing.Word(pyparsing.alphanums + '-') | BLANK

  INTEGER = (
      pyparsing.Word(pyparsing.nums, min=1).setParseAction(
          text_parser.ConvertTokenToInteger) | BLANK)

  IP_ADDRESS = (
      text_parser.PyparsingConstants.IPV4_ADDRESS |
      text_parser.PyparsingConstants.IPV6_ADDRESS | BLANK)

  PORT = (
      pyparsing.Word(pyparsing.nums, min=1, max=6).setParseAction(
          text_parser.ConvertTokenToInteger) | BLANK)

  _URI_SAFE_CHARACTERS = '/.?&+;_=()-:,%'
  _URI_UNSAFE_CHARACTERS = '{}|\\^~[]`'

  URI = pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS) | BLANK

  # Per https://blogs.iis.net/nazim/use-of-special-characters-like-in-an-iis-url
  # IIS does not require the a query comply with RFC1738 restrictions on valid
  # URI characters
  QUERY = (pyparsing.Word(
      pyparsing.alphanums + _URI_SAFE_CHARACTERS + _URI_UNSAFE_CHARACTERS) |
           BLANK)

  DATE_TIME = (
      text_parser.PyparsingConstants.DATE_ELEMENTS +
      text_parser.PyparsingConstants.TIME_ELEMENTS)

  DATE_METADATA = (
      pyparsing.Literal('Date:') + DATE_TIME.setResultsName('date_time'))

  FIELDS_METADATA = (
      pyparsing.Literal('Fields:') +
      pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('fields'))

  COMMENT = pyparsing.Literal('#') + (
      DATE_METADATA | FIELDS_METADATA | pyparsing.SkipTo(pyparsing.LineEnd()))

  LOG_LINE_6_0 = (
      DATE_TIME.setResultsName('date_time') +
      URI.setResultsName('s_sitename') +
      IP_ADDRESS.setResultsName('dest_ip') +
      WORD.setResultsName('http_method') +
      URI.setResultsName('cs_uri_stem') +
      URI.setResultsName('cs_uri_query') +
      PORT.setResultsName('dest_port') +
      WORD.setResultsName('cs_username') +
      IP_ADDRESS.setResultsName('source_ip') +
      URI.setResultsName('user_agent') +
      INTEGER.setResultsName('sc_status') +
      INTEGER.setResultsName('sc_substatus') +
      INTEGER.setResultsName('sc_win32_status'))

  _LOG_LINE_STRUCTURES = {}

  # Common fields. Set results name with underscores, not hyphens because regex
  # will not pick them up.
  _LOG_LINE_STRUCTURES['date'] = (
      text_parser.PyparsingConstants.DATE.setResultsName('date'))
  _LOG_LINE_STRUCTURES['time'] = (
      text_parser.PyparsingConstants.TIME.setResultsName('time'))
  _LOG_LINE_STRUCTURES['s-sitename'] = URI.setResultsName('s_sitename')
  _LOG_LINE_STRUCTURES['s-ip'] = IP_ADDRESS.setResultsName('dest_ip')
  _LOG_LINE_STRUCTURES['cs-method'] = WORD.setResultsName('http_method')
  _LOG_LINE_STRUCTURES['cs-uri-stem'] = URI.setResultsName(
      'requested_uri_stem')
  _LOG_LINE_STRUCTURES['cs-uri-query'] = QUERY.setResultsName('cs_uri_query')
  _LOG_LINE_STRUCTURES['s-port'] = PORT.setResultsName('dest_port')
  _LOG_LINE_STRUCTURES['cs-username'] = WORD.setResultsName('cs_username')
  _LOG_LINE_STRUCTURES['c-ip'] = IP_ADDRESS.setResultsName('source_ip')
  _LOG_LINE_STRUCTURES['cs(User-Agent)'] = URI.setResultsName('user_agent')
  _LOG_LINE_STRUCTURES['sc-status'] = INTEGER.setResultsName('http_status')
  _LOG_LINE_STRUCTURES['sc-substatus'] = INTEGER.setResultsName(
      'sc_substatus')
  _LOG_LINE_STRUCTURES['sc-win32-status'] = INTEGER.setResultsName(
      'sc_win32_status')

  # Less common fields.
  _LOG_LINE_STRUCTURES['s-computername'] = URI.setResultsName(
      's_computername')
  _LOG_LINE_STRUCTURES['sc-bytes'] = INTEGER.setResultsName('sent_bytes')
  _LOG_LINE_STRUCTURES['cs-bytes'] = INTEGER.setResultsName('received_bytes')
  _LOG_LINE_STRUCTURES['time-taken'] = INTEGER.setResultsName('time_taken')
  _LOG_LINE_STRUCTURES['cs-version'] = URI.setResultsName('protocol_version')
  _LOG_LINE_STRUCTURES['cs-host'] = URI.setResultsName('cs_host')
  _LOG_LINE_STRUCTURES['cs(Cookie)'] = URI.setResultsName('cs_cookie')
  _LOG_LINE_STRUCTURES['cs(Referrer)'] = URI.setResultsName('cs_referrer')
  _LOG_LINE_STRUCTURES['cs(Referer)'] = URI.setResultsName('cs_referrer')

  # Define the available log line structures. Default to the IIS v. 6.0
  # common format.
  LINE_STRUCTURES = [
      ('comment', COMMENT),
      ('logline', LOG_LINE_6_0)]

  # Define a signature value for the log file.
  _SIGNATURE = '#Software: Microsoft Internet Information Services'

  # Per https://msdn.microsoft.com/en-us/library/ms525807(v=vs.90).aspx:
  # "log file format(s) are all ASCII text formats (unless UTF-8 is enabled for
  #  your Web sites)
  _ENCODING = 'utf-8'

  def __init__(self):
    """Initializes a parser."""
    super(WinIISParser, self).__init__()
    self._day_of_month = None
    self._month = None
    self._year = None

  def _ParseComment(self, structure):
    """Parses a comment.

    Args:
      structure (pyparsing.ParseResults): structure parsed from the log file.
    """
    # TODO: refactor. Why is this method named _ParseComment when it extracts
    # the date and time?
    if structure[1] == 'Date:':
      time_elements_tuple = self._GetValueFromStructure(structure, 'date_time')
      self._year, self._month, self._day_of_month, _, _, _ = time_elements_tuple
    elif structure[1] == 'Fields:':
      self._ParseFieldsMetadata(structure)

  def _ParseFieldsMetadata(self, structure):
    """Parses the fields metadata and updates the log line definition to match.

    Args:
      structure (pyparsing.ParseResults): structure parsed from the log file.
    """
    fields = self._GetValueFromStructure(structure, 'fields', default_value='')
    fields = fields.strip()
    fields = fields.split(' ')

    log_line_structure = pyparsing.Empty()
    if fields[0] == 'date' and fields[1] == 'time':
      log_line_structure += self.DATE_TIME.setResultsName('date_time')
      fields = fields[2:]

    for member in fields:
      log_line_structure += self._LOG_LINE_STRUCTURES.get(member, self.URI)

    updated_structures = []
    for line_structure in self._line_structures:
      if line_structure[0] != 'logline':
        updated_structures.append(line_structure)
    updated_structures.append(('logline', log_line_structure))
    # TODO: self._line_structures is a work-around and this needs
    # a structural fix.
    self._line_structures = updated_structures

  def _ParseLogLine(self, parser_mediator, structure):
    """Parse a single log line and produce an event object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure parsed from the log file.
    """
    time_elements_structure = structure.get('date_time', None)
    if time_elements_structure:
      # Ensure time_elements_tuple is not a pyparsing.ParseResults otherwise
      # copy.deepcopy() of the dfDateTime object will fail on Python 3.8 with:
      # "TypeError: 'str' object is not callable" due to pyparsing.ParseResults
      # overriding __getattr__ with a function that returns an empty string when
      # named token does not exists.
      year, month, day_of_month, hours, minutes, seconds = (
          time_elements_structure)

      time_elements_tuple = (year, month, day_of_month, hours, minutes, seconds)

    else:
      time_tuple = self._GetValueFromStructure(structure, 'time')
      if not time_tuple:
        parser_mediator.ProduceExtractionWarning('missing time values')
        return

      date_tuple = self._GetValueFromStructure(structure, 'date')
      if not date_tuple:
        time_elements_tuple = (
            self._year, self._month, self._day_of_month, time_tuple[0],
            time_tuple[1], time_tuple[2])

      else:
        time_elements_tuple = (
            date_tuple[0], date_tuple[1], date_tuple[2], time_tuple[0],
            time_tuple[1], time_tuple[2])

    try:
      date_time = dfdatetime_time_elements.TimeElements(
          time_elements_tuple=time_elements_tuple)
    except ValueError:
      parser_mediator.ProduceExtractionWarning(
          'invalid date time value: {0!s}'.format(time_elements_tuple))
      return

    event_data = IISEventData()

    for key, value in structure.items():
      if key in ('date', 'date_time', 'time') or value == '-':
        continue

      if isinstance(value, pyparsing.ParseResults):
        value = ''.join(value)

      setattr(event_data, key, value)

    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_WRITTEN)
    parser_mediator.ProduceEventWithEventData(event, event_data)

  def ParseRecord(self, parser_mediator, key, structure):
    """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure parsed from the log file.

    Raises:
      ParseError: when the structure type is unknown.
    """
    if key not in ('comment', 'logline'):
      raise errors.ParseError(
          'Unable to parse record, unknown structure: {0:s}'.format(key))

    if key == 'logline':
      self._ParseLogLine(parser_mediator, structure)
    elif key == 'comment':
      self._ParseComment(structure)

  # pylint: disable=unused-argument
  def VerifyStructure(self, parser_mediator, line):
    """Verify that this file is an IIS log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between
          parsers and other components, such as storage and dfvfs.
      line (str): line from a text file.

    Returns:
      bool: True if the line was successfully parsed.
    """
    # TODO: self._line_structures is a work-around and this needs
    # a structural fix.
    self._line_structures = self.LINE_STRUCTURES

    self._day_of_month = None
    self._month = None
    self._year = None

    # TODO: Examine other versions of the file format and if this parser should
    # support them. For now just checking if it contains the IIS header.
    if self._SIGNATURE in line:
      return True

    return False
Esempio n. 27
0
def select_oemol_atom_idx_by_language(system, mask=''):
    """
    This function selects the atom indexes from the passed oemol molecular complex
    by using  a defined language. The language allows the selection of the ligand,
    protein, waters, mono-atomic ions, excipients, residue numbers and distance selection. Logic
    operators not, or, and, noh, diff, around can be used to refine the selection

    Parameters
    ----------
    system : OEMol of the bio-molecular complex protein-ligand
        The molecular complex

    mask : python string
        A string used to select atoms. A Backus–Naur Form grammar
        (https://en.wikipedia.org/wiki/Backus–Naur_form) is defined by the python
        module pyparsing.
        The defined grammar tokens are: "ligand", "protein", "ca_protein" ,"water",
        "ions", "excipients" and "resid chain1:res_idx1 chain2:res_idx2 ... res_idxn"
        that respectively define the ligand, the protein, carbon alpha protein atoms,
        water molecules, ions, excipients (not protein, ligand, water or ions) and
        residue numbers. The atom selection can be refined by using the following
        operator tokens:

        "not" = invert selection
        "or" = add selections
        "and" = intersect selections
        "diff" = logic difference between selections
        "noh" = remove hydrogens from the selection
        "around" = select atoms inside the cutoff distance from a given selection

    Returns
    -------
    atom_set : python set
        the select atom indexes

    Notes
    -----
        Example of selection string:
        mask = "ligand or protein"
        mask = "not water or not ions"
        mask = "ligand or protein or excipients"
        mask = "noh protein"
        mask = "resid A:17 B:12 17 18"
        mask = "protein diff resid A:1"
        mask = "5.0 around protein"
    """
    def split(system, ligand_res_name='LIG'):
        """
        This function splits the passed molecule in components and tracks the
        mapping between the original molecule and the split components. The
        mapping is created as separated atom component index sets.

        Parameters:
        -----------
        system: OEMol
            The system to split in components. The components are:
                the protein atoms,
                the protein carbon alpha atoms
                the water atoms,
                the ion atoms,
                the excipients atoms
        Returns:
        --------
        dic_set: python dictionary
            The sysetm is splitted in a dictionary with token words as keys
            and for value the related atom set. The token keywords are:
                protein,
                ca_protein,
                ligand,
                water,
                ions,
                excipients,
                system
        """

        # Define Empty sets
        lig_set = set()
        prot_set = set()
        ca_prot_set = set()
        wat_set = set()
        excp_set = set()
        ion_set = set()

        # Atom Bond Set vector used to contains the whole system
        frags = oechem.OEAtomBondSetVector()

        # Define Options for the Filter
        opt = oechem.OESplitMolComplexOptions()

        # The protein filter is set to avoid that multiple
        # chains are separated during the splitting and peptide
        # molecules are recognized as ligands
        pf = oechem.OEMolComplexFilterFactory(
            oechem.OEMolComplexFilterCategory_Protein)
        peptide = oechem.OEMolComplexFilterFactory(
            oechem.OEMolComplexFilterCategory_Peptide)
        protein_filter = oechem.OEOrRoleSet(pf, peptide)
        opt.SetProteinFilter(protein_filter)

        # The ligand filter is set to recognize just the ligand
        lf = oechem.OEMolComplexFilterFactory(
            oechem.OEMolComplexFilterCategory_Ligand)
        not_protein_filter = oechem.OENotRoleSet(protein_filter)
        ligand_filter = oechem.OEAndRoleSet(lf, not_protein_filter)
        opt.SetLigandFilter(ligand_filter)

        # The water filter is set to recognize just water molecules
        wf = oechem.OEMolComplexFilterFactory(
            oechem.OEMolComplexFilterCategory_Water)
        opt.SetWaterFilter(wf)

        # Set Category
        cat = oechem.OEMolComplexCategorizer()
        cat.AddLigandName(ligand_res_name)
        opt.SetCategorizer(cat)

        # Define the system fragments
        if not oechem.OEGetMolComplexFragments(frags, system, opt):
            raise ValueError('Unable to generate the system fragments')

        # Set empty OEMol containers
        prot = oechem.OEMol()
        lig = oechem.OEMol()
        wat = oechem.OEMol()
        excp = oechem.OEMol()

        # Split the protein from the system
        atommap = oechem.OEAtomArray(system.GetMaxAtomIdx())
        if not oechem.OECombineMolComplexFragments(
                prot, frags, opt, opt.GetProteinFilter(), atommap):
            raise ValueError('Unable to split the Protein')
        # Populate the protein set and the protein carbon alpha set
        pred = oechem.OEIsCAlpha()
        for sys_at in system.GetAtoms():
            sys_idx = sys_at.GetIdx()
            at_idx = atommap[sys_idx]
            if at_idx:
                prot_set.add(sys_idx)
                at = system.GetAtom(oechem.OEHasAtomIdx(sys_idx))
                if pred(at):
                    ca_prot_set.add(sys_idx)
                # print(sys_idx, '->', at_idx)

        # Split the ligand from the system
        atommap = oechem.OEAtomArray(system.GetMaxAtomIdx())
        if not oechem.OECombineMolComplexFragments(
                lig, frags, opt, opt.GetLigandFilter(), atommap):
            raise ValueError('Unable to split the Ligand')
        # Populate the ligand set
        for sys_at in system.GetAtoms():
            sys_idx = sys_at.GetIdx()
            at_idx = atommap[sys_idx]
            if at_idx:
                lig_set.add(sys_idx)
                # print(sys_idx, '->', at_idx)

        # Split the water from the system
        atommap = oechem.OEAtomArray(system.GetMaxAtomIdx())
        if not oechem.OECombineMolComplexFragments(
                wat, frags, opt, opt.GetWaterFilter(), atommap):
            raise ValueError('Unable to split the Water')
        # Populate the water set
        for sys_at in system.GetAtoms():
            sys_idx = sys_at.GetIdx()
            at_idx = atommap[sys_idx]
            if at_idx:
                wat_set.add(sys_idx)
                # print(sys_idx, '->', at_idx)

        # Split the excipients from the system
        atommap = oechem.OEAtomArray(system.GetMaxAtomIdx())
        if not oechem.OECombineMolComplexFragments(
                excp, frags, opt, opt.GetOtherFilter(), atommap):
            raise ValueError('Unable to split the Excipients')
        # Populate the excipient set
        for sys_at in system.GetAtoms():
            sys_idx = sys_at.GetIdx()
            at_idx = atommap[sys_idx]
            if at_idx:
                excp_set.add(sys_idx)
                # print(sys_idx, '->', at_idx)

        # Create the mono-atomic ions set
        for exc_idx in excp_set:
            atom = system.GetAtom(oechem.OEHasAtomIdx(exc_idx))
            if atom.GetDegree() == 0:
                ion_set.add(exc_idx)

        # Create the excipients set which are not protein, ligand, waters or ions
        excipients_set = excp_set - ion_set

        # Create the system set
        system_set = prot_set | lig_set | excp_set | wat_set

        if len(system_set) != system.NumAtoms():
            raise ValueError("The total system atom number {} is different "
                             "from its set representation {}".format(
                                 system.NumAtoms(), system_set))

        # The dictionary is used to link the token keywords to the created molecule sets
        dic_set = {
            'ligand': lig_set,
            'protein': prot_set,
            'ca_protein': ca_prot_set,
            'water': wat_set,
            'ions': ion_set,
            'excipients': excipients_set,
            'system': system_set
        }

        return dic_set

    def build_set(ls, dsets):
        """
        This function select the atom indexes

        Parameters:
        -----------
        ls: python list
            the parsed list with tokens and operand tokes for the selection
        dsets: python dictionary
             the dictionary containing the sets for the selection

        Return:
        -------
        atom_set: python set
            the set containing the atom index
        """
        def noh(ls, dsets):
            """
            This function remove hydrogens from the selection
            """
            data_set = build_set(ls[1], dsets)

            noh_set = set()
            pred = oechem.OEIsHydrogen()

            for idx in data_set:
                atom = system.GetAtom(oechem.OEHasAtomIdx(idx))
                if not pred(atom):
                    noh_set.add(idx)

            return noh_set

        def residues(ls):
            """
            This function select residues based on the residue numbers. An example of
            selection can be:
            mask = 'resid A:16 17 19 B:1'
            """
            # List residue atom index to be restrained
            res_atom_set = set()

            # Dictionary of lists with the chain residues selected to be restrained
            # e.g. {chainA:[res1, res15], chainB:[res19, res17]}
            chain_dic = {'': []}

            # Fill out the chain dictionary
            i = 0
            while i < len(ls):
                if ls[i].isdigit():
                    chain_dic[''].append(int(ls[i]))
                    i += 1
                else:
                    try:
                        chain_dic[ls[i]].append(int(ls[i + 2]))
                    except:
                        chain_dic[ls[i]] = []
                        chain_dic[ls[i]].append(int(ls[i + 2]))
                    i += 3

            # Loop over the molecular system to select the atom indexes to be selected
            hv = oechem.OEHierView(
                system, oechem.OEAssumption_BondedResidue +
                oechem.OEAssumption_ResPerceived)
            for chain in hv.GetChains():
                chain_id = chain.GetChainID()
                if chain_id not in chain_dic:
                    continue
                for frag in chain.GetFragments():
                    for hres in frag.GetResidues():
                        res_num = hres.GetOEResidue().GetResidueNumber()
                        if res_num not in chain_dic[chain_id]:
                            continue
                        for oe_at in hres.GetAtoms():
                            res_atom_set.add(oe_at.GetIdx())

            return res_atom_set

        def around(dist, ls):
            """
            This function select atom not far than the threshold distance from
            the current selection. The threshold distance is in Angstrom

            selection can be:
            mask = '5.0 around ligand'
            """
            # at = system.GetAtom(oechem.OEHasAtomIdx(idx))

            # Atom set selection
            atom_set_around = set()

            # Create a OE bit vector mask for each atoms
            bv_around = oechem.OEBitVector(system.GetMaxAtomIdx())

            # Set the mask atom
            for at in system.GetAtoms():
                if at.GetIdx() in ls:
                    bv_around.SetBitOn(at.GetIdx())

            # Predicate
            pred = oechem.OEAtomIdxSelected(bv_around)

            # Create the system molecule based on the atom mask
            molecules = oechem.OEMol()
            oechem.OESubsetMol(molecules, system, pred)

            # Create the Nearest neighbours
            nn = oechem.OENearestNbrs(system, float(dist))

            for nbrs in nn.GetNbrs(molecules):
                for atom in oechem.OEGetResidueAtoms(nbrs.GetBgn()):
                    if atom.GetIdx() in ls:
                        continue
                    atom_set_around.add(atom.GetIdx())

            return atom_set_around

        # Start Body of the selection function by language

        # Terminal Literal return the related set
        if isinstance(ls, str):
            return dsets[ls]
        # Not or Noh
        if len(ls) == 2:
            if ls[0] == 'noh':  # Noh case
                return noh(ls, dsets)
            elif ls[0] == 'not':  # Not case
                return dsets['system'] - build_set(ls[1], dsets)
            else:  # Resid case with one index
                return residues(ls[1])

        if len(ls) == 3:
            if ls[1] == 'or':  # Or Case (set union)
                return build_set(ls[0], dsets) | build_set(ls[2], dsets)
            elif ls[1] == 'and':  # And Case (set intersection)
                return build_set(ls[0], dsets) & build_set(ls[2], dsets)
            elif ls[1] == 'diff':  # Diff case (set difference)
                return build_set(ls[0], dsets) - build_set(ls[2], dsets)
            elif ls[1] == 'around':  # Around case
                return around(ls[0], build_set(ls[2], dsets))
            else:
                return residues(ls[1:])  # Resid case with one or two indexes
        else:
            if ls[0] == 'resid':
                return residues(ls[1:])  # Resid case with multiple indexes
            else:
                raise ValueError(
                    "The passed list have too many tokens: {}".format(ls))

    # Parse Action-Maker
    def makeLRlike(numterms):
        if numterms is None:
            # None operator can only by binary op
            initlen = 2
            incr = 1
        else:
            initlen = {0: 1, 1: 2, 2: 3, 3: 5}[numterms]
            incr = {0: 1, 1: 1, 2: 2, 3: 4}[numterms]

        # Define parse action for this number of terms,
        # to convert flat list of tokens into nested list
        def pa(s, l, t):
            t = t[0]
            if len(t) > initlen:
                ret = pyp.ParseResults(t[:initlen])
                i = initlen
                while i < len(t):
                    ret = pyp.ParseResults([ret] + t[i:i + incr])
                    i += incr
                return pyp.ParseResults([ret])

        return pa

    # Selection function body

    # Residue number selection
    id = pyp.Optional(pyp.Word(pyp.alphanums) + pyp.Literal(':')) + pyp.Word(
        pyp.nums)
    resid = pyp.Group(pyp.Literal("resid") + pyp.OneOrMore(id))

    # Real number for around operator selection
    real = pyp.Regex(r"\d+(\.\d*)?").setParseAction(lambda t: float(t[0]))

    # Define the tokens for the BNF grammar
    operand = pyp.Literal("protein") | pyp.Literal("ca_protein") | \
              pyp.Literal("ligand") | pyp.Literal("water") | \
              pyp.Literal("ions") | pyp.Literal("excipients") | resid

    # BNF Grammar definition with parseAction makeLRlike
    expr = pyp.operatorPrecedence(
        operand,
        [(None, 2, pyp.opAssoc.LEFT, makeLRlike(None)),
         (pyp.Literal("not"), 1, pyp.opAssoc.RIGHT, makeLRlike(1)),
         (pyp.Literal("noh"), 1, pyp.opAssoc.RIGHT, makeLRlike(1)),
         (pyp.Literal("and"), 2, pyp.opAssoc.LEFT, makeLRlike(2)),
         (pyp.Literal("or"), 2, pyp.opAssoc.LEFT, makeLRlike(2)),
         (pyp.Literal("diff"), 2, pyp.opAssoc.LEFT, makeLRlike(2)),
         (real + pyp.Literal("around"), 1, pyp.opAssoc.RIGHT, makeLRlike(2))])
    # Parse the input string
    try:
        ls = expr.parseString(mask, parseAll=True)
    except Exception as e:
        raise ValueError("The passed restraint mask is not valid: {}".format(
            str(e)))

    # Split the system
    dic_sets = split(system)

    # Select atom indexes
    atom_set = build_set(ls[0], dic_sets)

    return atom_set
Esempio n. 28
0
File: iis.py Progetto: noodled/plaso
class WinIISParser(text_parser.PyparsingSingleLineTextParser):
    """Parses a Microsoft IIS log file."""

    NAME = 'winiis'
    DESCRIPTION = 'Parser for Microsoft IIS log files.'

    # Common Fields (6.0: date time s-sitename s-ip cs-method cs-uri-stem
    # cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status
    # sc-substatus sc-win32-status.
    # Common Fields (7.5): date time s-ip cs-method cs-uri-stem cs-uri-query
    # s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus
    # sc-win32-status time-taken

    BLANK = pyparsing.Literal('-')
    WORD = pyparsing.Word(pyparsing.alphanums + '-') | BLANK

    INTEGER = (pyparsing.Word(pyparsing.nums, min=1).setParseAction(
        text_parser.ConvertTokenToInteger) | BLANK)

    IP_ADDRESS = (text_parser.PyparsingConstants.IPV4_ADDRESS
                  | text_parser.PyparsingConstants.IPV6_ADDRESS | BLANK)

    PORT = (pyparsing.Word(pyparsing.nums, min=1, max=6).setParseAction(
        text_parser.ConvertTokenToInteger) | BLANK)

    URI = pyparsing.Word(pyparsing.alphanums + '/.?&+;_=()-:,%') | BLANK

    DATE_TIME = (text_parser.PyparsingConstants.DATE_ELEMENTS +
                 text_parser.PyparsingConstants.TIME_ELEMENTS)

    DATE_METADATA = (pyparsing.Literal('Date:') +
                     DATE_TIME.setResultsName('date_time'))

    FIELDS_METADATA = (
        pyparsing.Literal('Fields:') +
        pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('fields'))

    COMMENT = pyparsing.Literal('#') + (DATE_METADATA | FIELDS_METADATA |
                                        pyparsing.SkipTo(pyparsing.LineEnd()))

    LOG_LINE_6_0 = (DATE_TIME.setResultsName('date_time') +
                    URI.setResultsName('s_sitename') +
                    IP_ADDRESS.setResultsName('dest_ip') +
                    WORD.setResultsName('http_method') +
                    URI.setResultsName('cs_uri_stem') +
                    URI.setResultsName('cs_uri_query') +
                    PORT.setResultsName('dest_port') +
                    WORD.setResultsName('cs_username') +
                    IP_ADDRESS.setResultsName('source_ip') +
                    URI.setResultsName('user_agent') +
                    INTEGER.setResultsName('sc_status') +
                    INTEGER.setResultsName('sc_substatus') +
                    INTEGER.setResultsName('sc_win32_status'))

    _LOG_LINE_STRUCTURES = {}

    # Common fields. Set results name with underscores, not hyphens because regex
    # will not pick them up.
    _LOG_LINE_STRUCTURES['date'] = (
        text_parser.PyparsingConstants.DATE.setResultsName('date'))
    _LOG_LINE_STRUCTURES['time'] = (
        text_parser.PyparsingConstants.TIME.setResultsName('time'))
    _LOG_LINE_STRUCTURES['s-sitename'] = URI.setResultsName('s_sitename')
    _LOG_LINE_STRUCTURES['s-ip'] = IP_ADDRESS.setResultsName('dest_ip')
    _LOG_LINE_STRUCTURES['cs-method'] = WORD.setResultsName('http_method')
    _LOG_LINE_STRUCTURES['cs-uri-stem'] = URI.setResultsName(
        'requested_uri_stem')
    _LOG_LINE_STRUCTURES['cs-uri-query'] = URI.setResultsName('cs_uri_query')
    _LOG_LINE_STRUCTURES['s-port'] = PORT.setResultsName('dest_port')
    _LOG_LINE_STRUCTURES['cs-username'] = WORD.setResultsName('cs_username')
    _LOG_LINE_STRUCTURES['c-ip'] = IP_ADDRESS.setResultsName('source_ip')
    _LOG_LINE_STRUCTURES['cs(User-Agent)'] = URI.setResultsName('user_agent')
    _LOG_LINE_STRUCTURES['sc-status'] = INTEGER.setResultsName('http_status')
    _LOG_LINE_STRUCTURES['sc-substatus'] = INTEGER.setResultsName(
        'sc_substatus')
    _LOG_LINE_STRUCTURES['sc-win32-status'] = INTEGER.setResultsName(
        'sc_win32_status')

    # Less common fields.
    _LOG_LINE_STRUCTURES['s-computername'] = URI.setResultsName(
        's_computername')
    _LOG_LINE_STRUCTURES['sc-bytes'] = INTEGER.setResultsName('sent_bytes')
    _LOG_LINE_STRUCTURES['cs-bytes'] = INTEGER.setResultsName('received_bytes')
    _LOG_LINE_STRUCTURES['time-taken'] = INTEGER.setResultsName('time_taken')
    _LOG_LINE_STRUCTURES['cs-version'] = URI.setResultsName('protocol_version')
    _LOG_LINE_STRUCTURES['cs-host'] = URI.setResultsName('cs_host')
    _LOG_LINE_STRUCTURES['cs(Cookie)'] = URI.setResultsName('cs_cookie')
    _LOG_LINE_STRUCTURES['cs(Referrer)'] = URI.setResultsName('cs_referrer')
    _LOG_LINE_STRUCTURES['cs(Referer)'] = URI.setResultsName('cs_referrer')

    # Define the available log line structures. Default to the IIS v. 6.0
    # common format.
    LINE_STRUCTURES = [('comment', COMMENT), ('logline', LOG_LINE_6_0)]

    # Define a signature value for the log file.
    _SIGNATURE = '#Software: Microsoft Internet Information Services'

    # Per https://msdn.microsoft.com/en-us/library/ms525807(v=vs.90).aspx:
    # "log file format(s) are all ASCII text formats (unless UTF-8 is enabled for
    #  your Web sites)
    _ENCODING = 'utf-8'

    def __init__(self):
        """Initializes a parser object."""
        super(WinIISParser, self).__init__()
        self._day_of_month = None
        self._month = None
        self._year = None

    def _ParseComment(self, structure):
        """Parses a comment.

    Args:
      structure (pyparsing.ParseResults): structure parsed from the log file.
    """
        if structure[1] == 'Date:':
            self._year, self._month, self._day_of_month, _, _, _ = structure.date_time
        elif structure[1] == 'Fields:':
            self._ParseFieldsMetadata(structure)

    def _ParseFieldsMetadata(self, structure):
        """Parses the fields metadata.

    Args:
      structure (pyparsing.ParseResults): structure parsed from the log file.
    """
        fields = structure.fields.split(' ')

        log_line_structure = pyparsing.Empty()
        if fields[0] == 'date' and fields[1] == 'time':
            log_line_structure += self.DATE_TIME.setResultsName('date_time')
            fields = fields[2:]

        for member in fields:
            log_line_structure += self._LOG_LINE_STRUCTURES.get(
                member, self.URI)

        # TODO: self._line_structures is a work-around and this needs
        # a structural fix.
        self._line_structures[1] = ('logline', log_line_structure)

    def _ParseLogLine(self, parser_mediator, structure):
        """Parse a single log line and produce an event object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure parsed from the log file.
    """
        if structure.date_time:
            time_elements_tuple = structure.date_time

        elif structure.date and structure.time:
            year, month, day_of_month = structure.date
            hours, minutes, seconds = structure.time
            time_elements_tuple = (year, month, day_of_month, hours, minutes,
                                   seconds)

        elif structure.time:
            hours, minutes, seconds = structure.time
            time_elements_tuple = (self._year, self._month, self._day_of_month,
                                   hours, minutes, seconds)

        else:
            parser_mediator.ProduceExtractionError(
                'missing date and time values')
            return

        try:
            date_time = dfdatetime_time_elements.TimeElements(
                time_elements_tuple=time_elements_tuple)
        except ValueError:
            parser_mediator.ProduceExtractionError(
                'invalid date time value: {0!s}'.format(time_elements_tuple))
            return

        event_data = IISEventData()

        for key, value in iter(structure.items()):
            if key in ('date', 'date_time', 'time') or value == '-':
                continue

            if isinstance(value, pyparsing.ParseResults):
                value = ''.join(value)

            setattr(event_data, key, value)

        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_WRITTEN)
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): structure parsed from the log file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in ('comment', 'logline'):
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        if key == 'logline':
            self._ParseLogLine(parser_mediator, structure)
        elif key == 'comment':
            self._ParseComment(structure)

    def VerifyStructure(self, unused_parser_mediator, line):
        """Verify that this file is an IIS log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): line from a text file.

    Returns:
      bool: True if the line was successfully parsed.
    """
        # TODO: self._line_structures is a work-around and this needs
        # a structural fix.
        self._line_structures = self.LINE_STRUCTURES

        self._day_of_month = None
        self._month = None
        self._year = None

        # TODO: Examine other versions of the file format and if this parser should
        # support them. For now just checking if it contains the IIS header.
        if self._SIGNATURE in line:
            return True

        return False
Esempio n. 29
0
 def test_str(self):
     expr = pp.Literal('abc')
     s_expr = StyledElement({}, 'class:abc', expr)
     self.assertEqual(str(expr), str(s_expr))
Esempio n. 30
0
def banana_grammar(emitter=emit.PrintEmitter()):
    """
    Generate a banana parser that can be then used to
    parse a banana content. It build an AST on which
    operation can then be applied.
    :return: Return a banana parser
    :rtype: BananaScopeParser
    """
    # Should debug
    debug_grammar = False

    # Actions
    def action_str_lit(s, l, t):
        return ast.StringLit(ast.make_span(s, l, t), t[0])

    def action_num_lit(s, l, t):
        return ast.Number(ast.make_span(s, l, t), t[0])

    def action_ident(s, l, t):
        return ast.Ident(ast.make_span(s, l, t), t[0])

    def action_expr(s, l, t):
        if len(t) != 1:
            raise exception.BananaGrammarBug(
                'Bug found in the grammar for expression,'
                ' Please report this bug.')
        if isinstance(t[0], ast.Expr):
            return t[0]
        return ast.Expr(ast.make_span(s, l, t), t[0])

    def action_dot_path(s, l, t):
        # First token is the name of the variable
        # The rest is the property path
        if isinstance(t[0], ast.StringLit) and len(t[1:]) == 0:
            return t[0]
        return ast.DotPath(ast.make_span(s, l, t), t[0], t[1:])

    def action_json_obj(s, l, t):
        return ast.JsonObj(ast.make_span(s, l, t), t)

    def action_parse_ctor_arg(s, l, t):
        if len(t) > 1:
            return ast.ComponentCtorArg(ast.make_span(s, l, t), t[1], t[0])
        else:
            return ast.ComponentCtorArg(ast.make_span(s, l, t), t[0])

    def action_parse_comp_ctor(s, l, tokens):
        comp = ast.Component(ast.make_span(s, l, tokens))
        for tok in tokens:
            if isinstance(tok, ast.Ident):
                comp.set_ctor(tok)
            elif isinstance(tok, ast.ComponentCtorArg):
                comp.add_arg(tok)
            else:
                raise exception.BananaGrammarBug(
                    'Bug found in the grammar, Please report this bug')
        return comp

    def action_assignment(s, l, t):
        return ast.Assignment(ast.make_span(s, l, t), t[0], t[1])

    def action_create_connections(s, l, t):
        ast_conn = ast.into_connection(t[0])
        ast_conn.span = ast.make_span(s, l, t)
        for i in range(1, len(t)):
            next_conn = ast.into_connection(t[i])
            ast_conn.connect_to(next_conn, emitter)
        return ast_conn

    def action_merge_connections(s, l, t):
        ast_conn = ast.Connection(ast.make_span(s, l, t))
        ast_conn.merge_all(t, emitter)
        return ast_conn

    def action_root_ast(s, l, tokens):
        root = ast.BananaFile(emitter)
        for tok in tokens:
            if isinstance(tok, ast.Assignment):
                if isinstance(tok.rhs, ast.Component):
                    root.add_component_ctor(tok.lhs, tok.rhs)
                else:
                    root.add_assignment(tok.lhs, tok.rhs)
            elif isinstance(tok, ast.Connection):
                root.add_connections(tok)
            else:
                raise exception.BananaGrammarBug(
                    'Bug found in the grammar, Please report this bug.')
        return root

    # TODO(Joan): Remove once it is no longer needed
    def print_stmt(s, l, t):
        print("\nPRINT AST")
        print((l, [str(x) for x in t]))
        print("END PRINT AST\n")

    def action_unimplemented(s, l, t):
        raise exception.BananaGrammarBug("unimplemented code reached")

    # Tokens
    equals = p.Literal("=").suppress().setName('"="').setDebug(debug_grammar)
    arrow = p.Literal("->").suppress().setName('"->"').setDebug(debug_grammar)
    lbra = p.Literal("[").suppress().setName('"["').setDebug(debug_grammar)
    rbra = p.Literal("]").suppress().setName('"]"').setDebug(debug_grammar)
    colon = p.Literal(":").suppress().setName('":"')
    comma = p.Literal(",").suppress().setName(",")
    less = p.Literal("<").suppress().setName('"<"')
    greater = p.Literal(">").suppress().setName('">"')
    lbrace = p.Literal("{").suppress().setName('"{"').setDebug(debug_grammar)
    rbrace = p.Literal("}").suppress().setName('"}"').setDebug(debug_grammar)
    lpar = p.Literal("(").suppress().setName('"("')
    rpar = p.Literal(")").suppress().setName('")"')

    # Keywords
    ing = p.Literal("ing").suppress()
    imp = p.Literal("import").suppress()
    fro = p.Literal("from").suppress()

    # String Literal, Numbers, Identifiers
    string_lit = p.quotedString()\
        .setParseAction(action_str_lit)\
        .setName(const.STRING_LIT)
    number_lit = p.Regex(r'\d+(\.\d*)?([eE]\d+)?')\
        .setParseAction(action_num_lit)\
        .setName(const.NUMBER)
    ident = p.Word(p.alphas + "_", p.alphanums + "_")\
        .setParseAction(action_ident)\
        .setName(const.IDENT)

    # Path for properties
    dot_prop = ident | string_lit
    dot_path = p.delimitedList(dot_prop, ".")\
        .setParseAction(action_dot_path)\
        .setName(const.DOT_PATH)\
        .setDebug(debug_grammar)

    # Expressions

    # Here to simplify the logic, we can match directly
    # against ident and string_lit to avoid having to deal
    # only with dot_path. It also allow to remove the confusion
    # where '"a"' could be interpreted as a dot_path and would thus
    # be the same as 'a'. With the following, the first we
    # always be type-checked as a String whereas the latter will
    # be as the type of the variable.
    expr = p.infixNotation(number_lit | dot_path, [
        (p.oneOf('* /'), 2, p.opAssoc.LEFT),
        (p.oneOf('+ -'), 2, p.opAssoc.LEFT),
    ],
                           lpar=lpar,
                           rpar=rpar)
    expr.setParseAction(action_expr)\
        .setName(const.EXPR)\
        .setDebug(debug_grammar)

    # Json-like object (value are much more)
    json_obj = p.Forward()
    json_value = p.Forward()
    json_array = p.Group(lbra + p.Optional(p.delimitedList(json_value)) + rbra)
    json_array.setDebug(debug_grammar)
    json_array.setName(const.JSON_ARRAY)
    json_value <<= expr | json_obj | json_array
    json_value.setDebug(debug_grammar)\
        .setName(const.JSON_VALUE)
    json_members = p.delimitedList(p.Group(dot_path + colon - json_value)) +\
        p.Optional(comma)
    json_members.setDebug(debug_grammar)\
        .setName(const.JSON_MEMBERS)
    json_obj <<= p.Dict(lbrace + p.Optional(json_members) - rbrace)
    json_obj.setParseAction(action_json_obj)\
        .setName(const.JSON_OBJ)\
        .setDebug(debug_grammar)

    # Component constructor
    arg = (ident + equals - (expr | json_obj)) | expr | json_obj
    arg.setParseAction(action_parse_ctor_arg)
    params = p.delimitedList(arg)
    comp_ctor = ident + lpar - p.Optional(params) + rpar
    comp_ctor.setParseAction(action_parse_comp_ctor)\
        .setName(const.COMP_CTOR)\
        .setDebug(debug_grammar)

    # Assignments
    assignment = dot_path + equals - (comp_ctor | expr | json_obj)
    assignment.setParseAction(action_assignment)

    # Connections
    connection = p.Forward()
    array_of_connection = p.Group(lbra +
                                  p.Optional(p.delimitedList(connection)) +
                                  rbra)
    array_of_connection.setParseAction(action_merge_connections)
    last_expr = ident | array_of_connection
    this_expr = p.Forward()
    match_expr = p.FollowedBy(last_expr + arrow - last_expr) + \
        (last_expr + p.OneOrMore(arrow - last_expr))
    this_expr <<= match_expr | last_expr
    connection <<= this_expr

    match_expr.setDebug(debug_grammar)\
        .setName(const.CONNECTION) \
        .setParseAction(action_create_connections)

    # Definitions
    definition = ing - less - string_lit - greater - ident - lbrace - rbrace
    definition.setDebug(debug_grammar)\
        .setName(const.DEFINITION)\
        .setParseAction(action_unimplemented)

    # Import directive
    module_def = (imp - ident) | fro - ident - imp - ident
    module_def.setDebug(debug_grammar)\
        .setName(const.MOD_IMPORT)\
        .setParseAction(action_unimplemented)

    # Comments
    comments = "#" + p.restOfLine

    statement = assignment | \
        match_expr | \
        definition | \
        module_def

    statement.setName(const.STATEMENT)
    statement.setDebug(debug_grammar)
    statement.setParseAction(print_stmt)

    # Grammar
    grammar = p.OneOrMore(statement).ignore(comments)
    grammar.setParseAction(action_root_ast)

    return BananaScopeParser(grammar)