Example #1
0
    def parse(self, header):
        comment = self._comment()
        quoted = quotedString.copy().setParseAction(removeQuotes)
        string = quoted | Word(printables,  excludeChars='{},%')
        enum_value = quotedString | Word(printables,  excludeChars='{},%')

        relation = (Suppress(CaselessLiteral("@relation")) +
                    Optional(restOfLine, default='default_name')('rel_name').setParseAction(lambda t: t.rel_name.strip()))
        relation_part = ZeroOrMore(comment) + relation + ZeroOrMore(comment)
        nominal = (Empty().copy().setParseAction(lambda t: self.ENUM) +
                   Suppress(Literal("{")) +
                   Group(delimitedList(enum_value, delim=self._separator))("next_arg").setParseAction(self.get_values) +
                   Suppress(Literal("}")))

        date = CaselessLiteral("date") + Optional(CharsNotIn("{},\n"))("next_arg").setParseAction(self._adapt_date_format)
        attributes_part = Forward()
        relational = CaselessLiteral("relational") + attributes_part + Suppress(CaselessLiteral("@end")) + string
        attr_type = (CaselessLiteral("numeric") | CaselessLiteral("string") | nominal | date | relational)("attr_type")
        attribute = Suppress(CaselessLiteral("@attribute")) + (string.copy())("attr_name") + attr_type
        attribute_line = comment | attribute
        attributes_part << (Group(OneOrMore(attribute_line)))("children")
        data_part = (CaselessLiteral("@data"))("data_start").setParseAction(lambda s, p, k: (lineno(p, s)))
        arff_header = relation_part + attributes_part + data_part
        attribute.setParseAction(self._create_attribute)

        try:
            result = arff_header.parseString(header, parseAll=True)
        except ParseException as e:
            raise HeaderError(FileType.ARFF, e.lineno, e.col, e.line, e)

        self._relation_name = result.rel_name
        self._find_relational(result.children)
        self._linearize_attrs(result.children)
        self._data_start = result.data_start
        self._index = 0
Example #2
0
    def init_parser(self):

        INTEGER = Word(nums)
        INTEGER.setParseAction(lambda x: int(x[0]))

        header = INTEGER("species_count") + INTEGER("sequence_length") +\
            Suppress(restOfLine)
        header.setParseAction(self.set_header)

        sequence_name = Word(
            alphas + nums + "!#$%&\'*+-./;<=>?@[\\]^_`{|}~",
            max=100)

        # Take a copy and disallow line breaks in the bases
        bases = self.BASES.copy()
        bases.setWhitespaceChars(" \t")
        seq_start = sequence_name("species") + bases(
            "sequence") + Suppress(LineEnd())
        seq_start.setParseAction(self.set_seq_start)
        seq_start_block = OneOrMore(seq_start)
        seq_start_block.setParseAction(self.set_start_block)

        seq_continue = bases("sequence") + Suppress(LineEnd())
        seq_continue.setParseAction(self.set_seq_continue)

        seq_continue_block = Suppress(LineEnd()) + OneOrMore(seq_continue)
        seq_continue_block.setParseAction(self.set_continue_block)

        return header + seq_start_block + ZeroOrMore(seq_continue_block)
Example #3
0
    def __init__(self, cfg):
        self.cfg = cfg

        if cfg.datatype == "protein":
            letters = _protein_letters
        elif cfg.datatype == "DNA":
            letters = _dna_letters
        elif cfg.datatype == "morphology":
            letters = "0123456789"
        else:
            log.error("Unknown datatype '%s', please check" % self.cfg.datatype)
            raise util.PartitionFinderError

        self.rate_indexes = self.cfg.data_layout.rate_indexes
        self.freq_indexes = self.cfg.data_layout.letter_indexes

        FLOAT = Word(nums + '.-').setParseAction(lambda x: float(x[0]))

        L = Word(letters, exact=1)
        COLON = Suppress(":")

        LNL_LABEL = Regex("Final GAMMA.+:") | Literal("Likelihood:")
        TIME_LABEL = Regex("Overall Time.+:") | Regex("Overall Time.+tion ")
        ALPHA_LABEL = Literal("alpha:")
        TREE_SIZE_LABEL = Literal("Tree-Length:")

        def labeled_float(label):
            return Suppress(SkipTo(label)) + Suppress(label) + FLOAT

        lnl = labeled_float(LNL_LABEL)
        lnl.setParseAction(self.set_lnl)

        seconds = labeled_float(TIME_LABEL)
        seconds.setParseAction(self.set_seconds)

        alpha = labeled_float(ALPHA_LABEL)
        alpha.setParseAction(self.set_alpha)

        tree_size = labeled_float(TREE_SIZE_LABEL)
        tree_size.setParseAction(self.set_tree_size)

        LG4X_LINE = "LG4X" + restOfLine
        lg4x = Optional(LG4X_LINE + LG4X_LINE)

        rate = Suppress("rate") + L + Suppress("<->") + L + COLON + FLOAT
        rate.setParseAction(self.set_rate)
        rates = OneOrMore(rate)

        freq = Suppress("freq pi(") + L + Suppress("):") + FLOAT
        freq.setParseAction(self.set_freq)
        freqs = OneOrMore(freq)

        LGM_LINE = "LGM" + restOfLine

        rate_block = Optional(LGM_LINE) + rates + freqs
        rate_block.setParseAction(self.rate_block)

        # Just look for these things
        self.root_parser = seconds + lnl + alpha + tree_size +\
            lg4x + OneOrMore(rate_block)
Example #4
0
    def parse(date_string):
        # Parser for individual dates
        days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday',
                        'Mon', 'Tue', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun']
        suffixes = Literal('nd') | Literal('rd') | Literal('st') | Literal('th')

        day_of_month = Group(Word(nums) + Suppress(Optional(suffixes))).setResultsName('day')

        single_date = Optional(DateParser._build_literal(days_of_week)).setResultsName('dow') + day_of_month + \
                      Optional(DateParser._build_literal(LONG_MONTHS + SHORT_MONTHS)).setResultsName('month') + \
                      Optional(Word(nums)).setResultsName('year')
        single_date.setParseAction(SingleDate)



        # Parser for date ranges
        date_range_separators = DateParser._build_literal(['-', 'until', 'to'])
        date_range = Suppress(Optional('From')) + single_date.setResultsName('start_date') + \
                            Suppress(date_range_separators) + single_date.setResultsName('end_date')
        date_range.setParseAction(DateRange)


        date_parser = (date_range | single_date) + stringEnd

        result = date_parser.parseString(date_string)

        return result
Example #5
0
    def __init__(self, cfg):
        self.cfg = cfg

        if cfg.datatype == "protein":
            letters = _protein_letters
        elif cfg.datatype == "DNA":
            letters = _dna_letters
        else:
            log.error("Unknown datatype '%s', please check" %
                      self.cfg.datatype)
            raise util.PartitionFinderError

        self.rate_indexes = self.cfg.data_layout.rate_indexes
        self.freq_indexes = self.cfg.data_layout.letter_indexes

        FLOAT = Word(nums + '.-').setParseAction(lambda x: float(x[0]))

        L = Word(letters, exact=1)
        COLON = Suppress(":")

        LNL_LABEL = Regex("Final GAMMA.+:") | Literal("Likelihood:")
        TIME_LABEL = Regex("Overall Time.+:") | Regex("Overall Time.+tion ")
        ALPHA_LABEL = Literal("alpha:")
        TREE_SIZE_LABEL = Literal("Tree-Length:")

        def labeled_float(label):
            return Suppress(SkipTo(label)) + Suppress(label) + FLOAT

        lnl = labeled_float(LNL_LABEL)
        lnl.setParseAction(self.set_lnl)

        seconds = labeled_float(TIME_LABEL)
        seconds.setParseAction(self.set_seconds)

        alpha = labeled_float(ALPHA_LABEL)
        alpha.setParseAction(self.set_alpha)

        tree_size = labeled_float(TREE_SIZE_LABEL)
        tree_size.setParseAction(self.set_tree_size)

        LG4X_LINE = "LG4X" + restOfLine
        lg4x = Optional(LG4X_LINE + LG4X_LINE)

        rate = Suppress("rate") + L + Suppress("<->") + L + COLON + FLOAT
        rate.setParseAction(self.set_rate)
        rates = OneOrMore(rate)

        freq = Suppress("freq pi(") + L + Suppress("):") + FLOAT
        freq.setParseAction(self.set_freq)
        freqs = OneOrMore(freq)

        LGM_LINE = "LGM" + restOfLine

        rate_block = Optional(LGM_LINE) + rates + freqs
        rate_block.setParseAction(self.rate_block)

        # Just look for these things
        self.root_parser = seconds + lnl + alpha + tree_size +\
            lg4x + OneOrMore(rate_block)
Example #6
0
 def get_parser(self, EXPRESSION):
     result = Suppress(self._name) + Suppress('(') + EXPRESSION
     for i in range(1, self._n_args):
         result += Suppress(',') + EXPRESSION
     result += Suppress(')')
     result.setName('Function({name})'.format(name=self._name))
     result.setParseAction(self)
     return result
Example #7
0
 def get_parser(self, EXPRESSION):
     if isinstance(self.value, str):
         result = Suppress(self.value)
         result.setName('NamedConstant({value})'.format(value=self.value))
         result.setParseAction(self)
         return result
     else:
         # TODO Detect constants?
         return None
    def setup(self):
        # some expressions that will be reused
        units = []
        for unit in time_units:
            units.append(Keyword(unit))
        units = get_match_first(units)
        units = units.setResultsName("unit")
        units.setParseAction(lambda s, l, tok: time_units[tok[0]])

        multiplier = Word(nums)
        multiplier = multiplier.setResultsName("multiply")
        multiplier.setParseAction(self.parseMulti)

        adder = []
        for add in add_modifiers:
            adder.append(CL(add))
        adder = get_match_first(adder)
        adder = adder.setResultsName("add")
        adder.setParseAction(self.parseAdd)
        modifier = (multiplier | adder)  # + FollowedBy(units)

        # ago
        #
        # e.g 5 days ago
        ago = Optional(modifier) + units + Suppress(Word("ago"))
        ago.setParseAction(self.parseAgo)

        # time range
        #
        # e.g in the lat 10 days
        time_range = Suppress(Optional(
            CL("in the"))) + \
            Suppress(Word("last") |
                     Word("past")) + \
            Optional(modifier) + \
            units
        time_range.setParseAction(self.parseRange)

        # special keyword handling
        #
        # e.g yesterday
        # only handles yesterday right now, maybe need to be modified to do
        # more
        special_expr = []
        for expr in special:
            special_expr.append(
                Keyword(expr).setParseAction(
                    lambda s, l, tok: special[tok[0]]))
        special_expr = get_match_first(special_expr)
        special_expr = special_expr.setResultsName("unit")
        special_expr.setParseAction(self.parseAgo)

        parser = (special_expr | ago | time_range)

        return parser
Example #9
0
    def setup(self):
        # some expressions that will be reused
        units = []
        for unit in time_units:
            units.append(Keyword(unit))
        units = get_match_first(units)
        units = units.setResultsName("unit")
        units.setParseAction(lambda s, l, tok: time_units[tok[0]])

        multiplier = Word(nums)
        multiplier = multiplier.setResultsName("multiply")
        multiplier.setParseAction(self.parseMulti)

        adder = []
        for add in add_modifiers:
            adder.append(CL(add))
        adder = get_match_first(adder)
        adder = adder.setResultsName("add")
        adder.setParseAction(self.parseAdd)
        modifier = (multiplier | adder)  # + FollowedBy(units)

        # ago
        #
        # e.g 5 days ago
        ago = Optional(modifier) + units + Suppress(Word("ago"))
        ago.setParseAction(self.parseAgo)

        # time range
        #
        # e.g in the lat 10 days
        time_range = Suppress(Optional(
            CL("in the"))) + \
            Suppress(Word("last") |
                     Word("past")) + \
            Optional(modifier) + \
            units
        time_range.setParseAction(self.parseRange)

        # special keyword handling
        #
        # e.g yesterday
        # only handles yesterday right now, maybe need to be modified to do
        # more
        special_expr = []
        for expr in special:
            special_expr.append(
                Keyword(expr).setParseAction(
                    lambda s, l, tok: special[tok[0]]))
        special_expr = get_match_first(special_expr)
        special_expr = special_expr.setResultsName("unit")
        special_expr.setParseAction(self.parseAgo)

        parser = (special_expr | ago | time_range)

        return parser
Example #10
0
def stream_query():
    '''
    <stream-query> ::=
        'SELECT' <stream-operation> '(' <table-window> ')'
    '''
    from grammar.keywords import SELECT_KEYWORD
    from grammar.parsed import ParsedStreamQuery
    stream_q = \
        Suppress(SELECT_KEYWORD) + stream_term()
    stream_q.setParseAction(ParsedStreamQuery)
    return stream_q
Example #11
0
class JenkinsFileParser:

    STAGE_KEY = 'stage'
    COMMENTED_STAGE_KEY = 'commented_stage'

    def __init__(self, filename='Jenkinsfile'):
        self.filename = filename
        self.create_grammar()

    def create_grammar(self):
        self.beg = SkipTo(LineStart() + Literal('/*')*(0, 1) + Literal('stage'), ignore=Literal('stages'))
        self.block = Forward()
        self.parallel = Suppress('parallel') + self.nested(self.block)
        self.parallel.setParseAction(lambda t: t[0])
        self.environment = Suppress('environment') + self.nested()
        self.stage_content = (
            self.nested((self.parallel | self.environment.suppress()), 'parallel') |
            self.nested().suppress()
        )

        self.stage = Group(
            Suppress('stage' + '(') +
            quotedString('stage_name').setParseAction(removeQuotes) +
            Suppress(')') +
            self.stage_content)(
                self.STAGE_KEY + '*'
            )
        self.commented_stage = Group(Suppress('/*') + self.stage + Suppress('*/'))(self.COMMENTED_STAGE_KEY + '*')
        self.any_stage = self.stage | self.commented_stage
        self.block << Group(self.parallel | self.any_stage)('block*')

    @staticmethod
    def nested(elem=None, name=None):
        expr = nestedExpr('{', '}', content=elem, ignoreExpr=Literal('*/'))
        if name:
            return expr.setResultsName(name)
        return expr

    def evaluate_stages(self):
        a = self.beg.suppress() + self.block[...]
        test = a.parseFile(self.filename)
        #  print(test.asDict())
        #  print(json.dumps(test.asDict(), indent=4))
        return test.asDict()

    def find_stage_by_name(self, name, content):
        quoted_name = (Literal('"') | Literal("'")).suppress() + name + (Literal('"') | Literal("'")).suppress()
        #  named_stage = Literal('/*')*(0, 1) + 'stage' + '(' + quoted_name + ')' + self.nested() + Literal('*/')*(0, 1)
        named_stage = 'stage' + '(' + quoted_name + ')' + self.nested()
        commented_named_stage = Literal('/*') + 'stage' + '(' + quoted_name + ')' + self.nested() + Literal('*/')
        return next((named_stage | commented_named_stage).scanString(content))
Example #12
0
def expr_parser():
    num = stl.num_parser()

    T_UND = Suppress(Literal("_"))
    T_LE = Literal("<=")
    T_GR = Literal(">")

    integer = Word(nums).setParseAction(lambda t: int(t[0]))
    relation = (T_LE | T_GR).setParseAction(lambda t: Relation.LE
                                            if t[0] == "<=" else Relation.GT)
    expr = Suppress(Word(alphas)) + T_UND + integer + relation + num
    expr.setParseAction(lambda t: LLTSignal(t[0], t[1], t[2]))

    return expr
Example #13
0
def parser():
    global _parser
    if _parser is None:
        ParserElement.setDefaultWhitespaceChars("")
        
        lbrack = Literal("[")
        rbrack = Literal("]")
        lbrace = Literal("{")
        rbrace = Literal("}")
        lparen = Literal("(")
        rparen = Literal(")")
        
        reMacro = Suppress("\\") + oneOf(list("dwsZ"))
        escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables)))
        reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|")

        reRange = Combine(lbrack.suppress() + SkipTo(rbrack,ignore=escapedChar) + rbrack.suppress())
        reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) )
        reDot = Literal(".")
        repetition = (
            ( lbrace + Word(nums).setResultsName("count") + rbrace ) |
            ( lbrace + Word(nums).setResultsName("minCount")+","+ Word(nums).setResultsName("maxCount") + rbrace ) |
            oneOf(list("*+?"))
            )
        reExpr = Forward()
        reGroup = (lparen.suppress() +
                   Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") +
                   reExpr.setResultsName("expr") +
                   rparen.suppress())

        reTerm = ( reLiteral | reRange | reMacro | reDot | reGroup )
        reExpr << operatorPrecedence( reTerm,
            [
            (repetition, 1, opAssoc.LEFT, create(Repetition)),
            (None, 2, opAssoc.LEFT, create(Sequence)),
            (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)),
            ]
            )

        reGroup.setParseAction(create(Group))
        reRange.setParseAction(create(Range))
        reLiteral.setParseAction(create(Character))
        reMacro.setParseAction(create(Macro))
        reDot.setParseAction(create(Dot))
        
        _parser = reExpr
        
    return _parser
Example #14
0
def parser():
    global _parser
    if _parser is None:
        ParserElement.setDefaultWhitespaceChars("")

        lbrack = Literal("[")
        rbrack = Literal("]")
        lbrace = Literal("{")
        rbrace = Literal("}")
        lparen = Literal("(")
        rparen = Literal(")")

        reMacro = Suppress("\\") + oneOf(list("dwsZ"))
        escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables)))
        reLiteralChar = "".join(c for c in string.printable
                                if c not in r"\[]{}().*?+|")

        reRange = Combine(lbrack.suppress() +
                          SkipTo(rbrack, ignore=escapedChar) +
                          rbrack.suppress())
        reLiteral = (escapedChar | oneOf(list(reLiteralChar)))
        reDot = Literal(".")
        repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) |
                      (lbrace + Word(nums).setResultsName("minCount") + "," +
                       Word(nums).setResultsName("maxCount") + rbrace)
                      | oneOf(list("*+?")))
        reExpr = Forward()
        reGroup = (lparen.suppress() +
                   Optional(Literal("?").suppress() +
                            oneOf(list(":P"))).setResultsName("option") +
                   reExpr.setResultsName("expr") + rparen.suppress())

        reTerm = (reLiteral | reRange | reMacro | reDot | reGroup)
        reExpr << operatorPrecedence(reTerm, [
            (repetition, 1, opAssoc.LEFT, create(Repetition)),
            (None, 2, opAssoc.LEFT, create(Sequence)),
            (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)),
        ])

        reGroup.setParseAction(create(Group))
        reRange.setParseAction(create(Range))
        reLiteral.setParseAction(create(Character))
        reMacro.setParseAction(create(Macro))
        reDot.setParseAction(create(Dot))

        _parser = reExpr

    return _parser
Example #15
0
class Compiler:
    def __init__(self):
        self._pythonVar = None
        self.varNames = []
        Preprocess.setSocialiteModule(getModuleVar())

    def pythonVar(self):
        if not self._pythonVar:
            from pyparsing import (ParserElement, Word, alphas, alphanums,
                                   Literal, Suppress, FollowedBy)
            _ws = ' \t'
            ParserElement.setDefaultWhitespaceChars(_ws)
            ident = Word(alphas + "_", alphanums + "_")
            lparen = Literal("(")
            dot = Literal(".")
            dollar = Literal("$")

            self._pythonVar = Suppress(dollar) + ident + ~FollowedBy(
                (dot + ident) | lparen)
            self._pythonVar.setParseAction(self.onPythonVar)
        return self._pythonVar

    def compile(self, src):
        gen = Preprocess.run(src)
        return gen

    def processPythonVars(self, query):
        query = '(' + query + ')'

        tmp = query
        if tmp.find("$") >= 0: tmp = self.pythonVar().transformString(query)
        if self.varNames:
            query = ''.join([
                tmp, "%" + getPassVarsFunc() + "(", ','.join(self.varNames),
                ")"
            ])
        else:
            query = tmp

        for i in xrange(len(self.varNames)):
            self.varNames.pop()
        return query

    def onPythonVar(self, inputStr, loc, tokens):
        varName = ''.join(tokens)
        self.varNames.append(varName)
        return "%s"
Example #16
0
def detect_token(jade):

    doctype = LineStart() + oneOf('!!! doctype') + Optional(oneOf('5 html xml' \
            + ' default transitional strict frameset 1.1 basic mobile', True))
    doctype.setParseAction(parse_doctype)

    element_id = Suppress('#') + Word(alphanums + '_' + '-')
    element_class = Suppress('.') + Word(alphanums + '_' + '-')

    selectors = (element_id.setResultsName('element_id') \
        + ZeroOrMore(element_class).setResultsName('element_class')) \
        | (OneOrMore(element_class).setResultsName('element_class') \
        + Optional(element_id).setResultsName('element_id'))
    selectors.setParseAction(parse_selectors)

    element = selectors.setResultsName('selectors') \
        | (Word(alphas).setResultsName('element_name') \
        + Optional(selectors).setResultsName('selectors'))
    element.setParseAction(parse_element)

    attribute = CharsNotIn('('+')')
    attributes = nestedExpr(content=attribute)

    tag = element.setResultsName('element') \
        + Optional(attributes).setResultsName('attributes')
    tag.setParseAction(parse_tag)


    # TODO: block-comment and conditional-comment
    unbuffered_comment = Suppress(Suppress('//-') + restOfLine)
    buffered_comment = Suppress('//') + restOfLine
    buffered_comment.setParseAction(parse_buffered_comment)

    # Order matters here, as buffered will pick up
    # unbuffered comments if set first
    comment = unbuffered_comment | buffered_comment

    source = doctype | tag | comment
    parsed = source.parseString(jade)


    return ' '.join(parsed)


    '''
    def __init__(self, datatype):

        if datatype == "protein":
            letters = "ARNDCQEGHILKMFPSTWYV"
        elif datatype == "DNA":
            letters = "ATCG"
        else:
            log.error("Unknown datatype '%s', please check" % datatype)
            raise RaxmlError

        FLOAT = Word(nums + '.-').setParseAction(lambda x: float(x[0]))

        L = Word(letters, exact=1)
        COLON = Suppress(":")

        LNL_LABEL = Regex("Final GAMMA.+:") | Literal("Likelihood:")
        TIME_LABEL = Regex("Overall Time.+:") | Regex("Overall Time.+tion ")
        ALPHA_LABEL = Literal("alpha:")
        TREE_SIZE_LABEL = Literal("Tree-Length:")

        def labeled_float(label):
            return Suppress(SkipTo(label)) + Suppress(label) + FLOAT

        lnl = labeled_float(LNL_LABEL)
        lnl.setParseAction(self.set_lnl)

        seconds = labeled_float(TIME_LABEL)
        seconds.setParseAction(self.set_seconds)

        alpha = labeled_float(ALPHA_LABEL)
        alpha.setParseAction(self.set_alpha)

        tree_size = labeled_float(TREE_SIZE_LABEL)
        tree_size.setParseAction(self.set_tree_size)

        rate = Suppress("rate") + L + Suppress("<->") + L + COLON + FLOAT
        rate.setParseAction(self.set_rate)
        rates = OneOrMore(rate)

        freq = Suppress("freq pi(") + L + Suppress("):") + FLOAT
        freq.setParseAction(self.set_freq)
        freqs = OneOrMore(freq)

        # Just look for these things
        self.root_parser = seconds + lnl + alpha + tree_size + rates + freqs
Example #18
0
    def __init__(self, datatype):

        if datatype == "protein":
            letters = "ARNDCQEGHILKMFPSTWYV"
        elif datatype == "DNA":
            letters = "ATCG"
        else:
            log.error("Unknown datatype '%s', please check" % datatype)
            raise RaxmlError

        FLOAT = Word(nums + '.-').setParseAction(lambda x: float(x[0]))

        L = Word(letters, exact=1)
        COLON = Suppress(":")

        LNL_LABEL = Regex("Final GAMMA.+:") | Literal("Likelihood:")
        TIME_LABEL = Regex("Overall Time.+:") | Regex("Overall Time.+tion ")
        ALPHA_LABEL = Literal("alpha:")
        TREE_SIZE_LABEL = Literal("Tree-Length:")

        def labeled_float(label):
            return Suppress(SkipTo(label)) + Suppress(label) + FLOAT

        lnl = labeled_float(LNL_LABEL)
        lnl.setParseAction(self.set_lnl)

        seconds = labeled_float(TIME_LABEL)
        seconds.setParseAction(self.set_seconds)

        alpha = labeled_float(ALPHA_LABEL)
        alpha.setParseAction(self.set_alpha)

        tree_size = labeled_float(TREE_SIZE_LABEL)
        tree_size.setParseAction(self.set_tree_size)

        rate = Suppress("rate") + L + Suppress("<->") + L + COLON + FLOAT
        rate.setParseAction(self.set_rate)
        rates = OneOrMore(rate)

        freq = Suppress("freq pi(") + L + Suppress("):") + FLOAT
        freq.setParseAction(self.set_freq)
        freqs = OneOrMore(freq)

        # Just look for these things
        self.root_parser = seconds + lnl + alpha + tree_size + rates + freqs
Example #19
0
    def parse(self, header):
        comment = self._comment()
        quoted = quotedString.copy().setParseAction(removeQuotes)
        string = quoted | Word(printables, excludeChars='{},%')
        enum_value = quotedString | Word(printables, excludeChars='{},%')

        relation = (Suppress(CaselessLiteral("@relation")) +
                    Optional(restOfLine, default='default_name')
                    ('rel_name').setParseAction(lambda t: t.rel_name.strip()))
        relation_part = ZeroOrMore(comment) + relation + ZeroOrMore(comment)
        nominal = (Empty().copy().setParseAction(lambda t: self.ENUM) +
                   Suppress(Literal("{")) +
                   Group(delimitedList(enum_value, delim=self._separator))
                   ("next_arg").setParseAction(self.get_values) +
                   Suppress(Literal("}")))

        date = CaselessLiteral("date") + Optional(
            CharsNotIn("{},\n"))("next_arg").setParseAction(
                self._adapt_date_format)
        attributes_part = Forward()
        relational = CaselessLiteral(
            "relational") + attributes_part + Suppress(
                CaselessLiteral("@end")) + string
        attr_type = (CaselessLiteral("numeric") | CaselessLiteral("string")
                     | nominal | date | relational)("attr_type")
        attribute = Suppress(CaselessLiteral("@attribute")) + (
            string.copy())("attr_name") + attr_type
        attribute_line = comment | attribute
        attributes_part << (Group(OneOrMore(attribute_line)))("children")
        data_part = (CaselessLiteral("@data"))("data_start").setParseAction(
            lambda s, p, k: (lineno(p, s)))
        arff_header = relation_part + attributes_part + data_part
        attribute.setParseAction(self._create_attribute)

        try:
            result = arff_header.parseString(header, parseAll=True)
        except ParseException as e:
            raise HeaderError(FileType.ARFF, e.lineno, e.col, e.line, e)

        self._relation_name = result.rel_name
        self._find_relational(result.children)
        self._linearize_attrs(result.children)
        self._data_start = result.data_start
        self._index = 0
Example #20
0
class Compiler:
    def __init__(self):
        self._pythonVar = None
        self.varNames = []
        Preprocess.setSocialiteModule(getModuleVar())

    def pythonVar(self):
        if not self._pythonVar:
            from pyparsing import (ParserElement, Word, alphas, alphanums,
                                   Literal, Suppress, FollowedBy)
            _ws = ' \t'
            ParserElement.setDefaultWhitespaceChars(_ws)
            ident = Word(alphas+"_", alphanums+"_")
            lparen = Literal("(")
            dot = Literal(".")
            dollar = Literal("$")

            self._pythonVar = Suppress(dollar) + ident + ~FollowedBy((dot+ident) | lparen)
            self._pythonVar.setParseAction(self.onPythonVar)
        return self._pythonVar

    def compile(self, src):
        gen=Preprocess.run(src)
        return gen

    def processPythonVars(self, query):
        query = '('+query+')'

        tmp = query
        if tmp.find("$") >= 0: tmp = self.pythonVar().transformString(query)
        if self.varNames:
            query = ''.join([tmp, "%"+getPassVarsFunc()+"(", ','.join(self.varNames), ")"])
        else: query = tmp

        for i in xrange(len(self.varNames)):
            self.varNames.pop()
        return query

    def onPythonVar(self, inputStr, loc, tokens):
        varName = ''.join(tokens)
        self.varNames.append(varName)
        return "%s"
def ListParser():
    """
	A parser for list columns, where each list is composed of pairs of values.
	"""

    value = Regex(r'[-+]?[0-9]+(?:\.[0-9]*)?(?:e[-+]?[0-9]+)?', IGNORECASE)
    value.setParseAction(lambda toks: float(toks[0]))

    item = Suppress('(') + value + Suppress(',') + value + Suppress(')')
    item.setParseAction(tuple)

    lst = Suppress('[') + delimitedList(item) + Suppress(']')
    lst.setParseAction(list)

    def parse(s):
        try:
            return lst.parseString(s).asList()
        except ParseBaseException as e:
            raise ValueError(e)

    return parse
Example #22
0
def ListParser():
	"""
	A parser for list columns, where each list is composed of pairs of values.
	"""

	value = Regex(r'[-+]?[0-9]+(?:\.[0-9]*)?(?:e[-+]?[0-9]+)?', IGNORECASE)
	value.setParseAction(lambda toks: float(toks[0]))

	item = Suppress('(') + value + Suppress(',') + value + Suppress(')')
	item.setParseAction(tuple)

	lst = Suppress('[') + delimitedList(item) + Suppress(']')
	lst.setParseAction(list)

	def parse(s):
		try:
			return lst.parseString(s).asList()
		except ParseBaseException as e:
			raise ValueError(e)

	return parse
Example #23
0
def getEbnfParser(symbols):
    """ Returns an EBNF parser for the command language. """
    identifier = Word(alphas + '_', alphanums + '_')
    string = quotedString.setParseAction(
        lambda t: symbols.append((t[0][1:-1], TokenType.StrLit))
    )
    integer = Word(nums).setParseAction(
        lambda t: symbols.append((int(t[0]), TokenType.NumLit))
    )
    var = Suppress("$") + identifier
    var.setParseAction(
        lambda t: symbols.append((t[0], TokenType.Var))
    )
    literal = var | string | integer
    fnid = Suppress(Optional(".")) + identifier
    fnid.setParseAction(
        lambda t: symbols.append((t[0], TokenType.Call))
    )
    call = Forward()
    callb = fnid + ZeroOrMore(call | literal)
    call << ((Suppress("(") + callb + Suppress(")")) | callb)
    fndef_head = Suppress("let") + identifier
    fndef_head.setParseAction(
        lambda t: symbols.append((t[0], TokenType.Def))
    )
    definition = fndef_head + ZeroOrMore(var) + Suppress("=") + call
    cmd = OneOrMore((definition | call) + Word(";").setParseAction(
        lambda t: symbols.append((t[0], TokenType.End))
    ))
    msg = OneOrMore(cmd)
    return msg
Example #24
0
def _parse_data(data: str) -> List[_PackageData]:
    lpar, rpar, lbrk, rbrk, dot = map(Suppress, '()[].')
    nil = Suppress('nil')

    pkgname = Word(printables)
    decimal = Regex(r'0|-?[1-9]\d*').setParseAction(lambda t: int(t[0]))
    qstring = QuotedString(quoteChar='"', escChar='\\')

    version = (lpar + OneOrMore(decimal) +
               rpar).setParseAction(lambda s, l, t: ['.'.join(map(str, t))])

    dependency_entry = lpar + pkgname + version + rpar
    dependency_list = ((lpar + OneOrMore(dependency_entry) + rpar) | nil)

    people_list = OneOrMore(qstring | dot | nil)

    keyval_url = (lpar + (Suppress(':url') | Suppress(':homepage')) + dot +
                  qstring +
                  rpar).setParseAction(lambda s, l, t: [('url', t[0])])
    keyval_keywords = (lpar + Suppress(':keywords') + ZeroOrMore(qstring) +
                       rpar).setParseAction(
                           lambda s, l, t: [('keywords', [str(k) for k in t])])
    keyval_commit = (lpar + Suppress(':commit') + dot + qstring +
                     rpar).setParseAction(lambda s, l, t: [('commit', t[0])])
    keyval_maintainer = (
        lpar + Suppress(':maintainer') + people_list + rpar
    ).setParseAction(lambda s, l, t: [('maintainer', [str(m) for m in t])])
    keyval_author = (
        lpar + Suppress(':author') + people_list +
        rpar).setParseAction(lambda s, l, t: [('author', [str(a) for a in t])])
    keyval_authors = (lpar + Suppress(':authors') +
                      OneOrMore(lpar + people_list + rpar) +
                      rpar).setParseAction(
                          lambda s, l, t: [('authors', [str(a) for a in t])])

    keyval_item = keyval_url | keyval_keywords | keyval_commit | keyval_maintainer | keyval_authors | keyval_author

    keyvals = (lpar + ZeroOrMore(keyval_item) + rpar
               ).setParseAction(lambda s, l, t: [{k: v
                                                  for k, v in t}]
                                ) | nil.setParseAction(lambda s, l, t: [{}])

    package_entry = (lpar + pkgname + dot + lbrk + version +
                     Suppress(dependency_list) + qstring +
                     Suppress(Word(alphas)) + keyvals + rbrk +
                     rpar).setParseAction(lambda s, l, t: [_PackageData(*t)])

    root = lpar + Suppress(decimal) + ZeroOrMore(package_entry) + rpar

    return root.parseString(data, parseAll=True)  # type: ignore
Example #25
0
message_field = (
    (Keyword("required") | Keyword("optional") | Keyword("repeated"))
    + field_type
    + field_name
    + Suppress("=")
    + field_number
    + Suppress(";")
)
message_field.setParseAction(Field)
message_name = Regex("[A-Za-z_]+")
message_spec = (
    Suppress(Keyword("message")) + message_name + Suppress("{") + Group(ZeroOrMore(message_field)) + Suppress("}")
)
message_spec.setParseAction(Message)
option_spec = Suppress(Keyword("option")) + Regex("[a-z_]+") + Suppress("=") + Regex('"[^"]*"') + Suppress(";")
option_spec.setParseAction(lambda tokens: (tokens[0], tokens[1][1:-1]))
option_list = ZeroOrMore(option_spec)
option_list.setParseAction(lambda tokens: dict(tokens.asList()))
message_list = Group(ZeroOrMore(message_spec))
proto_file = (option_list + message_list).ignore(comment)

if len(sys.argv) < 3:
    print "usage: simpleproto some_file.proto outputfolder"
    print "The output will be placed in a folder within outputfolder"
    print "appropriate for the package specified in the file's java_package"
    print "option, in the file specified by java_outer_classname."

options, messages = proto_file.parseFile(sys.argv[1], parseAll=True).asList()
output_class = options["java_outer_classname"]
output_package = options["java_package"]
output_file_path = os.path.join(sys.argv[2], output_package.replace(".", os.path.sep), output_class + ".java")
Example #26
0
DATATYPE = oneOf(PARAMETER_CLASSES.keys())
PARAMETER = DATATYPE('datatype') + Suppress(':') + NAME('name') + \
            Suppress('[') + ATTRIBUTES_LIST('properties') + Suppress(']')
def _get_parameter(token):
    """ return Parameter object from tokens """
    return get_parameter(token['name'], token['datatype'], token['properties'])
PARAMETER.setParseAction(_get_parameter)
PARAMETERS_LIST = Group(ZeroOrMore(PARAMETER)).setResultsName('parameters')

# variables
VARIABLE = Suppress('variable: ') + Word(alphanums)('name') + quotedString(
    'value').addParseAction(removeQuotes)
def _get_variable(token):
    """ return Section object from tokens """
    return Variable(token['name'], token['value'])
VARIABLE.setParseAction(_get_variable)

SECTION_CHILDREN_LIST = Forward()

SECTIONS_LIST = Forward()
SECTION = Suppress('section:') + NAME('name') + Suppress('[') + \
          ATTRIBUTES_LIST('properties') + Suppress(']') + \
          SECTION_CHILDREN_LIST('children') + Suppress('endsection:') + \
          Suppress(NAME)
def _get_section(token):
    """ return Section object from tokens """
    return Section(token['name'], properties=token['properties'],
                   children=token['children'])
SECTION.setParseAction(_get_section)
SECTIONS_LIST << Group(ZeroOrMore(SECTION))
Example #27
0
    string.ascii_letters + string.digits + ';-',
)
attr.leaveWhitespace()
attr.setName('attr')
hexdigits = Word(string.hexdigits, exact=2)
hexdigits.setName('hexdigits')
escaped = Suppress(Literal('\\')) + hexdigits
escaped.setName('escaped')


def _p_escaped(s, l, t):
    text = t[0]
    return chr(int(text, 16))


escaped.setParseAction(_p_escaped)
value = Combine(OneOrMore(CharsNotIn('*()\\\0') | escaped))
value.setName('value')
equal = Literal("=")
equal.setParseAction(lambda s, l, t: pureldap.LDAPFilter_equalityMatch)
approx = Literal("~=")
approx.setParseAction(lambda s, l, t: pureldap.LDAPFilter_approxMatch)
greater = Literal(">=")
greater.setParseAction(lambda s, l, t: pureldap.LDAPFilter_greaterOrEqual)
less = Literal("<=")
less.setParseAction(lambda s, l, t: pureldap.LDAPFilter_lessOrEqual)
filtertype = equal | approx | greater | less
filtertype.setName('filtertype')
simple = attr + filtertype + value
simple.leaveWhitespace()
simple.setName('simple')
Example #28
0
    def __init__(self, processor, baseiri, strict=False):
        """
        See class docstring.
        """
        # pylint: disable=R0914,R0915
        self.reset(processor, baseiri, strict)
        PrefixedName = PNAME_LN | PNAME_NS
        Iri = IRIREF | PrefixedName
        BNode = BLANK_NODE_LABEL | ANON

        RDFLiteral = STRING + Optional(LANGTAG("langtag") | Group(Suppress("^^") + Iri)("datatype"))
        Object = Forward()
        Collection = Suppress("(") + ZeroOrMore(Object) + Suppress(")")
        PredicateObjectList = Forward()
        BlankNodePropertyList = Suppress("[") + PredicateObjectList + Suppress("]")
        TtlLiteral = RDFLiteral | NUMERIC_LITERAL | BOOLEAN_LITERAL
        Subject = Iri | BNode | Collection | VARIABLE  # added for LD Patch
        Predicate = Iri
        Object << (  # pylint: disable=W0104
            Iri | BNode | Collection | BlankNodePropertyList | TtlLiteral | VARIABLE
        )  # added for LD Patch
        Verb = Predicate | Keyword("a")
        ObjectList = Group(Object + ZeroOrMore(COMMA + Object))
        PredicateObjectList << (  # pylint: disable=W0106
            Verb + ObjectList + ZeroOrMore(SEMICOLON + Optional(Verb + ObjectList))
        )
        Triples = (Subject + PredicateObjectList) | (BlankNodePropertyList + Optional(PredicateObjectList))

        Value = Iri | TtlLiteral | VARIABLE

        InvPredicate = Suppress("^") + Predicate
        Step = Suppress("/") + (Predicate | InvPredicate | INDEX)
        Filter = Forward()
        Constraint = Filter | UNICITY_CONSTRAINT
        Path = Group(OneOrMore(Step | Constraint))
        Filter << (
            Suppress("[")  # pylint: disable=W0106
            + Group(ZeroOrMore(Step | Constraint))("path")  # = Path (*)
            + Optional(Suppress("=") + Object)("value")
            + Suppress("]")
        )
        # (*) we can not reuse the Path rule defined above,
        #     because we want to set a name for that component
        Turtle = Triples + ZeroOrMore(PERIOD + Triples) + Optional(PERIOD)
        Graph = Suppress("{") + Optional(Turtle) + Suppress("}")

        Prefix = Literal("@prefix") + PNAME_NS + IRIREF + PERIOD
        if not strict:
            SparqlPrefix = CaselessKeyword("prefix") + PNAME_NS + IRIREF
            Prefix = Prefix | SparqlPrefix
        Bind = BIND_CMD + VARIABLE + Value + Optional(Path) + PERIOD
        Add = ADD_CMD + Graph + PERIOD
        AddNew = ADDNEW_CMD + Graph + PERIOD
        Delete = DELETE_CMD + Graph + PERIOD
        DeleteExisting = DELETEEXISTING_CMD + Graph + PERIOD
        Cut = CUT_CMD + VARIABLE + PERIOD
        UpdateList = UPDATELIST_CMD + Subject + Predicate + SLICE + Collection + PERIOD

        Statement = Prefix | Bind | Add | AddNew | Delete | DeleteExisting | Cut | UpdateList
        Patch = ZeroOrMore(Statement)
        if not strict:
            Patch.ignore("#" + restOfLine)  # Comment
        Patch.parseWithTabs()

        self.grammar = Patch

        IRIREF.setParseAction(self._parse_iri)
        PrefixedName.setParseAction(self._parse_pname)
        RDFLiteral.setParseAction(self._parse_turtleliteral)
        Collection.setParseAction(self._parse_collection)
        BlankNodePropertyList.setParseAction(self._parse_bnpl)
        Verb.setParseAction(self._parse_verb)
        ObjectList.setParseAction(self._parse_as_list)
        Triples.setParseAction(self._parse_tss)
        InvPredicate.setParseAction(self._parse_invpredicate)
        Filter.setParseAction(self._parse_filter)
        Path.setParseAction(self._parse_as_list)
        Prefix.setParseAction(self._do_prefix)
        Bind.setParseAction(self._do_bind)
        Add.setParseAction(self._do_add)
        AddNew.setParseAction(self._do_add_new)
        Delete.setParseAction(self._do_delete)
        DeleteExisting.setParseAction(self._do_delete_existing)
        Cut.setParseAction(self._do_cut)
        UpdateList.setParseAction(self._do_updatelist)
Example #29
0
class ControlParser(BaseParser):
    """A parser for BEL control statements.

    .. seealso::

        BEL 1.0 specification on `control records
        <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_control_records>`_
    """
    def __init__(
        self,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        citation_clearing: bool = True,
        required_annotations: Optional[List[str]] = None,
    ) -> None:
        """Initialize the control statement parser.

        :param annotation_to_term: A dictionary of {annotation: set of valid values} defined with URL for parsing
        :param annotation_to_pattern: A dictionary of {annotation: regular expression string}
        :param annotation_to_local: A dictionary of {annotation: set of valid values} for parsing defined with LIST
        :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
        :param required_annotations: Annotations that are required
        """
        self.citation_clearing = citation_clearing

        self.annotation_to_term = annotation_to_term or {}
        self.annotation_to_pattern = annotation_to_pattern or {}
        self.annotation_to_local = annotation_to_local or {}

        self.statement_group = None
        self.citation_db = None
        self.citation_db_id = None
        self.evidence = None
        self.annotations = {}
        self.required_annotations = required_annotations or []

        annotation_key = ppc.identifier('key').setParseAction(
            self.handle_annotation_key)

        self.set_statement_group = set_statement_group_stub().setParseAction(
            self.handle_set_statement_group)
        self.set_citation = set_citation_stub.setParseAction(
            self.handle_set_citation)
        self.set_evidence = set_evidence_stub.setParseAction(
            self.handle_set_evidence)

        set_command_prefix = And([annotation_key('key'), Suppress('=')])
        self.set_command = set_command_prefix + qid('value')
        self.set_command.setParseAction(self.handle_set_command)

        self.set_command_list = set_command_prefix + delimited_quoted_list(
            'values')
        self.set_command_list.setParseAction(self.handle_set_command_list)

        self.unset_command = annotation_key('key')
        self.unset_command.addParseAction(self.handle_unset_command)

        self.unset_evidence = supporting_text_tags(EVIDENCE)
        self.unset_evidence.setParseAction(self.handle_unset_evidence)

        self.unset_citation = Suppress(BEL_KEYWORD_CITATION)
        self.unset_citation.setParseAction(self.handle_unset_citation)

        self.unset_statement_group = Suppress(BEL_KEYWORD_STATEMENT_GROUP)
        self.unset_statement_group.setParseAction(
            self.handle_unset_statement_group)

        self.unset_list = delimited_unquoted_list('values')
        self.unset_list.setParseAction(self.handle_unset_list)

        self.unset_all = unset_all.setParseAction(self.handle_unset_all)

        self.set_statements = set_tag + MatchFirst([
            self.set_statement_group,
            self.set_citation,
            self.set_evidence,
            self.set_command,
            self.set_command_list,
        ])

        self.unset_statements = unset_tag + MatchFirst([
            self.unset_all,
            self.unset_citation,
            self.unset_evidence,
            self.unset_statement_group,
            self.unset_command,
            self.unset_list,
        ])

        self.language = self.set_statements | self.unset_statements

        super(ControlParser, self).__init__(self.language)

    @property
    def _in_debug_mode(self) -> bool:
        return not self.annotation_to_term and not self.annotation_to_pattern

    @property
    def citation_is_set(self) -> bool:
        """Check if the citation is set."""
        return self.citation_db is not None and self.citation_db_id is not None

    def has_enumerated_annotation(self, annotation: str) -> bool:
        """Check if the annotation is defined as an enumeration."""
        return annotation in self.annotation_to_term

    def has_regex_annotation(self, annotation: str) -> bool:
        """Check if the annotation is defined as a regular expression."""
        return annotation in self.annotation_to_pattern

    def has_local_annotation(self, annotation: str) -> bool:
        """Check if the annotation is defined locally."""
        return annotation in self.annotation_to_local

    def has_annotation(self, annotation: str) -> bool:
        """Check if the annotation is defined."""
        return (self.has_enumerated_annotation(annotation)
                or self.has_regex_annotation(annotation)
                or self.has_local_annotation(annotation))

    def raise_for_undefined_annotation(self, line: str, position: int,
                                       annotation: str) -> None:
        """Raise an exception if the annotation is not defined.

        :raises: UndefinedAnnotationWarning
        """
        if self._in_debug_mode:
            return

        if not self.has_annotation(annotation):
            raise UndefinedAnnotationWarning(self.get_line_number(), line,
                                             position, annotation)

    def raise_for_invalid_annotation_value(self, line: str, position: int,
                                           key: str, value: str) -> None:
        """Raise an exception if the annotation is not defined.

        :raises: IllegalAnnotationValueWarning or MissingAnnotationRegexWarning
        """
        if self._in_debug_mode:
            return

        if self.has_enumerated_annotation(
                key) and value not in self.annotation_to_term[key]:
            raise IllegalAnnotationValueWarning(self.get_line_number(), line,
                                                position, key, value)

        elif self.has_regex_annotation(
                key) and not self.annotation_to_pattern[key].match(value):
            raise MissingAnnotationRegexWarning(self.get_line_number(), line,
                                                position, key, value)

        elif self.has_local_annotation(
                key
        ) and value not in self.annotation_to_local[key]:  # TODO condense
            raise IllegalAnnotationValueWarning(self.get_line_number(), line,
                                                position, key, value)

    def raise_for_missing_citation(self, line: str, position: int) -> None:
        """Raise an exception if there is no citation present in the parser.

        :raises: MissingCitationException
        """
        if self.citation_clearing and not self.citation_is_set:
            raise MissingCitationException(self.get_line_number(), line,
                                           position)

    def handle_annotation_key(self, line: str, position: int,
                              tokens: ParseResults) -> ParseResults:
        """Handle an annotation key before parsing to validate that it's either enumerated or as a regex.

        :raise: MissingCitationException or UndefinedAnnotationWarning
        """
        key = tokens['key']
        self.raise_for_missing_citation(line, position)
        self.raise_for_undefined_annotation(line, position, key)
        return tokens

    def handle_set_statement_group(self, _, __,
                                   tokens: ParseResults) -> ParseResults:
        """Handle a ``SET STATEMENT_GROUP = "X"`` statement."""
        self.statement_group = tokens['group']
        return tokens

    def handle_set_citation(self, line: str, position: int,
                            tokens: ParseResults) -> ParseResults:
        """Handle a ``SET Citation = {"X", "Y", "Z", ...}`` statement."""
        self.clear_citation()

        values = tokens['values']

        if len(values) < 2:
            raise CitationTooShortException(self.get_line_number(), line,
                                            position)

        citation_db = values[0]

        if citation_db not in CITATION_TYPES:
            raise InvalidCitationType(self.get_line_number(), line, position,
                                      citation_db)

        if 2 == len(values):
            citation_db_id = values[1]

        elif 6 < len(values):
            raise CitationTooLongException(self.get_line_number(), line,
                                           position)

        else:
            if 3 == len(values):
                logger.warning('Throwing away JOURNAL entry in position 2')
            else:
                logger.warning(
                    'Throwing away JOURNAL entry in position 2 and everything after position 3'
                )

            citation_db_id = values[2]

        if citation_db == CITATION_TYPE_PUBMED and not is_int(citation_db_id):
            raise InvalidPubMedIdentifierWarning(self.get_line_number(), line,
                                                 position, citation_db_id)

        self.citation_db = citation_db
        self.citation_db_id = citation_db_id
        return tokens

    def handle_set_evidence(self, _, __, tokens: ParseResults) -> ParseResults:
        """Handle a ``SET Evidence = ""`` statement."""
        self.evidence = tokens['value']
        return tokens

    def handle_set_command(self, line: str, position: int,
                           tokens: ParseResults) -> ParseResults:
        """Handle a ``SET X = "Y"`` statement."""
        key, value = tokens['key'], tokens['value']
        self.raise_for_invalid_annotation_value(line, position, key, value)
        self.annotations[key] = value
        return tokens

    def handle_set_command_list(self, line: str, position: int,
                                tokens: ParseResults) -> ParseResults:
        """Handle a ``SET X = {"Y", "Z", ...}`` statement."""
        key, values = tokens['key'], tokens['values']
        for value in values:
            self.raise_for_invalid_annotation_value(line, position, key, value)
        self.annotations[key] = set(values)
        return tokens

    def handle_unset_statement_group(self, line: str, position: int,
                                     tokens: ParseResults) -> ParseResults:
        """Unset the statement group, or raises an exception if it is not set.

        :raises: MissingAnnotationKeyWarning
        """
        if self.statement_group is None:
            raise MissingAnnotationKeyWarning(self.get_line_number(), line,
                                              position,
                                              BEL_KEYWORD_STATEMENT_GROUP)
        self.statement_group = None
        return tokens

    def handle_unset_citation(self, line: str, position: int,
                              tokens: ParseResults) -> ParseResults:
        """Unset the citation, or raise an exception if it is not set.

        :raises: MissingAnnotationKeyWarning
        """
        if not self.citation_is_set:
            raise MissingAnnotationKeyWarning(self.get_line_number(), line,
                                              position, BEL_KEYWORD_CITATION)

        self.clear_citation()
        return tokens

    def handle_unset_evidence(self, line: str, position: int,
                              tokens: ParseResults) -> ParseResults:
        """Unset the evidence, or throws an exception if it is not already set.

        The value for ``tokens[EVIDENCE]`` corresponds to which alternate of SupportingText or Evidence was used in
        the BEL script.

        :raises: MissingAnnotationKeyWarning
        """
        if self.evidence is None:
            raise MissingAnnotationKeyWarning(self.get_line_number(), line,
                                              position, tokens[EVIDENCE])
        self.evidence = None
        return tokens

    def validate_unset_command(self, line: str, position: int,
                               annotation: str) -> None:
        """Raise an exception when trying to ``UNSET X`` if ``X`` is not already set.

        :raises: MissingAnnotationKeyWarning
        """
        if annotation not in self.annotations:
            raise MissingAnnotationKeyWarning(self.get_line_number(), line,
                                              position, annotation)

    def handle_unset_command(self, line: str, position: int,
                             tokens: ParseResults) -> ParseResults:
        """Handle an ``UNSET X`` statement or raises an exception if it is not already set.

        :raises: MissingAnnotationKeyWarning
        """
        key = tokens['key']
        self.validate_unset_command(line, position, key)
        del self.annotations[key]
        return tokens

    def handle_unset_list(self, line: str, position: int,
                          tokens: ParseResults) -> ParseResults:
        """Handle ``UNSET {A, B, ...}`` or raises an exception of any of them are not present.

        Consider that all unsets are in peril if just one of them is wrong!

        :raises: MissingAnnotationKeyWarning
        """
        for key in tokens['values']:
            if key in {BEL_KEYWORD_EVIDENCE, BEL_KEYWORD_SUPPORT}:
                self.evidence = None
            else:
                self.validate_unset_command(line, position, key)
                del self.annotations[key]

        return tokens

    def handle_unset_all(self, _, __, tokens) -> ParseResults:
        """Handle an ``UNSET_ALL`` statement."""
        self.clear()
        return tokens

    def get_annotations(self) -> Dict:
        """Get the current annotations."""
        return {
            EVIDENCE: self.evidence,
            CITATION: self.get_citation(),
            ANNOTATIONS: self.annotations.copy(),
        }

    def get_citation(self) -> Mapping[str, str]:
        """Get the citation dictionary."""
        return citation_dict(db=self.citation_db, db_id=self.citation_db_id)

    def get_missing_required_annotations(self) -> List[str]:
        """Return missing required annotations."""
        return [
            required_annotation
            for required_annotation in self.required_annotations
            if required_annotation not in self.annotations
        ]

    def clear_citation(self) -> None:
        """Clear the citation and if citation clearing is enabled, clear the evidence and annotations."""
        self.citation_db = None
        self.citation_db_id = None

        if self.citation_clearing:
            self.evidence = None
            self.annotations.clear()

    def clear(self) -> None:
        """Clear the statement_group, citation, evidence, and annotations."""
        self.statement_group = None
        self.citation_db = None
        self.citation_db_id = None
        self.evidence = None
        self.annotations.clear()
Example #30
0
DECIMAL = Regex(r'[0-9]*\.[0-9]+')  # (?![eE])
# DECIMAL.setResultsName('decimal')
DECIMAL.setParseAction(
    lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.decimal))

# [148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT
DOUBLE = Regex(
    r'[0-9]+\.[0-9]*%(e)s|\.([0-9])+%(e)s|[0-9]+%(e)s' % {'e': EXPONENT_re})
# DOUBLE.setResultsName('double')
DOUBLE.setParseAction(
    lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.double))


# [149] INTEGER_POSITIVE ::= '+' INTEGER
INTEGER_POSITIVE = Suppress('+') + INTEGER.copy().leaveWhitespace()
INTEGER_POSITIVE.setParseAction(lambda x: rdflib.Literal(
    "+" + x[0], datatype=rdflib.XSD.integer))

# [150] DECIMAL_POSITIVE ::= '+' DECIMAL
DECIMAL_POSITIVE = Suppress('+') + DECIMAL.copy().leaveWhitespace()

# [151] DOUBLE_POSITIVE ::= '+' DOUBLE
DOUBLE_POSITIVE = Suppress('+') + DOUBLE.copy().leaveWhitespace()

# [152] INTEGER_NEGATIVE ::= '-' INTEGER
INTEGER_NEGATIVE = Suppress('-') + INTEGER.copy().leaveWhitespace()
INTEGER_NEGATIVE.setParseAction(lambda x: neg(x[0]))

# [153] DECIMAL_NEGATIVE ::= '-' DECIMAL
DECIMAL_NEGATIVE = Suppress('-') + DECIMAL.copy().leaveWhitespace()
DECIMAL_NEGATIVE.setParseAction(lambda x: neg(x[0]))
Example #31
0
def _build_asn1_grammar():
    def build_identifier(prefix_pattern):
        identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
        identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix)  # todo: more rigorous? trailing hyphens and -- forbidden
        return identifier

    def braced_list(element_rule):
        return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')

    def annotate(name):
        def annotation(t):
            return AnnotatedToken(name, t.asList())

        return annotation

    # Reserved words
    DEFINITIONS = Keyword('DEFINITIONS')
    BEGIN = Keyword('BEGIN')
    END = Keyword('END')
    OPTIONAL = Keyword('OPTIONAL')
    DEFAULT = Keyword('DEFAULT')
    TRUE = Keyword('TRUE')
    FALSE = Keyword('FALSE')
    UNIVERSAL = Keyword('UNIVERSAL')
    APPLICATION = Keyword('APPLICATION')
    PRIVATE = Keyword('PRIVATE')
    MIN = Keyword('MIN')
    MAX = Keyword('MAX')
    IMPLICIT = Keyword('IMPLICIT')
    EXPLICIT = Keyword('EXPLICIT')
    EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
    IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
    AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
    EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
    COMPONENTS_OF = Keyword('COMPONENTS OF')
    ELLIPSIS = Keyword('...')
    SIZE = Keyword('SIZE')
    OF = Keyword('OF')
    IMPORTS = Keyword('IMPORTS')
    EXPORTS = Keyword('EXPORTS')
    FROM = Keyword('FROM')

    # Built-in types
    SEQUENCE = Keyword('SEQUENCE')
    SET = Keyword('SET')
    CHOICE = Keyword('CHOICE')
    ENUMERATED = Keyword('ENUMERATED')
    BIT_STRING = Keyword('BIT STRING')
    BOOLEAN = Keyword('BOOLEAN')
    REAL = Keyword('REAL')
    OCTET_STRING = Keyword('OCTET STRING')
    CHARACTER_STRING = Keyword('CHARACTER STRING')
    NULL = Keyword('NULL')
    INTEGER = Keyword('INTEGER')
    OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')

    # Restricted string types
    BMPString = Keyword('BMPString')
    GeneralString = Keyword('GeneralString')
    GraphicString = Keyword('GraphicString')
    IA5String =  Keyword('IA5String')
    ISO646String = Keyword('ISO646String')
    NumericString = Keyword('NumericString')
    PrintableString = Keyword('PrintableString')
    TeletexString = Keyword('TeletexString')
    T61String = Keyword('T61String')
    UniversalString = Keyword('UniversalString')
    UTF8String = Keyword('UTF8String')
    VideotexString = Keyword('VideotexString')
    VisibleString = Keyword('VisibleString')

    # Useful types
    GeneralizedTime = Keyword('GeneralizedTime')
    UTCTime = Keyword('UTCTime')
    ObjectDescriptor = Keyword('ObjectDescriptor')

    # Literals
    number = Word(nums)
    signed_number = Combine(Optional('-') + number)  # todo: consider defined values from 18.1
    bstring = Suppress('\'') + StringOf('01') + Suppress('\'B')
    hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H')

    # Comments
    hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
    comment = hyphen_comment | cStyleComment

    # identifier
    identifier = build_identifier('[a-z]')

    # references
    # these are duplicated to force unique token annotations
    valuereference = build_identifier('[a-z]')
    typereference = build_identifier('[A-Z]')
    module_reference = build_identifier('[A-Z]')
    reference = valuereference | typereference  # TODO: consider object references from 12.1

    # values
    # BUG: These are badly specified and cause the grammar to break if used generally.
    # todo: consider more literals from 16.9
    real_value = Regex(r'-?\d+(\.\d*)?') # todo: this doesn't really follow the spec
    boolean_value = TRUE | FALSE
    bitstring_value = bstring | hstring     # todo: consider more forms from 21.9
    integer_value = signed_number
    null_value = NULL
    cstring_value = dblQuotedString

    builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
    defined_value = valuereference # todo: more options from 13.1

    # object identifier value
    name_form = Unique(identifier)
    number_form = Unique(number)
    name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
    objid_components = name_and_number_form | name_form | number_form | defined_value
    objid_components_list = OneOrMore(objid_components)
    object_identifier_value = Suppress('{') + \
                              (objid_components_list | (defined_value + objid_components_list)) + \
                              Suppress('}')

    value = builtin_value | defined_value | object_identifier_value

    # definitive identifier value
    definitive_number_form = Unique(number)
    definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
    definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
    definitive_objid_component_list = OneOrMore(definitive_objid_component)
    definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))

    # tags
    class_ = UNIVERSAL | APPLICATION | PRIVATE
    class_number = Unique(number) # todo: consider defined values from 30.1
    tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
    tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty

    # extensions
    extension_default = EXTENSIBILITY_IMPLIED | empty

    # types
    defined_type = Unique(typereference)  # todo: consider other defined types from 13.1
    referenced_type = Unique(defined_type)  # todo: consider other ref:d types from 16.3

    # Forward-declare these, they can only be fully defined once
    # we have all types defined. There are some circular dependencies.
    named_type = Forward()
    type_ = Forward()

    # constraints
    # todo: consider the full subtype and general constraint syntax described in 45.*
    # but for now, just implement a simple integer value range.
    value_range_constraint = (signed_number | valuereference | MIN) + Suppress('..') + (signed_number | valuereference | MAX)
    size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + Suppress('(') + (value_range_constraint | signed_number) + Suppress(')') + Optional(Suppress(')'))
    constraint = Suppress('(') + value_range_constraint + Suppress(')')

    # TODO: consider exception syntax from 24.1
    extension_marker = Unique(ELLIPSIS)

    component_type_optional = named_type + Suppress(OPTIONAL)
    component_type_default = named_type + Suppress(DEFAULT) + value
    component_type_components_of = Suppress(COMPONENTS_OF) + type_
    component_type = component_type_components_of | component_type_optional | component_type_default | named_type

    tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_

    named_number_value = Suppress('(') + signed_number + Suppress(')')
    named_number = identifier + named_number_value
    enumeration = named_number | identifier

    set_type = SET + braced_list(component_type | extension_marker)
    sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
    sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
    setof_type = Suppress(SET) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
    choice_type = CHOICE + braced_list(named_type | extension_marker)
    enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
    bitstring_type = BIT_STRING + Optional(braced_list(named_number))
    plain_integer_type = INTEGER
    restricted_integer_type = INTEGER + braced_list(named_number)
    boolean_type = BOOLEAN
    real_type = REAL
    null_type = NULL
    object_identifier_type = OBJECT_IDENTIFIER
    octetstring_type = OCTET_STRING + Optional(size_constraint)
    
    unrestricted_characterstring_type = CHARACTER_STRING
    restricted_characterstring_type = BMPString | GeneralString | \
                                      GraphicString | IA5String | \
                                      ISO646String | NumericString | \
                                      PrintableString | TeletexString | \
                                      T61String | UniversalString | \
                                      UTF8String | VideotexString | VisibleString
    characterstring_type = restricted_characterstring_type | unrestricted_characterstring_type
    useful_type = GeneralizedTime | UTCTime | ObjectDescriptor

    # todo: consider other builtins from 16.2
    simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(constraint)
    constructed_type = choice_type | sequence_type | set_type
    value_list_type = restricted_integer_type | enumerated_type
    builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type

    type_ << (builtin_type | referenced_type)

    # EXT: identifier should not be Optional here, but
    # our other ASN.1 code generator supports unnamed members,
    # and we use them.
    named_type << (Optional(identifier) + type_)

    type_assignment = typereference + '::=' + type_
    value_assignment = valuereference + type_ + '::=' + value

    assignment = type_assignment | value_assignment
    assignment_list = ZeroOrMore(assignment)

    assigned_identifier = Optional(object_identifier_value | defined_value)
    global_module_reference = module_reference + assigned_identifier

    symbol = Unique(reference)  # TODO: parameterized reference?
    symbol_list = Group(delimitedList(symbol))
    symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference
    symbols_from_module_list = OneOrMore(symbols_from_module)
    symbols_imported = Optional(symbols_from_module_list)
    exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';'))
    imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';'))

    module_body = (exports + imports + assignment_list)
    module_defaults = Suppress(tag_default + extension_default)  # we don't want these in the AST
    module_identifier = module_reference + definitive_identifier
    module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END

    module_definition.ignore(comment)

    # Mark up the parse results with token tags
    identifier.setParseAction(annotate('Identifier'))
    named_number_value.setParseAction(annotate('Value'))
    tag.setParseAction(annotate('Tag'))
    class_.setParseAction(annotate('TagClass'))
    class_number.setParseAction(annotate('TagClassNumber'))
    type_.setParseAction(annotate('Type'))
    simple_type.setParseAction(annotate('SimpleType'))
    choice_type.setParseAction(annotate('ChoiceType'))
    sequence_type.setParseAction(annotate('SequenceType'))
    set_type.setParseAction(annotate('SetType'))
    value_list_type.setParseAction(annotate('ValueListType'))
    bitstring_type.setParseAction(annotate('BitStringType'))
    referenced_type.setParseAction(annotate('ReferencedType'))
    sequenceof_type.setParseAction(annotate('SequenceOfType'))
    setof_type.setParseAction(annotate('SetOfType'))
    named_number.setParseAction(annotate('NamedValue'))
    constraint.setParseAction(annotate('Constraint'))
    size_constraint.setParseAction(annotate('SizeConstraint'))
    component_type.setParseAction(annotate('ComponentType'))
    component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
    component_type_default.setParseAction(annotate('ComponentTypeDefault'))
    component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
    tagged_type.setParseAction(annotate('TaggedType'))
    named_type.setParseAction(annotate('NamedType'))
    type_assignment.setParseAction(annotate('TypeAssignment'))
    value_assignment.setParseAction(annotate('ValueAssignment'))
    valuereference.setParseAction(annotate('ValueReference'))
    module_reference.setParseAction(annotate('ModuleReference'))
    module_body.setParseAction(annotate('ModuleBody'))
    module_definition.setParseAction(annotate('ModuleDefinition'))
    extension_marker.setParseAction(annotate('ExtensionMarker'))
    name_form.setParseAction(annotate('NameForm'))
    number_form.setParseAction(annotate('NumberForm'))
    name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
    object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
    definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
    definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
    definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
    imports.setParseAction(annotate('Imports'))
    exports.setParseAction(annotate('Exports'))
    assignment_list.setParseAction(annotate('AssignmentList'))
    bstring.setParseAction(annotate('BinaryStringValue'))
    hstring.setParseAction(annotate('HexStringValue'))

    start = OneOrMore(module_definition)
    return start
Example #32
0
            return [toks]
        self.setParseAction(listify)

ParserElement.setDefaultWhitespaceChars("\n\t")
backslash = chr(92)

texcmd = Forward()
filler = CharsNotIn(backslash + '$')
filler2 = CharsNotIn(backslash + '$' + '{}')

arg = '[' + CharsNotIn("]") + ']'
arg.setParseAction(argfun)

dollarmath = QuotedString('$',  multiline=True, unquoteResults=False)
param = Suppress(Literal('{')) + ZeroOrMoreAsList(dollarmath | filler2 | QuotedString('{', endQuoteChar='}', unquoteResults=False) | texcmd) + Suppress(Literal('}'))
param.setParseAction(paramfun)
def bs(c): return Literal("\\" + c)
singles = bs("[") | bs("]") | bs("{") | bs("}") | bs("\\") | bs("&") | bs("_") | bs(",") | bs("#") | bs("\n") | bs(";") | bs("|") | bs("%") | bs("*") | bs("~") | bs("^")
texcmd << (singles | Word("\\", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", min = 2)) + ZeroOrMoreAsList(arg) + ZeroOrMoreAsList(param)
def texcmdfun(s, loc, toks):
    return TexCmd(s, loc, toks)
texcmd.setParseAction(texcmdfun)

#legal = "".join([chr(x) for x in set(range(32, 127)) - set(backslash)])
#filler = Word(legal)
document = ZeroOrMore(dollarmath | texcmd | filler) + StringEnd().suppress()

if 0:
    s = "This is \\\\ test"
    print s
    for t in document.parseString(s):
Example #33
0
DECIMAL = Regex(r'[0-9]*\.[0-9]+')  # (?![eE])
# DECIMAL.setResultsName('decimal')
DECIMAL.setParseAction(
    lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.decimal))

# [148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT
DOUBLE = Regex(
    r'[0-9]+\.[0-9]*%(e)s|\.([0-9])+%(e)s|[0-9]+%(e)s' % {'e': EXPONENT_re})
# DOUBLE.setResultsName('double')
DOUBLE.setParseAction(
    lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.double))


# [149] INTEGER_POSITIVE ::= '+' INTEGER
INTEGER_POSITIVE = Suppress('+') + INTEGER.copy().leaveWhitespace()
INTEGER_POSITIVE.setParseAction(lambda x: rdflib.Literal(
    "+"+x[0], datatype=rdflib.XSD.integer))

# [150] DECIMAL_POSITIVE ::= '+' DECIMAL
DECIMAL_POSITIVE = Suppress('+') + DECIMAL.copy().leaveWhitespace()

# [151] DOUBLE_POSITIVE ::= '+' DOUBLE
DOUBLE_POSITIVE = Suppress('+') + DOUBLE.copy().leaveWhitespace()

# [152] INTEGER_NEGATIVE ::= '-' INTEGER
INTEGER_NEGATIVE = Suppress('-') + INTEGER.copy().leaveWhitespace()
INTEGER_NEGATIVE.setParseAction(lambda x: neg(x[0]))

# [153] DECIMAL_NEGATIVE ::= '-' DECIMAL
DECIMAL_NEGATIVE = Suppress('-') + DECIMAL.copy().leaveWhitespace()
DECIMAL_NEGATIVE.setParseAction(lambda x: neg(x[0]))
Example #34
0
    data = single | tuple_

    # should not match a single (tr)
    simple_data = Group(NotAny('(tr)') + data + ZeroOrMore(Optional(Suppress(',')) + data))
    # the first element of a set data record  cannot be 'dimen', or else
    # these would match set_def_stmts
    non_dimen_simple_data = ~Literal('dimen') + simple_data

    matrix_row = Group(single + OneOrMore(PLUS | MINUS))
    matrix_data = ":" + OneOrMore(single).setResultsName('columns') \
            + ":=" + OneOrMore(matrix_row).setResultsName('data')
    matrix_data.setParseAction(MatrixData)

    tr_matrix_data = Suppress("(tr)") + matrix_data
    tr_matrix_data.setParseAction(mark_transposed)

    set_slice_component = number | symbol | '*'
    set_slice_record = LPAREN + NotAny('tr') + delimitedList(set_slice_component) + RPAREN
    set_slice_record.setParseAction(SliceRecord)

    _set_record = set_slice_record | matrix_data | tr_matrix_data | Suppress(":=")
    set_record = simple_data | _set_record
    non_dimen_set_record = non_dimen_simple_data | _set_record

    set_def_stmt = "set" + symbol + Optional(subscript_domain) + \
            Optional("dimen" + integer.setResultsName('dimen')) + END
    set_def_stmt.setParseAction(SetDefStmt)

    set_member = LBRACKET + delimitedList(data) + RBRACKET
Example #35
0
variable_operand.setParseAction(EvalVariable)
explicit_variable_operand.setParseAction(EvalExplicitVariable)
integer_operand.setParseAction(EvalInteger)
real_operand.setParseAction(EvalReal)
string_operand.setParseAction(EvalString)
constant.setParseAction(EvalConstant)
regexp.setParseAction(EvalRegExp)
timespan.setParseAction(EvalTimespan)

modifier = Regex(r"([a-zA-Z][a-zA-Z0-9_]*)\:")

simple_list_operand = Group(delimitedList(expr))
simple_list_operand.setParseAction(EvalSimpleList)

list_operand = Suppress("[") + delimitedList(expr) + Suppress("]")
list_operand.setParseAction(EvalList)

empty_list_operand = Literal("[]")
empty_list_operand.setParseAction(EvalEmptyList)

dict_item = Group(expr + Suppress(Literal(":")) + expr)
dict_operand = Group(Suppress("{") + delimitedList(dict_item) + Suppress("}"))
dict_operand.setParseAction(EvalDict)

empty_dict_operand = Literal("{}")
empty_dict_operand.setParseAction(EvalEmptyDict)

key_pair = Group(Regex(r"([a-zA-Z0-9_]+)") + Suppress(Literal("=") + WordEnd("=!+-*/")) + expr)
key_pair_dict_operand = delimitedList(key_pair)
key_pair_dict_operand.setParseAction(EvalKeyPairDict)
Example #36
0
# DOUBLE.setResultsName('double')
DOUBLE.setParseAction(lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.double))


# [149] INTEGER_POSITIVE ::= '+' INTEGER
INTEGER_POSITIVE = Suppress("+") + INTEGER.copy().leaveWhitespace()

# [150] DECIMAL_POSITIVE ::= '+' DECIMAL
DECIMAL_POSITIVE = Suppress("+") + DECIMAL.copy().leaveWhitespace()

# [151] DOUBLE_POSITIVE ::= '+' DOUBLE
DOUBLE_POSITIVE = Suppress("+") + DOUBLE.copy().leaveWhitespace()

# [152] INTEGER_NEGATIVE ::= '-' INTEGER
INTEGER_NEGATIVE = Suppress("-") + INTEGER.copy().leaveWhitespace()
INTEGER_NEGATIVE.setParseAction(lambda x: neg(x[0]))

# [153] DECIMAL_NEGATIVE ::= '-' DECIMAL
DECIMAL_NEGATIVE = Suppress("-") + DECIMAL.copy().leaveWhitespace()
DECIMAL_NEGATIVE.setParseAction(lambda x: neg(x[0]))

# [154] DOUBLE_NEGATIVE ::= '-' DOUBLE
DOUBLE_NEGATIVE = Suppress("-") + DOUBLE.copy().leaveWhitespace()
DOUBLE_NEGATIVE.setParseAction(lambda x: neg(x[0]))

# [160] ECHAR ::= '\' [tbnrf\"']
# ECHAR = Regex('\\\\[tbnrf"\']')


# [158] STRING_LITERAL_LONG1 ::= "'''" ( ( "'" | "''" )? ( [^'\] | ECHAR ) )* "'''"
# STRING_LITERAL_LONG1 = Literal("'''") + ( Optional( Literal("'") | "''" ) + ZeroOrMore( ~ Literal("'\\") | ECHAR ) ) + "'''"
Example #37
0
# For
for_header = (
    for_kw + identifier + Suppress("=") + expression + to_kw + expression +
    pOptional(step_kw + expression)).setParseAction(lambda r: ForHeader(*r))
for_footer = (next_kw +
              pOptional(identifier)).setParseAction(lambda r: ForFooter(*r))

loop_statement = for_header | for_footer

# If
if_header = (if_kw + expression +
             pOptional(then_kw)).setParseAction(lambda r: IfHeader(*r))
elseif_header = (elseif_kw + expression +
                 pOptional(then_kw)).setParseAction(lambda r: ElseIfHeader(*r))
else_header = else_kw.setParseAction(lambda r: ElseHeader())
if_footer = (end_kw + if_kw).setParseAction(lambda r: IfFooter())

if_oneliner = (if_kw + expression + then_kw + statement +
               pOptional(else_kw + statement)).setParseAction(lambda r: If(
                   condition=r[0],
                   body=[r[1]],
                   else_block=Block([r[3]]) if len(r) >= 3 else None,
               ))

conditional_statement = (if_oneliner | if_header | elseif_header | else_header
                         | if_footer)

####################
#  Error handling  #
####################
Example #38
0
                  | Literal("resize") + Suppress("(") + _basic_expr + ","
                  + _basic_expr + Suppress(")"))
_word_function.setParseAction(lambda s, l, t: WordFunction(t[0], t[1], t[2]))

_count = (Literal("count") + Suppress("(") + delimitedList(_basic_expr)
          + Suppress(")"))
_count.setParseAction(lambda s, l, t: Count(t[1]))

_next = Literal("next") + Suppress("(") + _basic_expr + Suppress(")")
_next.setParseAction(lambda s, l, t: Next(t[1]))

_case_case = _basic_expr + Suppress(":") + _basic_expr + Suppress(";")
_case_body = OneOrMore(_case_case)
_case_body.setParseAction(lambda s, l, t: OrderedDict(zip(t[::2], t[1::2])))
_case = Suppress("case") + _case_body + Suppress("esac")
_case.setParseAction(lambda s, l, t: Case(t[0]))

_base = (complex_identifier ^
         (_conversion
          | _word_function
          | _count
          | _next
          | Suppress("(") + _basic_expr + Suppress(")")
          | _case
          | constant))

_ap = Forward()
_array_subscript = Group(Suppress("[") + _basic_expr + Suppress("]"))

_word_bit_selection = Group(Suppress("[") + _basic_expr + Suppress(":")
                            + _basic_expr + Suppress("]"))
Example #39
0
class Parser(object):
    """Lexical and Syntax analysis"""
    @property
    def semantic_analyser(self):
        return self._AST.semantic_analyser

    def __init__(self):
        self._AST = Syntax_tree()

        # keywords
        self.int_ = Keyword('Int')
        self.false_ = Keyword('False')
        self.true_ = Keyword('True')
        self.bit_ = Combine(Optional(Literal("@")) + Keyword('Bit'))
        self.sbox_ = Keyword('Sbox')
        self.l_shift_ = Keyword('<<')
        self.r_shift_ = Keyword('>>')
        self.circ_l_shift_ = Keyword('<<<')
        self.circ_r_shift_ = Keyword('>>>')
        self.bit_val = self.false_ ^ self.true_
        self.if_ = Keyword('if')
        self.for_ = Keyword('for')
        self.return_ = Keyword('return')
        self.void_ = Keyword('void')
        self.ID = NotAny(self.sbox_ ^ self.int_ ^ self.bit_ ^ self.false_ ^ self.true_ ^ self.if_ ^ self.for_ ^ self.sbox_) + Word(alphas + '_', alphanums + '_')  # NOQA
        self.ID_ = NotAny(self.sbox_ ^ self.int_ ^ self.bit_ ^ self.false_ ^ self.true_ ^ self.if_ ^ self.for_ ^ self.sbox_) + Word(alphas + '_', alphanums + '_')
        # Other Tokens
        self.l_bracket = Literal('(')
        self.r_bracket = Literal(')')
        self.eq_set = Literal('=')("set")
        self.term_st = Literal(';')
        self.b_2_num = Combine(Literal("0b") + Word("01"))
        self.b_2_num.setParseAction(self.semantic_analyser.convert_base_to_str)
        self.b_16_num = Combine(Literal("0x") + Word(srange("[0-9a-fA-F]")))
        self.b_16_num.setParseAction(self.semantic_analyser.convert_base_to_str)
        self.b_10_num = Word(nums)
        self.bit_and = Literal('&')
        self.bit_or = Keyword('|')
        self.bit_xor = Keyword('^')
        self.bit_not = Literal('~')
        self.eq_compare = Literal('==')
        self.neq_compare = Literal('!=')
        self.l_brace = Literal('{')
        self.r_brace = Literal('}')
        self.bin_add = Literal('+')
        self.bin_mult = Literal('*')
        self.bin_sub = Literal('-')
        self.bin_mod = Literal('%')
        self.bin_div = Literal('/')
        self.g_than = Literal('>')
        self.ge_than = Literal('>=')
        self.l_than = Literal('<')
        self.le_than = Literal('<=')
        self.log_and = Keyword('&&')
        self.log_or = Keyword('||')
        self.l_sq_b = Literal('[')
        self.r_sq_b = Literal(']')

        # Operator Productions
        self.log_op = self.log_and ^ self.log_or
        self.comparison_op = self.g_than ^ self.ge_than ^ self.l_than ^ self.le_than ^ self.eq_compare ^ self.neq_compare
        self.arith_op = self.bin_add ^ self.bin_mult ^ self.bin_sub ^ self.bin_mod ^ self.bin_div
        self.bitwise_op = self.bit_and ^ self.bit_or ^ self.bit_xor ^ self.bit_not ^ self.l_shift_ ^ self.r_shift_ ^ self.circ_l_shift_ ^ self.circ_r_shift_

        # Grammar
        self.stmt = Forward()
        self.for_loop = Forward()
        self.cast = Forward()
        self.seq_val = Forward()
        self.int_value = self.b_2_num ^ self.b_16_num ^ self.b_10_num
        self.expr = Forward()
        self.function_call = Forward()
        self.index_select = Forward()
        self.seq_ = Forward()
        self.operand = Forward()
        self.seq_range = Forward()
        #  #######Operands

        self.sbox_call = Group((self.ID ^ self.seq_val) + ~White() + Literal(".") + ~White() + self.sbox_ + ~White() +
                               self.l_bracket + (self.ID ^ self.int_value) + self.r_bracket)

        self.operand = self.index_select | self.seq_val | self.function_call | self.ID | self.int_value | self.cast | self.bit_val
        self.seq_val.setParseAction(lambda t: ['Seq_val'] + [t.asList()])
        self.index_select.setParseAction(lambda t: ['index_select'] + [t.asList()])
        self.function_call.setParseAction(lambda t: ['function_call'] + [t.asList()])
        self.ID.setParseAction(lambda t: ['ID'] + [t.asList()])
        self.int_value.setParseAction(lambda t: ['Int_val'] + [t.asList()])
        self.cast.setParseAction(lambda t: ['cast'] + [t.asList()])
        self.bit_val.setParseAction(lambda t: ['Bit_val'] + [t.asList()])
        self.seq_range.setParseAction(lambda t: ['seq_range'] + [t.asList()])
        #  #######Expressions

        self.expr = Group(infixNotation(Group(self.operand), [(self.bitwise_op, 2, opAssoc.LEFT, self.nest_operand_pairs),
                                                              (self.comparison_op, 2, opAssoc.LEFT, self.nest_operand_pairs),
                                                              (self.log_op, 2, opAssoc.LEFT, self.nest_operand_pairs),
                                                              (self.arith_op, 2, opAssoc.LEFT, self.nest_operand_pairs)]))

        # self.expr.setParseAction(self.expr_p)
        self.int_size = Combine(Optional(Literal("@")) + self.int_)("decl") + ~White() + Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket)

        self.sbox_size = self.sbox_ + ~White() + Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket)

        self.seq_range << self.expr + Suppress(Literal(":")) + self.expr

        self.seq_val << Suppress(self.l_sq_b) + Optional(Group(delimitedList(self.expr))) + Suppress(self.r_sq_b)

        self.seq_ << (self.int_size | self.bit_ | self.sbox_size)("type") +\
            Group(OneOrMore(~White() + Suppress(self.l_sq_b) + self.expr + Suppress(self.r_sq_b)))("seq_size")

        self.function_call << self.ID("function_name") + ~White() + Suppress(self.l_bracket) +\
            Optional(Group(delimitedList(self.expr)))("param_list") + Suppress(self.r_bracket)

        self.cast << Suppress(self.l_bracket) + Group((self.seq_ | self.int_size | self.bit_)) +\
            Suppress(self.r_bracket) + (self.expr)("target")

        self.index_select << (self.ID("ID") ^ (Suppress(self.l_bracket) + self.cast + Suppress(self.r_bracket))("cast")) + ~White() +\
            Group(OneOrMore(Suppress(self.l_sq_b) + Group(delimitedList(self.expr ^ Group(Group(self.seq_range))))("index") + Suppress(self.r_sq_b)))
        #  ####### Declarations

        self.id_set = Group((Group(self.index_select) | self.ID_) + self.eq_set + self.expr)
        self.id_set.setParseAction(self.AST.id_set)

        self.int_decl = Group(self.int_size + delimitedList(Group((self.ID_("ID") + self.eq_set + self.expr("set_value")) |
                              self.ID_("ID")))("value"))  # NOQA
        self.int_decl.setParseAction(self.AST.int_decl)
        self.bit_decl = Group(self.bit_("decl") + delimitedList(Group(self.ID_("ID")) ^
                              Group(self.ID_("ID") + self.eq_set + self.expr("set_value")))("value"))
        self.bit_decl.setParseAction(self.AST.bit_decl)
        self.seq_decl = Group(self.seq_("decl") + Group(self.ID)("ID") + Optional(self.eq_set + Group(self.expr))("value"))
        self.seq_decl.setParseAction(self.AST.seq_decl)

        self.decl = self.bit_decl ^ self.int_decl ^ self.seq_decl

        # ###### Statements

        self.return_stmt = Group(self.return_ + self.expr)
        self.return_stmt.setParseAction(self.AST.return_stmt)

        self.function_start = Literal("{")
        self.function_start.setParseAction(self.AST.function_start)
        self.function_end = Literal("}")
        self.function_decl = Group((Group(self.seq_) | Group(self.int_size) | Group(self.bit_) | Group(self.void_))("return_type") + Group(self.ID)("func_ID") +
                                   Suppress(self.l_bracket) + Group(Optional(delimitedList(Group((self.seq_ | self.int_size | self.bit_) + Group(self.ID)))))("func_param") +  # NOQA
                                   Suppress(self.r_bracket) + Suppress(self.function_start) + Group(self.stmt)("body") + Suppress(self.r_brace))
        self.function_decl.setParseAction(self.AST.function_decl)

        self.for_init = Literal('(')
        self.for_init.setParseAction(self.AST.begin_for)

        self.for_terminator = Literal(';')
        self.for_terminator.setParseAction(self.AST.for_terminator)

        self.for_increment = Literal(';')
        self.for_increment.setParseAction(self.AST.for_increment)

        self.terminator_expr = Group(infixNotation(Group(self.operand), [(self.log_op, 2, opAssoc.LEFT, self.nest_operand_pairs),
                                                                         (self.bitwise_op, 2, opAssoc.LEFT, self.nest_operand_pairs),
                                                                         (self.comparison_op, 2, opAssoc.LEFT, self.nest_operand_pairs),
                                                                         (self.arith_op, 2, opAssoc.LEFT, self.nest_operand_pairs)]))

        self.terminator_expr.setParseAction(self.AST.terminator_expr)
        self.for_body = Literal('{')
        self.for_body.setParseAction(self.AST.for_body)

        self.end_for = Literal('}')
        self.end_for.setParseAction(self.AST.end_for)

        self.for_loop << Group(self.for_ + ~White() + Suppress(self.for_init) +
                               Optional(delimitedList(self.decl ^ self.id_set))("init") + Suppress(self.for_terminator) +
                               Optional(self.terminator_expr) + Suppress(self.for_increment) +
                               Optional(delimitedList(self.id_set))("increm") + Suppress(self.r_bracket) +
                               Suppress(self.for_body) + self.stmt("loop_body") + Suppress(self.end_for))

        self.if_condition = Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket)
        self.if_condition.setParseAction(self.AST.if_cond)

        self.if_.setParseAction(self.AST.begin_if)
        self.if_body_st = Literal('{')
        self.if_body_st.setParseAction(self.AST.if_body_st)
        self.if_body_end = Literal('}')
        self.if_body_end.setParseAction(self.AST.if_body_end)
        self.if_stmt = Group(self.if_ + self.if_condition("if_cond") + Suppress(self.if_body_st) + Group(self.stmt).setResultsName("body") + Suppress(self.if_body_end))
        self.single_expr = self.expr + Suppress(self.term_st)
        self.single_expr.setParseAction(self.AST.stand_alone_expr)

        self.stmt << ZeroOrMore(self.decl + Suppress(self.term_st)
                                ^ self.function_decl
                                ^ self.id_set + Suppress(self.term_st)
                                ^ self.single_expr
                                ^ self.for_loop
                                ^ self.if_stmt
                                ^ self.return_stmt + Suppress(self.term_st)
                                ^ self.sbox_call + Suppress(self.term_st))

        self.grammar_test = self.stmt + StringEnd()  # Allows single statements to be parsed

        self.grammar = ZeroOrMore(self.function_decl
                                  ^ self.seq_decl + Suppress(self.term_st)) + StringEnd()

    def nest_operand_pairs(self, tokens):
        tokens = tokens[0]
        ret = ParseResults(tokens[:3])
        remaining = iter(tokens[3:])
        done = False
        while not done:
            next_pair = (next(remaining, None), next(remaining, None))
            if next_pair == (None, None):
                done = True
                break
            ret = ParseResults([ret])
            ret += ParseResults(list(next_pair))
        return [ret]

    @property
    def AST(self):
        return self._AST

    @AST.setter
    def AST(self, value):
        self._AST = value

    def analyse_tree_test(self, AST):
        return self.semantic_analyser.analyse(AST)

    def parse_test_unit(self, data_in):
        """Parses single statements"""
        try:
            res = self.grammar_test.parseString(data_in)
        except ParseException as details:
            print("The following error occured:")
            print(details)
            return False
        if type(res[0]) is not bool:
            pass
            # print(res[0].dump())
        return [res, True]

    def parse_test_AST_semantic(self, data_in):
        """Parses single statements and returns AST"""
        try:
            self.grammar_test.parseString(data_in)
        except ParseException as details:
            print("The following error occured:")
            print(details)
            return False
        return self.AST

    def parse_test_integration(self, data_in):
        """Only Parses Statements in functions"""
        try:
            res = self.grammar.parseString(data_in)
        except ParseException as details:
            print("The following error occured:")
            print(details)
            return False
        # if type(res[0]) is not bool:
            # print(res[0].dump())
        return [res, True]

    def parse(self, data_in):
        """Prod parsing entry point"""
        self.grammar.parseString(data_in)
        if self.semantic_analyser.analyse(self.AST, True) is True:
            return self.semantic_analyser.IR.translate()
Example #40
0
def _build_asn1_grammar():
    def build_identifier(prefix_pattern):
        identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
        # todo: more rigorous? trailing hyphens and -- forbidden
        return Combine(
            Word(srange(prefix_pattern), exact=1) + identifier_suffix)

    def braced_list(element_rule):
        elements_rule = Optional(delimitedList(element_rule))
        return Suppress('{') + Group(elements_rule) + Suppress('}')

    def annotate(name):
        def annotation(t):
            return AnnotatedToken(name, t.asList())

        return annotation

    # Reserved words
    ANY = Keyword('ANY')
    DEFINED_BY = Keyword('DEFINED BY')
    DEFINITIONS = Keyword('DEFINITIONS')
    BEGIN = Keyword('BEGIN')
    END = Keyword('END')
    OPTIONAL = Keyword('OPTIONAL')
    DEFAULT = Keyword('DEFAULT')
    TRUE = Keyword('TRUE')
    FALSE = Keyword('FALSE')
    UNIVERSAL = Keyword('UNIVERSAL')
    APPLICATION = Keyword('APPLICATION')
    PRIVATE = Keyword('PRIVATE')
    MIN = Keyword('MIN')
    MAX = Keyword('MAX')
    IMPLICIT = Keyword('IMPLICIT')
    EXPLICIT = Keyword('EXPLICIT')
    EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
    IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
    AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
    EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
    COMPONENTS_OF = Keyword('COMPONENTS OF')
    ELLIPSIS = Keyword('...')
    SIZE = Keyword('SIZE')
    OF = Keyword('OF')
    IMPORTS = Keyword('IMPORTS')
    EXPORTS = Keyword('EXPORTS')
    FROM = Keyword('FROM')

    # Built-in types
    SEQUENCE = Keyword('SEQUENCE')
    SET = Keyword('SET')
    CHOICE = Keyword('CHOICE')
    ENUMERATED = Keyword('ENUMERATED')
    BIT_STRING = Keyword('BIT STRING')
    BOOLEAN = Keyword('BOOLEAN')
    REAL = Keyword('REAL')
    OCTET_STRING = Keyword('OCTET STRING')
    CHARACTER_STRING = Keyword('CHARACTER STRING')
    NULL = Keyword('NULL')
    INTEGER = Keyword('INTEGER')
    OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')

    # Restricted string types
    BMPString = Keyword('BMPString')
    GeneralString = Keyword('GeneralString')
    GraphicString = Keyword('GraphicString')
    IA5String = Keyword('IA5String')
    ISO646String = Keyword('ISO646String')
    NumericString = Keyword('NumericString')
    PrintableString = Keyword('PrintableString')
    TeletexString = Keyword('TeletexString')
    T61String = Keyword('T61String')
    UniversalString = Keyword('UniversalString')
    UTF8String = Keyword('UTF8String')
    VideotexString = Keyword('VideotexString')
    VisibleString = Keyword('VisibleString')

    # Useful types
    GeneralizedTime = Keyword('GeneralizedTime')
    UTCTime = Keyword('UTCTime')
    ObjectDescriptor = Keyword('ObjectDescriptor')

    # Literals
    number = Word(nums)
    signed_number = Combine(Optional('-') +
                            number)  # todo: consider defined values from 18.1
    bstring = Suppress('\'') + StringOf('01') + Suppress('\'B')
    hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H')

    # Comments
    hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
    comment = hyphen_comment | cStyleComment

    # identifier
    identifier = build_identifier('[a-z]')

    # references
    # these are duplicated to force unique token annotations
    valuereference = build_identifier('[a-z]')
    typereference = build_identifier('[A-Z]')
    module_reference = build_identifier('[A-Z]')
    reference = valuereference | typereference  # TODO: consider object references from 12.1

    # values
    # todo: consider more literals from 16.9
    boolean_value = TRUE | FALSE
    bitstring_value = bstring | hstring  # todo: consider more forms from 21.9
    integer_value = signed_number
    null_value = NULL
    cstring_value = dblQuotedString

    exponent = CaselessLiteral('e') + signed_number
    real_value = Combine(signed_number +
                         Optional(Literal('.') + Optional(number)) +
                         Optional(exponent))

    # In value range constraints, decimal points must be followed by number, or
    # the grammar becomes ambiguous: ([1.].100) vs ([1]..[100])
    constraint_real_value = Combine(signed_number +
                                    Optional(Literal('.') + number) +
                                    Optional(exponent))

    builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
    external_value_reference = module_reference + Suppress(
        '.') + valuereference
    defined_value = external_value_reference | valuereference  # todo: more options from 13.1
    referenced_value = Unique(defined_value)  # todo: more options from 16.11

    # object identifier value
    name_form = Unique(identifier)
    number_form = Unique(number)
    name_and_number_form = name_form + Suppress('(') + number_form + Suppress(
        ')')
    objid_components = name_and_number_form | name_form | number_form | defined_value
    objid_components_list = OneOrMore(objid_components)
    object_identifier_value = Suppress('{') + \
                              (objid_components_list | (defined_value + objid_components_list)) + \
                              Suppress('}')

    value = builtin_value | referenced_value | object_identifier_value

    # definitive identifier value
    definitive_number_form = Unique(number)
    definitive_name_and_number_form = name_form + Suppress(
        '(') + definitive_number_form + Suppress(')')
    definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
    definitive_objid_component_list = OneOrMore(definitive_objid_component)
    definitive_identifier = Optional(
        Suppress('{') + definitive_objid_component_list + Suppress('}'))

    # tags
    class_ = UNIVERSAL | APPLICATION | PRIVATE
    class_number = Unique(number)  # todo: consider defined values from 30.1
    tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
    tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS

    # extensions
    extension_default = Unique(EXTENSIBILITY_IMPLIED)

    # values

    # Forward-declare these, they can only be fully defined once
    # we have all types defined. There are some circular dependencies.
    named_type = Forward()
    type_ = Forward()

    # constraints
    # todo: consider the full subtype and general constraint syntax described in 45.*
    lower_bound = (constraint_real_value | signed_number | referenced_value
                   | MIN)
    upper_bound = (constraint_real_value | signed_number | referenced_value
                   | MAX)
    single_value_constraint = Suppress('(') + value + Suppress(')')
    value_range_constraint = Suppress('(') + lower_bound + Suppress(
        '..') + upper_bound + Suppress(')')
    # TODO: Include contained subtype constraint here if we ever implement it.
    size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + (
        single_value_constraint | value_range_constraint) + Optional(
            Suppress(')'))

    # types
    # todo: consider other defined types from 13.1
    defined_type = Optional(module_reference + Suppress('.'),
                            default=None) + typereference + Optional(
                                size_constraint, default=None)

    # TODO: consider exception syntax from 24.1
    extension_marker = Unique(ELLIPSIS)

    component_type_optional = named_type + Suppress(OPTIONAL)
    component_type_default = named_type + Suppress(DEFAULT) + value
    component_type_components_of = Suppress(COMPONENTS_OF) + type_
    component_type = component_type_components_of | component_type_optional | component_type_default | named_type

    tagged_type = tag + Optional(IMPLICIT | EXPLICIT, default=None) + type_

    named_number_value = Suppress('(') + signed_number + Suppress(')')
    named_number = identifier + named_number_value
    named_nonumber = Unique(identifier)
    enumeration = named_number | named_nonumber

    set_type = SET + braced_list(component_type | extension_marker)
    sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
    sequenceof_type = Suppress(SEQUENCE) + Optional(
        size_constraint, default=None) + Suppress(OF) + (type_ | named_type)
    setof_type = Suppress(SET) + Optional(
        size_constraint, default=None) + Suppress(OF) + (type_ | named_type)
    choice_type = CHOICE + braced_list(named_type | extension_marker)
    selection_type = identifier + Suppress('<') + type_
    enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
    bitstring_type = BIT_STRING + Optional(
        braced_list(named_number), default=[]) + Optional(
            single_value_constraint | size_constraint, default=None)
    plain_integer_type = INTEGER
    restricted_integer_type = INTEGER + braced_list(named_number) + Optional(
        single_value_constraint, default=None)
    boolean_type = BOOLEAN
    real_type = REAL
    null_type = NULL
    object_identifier_type = OBJECT_IDENTIFIER
    octetstring_type = OCTET_STRING + Optional(size_constraint)
    unrestricted_characterstring_type = CHARACTER_STRING
    restricted_characterstring_type = BMPString | GeneralString | \
                                      GraphicString | IA5String | \
                                      ISO646String | NumericString | \
                                      PrintableString | TeletexString | \
                                      T61String | UniversalString | \
                                      UTF8String | VideotexString | \
                                      VisibleString
    characterstring_type = (
        restricted_characterstring_type
        | unrestricted_characterstring_type) + Optional(size_constraint)
    useful_type = GeneralizedTime | UTCTime | ObjectDescriptor

    # ANY type
    any_type = ANY + Optional(Suppress(DEFINED_BY + identifier))

    # todo: consider other builtins from 16.2
    simple_type = (any_type | boolean_type | null_type | octetstring_type
                   | characterstring_type | real_type | plain_integer_type
                   | object_identifier_type
                   | useful_type) + Optional(value_range_constraint
                                             | single_value_constraint)
    constructed_type = choice_type | sequence_type | set_type
    value_list_type = restricted_integer_type | enumerated_type
    builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type

    referenced_type = defined_type | selection_type  # todo: consider other ref:d types from 16.3

    type_ << (builtin_type | referenced_type)
    named_type << (identifier + type_)

    type_assignment = typereference + '::=' + type_
    value_assignment = valuereference + type_ + '::=' + value

    assignment = type_assignment | value_assignment
    assignment_list = ZeroOrMore(assignment)

    # TODO: Maybe handle full assigned-identifier syntax with defined values
    # described in 12.1, but I haven't been able to find examples of it, and I
    # can't say for sure what acceptable syntax is.
    global_module_reference = module_reference + Optional(
        object_identifier_value, default=None)

    symbol = Unique(reference)  # TODO: parameterized reference?
    symbol_list = delimitedList(symbol, delim=',')
    symbols_from_module = Group(
        Group(symbol_list) + Suppress(FROM) + global_module_reference)
    symbols_from_module_list = OneOrMore(symbols_from_module)
    symbols_imported = Unique(symbols_from_module_list)
    exports = Suppress(EXPORTS) + Optional(symbol_list) + Suppress(';')
    imports = Suppress(IMPORTS) + Optional(symbols_imported) + Suppress(';')

    module_body = Optional(exports, default=None) + Optional(
        imports, default=None) + assignment_list
    module_identifier = module_reference + definitive_identifier
    module_definition = module_identifier + Suppress(DEFINITIONS) + Optional(tag_default, default=None) + \
                        Optional(extension_default, default=None) + Suppress('::=') + \
                        Suppress(BEGIN) + module_body + Suppress(END)

    module_definition.ignore(comment)

    # Mark up the parse results with token tags
    identifier.setParseAction(annotate('Identifier'))
    named_number_value.setParseAction(annotate('Value'))
    tag.setParseAction(annotate('Tag'))
    class_.setParseAction(annotate('TagClass'))
    class_number.setParseAction(annotate('TagClassNumber'))
    type_.setParseAction(annotate('Type'))
    simple_type.setParseAction(annotate('SimpleType'))
    choice_type.setParseAction(annotate('ChoiceType'))
    sequence_type.setParseAction(annotate('SequenceType'))
    set_type.setParseAction(annotate('SetType'))
    value_list_type.setParseAction(annotate('ValueListType'))
    bitstring_type.setParseAction(annotate('BitStringType'))
    sequenceof_type.setParseAction(annotate('SequenceOfType'))
    setof_type.setParseAction(annotate('SetOfType'))
    named_number.setParseAction(annotate('NamedValue'))
    named_nonumber.setParseAction(annotate('NamedValue'))
    single_value_constraint.setParseAction(annotate('SingleValueConstraint'))
    size_constraint.setParseAction(annotate('SizeConstraint'))
    value_range_constraint.setParseAction(annotate('ValueRangeConstraint'))
    component_type.setParseAction(annotate('ComponentType'))
    component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
    component_type_default.setParseAction(annotate('ComponentTypeDefault'))
    component_type_components_of.setParseAction(
        annotate('ComponentTypeComponentsOf'))
    tagged_type.setParseAction(annotate('TaggedType'))
    named_type.setParseAction(annotate('NamedType'))
    type_assignment.setParseAction(annotate('TypeAssignment'))
    value_assignment.setParseAction(annotate('ValueAssignment'))
    module_reference.setParseAction(annotate('ModuleReference'))
    global_module_reference.setParseAction(annotate('GlobalModuleReference'))
    module_body.setParseAction(annotate('ModuleBody'))
    module_definition.setParseAction(annotate('ModuleDefinition'))
    extension_marker.setParseAction(annotate('ExtensionMarker'))
    name_form.setParseAction(annotate('NameForm'))
    number_form.setParseAction(annotate('NumberForm'))
    name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
    object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
    definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
    definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
    definitive_name_and_number_form.setParseAction(
        annotate('DefinitiveNameAndNumberForm'))
    exports.setParseAction(annotate('Exports'))
    imports.setParseAction(annotate('Imports'))
    assignment_list.setParseAction(annotate('AssignmentList'))
    bstring.setParseAction(annotate('BinaryStringValue'))
    hstring.setParseAction(annotate('HexStringValue'))
    defined_type.setParseAction(annotate('DefinedType'))
    selection_type.setParseAction(annotate('SelectionType'))
    referenced_value.setParseAction(annotate('ReferencedValue'))

    start = OneOrMore(module_definition)
    return start
Example #41
0
    data = single | tuple_

    # should not match a single (tr)
    simple_data = Group(
        NotAny('(tr)') + data + ZeroOrMore(Optional(Suppress(',')) + data))
    # the first element of a set data record  cannot be 'dimen', or else
    # these would match set_def_stmts
    non_dimen_simple_data = ~Literal('dimen') + simple_data

    matrix_row = Group(single + OneOrMore(PLUS | MINUS))
    matrix_data = ":" + OneOrMore(single).setResultsName('columns') \
            + ":=" + OneOrMore(matrix_row).setResultsName('data')
    matrix_data.setParseAction(MatrixData)

    tr_matrix_data = Suppress("(tr)") + matrix_data
    tr_matrix_data.setParseAction(mark_transposed)

    set_slice_component = number | symbol | '*'
    set_slice_record = LPAREN + NotAny('tr') + delimitedList(
        set_slice_component) + RPAREN
    set_slice_record.setParseAction(SliceRecord)

    _set_record = set_slice_record | matrix_data | tr_matrix_data | Suppress(
        ":=")
    set_record = simple_data | _set_record
    non_dimen_set_record = non_dimen_simple_data | _set_record

    set_def_stmt = "set" + symbol + Optional(subscript_domain) + \
            Optional("dimen" + integer.setResultsName('dimen')) + END
    set_def_stmt.setParseAction(SetDefStmt)
Example #42
0
attr = Word(string.ascii_letters,
            string.ascii_letters + string.digits + ';-',)
attr.leaveWhitespace()
attr.setName('attr')
hexdigits = Word(string.hexdigits, exact=2)
hexdigits.setName('hexdigits')
escaped = Suppress(Literal('\\')) + hexdigits
escaped.setName('escaped')


def _p_escaped(s, l, t):
    text = t[0]
    return chr(int(text, 16))


escaped.setParseAction(_p_escaped)
value = Combine(OneOrMore(CharsNotIn('*()\\\0') | escaped))
value.setName('value')
equal = Literal("=")
equal.setParseAction(lambda s, l, t: pureldap.LDAPFilter_equalityMatch)
approx = Literal("~=")
approx.setParseAction(lambda s, l, t: pureldap.LDAPFilter_approxMatch)
greater = Literal(">=")
greater.setParseAction(lambda s, l, t: pureldap.LDAPFilter_greaterOrEqual)
less = Literal("<=")
less.setParseAction(lambda s, l, t: pureldap.LDAPFilter_lessOrEqual)
filtertype = equal | approx | greater | less
filtertype.setName('filtertype')
simple = attr + filtertype + value
simple.leaveWhitespace()
simple.setName('simple')
Example #43
0
RETURNS = Keyword('returns')
SERVICE = Keyword('service')
OPTION = Keyword('option')
ENUM = Keyword('enum')
ONEOF = Keyword('oneof')
REQUIRED = Keyword('required')
OPTIONAL = Keyword('optional')
REPEATED = Keyword('repeated')
TRUE = Keyword('true')
FALSE = Keyword('false')

message_body = Forward()

message_definition = Suppress(MESSAGE) - identifier("message_id") + Suppress(
    LBRACE) + message_body("message_body") + Suppress(RBRACE)
message_definition.setParseAction(message_definition_fn)
enum_definition = ENUM - identifier + LBRACE + ZeroOrMore(
    Group(identifier + EQ + integer + SEMI)) + RBRACE

DOUBLE = Keyword("double")
INT32 = Keyword("int32")
UINT32 = Keyword("uint32")
BOOL = Keyword("bool")
STRING = Keyword("string")

type_ = (DOUBLE | UINT32 | BOOL | STRING | identifier)
type_.setParseAction(type_fn)
qualifier = (REQUIRED | OPTIONAL | REPEATED)("qualifier")
qualifier.setParseAction(qualifier_fn)
field = qualifier - type_("type_") + identifier("identifier") + EQ + integer(
    "field_number") + SEMI
Example #44
0
def parse(string=None, filename=None, token=None, lang=None):
    """
    Parse a token stream from or raise a SyntaxError

    This function includes the parser grammar.
    """

    if not lang:
        lang = guess_language(string, filename)

    #
    # End of Line
    #
    EOL = Suppress(lineEnd)
    UTFWORD = Word(unicodePrintables)

    #
    # @tag
    #
    TAG = Suppress('@') + UTFWORD

    #
    # A table
    #
    # A table is made up of rows of cells, e.g.
    #
    #   | column 1 | column 2 |
    #
    # Table cells need to be able to handle escaped tokens such as \| and \n
    #
    def handle_esc_char(tokens):
        token = tokens[0]

        if token == r'\|':
            return u'|'
        elif token == r'\n':
            return u'\n'
        elif token == r'\\':
            return u'\\'

        raise NotImplementedError(u"Unknown token: %s" % token)

    ESC_CHAR = Word(initChars=r'\\', bodyChars=unicodePrintables, exact=2)
    ESC_CHAR.setParseAction(handle_esc_char)

    #
    # A cell can contain anything except a cell marker, new line or the
    # beginning of a cell marker, we then handle escape characters separately
    # and recombine the cell afterwards
    #
    CELL = OneOrMore(CharsNotIn('|\n\\') + Optional(ESC_CHAR))
    CELL.setParseAction(lambda tokens: u''.join(tokens))

    TABLE_ROW = Suppress('|') + OneOrMore(CELL + Suppress('|')) + EOL
    TABLE_ROW.setParseAction(lambda tokens: [v.strip() for v in tokens])
    TABLE = Group(OneOrMore(Group(TABLE_ROW)))

    #
    # Multiline string
    #
    def clean_multiline_string(s, loc, tokens):
        """
        Clean a multiline string

        The indent level of a multiline string is the indent level of the
        triple-". We have to derive this by walking backwards from the
        location of the quoted string token to the newline before it.

        We also want to remove the leading and trailing newline if they exist.

        FIXME: assumes UNIX newlines
        """

        def remove_indent(multiline, indent):
            """
            Generate the lines removing the indent
            """

            for line in multiline.splitlines():
                if line and not line[:indent].isspace():
                    warn("%s: %s: under-indented multiline string "
                         "truncated: '%s'" %
                         (lineno(loc, s), col(loc, s), line),
                         LettuceSyntaxWarning)

                # for those who are surprised by this, slicing a string
                # shorter than indent will yield empty string, not IndexError
                yield line[indent:]

        # determine the indentation offset
        indent = loc - s.rfind('\n', 0, loc) - 1

        multiline = '\n'.join(remove_indent(tokens[0], indent))

        # remove leading and trailing newlines
        if multiline[0] == '\n':
            multiline = multiline[1:]

        if multiline[-1] == '\n':
            multiline = multiline[:-1]

        return multiline

    MULTILINE = QuotedString('"""', multiline=True)
    MULTILINE.setParseAction(clean_multiline_string)

    # A Step
    #
    # Steps begin with a keyword such as Given, When, Then or And They can
    # contain an optional inline comment, although it's possible to encapsulate
    # it in a string. Finally they can contain a table or a multiline 'Python'
    # string.
    #
    # <variables> are not parsed as part of the grammar as it's not easy to
    # distinguish between a variable and XML. Instead scenarios will replace
    # instances in the steps based on the outline keys.
    #
    STATEMENT_SENTENCE = Group(
        lang.STATEMENT +  # Given, When, Then, And
        OneOrMore(UTFWORD.setWhitespaceChars(' \t') |
                  quotedString.setWhitespaceChars(' \t')) +
        EOL
    )

    STATEMENT = Group(
        STATEMENT_SENTENCE('sentence') +
        Optional(TABLE('table') | MULTILINE('multiline'))
    )
    STATEMENT.setParseAction(Step)

    STATEMENTS = Group(ZeroOrMore(STATEMENT))

    #
    # Background:
    #
    BACKGROUND_DEFN = \
        lang.BACKGROUND('keyword') + Suppress(':') + EOL
    BACKGROUND_DEFN.setParseAction(Background)

    BACKGROUND = Group(
        BACKGROUND_DEFN('node') +
        STATEMENTS('statements')
    )
    BACKGROUND.setParseAction(Background.add_statements)

    #
    # Scenario: description
    #
    SCENARIO_DEFN = Group(
        Group(ZeroOrMore(TAG))('tags') +
        lang.SCENARIO('keyword') + Suppress(':') +
        restOfLine('name') +
        EOL
    )
    SCENARIO_DEFN.setParseAction(Scenario)

    SCENARIO = Group(
        SCENARIO_DEFN('node') +
        STATEMENTS('statements') +
        Group(ZeroOrMore(
            Suppress(lang.EXAMPLES + ':') + EOL + TABLE
        ))('outlines')
    )
    SCENARIO.setParseAction(Scenario.add_statements)

    #
    # Feature: description
    #
    FEATURE_DEFN = Group(
        Group(ZeroOrMore(TAG))('tags') +
        lang.FEATURE('keyword') + Suppress(':') +
        restOfLine('name') +
        EOL
    )
    FEATURE_DEFN.setParseAction(Feature)

    #
    # A description composed of zero or more lines, before the
    # Background/Scenario block
    #
    DESCRIPTION_LINE = Group(
        ~BACKGROUND_DEFN + ~SCENARIO_DEFN +
        OneOrMore(UTFWORD).setWhitespaceChars(' \t') +
        EOL
    )
    DESCRIPTION = Group(ZeroOrMore(DESCRIPTION_LINE | EOL))
    DESCRIPTION.setParseAction(Description)

    #
    # Complete feature file definition
    #
    FEATURE = Group(
        FEATURE_DEFN('node') +
        DESCRIPTION('description') +
        Optional(BACKGROUND('background')) +
        Group(OneOrMore(SCENARIO))('scenarios') +
        stringEnd)
    FEATURE.ignore(pythonStyleComment)
    FEATURE.setParseAction(Feature.add_blocks)

    #
    # Try parsing the string
    #

    if not token:
        token = FEATURE
    else:
        token = locals()[token]

    try:
        if string:
            tokens = token.parseString(string)
        elif filename:
            with open(filename, 'r', 'utf-8') as fp:
                tokens = token.parseFile(fp)
        else:
            raise RuntimeError("Must pass string or filename")

        return tokens
    except ParseException as e:
        if e.parserElement == stringEnd:
            msg = "Expected EOF (max one feature per file)"
        else:
            msg = e.msg

        raise LettuceSyntaxError(
            filename,
            u"{lineno}:{col} Syntax Error: {msg}\n{line}\n{space}^".format(
                msg=msg,
                lineno=e.lineno,
                col=e.col,
                line=e.line,
                space=' ' * (e.col - 1)))
    except LettuceSyntaxError as e:
        # reraise the exception with the filename
        raise LettuceSyntaxError(filename, e.string)
Example #45
0
    def build_jimple_parser(self):
        # Literals
        op_add     =   Literal("+")
        op_sub     =   Literal("-")
        op_mul     =   Literal("*")
        op_div     =   Literal("/")
        op_xor     =   Literal("^")
        op_lt      =   Literal("<")
        op_gt      =   Literal(">")
        op_eq      =   Literal("==")
        op_neq     =   Literal("!=")
        op_lte     =   Literal("<=")
        op_gte     =   Literal(">=")
        op_sls     =   Literal("<<")
        op_srs     =   Literal(">>")
        op_urs     =   Literal(">>>")
        op_mod     =   Literal("%")
        op_rem     =   Literal("rem")
        op_bwa     =   Literal("&")
        op_bwo     =   Literal("|")
        op_cmp     =   Literal("cmp")
        op_cmpg    =   Literal("cmpg")
        op_cmpl    =   Literal("cmpl")
        lit_lcb    =   Literal("{").suppress()
        lit_rcb    =   Literal("}").suppress()
        lit_lp     =   Literal("(").suppress()
        lit_rp     =   Literal(")").suppress()
        lit_dot    =   Literal(".").suppress()
        lit_asgn   =   Literal("=").suppress()
        lit_ident  =   Literal(":=").suppress()
        lit_strm   =   Literal(";").suppress()
        lit_cln    =   Literal(":").suppress()
        lit_lsb    =   Literal("[").suppress()
        lit_rsb    =   Literal("]").suppress()
        
        binop =   op_add ^ op_sub ^ op_mul ^ op_div ^ op_xor \
                ^ op_bwa ^ op_mod ^ op_rem ^ op_urs ^ op_lte \
                ^ op_gte ^ op_sls ^ op_srs ^ op_lt  ^ op_gt  \
                ^ op_eq  ^ op_neq ^ op_bwo ^ op_cmp ^ op_cmpg \
                ^ op_cmpl \
                
        cond_op =  op_gte ^ op_lte ^ op_lt ^ op_gt ^ op_eq ^ op_neq 
        
        # Keywords
        kw_specialinvoke   = Keyword("specialinvoke")
        kw_interfaceinvoke = Keyword("interfaceinvoke")
        kw_virtualinvoke   = Keyword("virtualinvoke")
        kw_staticinvoke    = Keyword("staticinvoke")
        kw_instanceof      = Keyword("instanceof")
        kw_new             = Keyword("new")
        kw_newarray        = Keyword("newarray")
        kw_newmultiarray   = Keyword("newmultiarray")
        kw_length          = Keyword("lengthof")
        kw_neg             = Keyword("neg")
        kw_goto            = Keyword("goto")
        kw_if              = Keyword("if")
        kw_this            = Keyword("@this")
        kw_caughtexception = Keyword("@caughtexception")
        kw_lookupswitch    = Keyword("lookupswitch")
        kw_case            = Keyword("case")
        kw_default         = Keyword("default")
        kw_return          = Keyword("return")
        kw_entermonitor    = Keyword("entermonitor")
        kw_exitmonitor     = Keyword("exitmonitor")
        kw_throw           = Keyword("throw")
        kw_throws          = Keyword("throws")
        kw_catch           = Keyword("catch")
        kw_transient       = Keyword("transient")
        kw_from            = Keyword("from")
        kw_to              = Keyword("to")
        kw_with            = Keyword("with")
        kw_breakpoint      = Keyword("breakpoint")
        kw_nop             = Keyword("nop")
        kw_public          = Keyword("public")
        kw_protected       = Keyword("protected")
        kw_private         = Keyword("private")
        kw_volatile        = Keyword("volatile")
        kw_static          = Keyword("static")
        kw_annotation      = Keyword("annotation")
        kw_final           = Keyword("final")
        kw_class           = Keyword("class")
        kw_enum            = Keyword("enum")
        kw_interface       = Keyword("interface")
        kw_abstract        = Keyword("abstract")
        kw_extends         = Keyword("extends")
        kw_implements      = Keyword("implements")
        kw_null            = Keyword("null")
        
        modifier = \
                kw_public | kw_protected | kw_private \
            |   kw_static | kw_abstract | kw_final \
            |   kw_volatile | kw_enum | kw_transient \
            |   kw_annotation
        
        #Identifiers
        id_local  = Combine(Optional(Literal("$")) + Word(alphas) + Word(nums))
        id_java = Word(alphas + "'$_", alphanums + "'$_")
        id_class_comp = Word(alphas + "_", alphanums + "$_")
        id_type = Combine(id_class_comp + ZeroOrMore(Combine(Literal(".") + (id_class_comp))) + Optional(Word("[]")))
        id_method_name = id_java | Word("<clinit>") | Word("<init>") 
        id_label = Combine(Literal("label") + Word(nums)) 
        id_parameter = Combine(Literal("@parameter") + Word(nums)) 
        
        # Field
        field_specifier = \
                Suppress(Literal("<")) \
            +   id_type + lit_cln + id_type + id_java \
            +   Suppress(Literal(">"))
        field_specifier.setParseAction(self.field_specifier_parse_action)
        
        # Method
        method_param_list = delimitedList(id_type, delim=",")
        id_method = \
                Suppress(Literal("<")) \
            +   id_type + lit_cln + id_type + id_method_name \
            +   lit_lp + Group(Optional(method_param_list)) + lit_rp \
            +   Suppress(Literal(">"))
        
        number_suffix = Optional(Literal("F") | Literal("L"))
        
        # Numeric constant
        expr_number = \
                Combine( 
                    Word("+-" + nums, nums) 
                +   Optional(Literal(".") + Optional(Word(nums))) 
                +   Optional(Literal("E") + Optional(Word("+-")) + Word(nums)) 
                +   number_suffix) \
            |   Combine(Literal("#Infinity") + number_suffix) \
            |   Combine(Literal("#-Infinity") + number_suffix) \
            |   Combine(Literal("#NaN") + number_suffix)
                
        expr_number.setParseAction(self.expr_numeric_const_parse_action)
        expr_str = QuotedString(quoteChar='"', escChar="\\")
        expr_str.setParseAction(self.expr_str_const_parse_action)
        
        # Null constant
        expr_null = kw_null
        expr_null.setParseAction(self.expr_null_parse_action)
       
        # Group all constants
        expr_constant = \
                expr_str \
            ^   expr_number \
            ^   expr_null 

        # A 'class' expression (class + classname)
        expr_class = kw_class + QuotedString(quoteChar='"')
        expr_class.setParseAction(self.expr_class_parse_action)
        
        # A local variable expression
        expr_local = id_local
        expr_local.setParseAction(self.expr_local_parse_action)
       
        # Group together all "immediate" values
        expr_imm =  expr_local ^ expr_constant ^ expr_class 
        expr_imm.setParseAction(self.expr_imm_parse_action)
         
        # Conditional expression
        expr_cond = expr_imm + cond_op + expr_imm
        expr_cond.setParseAction(self.expr_cond_parse_action)
        
        # Array index
        array_idx = lit_lsb + expr_imm + lit_rsb
        empty_array_idx = lit_lsb + lit_rsb
        
        expr_binop = expr_imm + binop + expr_imm
        expr_binop.setParseAction(self.expr_binop_parse_action)
        
        expr_cast = lit_lp + id_type + lit_rp + expr_imm
        expr_cast.setParseAction(self.expr_cast_parse_action)
        
        expr_instanceof = expr_imm + kw_instanceof + id_type
        expr_instanceof.setParseAction(self.expr_instanceof_parse_action)
        
        expr_new = Suppress(kw_new) + id_type
        expr_new.setParseAction(self.expr_new_parse_action)
        
        expr_newarray = kw_newarray + lit_lp + id_type + lit_rp + array_idx
        expr_newarray.setParseAction(self.expr_newarray_parse_action)
                
        expr_newmultiarray = kw_newmultiarray + lit_lp + id_type + lit_rp + OneOrMore(array_idx | empty_array_idx)
        expr_newmultiarray.setParseAction(self.expr_newmultiarray_parse_action)
        
        expr_lengthof = kw_length + expr_imm 
        expr_lengthof.setParseAction(self.expr_lengthof_parse_action)
        
        expr_neg = kw_neg + expr_imm
        expr_neg.setParseAction(self.expr_neg_parse_action)
        
        # Invoke Expressions
        method_arg_list = delimitedList(expr_imm, delim=",")
        expr_invoke = \
                kw_specialinvoke \
                    + id_local + lit_dot + id_method \
                    + lit_lp + Group(Optional(method_arg_list)) + lit_rp \
            |   kw_interfaceinvoke \
                    + id_local + lit_dot + id_method \
                    + lit_lp + Group(Optional(method_arg_list)) + lit_rp \
            |   kw_virtualinvoke \
                    + id_local + lit_dot + id_method \
                    + lit_lp + Group(Optional(method_arg_list)) + lit_rp \
            |   kw_staticinvoke + id_method \
                    + lit_lp + Group(Optional(method_arg_list)) + lit_rp 
        expr_invoke.setParseAction(self.expr_invoke_parse_action)
                    
        expr = \
                expr_binop \
            ^   expr_cast \
            ^   expr_instanceof \
            ^   expr_invoke \
            ^   expr_new \
            ^   expr_newarray \
            ^   expr_newmultiarray \
            ^   expr_lengthof \
            ^   expr_neg 
        expr.setParseAction(self.expr_parse_action)
                    
        # Concrete Reference Expression
        expr_field_ref =  Group(Optional(id_local + lit_dot)) + field_specifier 
        expr_field_ref.setParseAction(self.expr_field_ref_parse_action)
        
        expr_array_ref = id_local + array_idx
        expr_array_ref.setParseAction(self.expr_array_ref_parse_action)
        
        # L and R values
        expr_lvalue = \
                id_local \
            ^   expr_field_ref \
            ^   expr_array_ref
            
        expr_lvalue.setParseAction(self.expr_lvalue_parse_action)
        expr_rvalue = \
                expr \
            ^   expr_field_ref \
            ^   expr_array_ref \
            ^   expr_imm 
        
        # Declaration
        stmt_decl = \
                id_type \
            +   Group(delimitedList(id_local, delim=",")) \
            +   lit_strm
        stmt_decl.setParseAction(self.stmt_decl_parse_action)
        
        # Statements 
        stmt_assign = \
                expr_lvalue + lit_asgn + expr_rvalue + lit_strm
        '''
                id_local + lit_asgn + expr_rvalue + lit_strm \
            ^   field_specifier + lit_asgn + expr_imm + lit_strm \
            ^   id_local + lit_dot + field_specifier + lit_asgn + expr_imm + lit_strm \
            ^   id_local + lit_lsb + expr_imm + lit_rsb + lit_asgn + expr_imm + lit_strm
        '''
        
        stmt_assign.setParseAction(self.stmt_assign_parse_action)
        
        stmt_identity = \
                id_local + lit_ident + kw_this + lit_cln + id_type + lit_strm \
            ^   id_local + lit_ident + id_parameter + lit_cln + id_type + lit_strm \
            ^   id_local + lit_ident + kw_caughtexception + lit_strm
            
        stmt_identity.setParseAction(self.stmt_identity_parse_action)
        
        stmt_goto = kw_goto + id_label + lit_strm
        stmt_goto.setParseAction(self.stmt_goto_parse_action)
        
        stmt_if = Suppress(kw_if) + expr_cond + Suppress(kw_goto) + id_label + lit_strm
        stmt_if.setParseAction(self.stmt_if_parse_action)
        
        stmt_invoke = expr_invoke + lit_strm
        stmt_invoke.setParseAction(self.stmt_invoke_parse_action)
        
        switch_case = kw_case + expr_number + lit_cln + kw_goto + id_label + lit_strm
        switch_default = kw_default + lit_cln + kw_goto + id_label + lit_strm
        switch_body = ZeroOrMore(switch_case) + Optional(switch_default)
        stmt_switch = kw_lookupswitch + lit_lp + expr_imm + lit_rp + lit_lcb + switch_body + lit_rcb + lit_strm
        stmt_switch.setParseAction(self.stmt_switch_parse_action)
        
        stmt_enter_monitor = kw_entermonitor + expr_imm + lit_strm
        stmt_enter_monitor.setParseAction(self.stmt_enter_monitor_parse_action)
        
        stmt_exit_monitor = kw_exitmonitor + expr_imm + lit_strm
        stmt_exit_monitor.setParseAction(self.stmt_exit_monitor_parse_action)
        
        stmt_return = Suppress(kw_return) + expr_imm + lit_strm | Suppress(kw_return) + lit_strm
        stmt_return.setParseAction(self.stmt_return_parse_action)
        
        stmt_throw = kw_throw + expr_imm + lit_strm
        stmt_throw.setParseAction(self.stmt_throw_parse_action)
        
        stmt_catch = kw_catch + id_type \
                +   kw_from + id_label \
                +   kw_to + id_label \
                +   kw_with + id_label + lit_strm
        stmt_catch.setParseAction(self.stmt_catch_parse_action)
        
        stmt_breakpoint = kw_breakpoint + lit_strm
        stmt_breakpoint.setParseAction(self.stmt_breakpoint_parse_action)
        
        stmt_nop = kw_nop + lit_strm
        stmt_nop.setParseAction(self.stmt_nop_parse_action)
        
        jimple_stmt = \
                stmt_decl           \
            ^   stmt_assign         \
            ^   stmt_identity       \
            ^   stmt_goto           \
            ^   stmt_if             \
            ^   stmt_invoke         \
            ^   stmt_switch         \
            ^   stmt_enter_monitor  \
            ^   stmt_exit_monitor   \
            ^   stmt_return         \
            ^   stmt_throw          \
            ^   stmt_catch          \
            ^   stmt_breakpoint     \
            ^   stmt_nop
        jimple_stmt.setParseAction(self.stmt_parse_action)
        
        throws_clause = kw_throws + delimitedList(id_type, delim=",")
        
        method_sig = \
                    Group(ZeroOrMore(modifier)) \
                +   id_type + id_method_name \
                +   lit_lp + Group(Optional(method_param_list)) + lit_rp \
                +   Group(Optional(throws_clause))
        
        method_decl = method_sig + lit_strm
        
        field_decl = ZeroOrMore(modifier) + id_type + id_java + lit_strm
        field_decl.setParseAction(self.field_decl_parse_action)
                    
        class_decl = \
                    Group(ZeroOrMore(modifier)) + Suppress(kw_class) + id_type \
                +   Optional(kw_extends + delimitedList(id_type, delim=",")) \
                +   Optional(kw_implements + delimitedList(id_type, delim=","))

        interface_decl = \
                    Group(ZeroOrMore(modifier)) + Suppress(kw_interface) + id_type \
                +   Optional(kw_extends + delimitedList(id_type, delim=",")) \
                +   Optional(kw_implements + delimitedList(id_type, delim=","))

        
        jimple_method_item = \
                jimple_stmt \
            |   Combine(id_label + lit_cln).setParseAction(self.label_parse_action)
            
        jimple_method_body = ZeroOrMore(jimple_method_item)
        jimple_method = \
                Group(method_sig) + lit_lcb \
            +   Group(jimple_method_body) \
            +   lit_rcb
        jimple_method.setParseAction(self.method_defn_parse_action)

        jimple_class_item = field_decl | method_decl | jimple_method
        jimple_class_body = ZeroOrMore(jimple_class_item)
        jimple_class = Group(class_decl | interface_decl) + lit_lcb + Group(jimple_class_body) + lit_rcb
        jimple_class.setParseAction(self.class_defn_parse_action)
        return jimple_class
Example #46
0
_word_function = Literal("extend") + Suppress("(") + _basic_expr + "," + _basic_expr + Suppress(")") | Literal(
    "resize"
) + Suppress("(") + _basic_expr + "," + _basic_expr + Suppress(")")
_word_function.setParseAction(lambda s, l, t: WordFunction(t[0], t[1], t[2]))

_count = Literal("count") + Suppress("(") + delimitedList(_basic_expr) + Suppress(")")
_count.setParseAction(lambda s, l, t: Count(t[1]))

_next = Literal("next") + Suppress("(") + _basic_expr + Suppress(")")
_next.setParseAction(lambda s, l, t: Next(t[1]))

_case_case = _basic_expr + Suppress(":") + _basic_expr + Suppress(";")
_case_body = OneOrMore(_case_case)
_case_body.setParseAction(lambda s, l, t: OrderedDict(zip(t[::2], t[1::2])))
_case = Suppress("case") + _case_body + Suppress("esac")
_case.setParseAction(lambda s, l, t: Case(t[0]))

_base = complex_identifier ^ (
    _conversion | _word_function | _count | _next | Suppress("(") + _basic_expr + Suppress(")") | _case | constant
)

_ap = Forward()
_array_subscript = Group(Suppress("[") + _basic_expr + Suppress("]"))

_word_bit_selection = Group(Suppress("[") + _basic_expr + Suppress(":") + _basic_expr + Suppress("]"))

_ap <<= Optional(_array_subscript + _ap | _word_bit_selection + _ap)
_array = _base + _ap


def _handle_array(tokens):
Example #47
0
def _build_asn1_grammar():
    def build_identifier(prefix_pattern):
        identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
        # todo: more rigorous? trailing hyphens and -- forbidden
        return Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix)

    def braced_list(element_rule):
        elements_rule = Optional(delimitedList(element_rule))
        return Suppress('{') + Group(elements_rule) + Suppress('}')

    def annotate(name):
        def annotation(t):
            return AnnotatedToken(name, t.asList())

        return annotation

    # Reserved words
    ANY = Keyword('ANY')
    DEFINED_BY = Keyword('DEFINED BY')
    DEFINITIONS = Keyword('DEFINITIONS')
    BEGIN = Keyword('BEGIN')
    END = Keyword('END')
    OPTIONAL = Keyword('OPTIONAL')
    DEFAULT = Keyword('DEFAULT')
    TRUE = Keyword('TRUE')
    FALSE = Keyword('FALSE')
    UNIVERSAL = Keyword('UNIVERSAL')
    APPLICATION = Keyword('APPLICATION')
    PRIVATE = Keyword('PRIVATE')
    MIN = Keyword('MIN')
    MAX = Keyword('MAX')
    IMPLICIT = Keyword('IMPLICIT')
    EXPLICIT = Keyword('EXPLICIT')
    EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
    IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
    AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
    EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
    COMPONENTS_OF = Keyword('COMPONENTS OF')
    ELLIPSIS = Keyword('...')
    SIZE = Keyword('SIZE')
    OF = Keyword('OF')
    IMPORTS = Keyword('IMPORTS')
    EXPORTS = Keyword('EXPORTS')
    FROM = Keyword('FROM')

    # Built-in types
    SEQUENCE = Keyword('SEQUENCE')
    SET = Keyword('SET')
    CHOICE = Keyword('CHOICE')
    ENUMERATED = Keyword('ENUMERATED')
    BIT_STRING = Keyword('BIT STRING')
    BOOLEAN = Keyword('BOOLEAN')
    REAL = Keyword('REAL')
    OCTET_STRING = Keyword('OCTET STRING')
    CHARACTER_STRING = Keyword('CHARACTER STRING')
    NULL = Keyword('NULL')
    INTEGER = Keyword('INTEGER')
    OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')

    # Restricted string types
    BMPString = Keyword('BMPString')
    GeneralString = Keyword('GeneralString')
    GraphicString = Keyword('GraphicString')
    IA5String = Keyword('IA5String')
    ISO646String = Keyword('ISO646String')
    NumericString = Keyword('NumericString')
    PrintableString = Keyword('PrintableString')
    TeletexString = Keyword('TeletexString')
    T61String = Keyword('T61String')
    UniversalString = Keyword('UniversalString')
    UTF8String = Keyword('UTF8String')
    VideotexString = Keyword('VideotexString')
    VisibleString = Keyword('VisibleString')

    # Useful types
    GeneralizedTime = Keyword('GeneralizedTime')
    UTCTime = Keyword('UTCTime')
    ObjectDescriptor = Keyword('ObjectDescriptor')

    # Literals
    number = Word(nums)
    signed_number = Combine(Optional('-') + number)  # todo: consider defined values from 18.1
    bstring = Suppress('\'') + StringOf('01') + Suppress('\'B')
    hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H')

    # Comments
    hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
    comment = hyphen_comment | cStyleComment

    # identifier
    identifier = build_identifier('[a-z]')

    # references
    # these are duplicated to force unique token annotations
    valuereference = build_identifier('[a-z]')
    typereference = build_identifier('[A-Z]')
    module_reference = build_identifier('[A-Z]')
    reference = valuereference | typereference  # TODO: consider object references from 12.1

    # values
    # todo: consider more literals from 16.9
    boolean_value = TRUE | FALSE
    bitstring_value = bstring | hstring  # todo: consider more forms from 21.9
    integer_value = signed_number
    null_value = NULL
    cstring_value = dblQuotedString

    exponent = CaselessLiteral('e') + signed_number
    real_value = Combine(signed_number + Optional(Literal('.') + Optional(number)) + Optional(exponent))

    # In value range constraints, decimal points must be followed by number, or
    # the grammar becomes ambiguous: ([1.].100) vs ([1]..[100])
    constraint_real_value = Combine(signed_number + Optional(Literal('.') + number) + Optional(exponent))

    builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
    external_value_reference = module_reference + Suppress('.') + valuereference
    defined_value = external_value_reference | valuereference  # todo: more options from 13.1
    referenced_value = Unique(defined_value)  # todo: more options from 16.11

    # object identifier value
    name_form = Unique(identifier)
    number_form = Unique(number)
    name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
    objid_components = name_and_number_form | name_form | number_form | defined_value
    objid_components_list = OneOrMore(objid_components)
    object_identifier_value = Suppress('{') + \
                              (objid_components_list | (defined_value + objid_components_list)) + \
                              Suppress('}')

    value = builtin_value | referenced_value | object_identifier_value

    # definitive identifier value
    definitive_number_form = Unique(number)
    definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
    definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
    definitive_objid_component_list = OneOrMore(definitive_objid_component)
    definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))

    # tags
    class_ = UNIVERSAL | APPLICATION | PRIVATE
    class_number = Unique(number)  # todo: consider defined values from 30.1
    tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
    tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS

    # extensions
    extension_default = Unique(EXTENSIBILITY_IMPLIED)

    # values

    # Forward-declare these, they can only be fully defined once
    # we have all types defined. There are some circular dependencies.
    named_type = Forward()
    type_ = Forward()

    # constraints
    # todo: consider the full subtype and general constraint syntax described in 45.*
    lower_bound = (constraint_real_value | signed_number | referenced_value | MIN)
    upper_bound = (constraint_real_value | signed_number | referenced_value | MAX)
    single_value_constraint = Suppress('(') + value + Suppress(')')
    value_range_constraint = Suppress('(') + lower_bound + Suppress('..') + upper_bound + Suppress(')')
    # TODO: Include contained subtype constraint here if we ever implement it.
    size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + (single_value_constraint | value_range_constraint) + Optional(Suppress(')'))

    # types
    # todo: consider other defined types from 13.1
    defined_type = Optional(module_reference + Suppress('.'), default=None) + typereference + Optional(size_constraint, default=None)

    # TODO: consider exception syntax from 24.1
    extension_marker = Unique(ELLIPSIS)

    component_type_optional = named_type + Suppress(OPTIONAL)
    component_type_default = named_type + Suppress(DEFAULT) + value
    component_type_components_of = Suppress(COMPONENTS_OF) + type_
    component_type = component_type_components_of | component_type_optional | component_type_default | named_type

    tagged_type = tag + Optional(IMPLICIT | EXPLICIT, default=None) + type_

    named_number_value = Suppress('(') + signed_number + Suppress(')')
    named_number = identifier + named_number_value
    named_nonumber = Unique(identifier)
    enumeration = named_number | named_nonumber

    set_type = SET + braced_list(component_type | extension_marker)
    sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
    sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint, default=None) + Suppress(OF) + (type_ | named_type)
    setof_type = Suppress(SET) + Optional(size_constraint, default=None) + Suppress(OF) + (type_ | named_type)
    choice_type = CHOICE + braced_list(named_type | extension_marker)
    selection_type = identifier + Suppress('<') + type_
    enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
    bitstring_type = BIT_STRING + Optional(braced_list(named_number), default=[]) + Optional(single_value_constraint | size_constraint, default=None)
    plain_integer_type = INTEGER
    restricted_integer_type = INTEGER + braced_list(named_number) + Optional(single_value_constraint, default=None)
    boolean_type = BOOLEAN
    real_type = REAL
    null_type = NULL
    object_identifier_type = OBJECT_IDENTIFIER
    octetstring_type = OCTET_STRING + Optional(size_constraint)
    unrestricted_characterstring_type = CHARACTER_STRING
    restricted_characterstring_type = BMPString | GeneralString | \
                                      GraphicString | IA5String | \
                                      ISO646String | NumericString | \
                                      PrintableString | TeletexString | \
                                      T61String | UniversalString | \
                                      UTF8String | VideotexString | \
                                      VisibleString
    characterstring_type = (restricted_characterstring_type | unrestricted_characterstring_type) + Optional(size_constraint)
    useful_type = GeneralizedTime | UTCTime | ObjectDescriptor

    # ANY type
    any_type = ANY + Optional(Suppress(DEFINED_BY + identifier))

    # todo: consider other builtins from 16.2
    simple_type = (any_type | boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(value_range_constraint | single_value_constraint)
    constructed_type = choice_type | sequence_type | set_type
    value_list_type = restricted_integer_type | enumerated_type
    builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type

    referenced_type = defined_type | selection_type  # todo: consider other ref:d types from 16.3

    type_ << (builtin_type | referenced_type)
    named_type << (identifier + type_)

    type_assignment = typereference + '::=' + type_
    value_assignment = valuereference + type_ + '::=' + value

    assignment = type_assignment | value_assignment
    assignment_list = ZeroOrMore(assignment)

    # TODO: Maybe handle full assigned-identifier syntax with defined values
    # described in 12.1, but I haven't been able to find examples of it, and I
    # can't say for sure what acceptable syntax is.
    global_module_reference = module_reference + Optional(object_identifier_value, default=None)

    symbol = Unique(reference)  # TODO: parameterized reference?
    symbol_list = delimitedList(symbol, delim=',')
    symbols_from_module = Group(Group(symbol_list) + Suppress(FROM) + global_module_reference)
    symbols_from_module_list = OneOrMore(symbols_from_module)
    symbols_imported = Unique(symbols_from_module_list)
    exports = Suppress(EXPORTS) + Optional(symbol_list) + Suppress(';')
    imports = Suppress(IMPORTS) + Optional(symbols_imported) + Suppress(';')

    module_body = Optional(exports, default=None) + Optional(imports, default=None) + assignment_list
    module_identifier = module_reference + definitive_identifier
    module_definition = module_identifier + Suppress(DEFINITIONS) + Optional(tag_default, default=None) + \
                        Optional(extension_default, default=None) + Suppress('::=') + \
                        Suppress(BEGIN) + module_body + Suppress(END)

    module_definition.ignore(comment)

    # Mark up the parse results with token tags
    identifier.setParseAction(annotate('Identifier'))
    named_number_value.setParseAction(annotate('Value'))
    tag.setParseAction(annotate('Tag'))
    class_.setParseAction(annotate('TagClass'))
    class_number.setParseAction(annotate('TagClassNumber'))
    type_.setParseAction(annotate('Type'))
    simple_type.setParseAction(annotate('SimpleType'))
    choice_type.setParseAction(annotate('ChoiceType'))
    sequence_type.setParseAction(annotate('SequenceType'))
    set_type.setParseAction(annotate('SetType'))
    value_list_type.setParseAction(annotate('ValueListType'))
    bitstring_type.setParseAction(annotate('BitStringType'))
    sequenceof_type.setParseAction(annotate('SequenceOfType'))
    setof_type.setParseAction(annotate('SetOfType'))
    named_number.setParseAction(annotate('NamedValue'))
    named_nonumber.setParseAction(annotate('NamedValue'))
    single_value_constraint.setParseAction(annotate('SingleValueConstraint'))
    size_constraint.setParseAction(annotate('SizeConstraint'))
    value_range_constraint.setParseAction(annotate('ValueRangeConstraint'))
    component_type.setParseAction(annotate('ComponentType'))
    component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
    component_type_default.setParseAction(annotate('ComponentTypeDefault'))
    component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
    tagged_type.setParseAction(annotate('TaggedType'))
    named_type.setParseAction(annotate('NamedType'))
    type_assignment.setParseAction(annotate('TypeAssignment'))
    value_assignment.setParseAction(annotate('ValueAssignment'))
    module_reference.setParseAction(annotate('ModuleReference'))
    global_module_reference.setParseAction(annotate('GlobalModuleReference'))
    module_body.setParseAction(annotate('ModuleBody'))
    module_definition.setParseAction(annotate('ModuleDefinition'))
    extension_marker.setParseAction(annotate('ExtensionMarker'))
    name_form.setParseAction(annotate('NameForm'))
    number_form.setParseAction(annotate('NumberForm'))
    name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
    object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
    definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
    definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
    definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
    exports.setParseAction(annotate('Exports'))
    imports.setParseAction(annotate('Imports'))
    assignment_list.setParseAction(annotate('AssignmentList'))
    bstring.setParseAction(annotate('BinaryStringValue'))
    hstring.setParseAction(annotate('HexStringValue'))
    defined_type.setParseAction(annotate('DefinedType'))
    selection_type.setParseAction(annotate('SelectionType'))
    referenced_value.setParseAction(annotate('ReferencedValue'))

    start = OneOrMore(module_definition)
    return start
Example #48
0
                  | Literal("resize") + Suppress("(") + _basic_expr + "," +
                  _basic_expr + Suppress(")"))
_word_function.setParseAction(lambda s, l, t: WordFunction(t[0], t[1], t[2]))

_count = (Literal("count") + Suppress("(") + delimitedList(_basic_expr) +
          Suppress(")"))
_count.setParseAction(lambda s, l, t: Count(t[1]))

_next = Literal("next") + Suppress("(") + _basic_expr + Suppress(")")
_next.setParseAction(lambda s, l, t: Next(t[1]))

_case_case = _basic_expr + Suppress(":") + _basic_expr + Suppress(";")
_case_body = OneOrMore(_case_case)
_case_body.setParseAction(lambda s, l, t: OrderedDict(zip(t[::2], t[1::2])))
_case = Suppress("case") + _case_body + Suppress("esac")
_case.setParseAction(lambda s, l, t: Case(t[0]))

_base = (complex_identifier ^ (_conversion
                               | _word_function
                               | _count
                               | _next
                               | Suppress("(") + _basic_expr + Suppress(")")
                               | _case
                               | constant))

_ap = Forward()
_array_subscript = Group(Suppress("[") + _basic_expr + Suppress("]"))

_word_bit_selection = Group(
    Suppress("[") + _basic_expr + Suppress(":") + _basic_expr + Suppress("]"))
Example #49
0
from .qa import ApplicationRef, FilePattern, FileGroup, Qa, CommandLine, \
    InputLine

TEST_ID = Suppress("ID") + Word(alphanums + '-' + '_')('id')

APPLICATION_REF = oneOf(['AP', 'AA', 'AQ']) + Word(alphas)('appname') + \
                  Optional(
    Suppress('AB') + Word(alphas)('embassypack'))
def _get_application_ref(token):
    return ApplicationRef(token['appname'], token.get('embassypack',None))
APPLICATION_REF.setParseAction(_get_application_ref)

CL_LINE = Suppress("CL ") + restOfLine('line')
def _get_cl_line(token):
    return CommandLine(token['line'])
CL_LINE.setParseAction(_get_cl_line)

CL_LINES = Group(ZeroOrMore(CL_LINE))('cl_lines')
def _get_cl_lines(token):
    return token['cl_lines']
CL_LINES.setParseAction(_get_cl_lines)

IN_LINE = Suppress("IN ") + restOfLine('line')
def _get_in_line(token):
    return InputLine(token['line'])
IN_LINE.setParseAction(_get_in_line)

IN_LINES = Group(ZeroOrMore(IN_LINE))('in_lines')
def _get_in_lines(token):
    return token['in_lines']
IN_LINES.setParseAction(_get_in_lines)
Example #50
0
    def __init__(self, fragment_file, sdkconfig):
        try:
            fragment_file = open(fragment_file, "r")
        except TypeError:
            pass

        path = os.path.realpath(fragment_file.name)

        indent_stack = [1]

        class parse_ctx:
            fragment = None  # current fragment
            key = ""  # current key
            keys = list()  # list of keys parsed
            key_grammar = None  # current key grammar

            @staticmethod
            def reset():
                parse_ctx.fragment_instance = None
                parse_ctx.key = ""
                parse_ctx.keys = list()
                parse_ctx.key_grammar = None

        def fragment_type_parse_action(toks):
            parse_ctx.reset()
            parse_ctx.fragment = FRAGMENT_TYPES[
                toks[0]]()  # create instance of the fragment
            return None

        def expand_conditionals(toks, stmts):
            try:
                stmt = toks["value"]
                stmts.append(stmt)
            except KeyError:
                try:
                    conditions = toks["conditional"]
                    for condition in conditions:
                        try:
                            _toks = condition[1]
                            _cond = condition[0]
                            if sdkconfig.evaluate_expression(_cond):
                                expand_conditionals(_toks, stmts)
                                break
                        except IndexError:
                            expand_conditionals(condition[0], stmts)
                except KeyError:
                    for tok in toks:
                        expand_conditionals(tok, stmts)

        def key_body_parsed(pstr, loc, toks):
            stmts = list()
            expand_conditionals(toks, stmts)

            if parse_ctx.key_grammar.min and len(
                    stmts) < parse_ctx.key_grammar.min:
                raise ParseFatalException(
                    pstr, loc,
                    "fragment requires at least %d values for key '%s'" %
                    (parse_ctx.key_grammar.min, parse_ctx.key))

            if parse_ctx.key_grammar.max and len(
                    stmts) > parse_ctx.key_grammar.max:
                raise ParseFatalException(
                    pstr, loc,
                    "fragment requires at most %d values for key '%s'" %
                    (parse_ctx.key_grammar.max, parse_ctx.key))

            try:
                parse_ctx.fragment.set_key_value(parse_ctx.key, stmts)
            except Exception as e:
                raise ParseFatalException(
                    pstr, loc,
                    "unable to add key '%s'; %s" % (parse_ctx.key, e.message))
            return None

        key = Word(alphanums + "_") + Suppress(":")
        key_stmt = Forward()

        condition_block = indentedBlock(key_stmt, indent_stack)
        key_stmts = OneOrMore(condition_block)
        key_body = Suppress(key) + key_stmts
        key_body.setParseAction(key_body_parsed)

        condition = originalTextFor(
            SDKConfig.get_expression_grammar()).setResultsName("condition")
        if_condition = Group(
            Suppress("if") + condition + Suppress(":") + condition_block)
        elif_condition = Group(
            Suppress("elif") + condition + Suppress(":") + condition_block)
        else_condition = Group(
            Suppress("else") + Suppress(":") + condition_block)
        conditional = (if_condition + Optional(OneOrMore(elif_condition)) +
                       Optional(else_condition)).setResultsName("conditional")

        def key_parse_action(pstr, loc, toks):
            key = toks[0]

            if key in parse_ctx.keys:
                raise ParseFatalException(
                    pstr, loc,
                    "duplicate key '%s' value definition" % parse_ctx.key)

            parse_ctx.key = key
            parse_ctx.keys.append(key)

            try:
                parse_ctx.key_grammar = parse_ctx.fragment.get_key_grammars(
                )[key]
                key_grammar = parse_ctx.key_grammar.grammar
            except KeyError:
                raise ParseFatalException(
                    pstr, loc, "key '%s' is not supported by fragment" % key)
            except Exception as e:
                raise ParseFatalException(
                    pstr, loc,
                    "unable to parse key '%s'; %s" % (key, e.message))

            key_stmt << (conditional
                         | Group(key_grammar).setResultsName("value"))

            return None

        def name_parse_action(pstr, loc, toks):
            parse_ctx.fragment.name = toks[0]

        key.setParseAction(key_parse_action)

        ftype = Word(alphas).setParseAction(fragment_type_parse_action)
        fid = Suppress(":") + Word(alphanums + "_.").setResultsName("name")
        fid.setParseAction(name_parse_action)
        header = Suppress("[") + ftype + fid + Suppress("]")

        def fragment_parse_action(pstr, loc, toks):
            key_grammars = parse_ctx.fragment.get_key_grammars()
            required_keys = set(
                [k for (k, v) in key_grammars.items() if v.required])
            present_keys = required_keys.intersection(set(parse_ctx.keys))
            if present_keys != required_keys:
                raise ParseFatalException(
                    pstr, loc, "required keys %s for fragment not found" %
                    list(required_keys - present_keys))
            return parse_ctx.fragment

        fragment_stmt = Forward()
        fragment_block = indentedBlock(fragment_stmt, indent_stack)

        fragment_if_condition = Group(
            Suppress("if") + condition + Suppress(":") + fragment_block)
        fragment_elif_condition = Group(
            Suppress("elif") + condition + Suppress(":") + fragment_block)
        fragment_else_condition = Group(
            Suppress("else") + Suppress(":") + fragment_block)
        fragment_conditional = (
            fragment_if_condition +
            Optional(OneOrMore(fragment_elif_condition)) +
            Optional(fragment_else_condition)).setResultsName("conditional")

        fragment = (header +
                    OneOrMore(indentedBlock(key_body, indent_stack,
                                            False))).setResultsName("value")
        fragment.setParseAction(fragment_parse_action)
        fragment.ignore("#" + restOfLine)

        deprecated_mapping = DeprecatedMapping.get_fragment_grammar(
            sdkconfig, fragment_file.name).setResultsName("value")

        fragment_stmt << (Group(deprecated_mapping) | Group(fragment)
                          | Group(fragment_conditional))

        def fragment_stmt_parsed(pstr, loc, toks):
            stmts = list()
            expand_conditionals(toks, stmts)
            return stmts

        parser = ZeroOrMore(fragment_stmt)
        parser.setParseAction(fragment_stmt_parsed)

        self.fragments = parser.parseFile(fragment_file, parseAll=True)

        for fragment in self.fragments:
            fragment.path = path
                r = vb_str.get_ms_ascii_value(c_str)

        # Return the result.
        if (log.getEffectiveLevel() == logging.DEBUG):
            log.debug("Asc: return %r" % r)
        return r

    def __repr__(self):
        return 'Asc(%s)' % repr(self.arg)


# Asc()
# TODO: see MS-VBAL 6.1.2.11.1.1 page 240 => AscB, AscW
asc = Suppress((CaselessKeyword('Asc') | CaselessKeyword('AscW')
                )) + Optional(Suppress('(') + expression + Suppress(')'))
asc.setParseAction(Asc)

# --- StrReverse() --------------------------------------------------------------------


class StrReverse(VBA_Object):
    """Emulator for VBA StrReverse function.

    """
    def __init__(self, original_str, location, tokens):
        super(StrReverse, self).__init__(original_str, location, tokens)
        # extract argument from the tokens:
        # Here the arg is expected to be either a string or a VBA_Object
        self.arg = tokens[0]

    def return_type(self):
Example #52
0
# [147] DECIMAL ::= [0-9]* '.' [0-9]+
DECIMAL = Regex(r'[0-9]*\.[0-9]+')  # (?![eE])
# DECIMAL.setResultsName('decimal')
DECIMAL.setParseAction(
    lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.decimal))

# [148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT
DOUBLE = Regex(r'[0-9]+\.[0-9]*%(e)s|\.([0-9])+%(e)s|[0-9]+%(e)s' %
               {'e': EXPONENT_re})
# DOUBLE.setResultsName('double')
DOUBLE.setParseAction(
    lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.double))

# [149] INTEGER_POSITIVE ::= '+' INTEGER
INTEGER_POSITIVE = Suppress('+') + INTEGER.copy().leaveWhitespace()
INTEGER_POSITIVE.setParseAction(
    lambda x: rdflib.Literal("+" + x[0], datatype=rdflib.XSD.integer))

# [150] DECIMAL_POSITIVE ::= '+' DECIMAL
DECIMAL_POSITIVE = Suppress('+') + DECIMAL.copy().leaveWhitespace()

# [151] DOUBLE_POSITIVE ::= '+' DOUBLE
DOUBLE_POSITIVE = Suppress('+') + DOUBLE.copy().leaveWhitespace()

# [152] INTEGER_NEGATIVE ::= '-' INTEGER
INTEGER_NEGATIVE = Suppress('-') + INTEGER.copy().leaveWhitespace()
INTEGER_NEGATIVE.setParseAction(lambda x: neg(x[0]))

# [153] DECIMAL_NEGATIVE ::= '-' DECIMAL
DECIMAL_NEGATIVE = Suppress('-') + DECIMAL.copy().leaveWhitespace()
DECIMAL_NEGATIVE.setParseAction(lambda x: neg(x[0]))
Example #53
0
ParserElement.setDefaultWhitespaceChars("\n\t")
backslash = chr(92)

texcmd = Forward()
filler = CharsNotIn(backslash + '$')
filler2 = CharsNotIn(backslash + '$' + '{}')

arg = '[' + CharsNotIn("]") + ']'
arg.setParseAction(argfun)

dollarmath = QuotedString('$', multiline=True, unquoteResults=False)
param = Suppress(Literal('{')) + ZeroOrMoreAsList(
    dollarmath | filler2
    | QuotedString('{', endQuoteChar='}', unquoteResults=False)
    | texcmd) + Suppress(Literal('}'))
param.setParseAction(paramfun)


def bs(c):
    return Literal("\\" + c)


singles = bs("[") | bs("]") | bs("{") | bs("}") | bs("\\") | bs("&") | bs(
    "_") | bs(",") | bs("#") | bs("\n") | bs(";") | bs("|") | bs("%") | bs(
        "*") | bs("~") | bs("^")
texcmd << (singles | Word(
    "\\",
    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
    min=2)) + ZeroOrMoreAsList(arg) + ZeroOrMoreAsList(param)

Example #54
0
RETURNS = Keyword('returns')
SERVICE = Keyword('service')
OPTION = Keyword('option')
ENUM = Keyword('enum')
ONEOF = Keyword('oneof')
REQUIRED = Keyword('required')
OPTIONAL = Keyword('optional')
REPEATED = Keyword('repeated')
TRUE = Keyword('true')
FALSE = Keyword('false')


message_body = Forward()

message_definition= Suppress(MESSAGE) - identifier("message_id") + Suppress(LBRACE) + message_body("message_body") + Suppress(RBRACE)
message_definition.setParseAction(message_definition_fn)
enum_definition= ENUM - identifier + LBRACE + ZeroOrMore(Group(identifier + EQ + integer + SEMI) ) + RBRACE

DOUBLE = Keyword("double")
INT32 = Keyword("int32")
UINT32 = Keyword("uint32")
BOOL = Keyword("bool")
STRING = Keyword("string")

type_ = (DOUBLE | UINT32 | BOOL | STRING | identifier)
type_.setParseAction(type_fn)
qualifier = (REQUIRED | OPTIONAL | REPEATED )("qualifier")
qualifier.setParseAction(qualifier_fn)
field = qualifier - type_("type_") + identifier("identifier") + EQ + integer("field_number") + SEMI
field.setParseAction(field_fn)
Example #55
0
comment = '#' + restOfLine
musicobject.ignore(comment)

#fraction = Regex(r'(\d*[./]?\d*)')
number = Regex(r'[\d./]+')
number.setParseAction(lambda s, l, t: [float(eval(t[0]))])

frequency_symbol = Regex(r'[abcdefg_]\d?[#-]?')
frequency_number = number
frequency = frequency_number ^ frequency_symbol

duration = number

tone = frequency ^ (Suppress('(') + frequency + Suppress(',') + duration + Suppress(')'))
tone.setParseAction(lambda s, l, t: Tone(*t))

group = Suppress('{') + delimitedList(Grp(OneOrMore(musicobject)), ',') + Suppress('}')
group.setParseAction(lambda s, l, t: Group(t))

transformed = tone + '*' + musicobject
transformed.setParseAction(lambda s, l, t: Transformed(t[0], t[2]))
musicobject << (tone ^ group ^ transformed)


result = musicobject.parseFile('example.music')
print(result[0])

from to_music21 import construct_music21
construct_music21(result[0]).write('musicxml', 'foo.xml')
construct_music21(result[0]).show('text')