Example #1
0
def parse_mapping(data):
	'''
	Takes a list of strings and returns the (character,configuration) mapping in it
	The 'data' passed is actually the metadata at ths tartting of the file, which specifies
	characters which have to be replaced with configurations.
	For example, 'u sp 5d' indicates that u is to be replaced with 'sp 5d'
	The function uses the fact that different mappings are separated by a gap of at least two spaces.
	So, data is parsed to separate out individual mappings. Then, the part after the character is joined
	to create a mapping. 
	'''
	mapping = {}
	# Defined two grammars, as Optional() wasn't working as expected:
	map_parse1 = CharsNotIn(' ') + White() + Word(alphanums) + White(exact=1) + Word(alphanums) + White(min=2) + restOfLine
	map_parse2 = CharsNotIn(' ') + White() + Word(alphanums) + White(min=2) + restOfLine
	for row in data:
		row_copy = row
		while len(row_copy) > 0:
			# If it's a two-level confiugration:
			try:
				temp = map_parse1.parseString(row_copy)
				mapping[temp[0]] = ' '.join([temp[2],temp[4]])
				row_copy = temp[-1]
			except:
				# If it's a single-level configuration:
				try:
					temp = map_parse2.parseString(row_copy)
					mapping[temp[0]] = temp[2]
					row_copy = temp[-1]
				except:
					# Parsing error, so mappings must be complete
					return mapping
Example #2
0
def param_substitude(lines, param_dict):
    mask_par = (CharsNotIn('{{}}')[0, ] + '{{' + WRD_p + '}}' +
                CharsNotIn('{{}}')[0, ])[1, ] + Char('\n')[0, 1]
    # mask_par.setParseAction(param_set)
    mask_par.setParseAction(lambda tokens: param_set(tokens, param_dict))

    for line_to, line in enumerate(lines):
        line = line.rstrip()

        if not line:
            continue

        lines[line_to] = mask_par.transformString(line)

    return lines
Example #3
0
    def init_latex_parser(self):
        # these variables will be used to define valid lists of characters
        lowers = 'qwertyuiopasdfghjklzxcvbnm'
        uppers = lowers.upper()
        alphas = lowers + uppers
        digits = '1234567890'
        other_word_symbols = '-_'
        word_chars = alphas + other_word_symbols + digits
        punctuation_symbols = '.!?:;…'
        command_symbol = '\\'
        white_characters = CharsNotIn(word_chars + punctuation_symbols +
                                      command_symbol)

        # define grammar
        word = Word(word_chars)
        command = command_symbol + OneOrMore(
            alphas
        )  # as far as i can tell, only alphas are allowed in a latex command (bar the special commands such as \&).
        punc = Word(punctuation_symbols)
        white_and_word = Group(Suppress(Optional(white_characters)) + word)
        white_and_command = Group(
            Suppress(Optional(white_characters)) + command)
        white_and_punc = Group(Suppress(Optional(white_characters)) + punc)
        sentence = ZeroOrMore(white_and_word) + white_and_punc
        pure_piece = ZeroOrMore(sentence)
        bracketed_piece = '{' + pure_piece + Suppress(
            ZeroOrMore(white_characters)) + '}'

        self.parser = content.parseString
Example #4
0
    class NLPyParser(NLBaseParser):
        """pyparsing--based implementation of the NLBaseParser
        """
        notSpace = CharsNotIn(" \n")
        eq = Literal('=').suppress()
        value = (QuotedString('"', escChar=chr(92), unquoteResults=False) \
                     ^ OneOrMore(notSpace))
        ts = Group(Literal('ts') + eq + value)
        event = Group(Literal('event') + eq + value)
        name = ~oneOf("ts event") + Word(alphanums + '-_.')
        nv = ZeroOrMore(Group(name + eq + value))
        nvp = Each([ts, event, nv]) + White('\n').suppress() + StringEnd()

        def parseLine(self, line):
            try:
                rlist = self.nvp.parseString(line).asList()
            except ParseException as E:
                raise ValueError(E)
            result = {}
            for a in rlist:
                if self.parse_date and a[0] == 'ts':
                    result[a[0]] = parse_ts(a[1])
                else:
                    result[a[0]] = a[1]
            return result
Example #5
0
def PIs():
    """Parses Processing Instructions
   
    PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
    
    >>> tests=(
    ...  '''<?foo?>''',
    ...  '''<?foo ?>''',
    ...  '''<?foo abc def ghi ?>''',
    ...  '''<?xml-stylesheet foo="bla" bar="x" ?>''',
    ...  '''<?xml-stylesheet
    ...    foo="bla"
    ...    foo="x"
    ...  ?>''',
    ... )
    >>> p=PIs()
    >>> for t in tests:
    ...   result=p.parseString(t)
    ...   print result
    ['foo']
    ['foo', ' ']
    ['foo', ' abc def ghi ']
    ['xml-stylesheet', ' foo="bla" bar="x" ']
    ['xml-stylesheet', '\\n   foo="bla"\\n   foo="x"\\n ']
   """
    pi=Suppress('<?') + \
         Word(alphas, alphanums+'-_')('pitarget') + \
         ZeroOrMore(CharsNotIn('?>'))('picontents') + \
         Suppress('?>')
    return pi
Example #6
0
    def _parser_piece_text():
        """
        Return PyParsing element to the text of a markdown link.
        """
        # No double line breaks in markdown links
        double_line_break = (Word("\n\r", exact=1) + Optional(Word(" \t")) +
                             Word("\n\r", exact=1))

        # We will ignore escaped square brackets when match finding balanced
        # square brackets.
        ignore = Literal("\\[") | Literal("\\]")

        # The text parser will match text inside balanced brackets using the
        # nestedExpr helper function from PyParsing.
        #
        # Next we define the content that is allowed inside the brackets.
        content_character = ~FollowedBy(double_line_break) + CharsNotIn(
            "[]", exact=1)
        # Normally with nestedExpr, the content parser would be separately applied
        # to each whitespace-separated string within the nested expression.
        # However, since we set whitespaceChars to '', the content parser is
        # applied to characters one-at-a-time.
        #
        # If this ever changes, we would need to change content to something
        # like Combine(OneOrMore(~ignore + content_character))
        content = content_character
        text = originalTextFor(
            nestedExpr(
                opener="[",
                closer="]",
                content=content,
                ignoreExpr=ignore,
            )).setResultsName("text")
        text.addParseAction(lambda s, l, toks: toks[0][1:-1])
        return text
Example #7
0
def pyparse_blk(text):
    def create_add_block(tokens):
        return Block.Block(tokens.title,
                           tokens.color if tokens.color else 'white')

    left_bracket, right_bracket, equal_sign = map(Suppress, '[]=')
    color = (Word('#', hexnums, exact=7) | Word(alphanums, alphas))('color')
    empty_block = (
        left_bracket +
        right_bracket)('empty_block').setParseAction(lambda: EmptyBlock)
    new_lines = Word('/')('new_lines').setParseAction(
        lambda tokens: len(tokens.new_lines))
    title = CharsNotIn('[]/\n')('title').setParseAction(
        lambda tokens: tokens.title.strip())
    block_data = Optional(color + Suppress(':')) + Optional(title)
    block_data.addParseAction(create_add_block)
    blocks = Forward()
    block = left_bracket + block_data + blocks + right_bracket
    blocks << Group(
        ZeroOrMore(Optional(new_lines) + OneOrMore(empty_block | block)))

    stack = [Block.create_root_block()]
    try:
        result = blocks.parseString(text, parseAll=True)
        assert len(result) == 1
        blocks_list = result.asList()[0]
        populate_children(blocks_list, stack)
    except (ParseSyntaxException, ParseException) as parse_err:
        raise ValueError('Error {{0}}: {0}'.format(parse_err.lineno))
    return stack[0]
Example #8
0
def func_tokens(dictionary, parse_action):
    func_name = Word(alphas + '_', alphanums + '_')

    func_ident = Combine('$' + func_name.copy()('funcname'))
    func_tok = func_ident + originalTextFor(nestedExpr())('args')
    func_tok.leaveWhitespace()
    func_tok.setParseAction(parse_action)
    func_tok.enablePackrat()

    rx_tok = Combine(Literal('$').suppress() + Word(nums)('num'))

    def replace_token(tokens):
        index = int(tokens.num)
        return dictionary.get(index, '')

    rx_tok.setParseAction(replace_token)

    strip = lambda s, l, tok: tok[0].strip()
    text_tok = CharsNotIn(',').setParseAction(strip)
    quote_tok = QuotedString('"')

    if dictionary:
        arglist = Optional(delimitedList(quote_tok | rx_tok | text_tok))
    else:
        arglist = Optional(delimitedList(quote_tok | text_tok))

    return func_tok, arglist, rx_tok
Example #9
0
def pyparsing_parse(text):
    """
    >>> import os
    >>> dirname = os.path.join(os.path.dirname(__file__), "data")
    >>> filename = os.path.join(dirname, "error1.blk")
    >>> pyparsing_parse(open(filename, encoding="utf8").read())
    Traceback (most recent call last):
    ...
    ValueError: Error {0}: syntax error, line 8
    >>> filename = os.path.join(dirname, "error2.blk")
    >>> pyparsing_parse(open(filename, encoding="utf8").read())
    Traceback (most recent call last):
    ...
    ValueError: Error {0}: syntax error, line 1
    >>> filename = os.path.join(dirname, "error3.blk")
    >>> pyparsing_parse(open(filename, encoding="utf8").read())
    Traceback (most recent call last):
    ...
    ValueError: Error {0}: syntax error, line 4
    >>> expected = "[white: ]\\n[lightblue: Director]\\n/\\n/\\n[white: ]\\n[lightgreen: Secretary]\\n/\\n/\\n[white: Minion #1]\\n[white: ]\\n[white: Minion #2]"
    >>> filename = os.path.join(dirname, "hierarchy.blk")
    >>> blocks = pyparsing_parse(open(filename, encoding="utf8").read())
    >>> str(blocks).strip() == expected
    True

    >>> expected = "[#00CCDE: MessageBox Window\\n[lightgray: Frame\\n[white: ]\\n[white: Message text]\\n/\\n/\\n[goldenrod: OK Button]\\n[white: ]\\n[#ff0505: Cancel Button]\\n/\\n[white: ]\\n]\\n]"
    >>> filename = os.path.join(dirname, "messagebox.blk")
    >>> blocks = pyparsing_parse(open(filename, encoding="utf8").read())
    >>> str(blocks).strip() == expected
    True
    """
    def add_block(tokens):
        return Block.Block(tokens.name,
                           tokens.color if tokens.color else "white")

    left_bracket, right_bracket = map(Suppress, "[]")
    new_rows = Word("/")("new_rows").setParseAction(
        lambda tokens: len(tokens.new_rows))
    name = CharsNotIn("[]/\n")("name").setParseAction(
        lambda tokens: tokens.name.strip())
    color = (Word("#", hexnums, exact=7) | Word(alphas, alphanums))("color")
    empty_node = (left_bracket +
                  right_bracket).setParseAction(lambda: EmptyBlock)
    nodes = Forward()
    node_data = Optional(color + Suppress(":")) + Optional(name)
    node_data.setParseAction(add_block)
    node = left_bracket - node_data + nodes + right_bracket
    nodes << Group(
        ZeroOrMore(Optional(new_rows) + OneOrMore(node | empty_node)))
    stack = [Block.get_root_block()]
    try:
        results = nodes.parseString(text, parseAll=True)
        assert len(results) == 1
        items = results.asList()[0]
        populate_children(items, stack)
    except (ParseException, ParseSyntaxException) as err:
        raise ValueError("Error {{0}}: syntax error, line "
                         "{0}".format(err.lineno))
    return stack[0]
Example #10
0
	def fromString(inputText):
		text = nestedExpr("/*", "*/").suppress().transformString(inputText)

		semicolon = Suppress(Word(";"))
		quote     = Suppress(Word("\""))
		op        = Suppress(Word("{"))
		cl        = Suppress(Word("}"))
		opp       = Suppress(Word("("))
		clp       = Suppress(Word(")"))
		lt        = Suppress(Word("<"))
		gt        = Suppress(Word(">"))
		eq        = Suppress(Word("="))
		identifier        = Word(alphas+"_",alphanums+"_")
		typeIdentifier    = Word(alphas+"_",alphanums+"_:")
		structIdentifer   = Group(typeIdentifier.setResultsName('type') + identifier.setResultsName('identifier') + Optional(eq) + Optional(CharsNotIn(";").setResultsName('defaultValue')) + semicolon)
		structIdentifers  = Group(OneOrMore(structIdentifer))

		## Imports
		idslImport  = Suppress(Word("import")) + quote +  CharsNotIn("\";").setResultsName('path') + quote + semicolon
		idslImports = ZeroOrMore(idslImport)

		structDef     = Word("struct").setResultsName('type') + identifier.setResultsName('name') + op + structIdentifers.setResultsName("structIdentifiers") + cl + semicolon
		dictionaryDef = Word("dictionary").setResultsName('type') + lt + CharsNotIn("<>").setResultsName('content') + gt + identifier.setResultsName('name') + semicolon
		sequenceDef   = Word("sequence").setResultsName('type')   + lt + typeIdentifier.setResultsName('typeSequence') + gt + identifier.setResultsName('name') + semicolon
		enumDef       = Word("enum").setResultsName('type')       + identifier.setResultsName('name') + op + CharsNotIn("{}").setResultsName('content') + cl + semicolon
		exceptionDef  = Word("exception").setResultsName('type')  + identifier.setResultsName('name') + op + CharsNotIn("{}").setResultsName('content') + cl + semicolon

		raiseDef       = Suppress(Word("throws")) + typeIdentifier + ZeroOrMore( Literal(',') + typeIdentifier )
		decoratorDef    = Literal('idempotent') | Literal('out')
		retValDef       = typeIdentifier.setResultsName('ret')

		firstParam    = Group( Optional(decoratorDef.setResultsName('decorator')) + typeIdentifier.setResultsName('type') + identifier.setResultsName('name'))
		nextParam     = Suppress(Word(',')) + firstParam
		params        = firstParam + ZeroOrMore(nextParam)


		remoteMethodDef  = Group(Optional(decoratorDef.setResultsName('decorator')) + retValDef.setResultsName('ret') + typeIdentifier.setResultsName('name') + opp + Optional(          params).setResultsName('params') + clp + Optional(raiseDef.setResultsName('raise')) + semicolon )
		interfaceDef    = Word('interface').setResultsName('type')  + typeIdentifier.setResultsName('name') + op + Group(ZeroOrMore(remoteMethodDef)).setResultsName('methods') + cl + semicolon

		moduleContent = Group(structDef | enumDef | exceptionDef | dictionaryDef | sequenceDef | interfaceDef)
		module = Suppress(Word("module")) + identifier.setResultsName("name") + op + ZeroOrMore(moduleContent).setResultsName("contents") + cl + semicolon

		IDSL = idslImports.setResultsName("imports") + module.setResultsName("module")
		IDSL.ignore( cppStyleComment )
		tree = IDSL.parseString(text)
		return tree
Example #11
0
class Include(object):
    rule = (INCLUDE + LOPBRACK + CharsNotIn('>')("header") +
            ROPBRACK).setParseAction(lambda t: Include(t.header))

    def __init__(self, header, parent=''):
        self.header = header
        self.parent = parent

    def __repr__(self):
        return "#include <{}>".format(self.header)
Example #12
0
class NginxParser(object):
    """
    A class that parses nginx configuration with pyparsing
    """

    # constants
    left_bracket = Literal("{").suppress()
    right_bracket = Literal("}").suppress()
    semicolon = Literal(";").suppress()
    space = White().suppress()
    key = Word(alphanums + "_/")
    value = CharsNotIn("{};,")
    location = CharsNotIn("{};," + string.whitespace)
    # modifier for location uri [ = | ~ | ~* | ^~ ]
    modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~")

    # rules
    assignment = (key + Optional(space + value) + semicolon)
    block = Forward()

    block << Group(
        Group(key + Optional(space + modifier) + Optional(space + location))
        + left_bracket
        + Group(ZeroOrMore(Group(assignment) | block))
        + right_bracket)

    script = OneOrMore(Group(assignment) | block).ignore(pythonStyleComment)

    def __init__(self, source):
        self.source = source

    def parse(self):
        """
        Returns the parsed tree.
        """
        return self.script.parseString(self.source)

    def as_list(self):
        """
        Returns the list of tree.
        """
        return self.parse().asList()
Example #13
0
class RawNginxParser(object):
    # pylint: disable=expression-not-assigned
    """A class that parses nginx configuration with pyparsing."""

    # constants
    space = Optional(White())
    nonspace = Regex(r"\S+")
    left_bracket = Literal("{").suppress()
    right_bracket = space.leaveWhitespace() + Literal("}").suppress()
    semicolon = Literal(";").suppress()
    key = Word(alphanums + "_/+-.")
    dollar_var = Combine(Literal('$') + nonspace)
    condition = Regex(r"\(.+\)")
    # Matches anything that is not a special character AND any chars in single
    # or double quotes
    value = Regex(r"((\".*\")?(\'.*\')?[^\{\};,]?)+")
    location = CharsNotIn("{};," + string.whitespace)
    # modifier for location uri [ = | ~ | ~* | ^~ ]
    modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~")

    # rules
    comment = space + Literal('#') + restOfLine()

    assignment = space + key + Optional(space + value,
                                        default=None) + semicolon
    location_statement = space + Optional(modifier) + Optional(space +
                                                               location +
                                                               space)
    if_statement = space + Literal("if") + space + condition + space
    map_statement = space + Literal(
        "map") + space + nonspace + space + dollar_var + space
    block = Forward()

    block << Group(
        # key could for instance be "server" or "http", or "location" (in which case
        # location_statement needs to have a non-empty location)
        (Group(space + key + location_statement) ^ Group(if_statement)
         ^ Group(map_statement)).leaveWhitespace() + left_bracket +
        Group(ZeroOrMore(Group(comment | assignment) | block) +
              space).leaveWhitespace() + right_bracket)

    script = OneOrMore(Group(comment | assignment) ^ block) + space + stringEnd
    script.parseWithTabs()

    def __init__(self, source):
        self.source = source

    def parse(self):
        """Returns the parsed tree."""
        return self.script.parseString(self.source)

    def as_list(self):
        """Returns the parsed tree as a list."""
        return self.parse().asList()
Example #14
0
    def grammar(self, value):
        seps = list({value.list_sep, value.range_sep, value.step_sep})
        quotedstr = pp.quotedString.setParseAction(pp.removeQuotes)

        self.tokens = OrderedDict({
            'regex': Literal('r').suppress() + quotedstr,
            'quoted': quotedstr,
            'colnum': Grammar.integer,
            'sep': Or(seps),
            'name': CharsNotIn(seps),
        })
Example #15
0
    def initGrammar(self):
        L_Equals = Word("=")
        N_comment = htmlComment()

        N_name = CharsNotIn("{}|[]")
        N_simpleText = SkipTo(
            oneOf(["{{", "|", "[[", "]]", "}}", "'''", "<ref"]))

        N_elements = Forward()
        N_apostrofs = QuotedString("'''").setParseAction(
            lambda s, l, t: {'APOSTROFS': t})
        N_link = nestedExpr(
            opener="[[",
            closer="]]",
            content=N_name +
            Optional("|" + delimitedList(CharsNotIn("[]"), delim="|"))
        ).setParseAction(self.genLink)
        N_header = Group(L_Equals + SkipTo("=") + L_Equals).setParseAction(
            lambda s, l, t: {'HEADER': t})
        N_template = Forward()
        N_key = CharsNotIn("{}|=")
        # N_value = ZeroOrMore(CharsNotIn("{}|")) + ZeroOrMore(N_template + ZeroOrMore(CharsNotIn("{}|"))).setResultsName('VALUE')
        N_keyValues = "|" + delimitedList(
            Group(Optional(N_key) + Optional("=" + N_elements)), delim="|")
        N_label_content = N_template | ("{{" + OneOrMore("!") +
                                        "}}") | CharsNotIn("{}|")
        N_label = nestedExpr(opener="{", closer="}", content=N_label_content)
        N_template << nestedExpr(
            opener="{{", closer="}}", content=N_name +
            Optional(N_keyValues)).setParseAction(self.genTemplate)

        ref_start, ref_end = makeHTMLTags("ref")
        N_named_ref = ref_start + SkipTo(ref_end) + ref_end
        N_named_ref.setParseAction(lambda s, l, t: {'REF': t})

        N_element = N_comment | N_simpleText | N_named_ref | N_apostrofs | N_link | N_header | N_template | N_label

        # N_ref = nestedExpr( opener="<ref>", closer="</ref>", content=N_elements).setParseAction( lambda s,l,t: {'REF' : t} )
        N_elements << ZeroOrMore(N_element)

        self.N_S = N_elements
Example #16
0
    def initGrammar(self):
        N_comment = htmlComment().setParseAction(self.genComment)

        N_name = CharsNotIn("{}|[]")
        N_link = nestedExpr(
            opener="[[",
            closer="]]",
            content=N_name +
            Optional("|" + delimitedList(CharsNotIn("[]"), delim="|"))
        ).setParseAction(self.genLink).setDebug(True)
        L_Equals = Word("=")
        N_header = Group(L_Equals + SkipTo("=") + L_Equals).setParseAction(
            self.genHeader)

        N_element = Forward()

        N_template = Forward().setDebug(True)
        N_key = CharsNotIn("{}|=")
        N_internalText = CharsNotIn("{}|=<[") + SkipTo(
            Literal("{{") | Literal("[[") | Literal("<!--") | Literal("<ref")
            | Literal("|") | Literal("}}"))  #CharsNotIn("{}|[]<")
        N_insideElements = OneOrMore(N_element | N_internalText).setDebug(True)
        N_keyValue = Group(
            Optional(N_key) +
            Optional(Literal("=") + N_insideElements)).setDebug(True)
        N_keyValues = "|" + delimitedList(N_keyValue, delim="|")
        N_keyValues.setDebug(True)
        #N_label_content = N_template | ("{{"+OneOrMore("!")+"}}") | CharsNotIn("{}|")
        #N_label = nestedExpr( opener="{", closer="}", content = N_label_content)
        N_template << nestedExpr(
            opener="{{", closer="}}", content=N_name +
            Optional(N_keyValues)).setParseAction(self.genTemplate)

        #ref_start, ref_end = makeHTMLTags("ref")
        #N_named_ref = ref_start + SkipTo(ref_end) + ref_end
        #N_named_ref.setParseAction( lambda s,l,t: {'REF' : t} )

        N_element = N_comment | N_link | N_header | N_template
        N_element.setDebug(True)

        self.N_S = N_element
Example #17
0
    def pattern():
        """pyparsing pattern
        """
        
        def attachLocation(s, loc, tocs):
            """pyparsing callback. Saves path position in the original string
            """
            return [(loc, tocs[0])]

        path = CharsNotIn(" \t")("path")
        path.setParseAction(attachLocation)
        longPath = CharsNotIn(" \t", min=2)("path")
        longPath.setParseAction(attachLocation)
        slashPath = Combine(Literal('/') + Optional(CharsNotIn(" \t")))("path")
        slashPath.setParseAction(attachLocation)

        pat = ((Literal('f ') + Optional(White()) + Optional(path)) ^ longPath ^ slashPath) + \
                    Optional(White() + Word(nums)("line"))
        pat.leaveWhitespace()
        pat.setParseAction(CommandOpen.create)
        return pat
Example #18
0
def get_parser():
    from pyparsing import CharsNotIn, ParserElement, Suppress, ZeroOrMore

    ParserElement.enablePackrat()

    word = CharsNotIn(f"{PERIOD}{LBRACK}{RBRACK}")
    idx = Suppress(LBRACK) + word + Suppress(RBRACK)
    attr = Suppress(PERIOD) + word
    parser = word + ZeroOrMore(attr ^ idx)
    parser.setParseAction(PERIOD.join)

    return parser
def build_parser():
    key = Word(alphanums).setResultsName('key')
    value = restOfLine.setParseAction(lambda string, location, tokens: tokens[
        0].strip()).setResultsName('value')
    property_ = Group(key + Suppress(Literal('=')) + value)
    properties = Group(OneOrMore(property_)).setResultsName('properties')
    section_name = (Suppress('[') + OneOrMore(CharsNotIn(']')) +
                    Suppress(']')).setResultsName('section')
    section = Group(section_name + properties)
    ini_file = ZeroOrMore(section).setResultsName('sections')
    ini_file.ignore(pythonStyleComment)
    return ini_file
Example #20
0
 def _getControls(self, index=1):
     identifier = QuotedString('"') | CharsNotIn(',')
     arglist = delimitedList(identifier)
     docstr = self.doc[1:]
     if index:
         return [(arglist.parseString(line)[index]).strip()
                 for line in docstr]
     else:
         ret = []
         for line in docstr:
             ret.append([z.strip() for z in arglist.parseString(line)])
         return ret
Example #21
0
    def query_from_string(cls, filter_string):
        """
        TODO:
        * handle values with " via: a.b.c.d="hello\"world"
        * handle keys with " via: a.\"b.c="yeah"
        * handle key with __ in it
        """
        filter_string_raw = filter_string
        filter_string = str(filter_string)

        unicode_spaces = list(set(
            str(c) for c in filter_string if c.isspace()))
        unicode_spaces_other = unicode_spaces + [u'(', u')', u'=', u'"']
        atom = CharsNotIn(unicode_spaces_other)
        atom_inside_quotes = CharsNotIn(u'"')
        atom_quoted = Literal('"') + Optional(atom_inside_quotes) + Literal(
            '"')
        EQUAL = Literal('=')

        grammar = (atom_quoted | atom) + EQUAL + Optional((atom_quoted | atom))
        grammar.setParseAction(cls.BoolOperand)

        boolExpr = infixNotation(
            grammar,
            [
                ("and", 2, opAssoc.LEFT, cls.BoolAnd),
                ("or", 2, opAssoc.LEFT, cls.BoolOr),
            ],
        )

        try:
            res = boolExpr.parseString('(' + filter_string + ')')
        except ParseException:
            raise RuntimeError(u"Invalid query %s" % filter_string_raw)

        if len(res) > 0:
            return res[0].result

        raise RuntimeError("Parsing the filter_string %s went terribly wrong" %
                           filter_string)
Example #22
0
    def __init__(self):
        self.ALPHA_LABEL = Regex(r'alpha\[\d+\]:')
        self.LNL_LABEL = Literal('Final GAMMA-based Score of best tree')
        self.FRQ_LABEL = Regex(r'Base frequencies: (?=\d+)') ^ Regex(
            r'ML estimate base freqs\[\d+\]:')
        self.NAMES_LABEL = Regex(r'Partition: \d+ with name:\s+')
        self.RATES_LABEL = Regex(r'rates\[\d+\].+?:')
        self.MODEL_LABEL = Literal('Substitution Matrix:')
        self.alpha = OneOrMore(
            Suppress(SkipTo(self.ALPHA_LABEL)) + Suppress(self.ALPHA_LABEL) +
            FLOAT)
        self.lnl = Suppress(SkipTo(self.LNL_LABEL)) + Suppress(
            self.LNL_LABEL) + FLOAT
        self.frq = OneOrMore(
            Group(
                Suppress(SkipTo(self.FRQ_LABEL)) + Suppress(self.FRQ_LABEL) +
                OneOrMore(FLOAT)))
        self.names = OneOrMore(
            Suppress(SkipTo(self.NAMES_LABEL)) + Suppress(self.NAMES_LABEL) +
            CharsNotIn('\n') + Suppress(LineEnd()))
        self.rates = OneOrMore(
            Group(
                Suppress(SkipTo(self.RATES_LABEL)) +
                Suppress(self.RATES_LABEL) + OneOrMore(FLOAT)))
        self.model = Suppress(SkipTo(self.MODEL_LABEL)) + Suppress(
            self.MODEL_LABEL) + WORD

        MODEL_LABEL = Literal('Substitution Matrix:')
        SCORE_LABEL = Literal('Final GAMMA  likelihood:')
        DESC_LABEL = Literal('Model Parameters of Partition')
        NAME_LEADIN = Literal(', Name:')
        DATATYPE_LEADIN = Literal(', Type of Data:')
        ALPHA_LEADIN = Literal('alpha:')
        TREELENGTH_LEADIN = Literal('Tree-Length:')
        RATES_LABEL = Regex(r'rate \w <-> \w:')
        FREQS_LABEL = Regex(r'freq pi\(\w\):')

        model = Suppress(SkipTo(MODEL_LABEL)) + Suppress(MODEL_LABEL) + WORD
        likelihood = Suppress(
            SkipTo(SCORE_LABEL)) + Suppress(SCORE_LABEL) + FLOAT
        description = Suppress(
            SkipTo(DESC_LABEL)) + Suppress(DESC_LABEL) + INT + Suppress(
                NAME_LEADIN) + SPACEDWORD + Suppress(DATATYPE_LEADIN) + WORD
        alpha = Suppress(ALPHA_LEADIN) + FLOAT
        rates = Suppress(RATES_LABEL) + FLOAT
        freqs = Suppress(FREQS_LABEL) + FLOAT

        self._dash_f_e_parser = (Group(OneOrMore(model)) + likelihood + Group(
            OneOrMore(
                Group(description + alpha + Suppress(TREELENGTH_LEADIN) +
                      Suppress(FLOAT) + Group(OneOrMore(rates)) +
                      Group(OneOrMore(freqs))))))
Example #23
0
def grammar():
    parenthesis = Forward()
    parenthesis <<= "(" + ZeroOrMore(CharsNotIn("()") | parenthesis) + ")"

    field_def = OneOrMore(Word(alphanums + "_\"'`:-") | parenthesis)
    field_def.setParseAction(field_act)

    tablename_def = (Word(alphas + "`_") | QuotedString("\""))

    field_list_def = field_def + ZeroOrMore(Suppress(",") + field_def)
    field_list_def.setParseAction(field_list_act)

    create_table_def = Literal(
        "CREATE") + "TABLE" + tablename_def.setResultsName(
            "tableName") + "(" + field_list_def.setResultsName(
                "fields") + ")" + ";"
    create_table_def.setParseAction(create_table_act)

    add_fkey_def = Literal(
        "ALTER") + "TABLE" + "ONLY" + tablename_def.setResultsName(
            "tableName") + "ADD" + "CONSTRAINT" + Word(
                alphanums + "_") + "FOREIGN" + "KEY" + "(" + Word(
                    alphanums +
                    "_").setResultsName("keyName") + ")" + "REFERENCES" + Word(
                        alphanums +
                        "_").setResultsName("fkTable") + "(" + Word(
                            alphanums +
                            "_").setResultsName("fkCol") + ")" + Optional(
                                Literal("DEFERRABLE")) + ";"
    add_fkey_def.setParseAction(add_fkey_act)

    other_statement_def = OneOrMore(CharsNotIn(";")) + ";"
    other_statement_def.setParseAction(other_statement_act)

    comment_def = "--" + ZeroOrMore(CharsNotIn("\n"))
    comment_def.setParseAction(other_statement_act)

    return OneOrMore(comment_def | create_table_def | add_fkey_def
                     | other_statement_def)
Example #24
0
class Include:
    """
    Rule to parse #include directives.
    """
    rule = (INCLUDE + LOPBRACK + CharsNotIn('>')("header") +
            ROPBRACK).setParseAction(lambda t: Include(t.header))

    def __init__(self, header: CharsNotIn, parent: str = ''):
        self.header = header
        self.parent = parent

    def __repr__(self) -> str:
        return "#include <{}>".format(self.header)
Example #25
0
def getSignatures(signatureFile):
    fp = open(signatureFile, "rb")
    content = fp.read()
    fp.close()

    litteral = Word(alphas + nums + "_")
    regex_pattern = CharsNotIn("(")
    tags = (OneOrMore(Group('#' + litteral))).setResultsName("tags")
    hierarchy_modifier = oneOf("<= =")
    java_type = Group(
        Optional(hierarchy_modifier) +
        Word(alphas + nums + "_" + "." + "[" + "]" + "$"))
    return_type = (java_type | "*").setResultsName("return_type")
    method_name = (litteral | "<init>"
                   | regex_pattern).setResultsName("method_name")
    parameter = Group((java_type + litteral))
    parameter_list = (delimitedList(parameter)
                      | "*").setResultsName("parameters")
    body_instruction = (CharsNotIn("{;}"))
    signature_body = (delimitedList(body_instruction,
                                    ";")).setResultsName("signature_body")
    class_name = (java_type
                  | Group(Optional(hierarchy_modifier) +
                          CharsNotIn(":"))).setResultsName("class_name")

    signature_stmt =    Group((Optional(tags) + return_type + class_name+":"+method_name+ \
                        "(" + Optional(parameter_list) + ")"+"{"+Optional(signature_body)+"}"))
    grammar = OneOrMore(signature_stmt)

    grammar.ignore(dblSlashComment)
    result = grammar.parseString(content)

    #IPython.embed()
    #debug_db_data(content,result)

    signatures = [Signature(sig) for sig in result]

    return signatures
Example #26
0
    def __init__(self, debug=False):
        aggregate = Forward().setResultsName("OFC")
        aggregate_open_tag, aggregate_close_tag = self._tag()
        content_open_tag = self._tag(closed=False)
        content = Group(content_open_tag + CharsNotIn("<\r\n"))
        aggregate << Group(aggregate_open_tag \
            + Dict(OneOrMore(aggregate | content)) \
            + aggregate_close_tag)

        self.parser = Group(aggregate).setResultsName("document")
        if (debug):
            self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction,
                                        ofxtools._ofxtoolsSuccessDebugAction,
                                        ofxtools._ofxtoolsExceptionDebugAction)
Example #27
0
    def __init__(self, debug=False):
        # Parser definition for headers
        header = Group(
            Word(alphas) + Literal(":").suppress() +
            Optional(CharsNotIn("\r\n")))
        headers = Dict(OneOrMore(header)).setResultsName("header")

        # Parser definition for OFX body
        aggregate = Forward().setResultsName("OFX")
        aggregate_open_tag, aggregate_close_tag = self._tag()
        content_open_tag = self._tag(closed=False)
        content = Group(content_open_tag + CharsNotIn("<\r\n"))
        aggregate << Group(aggregate_open_tag \
            + Dict(ZeroOrMore(aggregate | content)) \
            + aggregate_close_tag)
        body = Group(aggregate).setResultsName("body")

        # The parser as a whole
        self.parser = headers + body
        if (debug):
            self.parser.setDebugActions(_ofxStartDebugAction,
                                        _ofxSuccessDebugAction,
                                        _ofxExceptionDebugAction)
Example #28
0
 def Syntax():
     delimitedList = (lambda x: x + ZeroOrMore(Suppress(",") + x) +
                      Optional(Suppress(",")))
     dbl_quoted = Suppress('"') + Optional(CharsNotIn('"')) + Suppress('"')
     lelem = Word(alphanums + "-_")
     relem = Literal("true") | Literal("false") | Word(nums +
                                                       ".") | dbl_quoted
     dict_ = Forward()
     assignment = lelem + Suppress("=") + (relem | (dict_))
     dict_ << Suppress("{") + Group(
         Dict(delimitedList(Group(assignment)))
         | (dbl_quoted + Suppress(",") + Word(nums + "."))
         | delimitedList(dict_)) + Suppress("}")
     return Dict(delimitedList(Group(assignment)))
Example #29
0
def parse_final():
	'''
	Takes the parsed data( list of lists), and performs substitutions according to the mapping specified
	as the start of the file. Applies first mapping for first half of the document, and second mapping 
	for the second half.
	The substitutions are performed by trying to match every field with the grammar below, and making
	substitutions whenever the grammar's rule s satisfied.
	'''

	first_mapping = []
	second_mapping = []
	parsed_data = []

	if not parse_data(first_mapping, second_mapping,parsed_data):
		return False

	SUBS = CharsNotIn('(') + "(" + Word(alphanums) + ")" + restOfLine
	for row in parsed_data:
		for i in range(len(row)):
			# First half of data ; first mapping will apply:
			if len(row)>6:
				try:
					parsed = SUBS.parseString(row[i])
					parsed[0] = first_mapping[parsed[0]]
					row[i] = ''.join(parsed)
					# print parsed
				except:
					pass
			# Second half of data ; second mapping will apply:
			else:	
				try:
					parsed = SUBS.parseString(row[i])
					parsed[0] = second_mapping[parsed[0]]
					row[i] = ''.join(parsed)
				except:
					pass
	return parsed_data
Example #30
0
    def makeNewickParser():
        # pyparsing
        from pyparsing import Combine, Optional, Literal, CaselessLiteral, \
                           Word, alphanums, \
                           nums, oneOf, Group, Dict, Forward, \
                           ParseResults, CharsNotIn, ZeroOrMore


        # literals
        lparen    = Literal("(").suppress()
        rparen    = Literal(")").suppress()
        colon     = Literal(":").suppress()
        semicolon = Literal(":").suppress()
        comma     = Literal(",").suppress()
        point     = Literal(".")
        e         = CaselessLiteral("E")


        # terminal rules
        name    = Word(alphanums + "_" + "-" + "." + "+")
        fnumber = Combine(Word("+-"+nums, nums) + 
                          Optional(point + Optional(Word(nums))) +
                          Optional(e + Word("+-"+nums, nums)))
        dist      = fnumber
        bootstrap = fnumber


        # recursive rules
        subtree = Forward()
        subtreelist = Forward()

        subtree << \
            Group(
                (
                    (lparen + subtreelist + rparen).setResultsName("subtree") |
                    name.setResultsName("name")
                ) +
                Optional(
                    CharsNotIn(",);").setResultsName("data")
                )
            )
        subtreelist << subtree + Optional(comma + subtreelist)


        # top level rule
        tree = subtree + Word(";").suppress()


        return tree.parseString
def parse_variadic_templates(txt):
    template_param_type = Word(alphas)
    template_variadic = Literal('...')
    template_id = Word(alphas)

    template_variadic_param = Group(template_param_type + template_variadic +
                                    template_id)

    template_param = Group(template_param_type + template_id)

    # template_params = Group ( delimitedList( template_variadic_param | Optional(template_param) ) )
    template_params = (Optional(OneOrMore(template_param + ',')) +
                       template_variadic_param +
                       Optional(OneOrMore(',' + template_param)))

    template_params_no_variadic = (template_param +
                                   Optional(OneOrMore(',' + template_param)))

    template_decl = Optional("template" + Literal("<") +
                             template_params_no_variadic +
                             Literal(">")) + "template" + Literal(
                                 "<") + template_params + Literal(">")

    block_content = Forward()
    block = nestedExpr('{', '}', content=block_content) + Literal(';') * (0, 1)
    block_content << (CharsNotIn('{}') | block)

    decl = originalTextFor(template_decl + CharsNotIn('{') + block)

    template_file = Forward()
    code_block = decl | White() | Word(printables)
    template_file << (Optional(OneOrMore(code_block)) | template_file)

    parsed = template_file.parseString(txt)

    return parsed
Example #32
0
def process_task_lists(content: str) -> str:
    item = Group(CharsNotIn('\n') + (StringEnd() | '\n')).leaveWhitespace()
    checkbox = oneOf(['[ ]', '[x]'])
    marker = Suppress(oneOf(['+', '-', '*']) | Word(nums) + '.')
    #
    indent = oneOf(['    ', '\t']).leaveWhitespace()
    indents = Group(ZeroOrMore(indent))
    #
    list_item = Group(indents + marker + checkbox + item)
    #
    before = Suppress(StringStart() | Literal('\n\n')).leaveWhitespace()
    list_ = before + OneOrMore(list_item)
    #
    list_.setParseAction(replace_list)
    return list_.transformString(content)
Example #33
0
    def pattern():
        """pyparsing pattern of the command
        """
        def attachLocation(s, loc, tocs):
            return [(loc, tocs[0])]

        from pyparsing import CharsNotIn, Literal, Optional, White  # delayed import, performance optimization

        path = CharsNotIn(" \t")("path")
        path.setParseAction(attachLocation)

        pat = (Literal('s ') + Optional(White()) + Optional(path))
        pat.leaveWhitespace()
        pat.setParseAction(CommandSaveAs.create)
        return pat
Example #34
0
SUPPORT_MISSING_VALUES = True

# from the RFCs ABNF description
nilvalue = Word("-")
digit = Regex("[0-9]{1}")
nonzero_digit = Regex("[1-9]{1}")
printusascii = printables
sp = White(" ", exact=1)
octet = Regex("[\x00-\xFF]")
utf_8_string = Regex("[\x00-\xFF]*")
BOM = "\xef\xbb\xbf"
bom = Regex(BOM)
msg_utf8 = bom + utf_8_string
msg_any = utf_8_string
msg = Combine(Or([msg_utf8, msg_any])).setResultsName("MSG")
sd_name = CharsNotIn('= ]"', 1, 32)
param_name = sd_name.setResultsName("SD_PARAM_NAME")
param_value = QuotedString(quoteChar='"', escChar="\\", multiline=True)
param_value = param_value.setResultsName("SD_PARAM_VALUE")
sd_id = sd_name.setResultsName("SD_ID")
sd_param = Group(param_name + Regex("=") + param_value)
sd_params = Group(ZeroOrMore(Group(sp + sd_param.setResultsName("SD_PARAM"))))
sd_element = Group("[" + sd_id + sd_params.setResultsName("SD_PARAMS") + "]")
sd_element = sd_element.setResultsName("SD_ELEMENT")
sd_elements = Group(OneOrMore(sd_element))
structured_data = Or([nilvalue, sd_elements.setResultsName("SD_ELEMENTS")])
structured_data = structured_data.setResultsName("STRUCTURED_DATA")
time_hour = Regex("0[0-9]|1[0-9]|2[0-3]")
time_minute = Regex("[0-5][0-9]")
time_second = time_minute
time_secfrac = Regex("\.[0-9]{1,6}")
Example #35
0
SUPPORT_MISSING_VALUES = True

# from the RFCs ABNF description
nilvalue = Word("-")
digit = Regex("[0-9]{1}")
nonzero_digit = Regex("[1-9]{1}")
printusascii = printables
sp = White(" ", exact=1)
octet = Regex('[\x00-\xFF]')
utf_8_string = Regex('[\x00-\xFF]*')
BOM = '\xef\xbb\xbf'
bom = Regex(BOM)
msg_utf8 = bom + utf_8_string
msg_any = utf_8_string
msg = Combine(Or([msg_utf8, msg_any])).setResultsName('MSG')
sd_name = CharsNotIn('= ]"', 1, 32)
param_name = sd_name.setResultsName('SD_PARAM_NAME')
param_value = QuotedString(quoteChar='"', escChar='\\', multiline=True)
param_value = param_value.setResultsName('SD_PARAM_VALUE')
sd_id = sd_name.setResultsName('SD_ID')
sd_param = Group(param_name + Regex('=') + param_value)
sd_params = Group(ZeroOrMore(Group(sp+sd_param.setResultsName('SD_PARAM'))))
sd_element = Group('['+sd_id+sd_params.setResultsName('SD_PARAMS')+']')
sd_element = sd_element.setResultsName('SD_ELEMENT')
sd_elements = Group(OneOrMore(sd_element))
structured_data = Or([nilvalue, sd_elements.setResultsName('SD_ELEMENTS')])
structured_data = structured_data.setResultsName('STRUCTURED_DATA')
time_hour = Regex('0[0-9]|1[0-9]|2[0-3]')
time_minute = Regex('[0-5][0-9]')
time_second = time_minute
time_secfrac = Regex('\.[0-9]{1,6}')