コード例 #1
0
ファイル: parsing.py プロジェクト: iamgroot42/basic-TARDIS
def parse_mapping(data):
	'''
	Takes a list of strings and returns the (character,configuration) mapping in it
	The 'data' passed is actually the metadata at ths tartting of the file, which specifies
	characters which have to be replaced with configurations.
	For example, 'u sp 5d' indicates that u is to be replaced with 'sp 5d'
	The function uses the fact that different mappings are separated by a gap of at least two spaces.
	So, data is parsed to separate out individual mappings. Then, the part after the character is joined
	to create a mapping. 
	'''
	mapping = {}
	# Defined two grammars, as Optional() wasn't working as expected:
	map_parse1 = CharsNotIn(' ') + White() + Word(alphanums) + White(exact=1) + Word(alphanums) + White(min=2) + restOfLine
	map_parse2 = CharsNotIn(' ') + White() + Word(alphanums) + White(min=2) + restOfLine
	for row in data:
		row_copy = row
		while len(row_copy) > 0:
			# If it's a two-level confiugration:
			try:
				temp = map_parse1.parseString(row_copy)
				mapping[temp[0]] = ' '.join([temp[2],temp[4]])
				row_copy = temp[-1]
			except:
				# If it's a single-level configuration:
				try:
					temp = map_parse2.parseString(row_copy)
					mapping[temp[0]] = temp[2]
					row_copy = temp[-1]
				except:
					# Parsing error, so mappings must be complete
					return mapping
コード例 #2
0
def param_substitude(lines, param_dict):
    mask_par = (CharsNotIn('{{}}')[0, ] + '{{' + WRD_p + '}}' +
                CharsNotIn('{{}}')[0, ])[1, ] + Char('\n')[0, 1]
    # mask_par.setParseAction(param_set)
    mask_par.setParseAction(lambda tokens: param_set(tokens, param_dict))

    for line_to, line in enumerate(lines):
        line = line.rstrip()

        if not line:
            continue

        lines[line_to] = mask_par.transformString(line)

    return lines
コード例 #3
0
ファイル: parse.py プロジェクト: ghseeli/trunconomics
    def init_latex_parser(self):
        # these variables will be used to define valid lists of characters
        lowers = 'qwertyuiopasdfghjklzxcvbnm'
        uppers = lowers.upper()
        alphas = lowers + uppers
        digits = '1234567890'
        other_word_symbols = '-_'
        word_chars = alphas + other_word_symbols + digits
        punctuation_symbols = '.!?:;…'
        command_symbol = '\\'
        white_characters = CharsNotIn(word_chars + punctuation_symbols +
                                      command_symbol)

        # define grammar
        word = Word(word_chars)
        command = command_symbol + OneOrMore(
            alphas
        )  # as far as i can tell, only alphas are allowed in a latex command (bar the special commands such as \&).
        punc = Word(punctuation_symbols)
        white_and_word = Group(Suppress(Optional(white_characters)) + word)
        white_and_command = Group(
            Suppress(Optional(white_characters)) + command)
        white_and_punc = Group(Suppress(Optional(white_characters)) + punc)
        sentence = ZeroOrMore(white_and_word) + white_and_punc
        pure_piece = ZeroOrMore(sentence)
        bracketed_piece = '{' + pure_piece + Suppress(
            ZeroOrMore(white_characters)) + '}'

        self.parser = content.parseString
コード例 #4
0
ファイル: base.py プロジェクト: Freeguy1/pegasus-2
    class NLPyParser(NLBaseParser):
        """pyparsing--based implementation of the NLBaseParser
        """
        notSpace = CharsNotIn(" \n")
        eq = Literal('=').suppress()
        value = (QuotedString('"', escChar=chr(92), unquoteResults=False) \
                     ^ OneOrMore(notSpace))
        ts = Group(Literal('ts') + eq + value)
        event = Group(Literal('event') + eq + value)
        name = ~oneOf("ts event") + Word(alphanums + '-_.')
        nv = ZeroOrMore(Group(name + eq + value))
        nvp = Each([ts, event, nv]) + White('\n').suppress() + StringEnd()

        def parseLine(self, line):
            try:
                rlist = self.nvp.parseString(line).asList()
            except ParseException as E:
                raise ValueError(E)
            result = {}
            for a in rlist:
                if self.parse_date and a[0] == 'ts':
                    result[a[0]] = parse_ts(a[1])
                else:
                    result[a[0]] = a[1]
            return result
コード例 #5
0
ファイル: dtdparsing.py プロジェクト: termdew/daps
def PIs():
    """Parses Processing Instructions
   
    PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
    
    >>> tests=(
    ...  '''<?foo?>''',
    ...  '''<?foo ?>''',
    ...  '''<?foo abc def ghi ?>''',
    ...  '''<?xml-stylesheet foo="bla" bar="x" ?>''',
    ...  '''<?xml-stylesheet
    ...    foo="bla"
    ...    foo="x"
    ...  ?>''',
    ... )
    >>> p=PIs()
    >>> for t in tests:
    ...   result=p.parseString(t)
    ...   print result
    ['foo']
    ['foo', ' ']
    ['foo', ' abc def ghi ']
    ['xml-stylesheet', ' foo="bla" bar="x" ']
    ['xml-stylesheet', '\\n   foo="bla"\\n   foo="x"\\n ']
   """
    pi=Suppress('<?') + \
         Word(alphas, alphanums+'-_')('pitarget') + \
         ZeroOrMore(CharsNotIn('?>'))('picontents') + \
         Suppress('?>')
    return pi
コード例 #6
0
ファイル: link_parser.py プロジェクト: mitodl/ocw-studio
    def _parser_piece_text():
        """
        Return PyParsing element to the text of a markdown link.
        """
        # No double line breaks in markdown links
        double_line_break = (Word("\n\r", exact=1) + Optional(Word(" \t")) +
                             Word("\n\r", exact=1))

        # We will ignore escaped square brackets when match finding balanced
        # square brackets.
        ignore = Literal("\\[") | Literal("\\]")

        # The text parser will match text inside balanced brackets using the
        # nestedExpr helper function from PyParsing.
        #
        # Next we define the content that is allowed inside the brackets.
        content_character = ~FollowedBy(double_line_break) + CharsNotIn(
            "[]", exact=1)
        # Normally with nestedExpr, the content parser would be separately applied
        # to each whitespace-separated string within the nested expression.
        # However, since we set whitespaceChars to '', the content parser is
        # applied to characters one-at-a-time.
        #
        # If this ever changes, we would need to change content to something
        # like Combine(OneOrMore(~ignore + content_character))
        content = content_character
        text = originalTextFor(
            nestedExpr(
                opener="[",
                closer="]",
                content=content,
                ignoreExpr=ignore,
            )).setResultsName("text")
        text.addParseAction(lambda s, l, toks: toks[0][1:-1])
        return text
コード例 #7
0
def pyparse_blk(text):
    def create_add_block(tokens):
        return Block.Block(tokens.title,
                           tokens.color if tokens.color else 'white')

    left_bracket, right_bracket, equal_sign = map(Suppress, '[]=')
    color = (Word('#', hexnums, exact=7) | Word(alphanums, alphas))('color')
    empty_block = (
        left_bracket +
        right_bracket)('empty_block').setParseAction(lambda: EmptyBlock)
    new_lines = Word('/')('new_lines').setParseAction(
        lambda tokens: len(tokens.new_lines))
    title = CharsNotIn('[]/\n')('title').setParseAction(
        lambda tokens: tokens.title.strip())
    block_data = Optional(color + Suppress(':')) + Optional(title)
    block_data.addParseAction(create_add_block)
    blocks = Forward()
    block = left_bracket + block_data + blocks + right_bracket
    blocks << Group(
        ZeroOrMore(Optional(new_lines) + OneOrMore(empty_block | block)))

    stack = [Block.create_root_block()]
    try:
        result = blocks.parseString(text, parseAll=True)
        assert len(result) == 1
        blocks_list = result.asList()[0]
        populate_children(blocks_list, stack)
    except (ParseSyntaxException, ParseException) as parse_err:
        raise ValueError('Error {{0}}: {0}'.format(parse_err.lineno))
    return stack[0]
コード例 #8
0
def func_tokens(dictionary, parse_action):
    func_name = Word(alphas + '_', alphanums + '_')

    func_ident = Combine('$' + func_name.copy()('funcname'))
    func_tok = func_ident + originalTextFor(nestedExpr())('args')
    func_tok.leaveWhitespace()
    func_tok.setParseAction(parse_action)
    func_tok.enablePackrat()

    rx_tok = Combine(Literal('$').suppress() + Word(nums)('num'))

    def replace_token(tokens):
        index = int(tokens.num)
        return dictionary.get(index, '')

    rx_tok.setParseAction(replace_token)

    strip = lambda s, l, tok: tok[0].strip()
    text_tok = CharsNotIn(',').setParseAction(strip)
    quote_tok = QuotedString('"')

    if dictionary:
        arglist = Optional(delimitedList(quote_tok | rx_tok | text_tok))
    else:
        arglist = Optional(delimitedList(quote_tok | text_tok))

    return func_tok, arglist, rx_tok
コード例 #9
0
def pyparsing_parse(text):
    """
    >>> import os
    >>> dirname = os.path.join(os.path.dirname(__file__), "data")
    >>> filename = os.path.join(dirname, "error1.blk")
    >>> pyparsing_parse(open(filename, encoding="utf8").read())
    Traceback (most recent call last):
    ...
    ValueError: Error {0}: syntax error, line 8
    >>> filename = os.path.join(dirname, "error2.blk")
    >>> pyparsing_parse(open(filename, encoding="utf8").read())
    Traceback (most recent call last):
    ...
    ValueError: Error {0}: syntax error, line 1
    >>> filename = os.path.join(dirname, "error3.blk")
    >>> pyparsing_parse(open(filename, encoding="utf8").read())
    Traceback (most recent call last):
    ...
    ValueError: Error {0}: syntax error, line 4
    >>> expected = "[white: ]\\n[lightblue: Director]\\n/\\n/\\n[white: ]\\n[lightgreen: Secretary]\\n/\\n/\\n[white: Minion #1]\\n[white: ]\\n[white: Minion #2]"
    >>> filename = os.path.join(dirname, "hierarchy.blk")
    >>> blocks = pyparsing_parse(open(filename, encoding="utf8").read())
    >>> str(blocks).strip() == expected
    True

    >>> expected = "[#00CCDE: MessageBox Window\\n[lightgray: Frame\\n[white: ]\\n[white: Message text]\\n/\\n/\\n[goldenrod: OK Button]\\n[white: ]\\n[#ff0505: Cancel Button]\\n/\\n[white: ]\\n]\\n]"
    >>> filename = os.path.join(dirname, "messagebox.blk")
    >>> blocks = pyparsing_parse(open(filename, encoding="utf8").read())
    >>> str(blocks).strip() == expected
    True
    """
    def add_block(tokens):
        return Block.Block(tokens.name,
                           tokens.color if tokens.color else "white")

    left_bracket, right_bracket = map(Suppress, "[]")
    new_rows = Word("/")("new_rows").setParseAction(
        lambda tokens: len(tokens.new_rows))
    name = CharsNotIn("[]/\n")("name").setParseAction(
        lambda tokens: tokens.name.strip())
    color = (Word("#", hexnums, exact=7) | Word(alphas, alphanums))("color")
    empty_node = (left_bracket +
                  right_bracket).setParseAction(lambda: EmptyBlock)
    nodes = Forward()
    node_data = Optional(color + Suppress(":")) + Optional(name)
    node_data.setParseAction(add_block)
    node = left_bracket - node_data + nodes + right_bracket
    nodes << Group(
        ZeroOrMore(Optional(new_rows) + OneOrMore(node | empty_node)))
    stack = [Block.get_root_block()]
    try:
        results = nodes.parseString(text, parseAll=True)
        assert len(results) == 1
        items = results.asList()[0]
        populate_children(items, stack)
    except (ParseException, ParseSyntaxException) as err:
        raise ValueError("Error {{0}}: syntax error, line "
                         "{0}".format(err.lineno))
    return stack[0]
コード例 #10
0
	def fromString(inputText):
		text = nestedExpr("/*", "*/").suppress().transformString(inputText)

		semicolon = Suppress(Word(";"))
		quote     = Suppress(Word("\""))
		op        = Suppress(Word("{"))
		cl        = Suppress(Word("}"))
		opp       = Suppress(Word("("))
		clp       = Suppress(Word(")"))
		lt        = Suppress(Word("<"))
		gt        = Suppress(Word(">"))
		eq        = Suppress(Word("="))
		identifier        = Word(alphas+"_",alphanums+"_")
		typeIdentifier    = Word(alphas+"_",alphanums+"_:")
		structIdentifer   = Group(typeIdentifier.setResultsName('type') + identifier.setResultsName('identifier') + Optional(eq) + Optional(CharsNotIn(";").setResultsName('defaultValue')) + semicolon)
		structIdentifers  = Group(OneOrMore(structIdentifer))

		## Imports
		idslImport  = Suppress(Word("import")) + quote +  CharsNotIn("\";").setResultsName('path') + quote + semicolon
		idslImports = ZeroOrMore(idslImport)

		structDef     = Word("struct").setResultsName('type') + identifier.setResultsName('name') + op + structIdentifers.setResultsName("structIdentifiers") + cl + semicolon
		dictionaryDef = Word("dictionary").setResultsName('type') + lt + CharsNotIn("<>").setResultsName('content') + gt + identifier.setResultsName('name') + semicolon
		sequenceDef   = Word("sequence").setResultsName('type')   + lt + typeIdentifier.setResultsName('typeSequence') + gt + identifier.setResultsName('name') + semicolon
		enumDef       = Word("enum").setResultsName('type')       + identifier.setResultsName('name') + op + CharsNotIn("{}").setResultsName('content') + cl + semicolon
		exceptionDef  = Word("exception").setResultsName('type')  + identifier.setResultsName('name') + op + CharsNotIn("{}").setResultsName('content') + cl + semicolon

		raiseDef       = Suppress(Word("throws")) + typeIdentifier + ZeroOrMore( Literal(',') + typeIdentifier )
		decoratorDef    = Literal('idempotent') | Literal('out')
		retValDef       = typeIdentifier.setResultsName('ret')

		firstParam    = Group( Optional(decoratorDef.setResultsName('decorator')) + typeIdentifier.setResultsName('type') + identifier.setResultsName('name'))
		nextParam     = Suppress(Word(',')) + firstParam
		params        = firstParam + ZeroOrMore(nextParam)


		remoteMethodDef  = Group(Optional(decoratorDef.setResultsName('decorator')) + retValDef.setResultsName('ret') + typeIdentifier.setResultsName('name') + opp + Optional(          params).setResultsName('params') + clp + Optional(raiseDef.setResultsName('raise')) + semicolon )
		interfaceDef    = Word('interface').setResultsName('type')  + typeIdentifier.setResultsName('name') + op + Group(ZeroOrMore(remoteMethodDef)).setResultsName('methods') + cl + semicolon

		moduleContent = Group(structDef | enumDef | exceptionDef | dictionaryDef | sequenceDef | interfaceDef)
		module = Suppress(Word("module")) + identifier.setResultsName("name") + op + ZeroOrMore(moduleContent).setResultsName("contents") + cl + semicolon

		IDSL = idslImports.setResultsName("imports") + module.setResultsName("module")
		IDSL.ignore( cppStyleComment )
		tree = IDSL.parseString(text)
		return tree
コード例 #11
0
class Include(object):
    rule = (INCLUDE + LOPBRACK + CharsNotIn('>')("header") +
            ROPBRACK).setParseAction(lambda t: Include(t.header))

    def __init__(self, header, parent=''):
        self.header = header
        self.parent = parent

    def __repr__(self):
        return "#include <{}>".format(self.header)
コード例 #12
0
class NginxParser(object):
    """
    A class that parses nginx configuration with pyparsing
    """

    # constants
    left_bracket = Literal("{").suppress()
    right_bracket = Literal("}").suppress()
    semicolon = Literal(";").suppress()
    space = White().suppress()
    key = Word(alphanums + "_/")
    value = CharsNotIn("{};,")
    location = CharsNotIn("{};," + string.whitespace)
    # modifier for location uri [ = | ~ | ~* | ^~ ]
    modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~")

    # rules
    assignment = (key + Optional(space + value) + semicolon)
    block = Forward()

    block << Group(
        Group(key + Optional(space + modifier) + Optional(space + location))
        + left_bracket
        + Group(ZeroOrMore(Group(assignment) | block))
        + right_bracket)

    script = OneOrMore(Group(assignment) | block).ignore(pythonStyleComment)

    def __init__(self, source):
        self.source = source

    def parse(self):
        """
        Returns the parsed tree.
        """
        return self.script.parseString(self.source)

    def as_list(self):
        """
        Returns the list of tree.
        """
        return self.parse().asList()
コード例 #13
0
class RawNginxParser(object):
    # pylint: disable=expression-not-assigned
    """A class that parses nginx configuration with pyparsing."""

    # constants
    space = Optional(White())
    nonspace = Regex(r"\S+")
    left_bracket = Literal("{").suppress()
    right_bracket = space.leaveWhitespace() + Literal("}").suppress()
    semicolon = Literal(";").suppress()
    key = Word(alphanums + "_/+-.")
    dollar_var = Combine(Literal('$') + nonspace)
    condition = Regex(r"\(.+\)")
    # Matches anything that is not a special character AND any chars in single
    # or double quotes
    value = Regex(r"((\".*\")?(\'.*\')?[^\{\};,]?)+")
    location = CharsNotIn("{};," + string.whitespace)
    # modifier for location uri [ = | ~ | ~* | ^~ ]
    modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~")

    # rules
    comment = space + Literal('#') + restOfLine()

    assignment = space + key + Optional(space + value,
                                        default=None) + semicolon
    location_statement = space + Optional(modifier) + Optional(space +
                                                               location +
                                                               space)
    if_statement = space + Literal("if") + space + condition + space
    map_statement = space + Literal(
        "map") + space + nonspace + space + dollar_var + space
    block = Forward()

    block << Group(
        # key could for instance be "server" or "http", or "location" (in which case
        # location_statement needs to have a non-empty location)
        (Group(space + key + location_statement) ^ Group(if_statement)
         ^ Group(map_statement)).leaveWhitespace() + left_bracket +
        Group(ZeroOrMore(Group(comment | assignment) | block) +
              space).leaveWhitespace() + right_bracket)

    script = OneOrMore(Group(comment | assignment) ^ block) + space + stringEnd
    script.parseWithTabs()

    def __init__(self, source):
        self.source = source

    def parse(self):
        """Returns the parsed tree."""
        return self.script.parseString(self.source)

    def as_list(self):
        """Returns the parsed tree as a list."""
        return self.parse().asList()
コード例 #14
0
    def grammar(self, value):
        seps = list({value.list_sep, value.range_sep, value.step_sep})
        quotedstr = pp.quotedString.setParseAction(pp.removeQuotes)

        self.tokens = OrderedDict({
            'regex': Literal('r').suppress() + quotedstr,
            'quoted': quotedstr,
            'colnum': Grammar.integer,
            'sep': Or(seps),
            'name': CharsNotIn(seps),
        })
コード例 #15
0
    def initGrammar(self):
        L_Equals = Word("=")
        N_comment = htmlComment()

        N_name = CharsNotIn("{}|[]")
        N_simpleText = SkipTo(
            oneOf(["{{", "|", "[[", "]]", "}}", "'''", "<ref"]))

        N_elements = Forward()
        N_apostrofs = QuotedString("'''").setParseAction(
            lambda s, l, t: {'APOSTROFS': t})
        N_link = nestedExpr(
            opener="[[",
            closer="]]",
            content=N_name +
            Optional("|" + delimitedList(CharsNotIn("[]"), delim="|"))
        ).setParseAction(self.genLink)
        N_header = Group(L_Equals + SkipTo("=") + L_Equals).setParseAction(
            lambda s, l, t: {'HEADER': t})
        N_template = Forward()
        N_key = CharsNotIn("{}|=")
        # N_value = ZeroOrMore(CharsNotIn("{}|")) + ZeroOrMore(N_template + ZeroOrMore(CharsNotIn("{}|"))).setResultsName('VALUE')
        N_keyValues = "|" + delimitedList(
            Group(Optional(N_key) + Optional("=" + N_elements)), delim="|")
        N_label_content = N_template | ("{{" + OneOrMore("!") +
                                        "}}") | CharsNotIn("{}|")
        N_label = nestedExpr(opener="{", closer="}", content=N_label_content)
        N_template << nestedExpr(
            opener="{{", closer="}}", content=N_name +
            Optional(N_keyValues)).setParseAction(self.genTemplate)

        ref_start, ref_end = makeHTMLTags("ref")
        N_named_ref = ref_start + SkipTo(ref_end) + ref_end
        N_named_ref.setParseAction(lambda s, l, t: {'REF': t})

        N_element = N_comment | N_simpleText | N_named_ref | N_apostrofs | N_link | N_header | N_template | N_label

        # N_ref = nestedExpr( opener="<ref>", closer="</ref>", content=N_elements).setParseAction( lambda s,l,t: {'REF' : t} )
        N_elements << ZeroOrMore(N_element)

        self.N_S = N_elements
コード例 #16
0
    def initGrammar(self):
        N_comment = htmlComment().setParseAction(self.genComment)

        N_name = CharsNotIn("{}|[]")
        N_link = nestedExpr(
            opener="[[",
            closer="]]",
            content=N_name +
            Optional("|" + delimitedList(CharsNotIn("[]"), delim="|"))
        ).setParseAction(self.genLink).setDebug(True)
        L_Equals = Word("=")
        N_header = Group(L_Equals + SkipTo("=") + L_Equals).setParseAction(
            self.genHeader)

        N_element = Forward()

        N_template = Forward().setDebug(True)
        N_key = CharsNotIn("{}|=")
        N_internalText = CharsNotIn("{}|=<[") + SkipTo(
            Literal("{{") | Literal("[[") | Literal("<!--") | Literal("<ref")
            | Literal("|") | Literal("}}"))  #CharsNotIn("{}|[]<")
        N_insideElements = OneOrMore(N_element | N_internalText).setDebug(True)
        N_keyValue = Group(
            Optional(N_key) +
            Optional(Literal("=") + N_insideElements)).setDebug(True)
        N_keyValues = "|" + delimitedList(N_keyValue, delim="|")
        N_keyValues.setDebug(True)
        #N_label_content = N_template | ("{{"+OneOrMore("!")+"}}") | CharsNotIn("{}|")
        #N_label = nestedExpr( opener="{", closer="}", content = N_label_content)
        N_template << nestedExpr(
            opener="{{", closer="}}", content=N_name +
            Optional(N_keyValues)).setParseAction(self.genTemplate)

        #ref_start, ref_end = makeHTMLTags("ref")
        #N_named_ref = ref_start + SkipTo(ref_end) + ref_end
        #N_named_ref.setParseAction( lambda s,l,t: {'REF' : t} )

        N_element = N_comment | N_link | N_header | N_template
        N_element.setDebug(True)

        self.N_S = N_element
コード例 #17
0
    def pattern():
        """pyparsing pattern
        """
        
        def attachLocation(s, loc, tocs):
            """pyparsing callback. Saves path position in the original string
            """
            return [(loc, tocs[0])]

        path = CharsNotIn(" \t")("path")
        path.setParseAction(attachLocation)
        longPath = CharsNotIn(" \t", min=2)("path")
        longPath.setParseAction(attachLocation)
        slashPath = Combine(Literal('/') + Optional(CharsNotIn(" \t")))("path")
        slashPath.setParseAction(attachLocation)

        pat = ((Literal('f ') + Optional(White()) + Optional(path)) ^ longPath ^ slashPath) + \
                    Optional(White() + Word(nums)("line"))
        pat.leaveWhitespace()
        pat.setParseAction(CommandOpen.create)
        return pat
コード例 #18
0
def get_parser():
    from pyparsing import CharsNotIn, ParserElement, Suppress, ZeroOrMore

    ParserElement.enablePackrat()

    word = CharsNotIn(f"{PERIOD}{LBRACK}{RBRACK}")
    idx = Suppress(LBRACK) + word + Suppress(RBRACK)
    attr = Suppress(PERIOD) + word
    parser = word + ZeroOrMore(attr ^ idx)
    parser.setParseAction(PERIOD.join)

    return parser
コード例 #19
0
def build_parser():
    key = Word(alphanums).setResultsName('key')
    value = restOfLine.setParseAction(lambda string, location, tokens: tokens[
        0].strip()).setResultsName('value')
    property_ = Group(key + Suppress(Literal('=')) + value)
    properties = Group(OneOrMore(property_)).setResultsName('properties')
    section_name = (Suppress('[') + OneOrMore(CharsNotIn(']')) +
                    Suppress(']')).setResultsName('section')
    section = Group(section_name + properties)
    ini_file = ZeroOrMore(section).setResultsName('sections')
    ini_file.ignore(pythonStyleComment)
    return ini_file
コード例 #20
0
 def _getControls(self, index=1):
     identifier = QuotedString('"') | CharsNotIn(',')
     arglist = delimitedList(identifier)
     docstr = self.doc[1:]
     if index:
         return [(arglist.parseString(line)[index]).strip()
                 for line in docstr]
     else:
         ret = []
         for line in docstr:
             ret.append([z.strip() for z in arglist.parseString(line)])
         return ret
コード例 #21
0
    def query_from_string(cls, filter_string):
        """
        TODO:
        * handle values with " via: a.b.c.d="hello\"world"
        * handle keys with " via: a.\"b.c="yeah"
        * handle key with __ in it
        """
        filter_string_raw = filter_string
        filter_string = str(filter_string)

        unicode_spaces = list(set(
            str(c) for c in filter_string if c.isspace()))
        unicode_spaces_other = unicode_spaces + [u'(', u')', u'=', u'"']
        atom = CharsNotIn(unicode_spaces_other)
        atom_inside_quotes = CharsNotIn(u'"')
        atom_quoted = Literal('"') + Optional(atom_inside_quotes) + Literal(
            '"')
        EQUAL = Literal('=')

        grammar = (atom_quoted | atom) + EQUAL + Optional((atom_quoted | atom))
        grammar.setParseAction(cls.BoolOperand)

        boolExpr = infixNotation(
            grammar,
            [
                ("and", 2, opAssoc.LEFT, cls.BoolAnd),
                ("or", 2, opAssoc.LEFT, cls.BoolOr),
            ],
        )

        try:
            res = boolExpr.parseString('(' + filter_string + ')')
        except ParseException:
            raise RuntimeError(u"Invalid query %s" % filter_string_raw)

        if len(res) > 0:
            return res[0].result

        raise RuntimeError("Parsing the filter_string %s went terribly wrong" %
                           filter_string)
コード例 #22
0
ファイル: parsers.py プロジェクト: dvdylus/treeCl
    def __init__(self):
        self.ALPHA_LABEL = Regex(r'alpha\[\d+\]:')
        self.LNL_LABEL = Literal('Final GAMMA-based Score of best tree')
        self.FRQ_LABEL = Regex(r'Base frequencies: (?=\d+)') ^ Regex(
            r'ML estimate base freqs\[\d+\]:')
        self.NAMES_LABEL = Regex(r'Partition: \d+ with name:\s+')
        self.RATES_LABEL = Regex(r'rates\[\d+\].+?:')
        self.MODEL_LABEL = Literal('Substitution Matrix:')
        self.alpha = OneOrMore(
            Suppress(SkipTo(self.ALPHA_LABEL)) + Suppress(self.ALPHA_LABEL) +
            FLOAT)
        self.lnl = Suppress(SkipTo(self.LNL_LABEL)) + Suppress(
            self.LNL_LABEL) + FLOAT
        self.frq = OneOrMore(
            Group(
                Suppress(SkipTo(self.FRQ_LABEL)) + Suppress(self.FRQ_LABEL) +
                OneOrMore(FLOAT)))
        self.names = OneOrMore(
            Suppress(SkipTo(self.NAMES_LABEL)) + Suppress(self.NAMES_LABEL) +
            CharsNotIn('\n') + Suppress(LineEnd()))
        self.rates = OneOrMore(
            Group(
                Suppress(SkipTo(self.RATES_LABEL)) +
                Suppress(self.RATES_LABEL) + OneOrMore(FLOAT)))
        self.model = Suppress(SkipTo(self.MODEL_LABEL)) + Suppress(
            self.MODEL_LABEL) + WORD

        MODEL_LABEL = Literal('Substitution Matrix:')
        SCORE_LABEL = Literal('Final GAMMA  likelihood:')
        DESC_LABEL = Literal('Model Parameters of Partition')
        NAME_LEADIN = Literal(', Name:')
        DATATYPE_LEADIN = Literal(', Type of Data:')
        ALPHA_LEADIN = Literal('alpha:')
        TREELENGTH_LEADIN = Literal('Tree-Length:')
        RATES_LABEL = Regex(r'rate \w <-> \w:')
        FREQS_LABEL = Regex(r'freq pi\(\w\):')

        model = Suppress(SkipTo(MODEL_LABEL)) + Suppress(MODEL_LABEL) + WORD
        likelihood = Suppress(
            SkipTo(SCORE_LABEL)) + Suppress(SCORE_LABEL) + FLOAT
        description = Suppress(
            SkipTo(DESC_LABEL)) + Suppress(DESC_LABEL) + INT + Suppress(
                NAME_LEADIN) + SPACEDWORD + Suppress(DATATYPE_LEADIN) + WORD
        alpha = Suppress(ALPHA_LEADIN) + FLOAT
        rates = Suppress(RATES_LABEL) + FLOAT
        freqs = Suppress(FREQS_LABEL) + FLOAT

        self._dash_f_e_parser = (Group(OneOrMore(model)) + likelihood + Group(
            OneOrMore(
                Group(description + alpha + Suppress(TREELENGTH_LEADIN) +
                      Suppress(FLOAT) + Group(OneOrMore(rates)) +
                      Group(OneOrMore(freqs))))))
コード例 #23
0
def grammar():
    parenthesis = Forward()
    parenthesis <<= "(" + ZeroOrMore(CharsNotIn("()") | parenthesis) + ")"

    field_def = OneOrMore(Word(alphanums + "_\"'`:-") | parenthesis)
    field_def.setParseAction(field_act)

    tablename_def = (Word(alphas + "`_") | QuotedString("\""))

    field_list_def = field_def + ZeroOrMore(Suppress(",") + field_def)
    field_list_def.setParseAction(field_list_act)

    create_table_def = Literal(
        "CREATE") + "TABLE" + tablename_def.setResultsName(
            "tableName") + "(" + field_list_def.setResultsName(
                "fields") + ")" + ";"
    create_table_def.setParseAction(create_table_act)

    add_fkey_def = Literal(
        "ALTER") + "TABLE" + "ONLY" + tablename_def.setResultsName(
            "tableName") + "ADD" + "CONSTRAINT" + Word(
                alphanums + "_") + "FOREIGN" + "KEY" + "(" + Word(
                    alphanums +
                    "_").setResultsName("keyName") + ")" + "REFERENCES" + Word(
                        alphanums +
                        "_").setResultsName("fkTable") + "(" + Word(
                            alphanums +
                            "_").setResultsName("fkCol") + ")" + Optional(
                                Literal("DEFERRABLE")) + ";"
    add_fkey_def.setParseAction(add_fkey_act)

    other_statement_def = OneOrMore(CharsNotIn(";")) + ";"
    other_statement_def.setParseAction(other_statement_act)

    comment_def = "--" + ZeroOrMore(CharsNotIn("\n"))
    comment_def.setParseAction(other_statement_act)

    return OneOrMore(comment_def | create_table_def | add_fkey_def
                     | other_statement_def)
コード例 #24
0
ファイル: declaration.py プロジェクト: borglab/gtsam
class Include:
    """
    Rule to parse #include directives.
    """
    rule = (INCLUDE + LOPBRACK + CharsNotIn('>')("header") +
            ROPBRACK).setParseAction(lambda t: Include(t.header))

    def __init__(self, header: CharsNotIn, parent: str = ''):
        self.header = header
        self.parent = parent

    def __repr__(self) -> str:
        return "#include <{}>".format(self.header)
コード例 #25
0
ファイル: SignatureParser.py プロジェクト: ucsb-seclab/LoopMC
def getSignatures(signatureFile):
    fp = open(signatureFile, "rb")
    content = fp.read()
    fp.close()

    litteral = Word(alphas + nums + "_")
    regex_pattern = CharsNotIn("(")
    tags = (OneOrMore(Group('#' + litteral))).setResultsName("tags")
    hierarchy_modifier = oneOf("<= =")
    java_type = Group(
        Optional(hierarchy_modifier) +
        Word(alphas + nums + "_" + "." + "[" + "]" + "$"))
    return_type = (java_type | "*").setResultsName("return_type")
    method_name = (litteral | "<init>"
                   | regex_pattern).setResultsName("method_name")
    parameter = Group((java_type + litteral))
    parameter_list = (delimitedList(parameter)
                      | "*").setResultsName("parameters")
    body_instruction = (CharsNotIn("{;}"))
    signature_body = (delimitedList(body_instruction,
                                    ";")).setResultsName("signature_body")
    class_name = (java_type
                  | Group(Optional(hierarchy_modifier) +
                          CharsNotIn(":"))).setResultsName("class_name")

    signature_stmt =    Group((Optional(tags) + return_type + class_name+":"+method_name+ \
                        "(" + Optional(parameter_list) + ")"+"{"+Optional(signature_body)+"}"))
    grammar = OneOrMore(signature_stmt)

    grammar.ignore(dblSlashComment)
    result = grammar.parseString(content)

    #IPython.embed()
    #debug_db_data(content,result)

    signatures = [Signature(sig) for sig in result]

    return signatures
コード例 #26
0
ファイル: ofc_parser.py プロジェクト: m8nky/fixofx
    def __init__(self, debug=False):
        aggregate = Forward().setResultsName("OFC")
        aggregate_open_tag, aggregate_close_tag = self._tag()
        content_open_tag = self._tag(closed=False)
        content = Group(content_open_tag + CharsNotIn("<\r\n"))
        aggregate << Group(aggregate_open_tag \
            + Dict(OneOrMore(aggregate | content)) \
            + aggregate_close_tag)

        self.parser = Group(aggregate).setResultsName("document")
        if (debug):
            self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction,
                                        ofxtools._ofxtoolsSuccessDebugAction,
                                        ofxtools._ofxtoolsExceptionDebugAction)
コード例 #27
0
ファイル: parser.py プロジェクト: captainmarkos/howling_hound
    def __init__(self, debug=False):
        # Parser definition for headers
        header = Group(
            Word(alphas) + Literal(":").suppress() +
            Optional(CharsNotIn("\r\n")))
        headers = Dict(OneOrMore(header)).setResultsName("header")

        # Parser definition for OFX body
        aggregate = Forward().setResultsName("OFX")
        aggregate_open_tag, aggregate_close_tag = self._tag()
        content_open_tag = self._tag(closed=False)
        content = Group(content_open_tag + CharsNotIn("<\r\n"))
        aggregate << Group(aggregate_open_tag \
            + Dict(ZeroOrMore(aggregate | content)) \
            + aggregate_close_tag)
        body = Group(aggregate).setResultsName("body")

        # The parser as a whole
        self.parser = headers + body
        if (debug):
            self.parser.setDebugActions(_ofxStartDebugAction,
                                        _ofxSuccessDebugAction,
                                        _ofxExceptionDebugAction)
コード例 #28
0
 def Syntax():
     delimitedList = (lambda x: x + ZeroOrMore(Suppress(",") + x) +
                      Optional(Suppress(",")))
     dbl_quoted = Suppress('"') + Optional(CharsNotIn('"')) + Suppress('"')
     lelem = Word(alphanums + "-_")
     relem = Literal("true") | Literal("false") | Word(nums +
                                                       ".") | dbl_quoted
     dict_ = Forward()
     assignment = lelem + Suppress("=") + (relem | (dict_))
     dict_ << Suppress("{") + Group(
         Dict(delimitedList(Group(assignment)))
         | (dbl_quoted + Suppress(",") + Word(nums + "."))
         | delimitedList(dict_)) + Suppress("}")
     return Dict(delimitedList(Group(assignment)))
コード例 #29
0
ファイル: parsing.py プロジェクト: iamgroot42/basic-TARDIS
def parse_final():
	'''
	Takes the parsed data( list of lists), and performs substitutions according to the mapping specified
	as the start of the file. Applies first mapping for first half of the document, and second mapping 
	for the second half.
	The substitutions are performed by trying to match every field with the grammar below, and making
	substitutions whenever the grammar's rule s satisfied.
	'''

	first_mapping = []
	second_mapping = []
	parsed_data = []

	if not parse_data(first_mapping, second_mapping,parsed_data):
		return False

	SUBS = CharsNotIn('(') + "(" + Word(alphanums) + ")" + restOfLine
	for row in parsed_data:
		for i in range(len(row)):
			# First half of data ; first mapping will apply:
			if len(row)>6:
				try:
					parsed = SUBS.parseString(row[i])
					parsed[0] = first_mapping[parsed[0]]
					row[i] = ''.join(parsed)
					# print parsed
				except:
					pass
			# Second half of data ; second mapping will apply:
			else:	
				try:
					parsed = SUBS.parseString(row[i])
					parsed[0] = second_mapping[parsed[0]]
					row[i] = ''.join(parsed)
				except:
					pass
	return parsed_data
コード例 #30
0
    def makeNewickParser():
        # pyparsing
        from pyparsing import Combine, Optional, Literal, CaselessLiteral, \
                           Word, alphanums, \
                           nums, oneOf, Group, Dict, Forward, \
                           ParseResults, CharsNotIn, ZeroOrMore


        # literals
        lparen    = Literal("(").suppress()
        rparen    = Literal(")").suppress()
        colon     = Literal(":").suppress()
        semicolon = Literal(":").suppress()
        comma     = Literal(",").suppress()
        point     = Literal(".")
        e         = CaselessLiteral("E")


        # terminal rules
        name    = Word(alphanums + "_" + "-" + "." + "+")
        fnumber = Combine(Word("+-"+nums, nums) + 
                          Optional(point + Optional(Word(nums))) +
                          Optional(e + Word("+-"+nums, nums)))
        dist      = fnumber
        bootstrap = fnumber


        # recursive rules
        subtree = Forward()
        subtreelist = Forward()

        subtree << \
            Group(
                (
                    (lparen + subtreelist + rparen).setResultsName("subtree") |
                    name.setResultsName("name")
                ) +
                Optional(
                    CharsNotIn(",);").setResultsName("data")
                )
            )
        subtreelist << subtree + Optional(comma + subtreelist)


        # top level rule
        tree = subtree + Word(";").suppress()


        return tree.parseString
コード例 #31
0
def parse_variadic_templates(txt):
    template_param_type = Word(alphas)
    template_variadic = Literal('...')
    template_id = Word(alphas)

    template_variadic_param = Group(template_param_type + template_variadic +
                                    template_id)

    template_param = Group(template_param_type + template_id)

    # template_params = Group ( delimitedList( template_variadic_param | Optional(template_param) ) )
    template_params = (Optional(OneOrMore(template_param + ',')) +
                       template_variadic_param +
                       Optional(OneOrMore(',' + template_param)))

    template_params_no_variadic = (template_param +
                                   Optional(OneOrMore(',' + template_param)))

    template_decl = Optional("template" + Literal("<") +
                             template_params_no_variadic +
                             Literal(">")) + "template" + Literal(
                                 "<") + template_params + Literal(">")

    block_content = Forward()
    block = nestedExpr('{', '}', content=block_content) + Literal(';') * (0, 1)
    block_content << (CharsNotIn('{}') | block)

    decl = originalTextFor(template_decl + CharsNotIn('{') + block)

    template_file = Forward()
    code_block = decl | White() | Word(printables)
    template_file << (Optional(OneOrMore(code_block)) | template_file)

    parsed = template_file.parseString(txt)

    return parsed
コード例 #32
0
def process_task_lists(content: str) -> str:
    item = Group(CharsNotIn('\n') + (StringEnd() | '\n')).leaveWhitespace()
    checkbox = oneOf(['[ ]', '[x]'])
    marker = Suppress(oneOf(['+', '-', '*']) | Word(nums) + '.')
    #
    indent = oneOf(['    ', '\t']).leaveWhitespace()
    indents = Group(ZeroOrMore(indent))
    #
    list_item = Group(indents + marker + checkbox + item)
    #
    before = Suppress(StringStart() | Literal('\n\n')).leaveWhitespace()
    list_ = before + OneOrMore(list_item)
    #
    list_.setParseAction(replace_list)
    return list_.transformString(content)
コード例 #33
0
    def pattern():
        """pyparsing pattern of the command
        """
        def attachLocation(s, loc, tocs):
            return [(loc, tocs[0])]

        from pyparsing import CharsNotIn, Literal, Optional, White  # delayed import, performance optimization

        path = CharsNotIn(" \t")("path")
        path.setParseAction(attachLocation)

        pat = (Literal('s ') + Optional(White()) + Optional(path))
        pat.leaveWhitespace()
        pat.setParseAction(CommandSaveAs.create)
        return pat
コード例 #34
0
ファイル: rfc5424.py プロジェクト: AlekSi/loggerglue
SUPPORT_MISSING_VALUES = True

# from the RFCs ABNF description
nilvalue = Word("-")
digit = Regex("[0-9]{1}")
nonzero_digit = Regex("[1-9]{1}")
printusascii = printables
sp = White(" ", exact=1)
octet = Regex("[\x00-\xFF]")
utf_8_string = Regex("[\x00-\xFF]*")
BOM = "\xef\xbb\xbf"
bom = Regex(BOM)
msg_utf8 = bom + utf_8_string
msg_any = utf_8_string
msg = Combine(Or([msg_utf8, msg_any])).setResultsName("MSG")
sd_name = CharsNotIn('= ]"', 1, 32)
param_name = sd_name.setResultsName("SD_PARAM_NAME")
param_value = QuotedString(quoteChar='"', escChar="\\", multiline=True)
param_value = param_value.setResultsName("SD_PARAM_VALUE")
sd_id = sd_name.setResultsName("SD_ID")
sd_param = Group(param_name + Regex("=") + param_value)
sd_params = Group(ZeroOrMore(Group(sp + sd_param.setResultsName("SD_PARAM"))))
sd_element = Group("[" + sd_id + sd_params.setResultsName("SD_PARAMS") + "]")
sd_element = sd_element.setResultsName("SD_ELEMENT")
sd_elements = Group(OneOrMore(sd_element))
structured_data = Or([nilvalue, sd_elements.setResultsName("SD_ELEMENTS")])
structured_data = structured_data.setResultsName("STRUCTURED_DATA")
time_hour = Regex("0[0-9]|1[0-9]|2[0-3]")
time_minute = Regex("[0-5][0-9]")
time_second = time_minute
time_secfrac = Regex("\.[0-9]{1,6}")
コード例 #35
0
ファイル: rfc5424.py プロジェクト: zenweasel/loggerglue
SUPPORT_MISSING_VALUES = True

# from the RFCs ABNF description
nilvalue = Word("-")
digit = Regex("[0-9]{1}")
nonzero_digit = Regex("[1-9]{1}")
printusascii = printables
sp = White(" ", exact=1)
octet = Regex('[\x00-\xFF]')
utf_8_string = Regex('[\x00-\xFF]*')
BOM = '\xef\xbb\xbf'
bom = Regex(BOM)
msg_utf8 = bom + utf_8_string
msg_any = utf_8_string
msg = Combine(Or([msg_utf8, msg_any])).setResultsName('MSG')
sd_name = CharsNotIn('= ]"', 1, 32)
param_name = sd_name.setResultsName('SD_PARAM_NAME')
param_value = QuotedString(quoteChar='"', escChar='\\', multiline=True)
param_value = param_value.setResultsName('SD_PARAM_VALUE')
sd_id = sd_name.setResultsName('SD_ID')
sd_param = Group(param_name + Regex('=') + param_value)
sd_params = Group(ZeroOrMore(Group(sp+sd_param.setResultsName('SD_PARAM'))))
sd_element = Group('['+sd_id+sd_params.setResultsName('SD_PARAMS')+']')
sd_element = sd_element.setResultsName('SD_ELEMENT')
sd_elements = Group(OneOrMore(sd_element))
structured_data = Or([nilvalue, sd_elements.setResultsName('SD_ELEMENTS')])
structured_data = structured_data.setResultsName('STRUCTURED_DATA')
time_hour = Regex('0[0-9]|1[0-9]|2[0-3]')
time_minute = Regex('[0-5][0-9]')
time_second = time_minute
time_secfrac = Regex('\.[0-9]{1,6}')