예제 #1
0
    def parse_block(self, block_text):
        """Parses sql block into tokens
        """

        # Valid grammar looks like this:
        #   {sqlbarchart: title='Some string' | other params as yet unknown...}

        # make a grammar
        block_start = Literal("{")
        sql_start = Keyword(self.TAGNAME, caseless=True)
        colon = Literal(":")
        sql_end = Literal("}")
        separator = Literal("|")
        block_end = Keyword("{" + self.TAGNAME + "}", caseless=True)
        
        # params
        field_name = Word(alphanums)
        equal_sign = Suppress(Literal("="))

        # whatever value
        field_value = (CharsNotIn("|}"))

        # param name and value
        param_group = Group(field_name + equal_sign + field_value)

        # list of all params
        param_list = delimitedList(param_group, '|')

        # helper
        param_dict = Dict(param_list)

        # sql text
        sql_text = SkipTo(block_end)
        
        sqldecl = Forward()

        sqldecl << (block_start +
                    sql_start + 
                    Optional(colon) +
                    Optional(param_dict) +
                    sql_end +
                    sql_text.setResultsName('sqltext') +
                    block_end)

        block_str = "".join(block_text) 
        tokens = sqldecl.parseString( block_str )
        return tokens
예제 #2
0
    def parse_block(self, block_text):
        # make a grammar
        block_start = Literal("{")
        sql_start = Keyword("sqltable", caseless=True)
        colon = Literal(":")
        sql_end = Literal("}")
        separator = Literal("|")
        block_end = Keyword("{sqltable}", caseless=True)

        # params
        field_name = Word(alphanums)
        equal_sign = Suppress(Literal("="))

        # whatever value
        field_value = (CharsNotIn("|}"))

        # param name and value
        param_group = Group(field_name + equal_sign + field_value)

        # list of all params
        param_list = delimitedList(param_group, '|')

        # helper
        param_dict = Dict(param_list)

        # sql text
        sql_text = SkipTo(block_end)

        sqldecl = Forward()

        sqldecl << (block_start +
                    sql_start + 
                    Optional(colon) +
                    Optional(param_dict) +
                    sql_end +
                    sql_text.setResultsName('sqltext') +
                    block_end)

        block_str = "".join(block_text) 
        tokens = sqldecl.parseString( block_str )
        return tokens
예제 #3
0
class CreateParser(object):
    """
    This class can take a plain "CREATE TABLE" SQL as input and parse it into
    a Table object, so that we have more insight on the detail of this SQL.

    Example:
    sql = 'create table foo ( bar int primary key )'
    parser = CreateParser(sql)
    try:
        tbl_obj = parser.parse()
    except ParseError:
        log.error("Failed to parse SQL")

    This set of BNF rules are basically translated from the MySQL manual:
    http://dev.mysql.com/doc/refman/5.6/en/create-table.html
    If you don't know how to change the rule or fix the bug,
    <Getting Started with Pyparsing> is probably the best book to start with.
    Also this wiki has all supported functions listed:
    https://pyparsing.wikispaces.com/HowToUsePyparsing
    If you want have more information how these characters are
    matching, add .setDebug(True) after the specific token you want to debug
    """
    # Basic token
    WORD_CREATE = CaselessLiteral("CREATE").suppress()
    WORD_TABLE = CaselessLiteral("TABLE").suppress()
    COMMA = Literal(',').suppress()
    DOT = Literal('.')
    LEFT_PARENTHESES = Literal('(').suppress()
    RIGHT_PARENTHESES = Literal(')').suppress()
    QUOTE = Literal("'") | Literal('"')
    BACK_QUOTE = Optional(Literal('`')).suppress()
    LENGTH = Word(nums)
    OBJECT_NAME = Word(alphanums + "_" + "-" + "<" + ">" + ":")
    QUOTED_STRING_WITH_QUOTE = QuotedString(
        quoteChar="'",
        escQuote="''",
        escChar='\\',
        multiline=True,
        unquoteResults=False) | QuotedString(quoteChar='"',
                                             escQuote='""',
                                             escChar='\\',
                                             multiline=True,
                                             unquoteResults=False)
    QUOTED_STRING = QuotedString(
        quoteChar="'", escQuote="''", escChar='\\',
        multiline=True) | QuotedString(
            quoteChar='"', escQuote='""', escChar='\\', multiline=True)
    # Start of a create table statement
    # Sample: this part of rule will match following section
    # `table_name` IF NOT EXISTS
    IF_NOT_EXIST = Optional(
        CaselessLiteral("IF") + CaselessLiteral("NOT") +
        CaselessLiteral("EXISTS")).suppress()
    TABLE_NAME = (QuotedString(
        quoteChar="`", escQuote="``", escChar='\\', unquoteResults=True)
                  | OBJECT_NAME)('table_name')

    # Column definition
    # Sample: this part of rule will match following section
    # `id` bigint(20) unsigned NOT NULL DEFAULT '0',
    COLUMN_NAME = (QuotedString(
        quoteChar="`", escQuote="``", escChar='\\', unquoteResults=True)
                   | OBJECT_NAME)('column_name')
    COLUMN_NAME_WITH_QUOTE = (QuotedString(
        quoteChar="`", escQuote="``", escChar='\\', unquoteResults=False)
                              | OBJECT_NAME)('column_name')
    UNSIGNED = Optional(CaselessLiteral("UNSIGNED"))('unsigned')
    ZEROFILL = Optional(CaselessLiteral("ZEROFILL"))('zerofill')
    COL_LEN = Combine(LEFT_PARENTHESES + LENGTH + RIGHT_PARENTHESES,
                      adjacent=False)('length')
    INT_TYPE = (CaselessLiteral("TINYINT") | CaselessLiteral("SMALLINT")
                | CaselessLiteral("MEDIUMINT") | CaselessLiteral("INT")
                | CaselessLiteral("INTERGER") | CaselessLiteral("BIGINT")
                | CaselessLiteral("BINARY") | CaselessLiteral("BIT"))
    INT_DEF = (INT_TYPE('column_type') + Optional(COL_LEN) + UNSIGNED +
               ZEROFILL)
    VARBINARY_DEF = (CaselessLiteral('VARBINARY')('column_type') + COL_LEN)
    FLOAT_TYPE = \
        CaselessLiteral("REAL") | CaselessLiteral("DOUBLE") |\
        CaselessLiteral("FLOAT") | CaselessLiteral("DECIMAL") |\
        CaselessLiteral("NUMERIC")
    FLOAT_LEN = Combine(LEFT_PARENTHESES + LENGTH + Optional(COMMA + LENGTH) +
                        RIGHT_PARENTHESES,
                        adjacent=False,
                        joinString=', ')('length')
    FLOAT_DEF = (FLOAT_TYPE('column_type') + Optional(FLOAT_LEN) + UNSIGNED +
                 ZEROFILL)
    # time type definition. They contain type_name and an optional FSP section
    # Sample: DATETIME[(fsp)]
    FSP = COL_LEN
    DT_DEF = (
        Combine(CaselessLiteral("TIME") + Optional(CaselessLiteral("STAMP")))
        | CaselessLiteral("DATETIME"))('column_type') + Optional(FSP)
    SIMPLE_DEF = (CaselessLiteral("DATE") | CaselessLiteral("YEAR")
                  | CaselessLiteral("TINYBLOB") | CaselessLiteral("BLOB")
                  | CaselessLiteral("MEDIUMBLOB") | CaselessLiteral("LONGBLOB")
                  | CaselessLiteral("BOOL")
                  | CaselessLiteral("BOOLEAN"))('column_type')
    OPTIONAL_COL_LEN = Optional(COL_LEN)
    BINARY = Optional(CaselessLiteral("BINARY"))('binary')
    CHARSET_NAME = (Optional(QUOTE).suppress() +
                    Word(alphanums + '_')('charset') +
                    Optional(QUOTE).suppress())
    COLLATION_NAME = (Optional(QUOTE).suppress() +
                      Word(alphanums + '_')('collate') +
                      Optional(QUOTE).suppress())
    CHARSET_DEF = Optional(
        CaselessLiteral("CHARACTER SET").suppress() + CHARSET_NAME)
    COLLATE_DEF = Optional(
        CaselessLiteral("COLLATE").suppress() + COLLATION_NAME)
    CHAR_DEF = (CaselessLiteral("CHAR")('column_type') + OPTIONAL_COL_LEN +
                BINARY + CHARSET_DEF + COLLATE_DEF)
    VARCHAR_DEF = (CaselessLiteral("VARCHAR")('column_type') + COL_LEN +
                   BINARY + CHARSET_DEF + COLLATE_DEF)
    TEXT_TYPE = (CaselessLiteral("TINYTEXT") | CaselessLiteral("TEXT")
                 | CaselessLiteral("MEDIUMTEXT") | CaselessLiteral("LONGTEXT")
                 | CaselessLiteral("DOCUMENT"))
    TEXT_DEF = (TEXT_TYPE('column_type') + BINARY + CHARSET_DEF + COLLATE_DEF)
    ENUM_VALUE_LIST = Group(QUOTED_STRING_WITH_QUOTE +
                            ZeroOrMore(COMMA + QUOTED_STRING_WITH_QUOTE))(
                                'enum_value_list')
    ENUM_DEF = (CaselessLiteral("ENUM")('column_type') + LEFT_PARENTHESES +
                ENUM_VALUE_LIST + RIGHT_PARENTHESES + CHARSET_DEF +
                COLLATE_DEF)
    SET_VALUE_LIST = Group(QUOTED_STRING_WITH_QUOTE +
                           ZeroOrMore(COMMA + QUOTED_STRING_WITH_QUOTE))(
                               'set_value_list')
    SET_DEF = (CaselessLiteral("SET")('column_type') + LEFT_PARENTHESES +
               SET_VALUE_LIST + RIGHT_PARENTHESES + CHARSET_DEF + COLLATE_DEF)
    DATA_TYPE = (INT_DEF | FLOAT_DEF | DT_DEF | SIMPLE_DEF | TEXT_DEF
                 | CHAR_DEF | VARCHAR_DEF | ENUM_DEF | SET_DEF | VARBINARY_DEF)

    # Column attributes come after column type and length
    NULLABLE = (CaselessLiteral("NULL") | CaselessLiteral("NOT NULL"))
    DEFAULT_VALUE = (CaselessLiteral("DEFAULT").suppress() + (
        Optional(Literal('b'))('is_bit') + QUOTED_STRING_WITH_QUOTE('default')
        | Combine(
            CaselessLiteral("CURRENT_TIMESTAMP")('default') + Optional(COL_LEN)
            ('ts_len')) | Word(alphanums + '_' + '-' + '+')('default')))
    ON_UPDATE = (CaselessLiteral("ON") + CaselessLiteral("UPDATE") +
                 (CaselessLiteral("CURRENT_TIMESTAMP")('on_update') +
                  Optional(COL_LEN)('on_update_ts_len')))
    AUTO_INCRE = CaselessLiteral("AUTO_INCREMENT")
    UNIQ_KEY = (CaselessLiteral("UNIQUE") +
                Optional(CaselessLiteral("KEY")).suppress())
    PRIMARY_KEY = (CaselessLiteral("PRIMARY") +
                   Optional(CaselessLiteral("KEY")).suppress())
    COMMENT = Combine(CaselessLiteral("COMMENT").suppress() +
                      QUOTED_STRING_WITH_QUOTE,
                      adjacent=False)
    COLUMN_DEF = Group(COLUMN_NAME + DATA_TYPE + ZeroOrMore(
        NULLABLE('nullable') | DEFAULT_VALUE | ON_UPDATE
        | AUTO_INCRE('auto_increment') | UNIQ_KEY('uniq_key')
        | PRIMARY_KEY('primary') | COMMENT('comment')))
    COLUMN_LIST = Group(COLUMN_DEF +
                        ZeroOrMore(COMMA + COLUMN_DEF))('column_list')

    DOCUMENT_PATH = Combine(COLUMN_NAME_WITH_QUOTE +
                            ZeroOrMore(DOT + COLUMN_NAME_WITH_QUOTE))
    IDX_COL = ((Group(DOCUMENT_PATH + CaselessLiteral('AS') +
                      (CaselessLiteral('INT') | CaselessLiteral('STRING')) +
                      Optional(COL_LEN, default=''))) |
               (Group(COLUMN_NAME + Optional(COL_LEN, default=''))))

    # Primary key section
    COL_NAME_LIST = Group(IDX_COL + ZeroOrMore(COMMA + IDX_COL))
    IDX_COLS = (LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES)
    WORD_PRI_KEY = (CaselessLiteral("PRIMARY").suppress() +
                    CaselessLiteral("KEY").suppress())
    KEY_BLOCK_SIZE = (CaselessLiteral("KEY_BLOCK_SIZE").suppress() +
                      Optional(Literal('=')) +
                      Word(nums)('idx_key_block_size'))
    INDEX_USING = (
        CaselessLiteral("USING").suppress() +
        (CaselessLiteral("BTREE") | CaselessLiteral("HASH"))('idx_using'))

    INDEX_OPTION = (ZeroOrMore(KEY_BLOCK_SIZE | COMMENT('idx_comment')
                               | INDEX_USING))
    PRI_KEY_DEF = (COMMA + WORD_PRI_KEY + IDX_COLS('pri_list') + INDEX_OPTION)

    # Index section
    KEY_TYPE = (CaselessLiteral("FULLTEXT")
                | CaselessLiteral("SPATIAL"))('key_type')
    WORD_UNIQUE = CaselessLiteral("UNIQUE")('unique')
    WORD_KEY = (CaselessLiteral("INDEX").suppress()
                | CaselessLiteral("KEY").suppress())
    IDX_NAME = Optional(COLUMN_NAME)
    IDX_DEF = (ZeroOrMore(
        Group(COMMA + Optional(WORD_UNIQUE | KEY_TYPE) + WORD_KEY +
              IDX_NAME('index_name') + IDX_COLS('index_col_list') +
              INDEX_OPTION)))('index_section')

    # Constraint section as this is not a recommended way of using MySQL
    # we'll treat the whole section as a string
    CONSTRAINT = Combine(
        ZeroOrMore(COMMA + Optional(CaselessLiteral('CONSTRAINT')) +
                   # foreign key name except the key word 'FOREIGN'
                   Optional((~CaselessLiteral('FOREIGN') + COLUMN_NAME)) +
                   CaselessLiteral('FOREIGN') + CaselessLiteral('KEY') +
                   LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES +
                   CaselessLiteral('REFERENCES') + COLUMN_NAME +
                   LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES +
                   ZeroOrMore(Word(alphanums))),
        adjacent=False,
        joinString=' ')('constraint')

    # Table option section
    ENGINE = (CaselessLiteral("ENGINE").suppress() +
              Optional(Literal('=')).suppress() +
              COLUMN_NAME('engine').setParseAction(upcaseTokens))
    DEFAULT_CHARSET = (Optional(CaselessLiteral("DEFAULT")).suppress() +
                       ((CaselessLiteral("CHARACTER").suppress() +
                         CaselessLiteral("SET").suppress()) |
                        (CaselessLiteral("CHARSET").suppress())) +
                       Optional(Literal('=')).suppress() +
                       Word(alphanums + '_')('charset'))
    TABLE_COLLATE = (Optional(CaselessLiteral("DEFAULT")).suppress() +
                     CaselessLiteral("COLLATE").suppress() +
                     Optional(Literal('=')).suppress() + COLLATION_NAME)
    ROW_FORMAT = (
        CaselessLiteral("ROW_FORMAT").suppress() +
        Optional(Literal('=')).suppress() +
        Word(alphanums + '_')('row_format').setParseAction(upcaseTokens))
    TABLE_KEY_BLOCK_SIZE = (
        CaselessLiteral("KEY_BLOCK_SIZE").suppress() +
        Optional(Literal('=')).suppress() + Word(nums)
        ('key_block_size').setParseAction(lambda s, l, t: [int(t[0])]))
    COMPRESSION = (
        CaselessLiteral("COMPRESSION").suppress() +
        Optional(Literal('=')).suppress() +
        Word(alphanums + '_')('compression').setParseAction(upcaseTokens))
    # Parse and make sure auto_increment is an interger
    # parseAction function is defined as fn( s, loc, toks ), where:
    # s is the original parse string
    # loc is the location in the string where matching started
    # toks is the list of the matched tokens, packaged as a ParseResults_
    # object
    TABLE_AUTO_INCRE = (
        CaselessLiteral("AUTO_INCREMENT").suppress() +
        Optional(Literal('=')).suppress() + Word(nums)
        ('auto_increment').setParseAction(lambda s, l, t: [int(t[0])]))
    TABLE_COMMENT = (CaselessLiteral("COMMENT").suppress() +
                     Optional(Literal('=')).suppress() +
                     QUOTED_STRING_WITH_QUOTE('comment'))
    TABLE_OPTION = ZeroOrMore(ENGINE | DEFAULT_CHARSET | TABLE_COLLATE
                              | ROW_FORMAT | TABLE_KEY_BLOCK_SIZE | COMPRESSION
                              | TABLE_AUTO_INCRE | TABLE_COMMENT)

    # Partition section
    PARTITION = Optional(
        Combine(Combine(Optional(Literal('/*!') + Word(nums))) +
                CaselessLiteral("PARTITION") + CaselessLiteral("BY") +
                SkipTo(StringEnd()),
                adjacent=False,
                joinString=" ")('partition'))

    @classmethod
    def generate_rule(cls):
        # The final rule for the whole statement match
        return (cls.WORD_CREATE + cls.WORD_TABLE + cls.IF_NOT_EXIST +
                cls.TABLE_NAME + cls.LEFT_PARENTHESES + cls.COLUMN_LIST +
                Optional(cls.PRI_KEY_DEF) + cls.IDX_DEF + cls.CONSTRAINT +
                cls.RIGHT_PARENTHESES + cls.TABLE_OPTION('table_options') +
                cls.PARTITION)

    @classmethod
    def parse(cls, sql):
        try:
            result = cls.generate_rule().parseString(sql)
        except ParseException as e:
            raise ParseError(
                "Failed to parse SQL, unsupported syntax: {}".format(e),
                e.line, e.column)

        inline_pri_exists = False
        table = models.Table()
        table.name = result.table_name
        table_options = [
            'engine', 'charset', 'collate', 'row_format', 'key_block_size',
            'compression', 'auto_increment', 'comment'
        ]
        for table_option in table_options:
            if table_option in result.table_options:
                setattr(table, table_option,
                        result.table_options.get(table_option))
        if 'partition' in result:
            table.partition = result.partition
        if 'constraint' in result:
            table.constraint = result.constraint
        for column_def in result.column_list:
            if column_def.column_type == 'ENUM':
                column = models.EnumColumn()
                for enum_value in column_def.enum_value_list:
                    column.enum_list.append(enum_value)
            elif column_def.column_type == 'SET':
                column = models.SetColumn()
                for set_value in column_def.set_value_list:
                    column.set_list.append(set_value)
            elif column_def.column_type in ('TIMESTAMP', 'DATETIME'):
                column = models.TimestampColumn()
                if 'on_update' in column_def:
                    if 'on_update_ts_len' in column_def:
                        column.on_update_current_timestamp = \
                            "{}({})".format(
                                column_def.on_update,
                                column_def.on_update_ts_len)
                    else:
                        column.on_update_current_timestamp = \
                            column_def.on_update
            else:
                column = models.Column()

            column.name = column_def.column_name
            column.column_type = column_def.column_type

            # We need to check whether each column property exist in the
            # create table string, because not specifying a "COMMENT" is
            # different from specifying "COMMENT" equals to empty string.
            # The former one will ends up being
            #   column=None
            # and the later one being
            #   column=''
            if 'comment' in column_def:
                column.comment = column_def.comment
            if 'nullable' in column_def:
                if column_def.nullable == 'NULL':
                    column.nullable = True
                elif column_def.nullable == 'NOT NULL':
                    column.nullable = False
            if 'unsigned' in column_def:
                if column_def.unsigned == 'UNSIGNED':
                    column.unsigned = True
            if 'default' in column_def:
                if 'ts_len' in column_def:
                    column.default = "{}({})".format(column_def.default,
                                                     column_def.ts_len)
                else:
                    column.default = column_def.default
                if 'is_bit' in column_def:
                    column.is_default_bit = True
            if 'charset' in column_def:
                column.charset = column_def.charset
            if 'length' in column_def:
                column.length = column_def.length
            if 'collate' in column_def:
                column.collate = column_def.collate
            if 'auto_increment' in column_def:
                column.auto_increment = True
            if 'primary' in column_def:
                idx_col = models.IndexColumn()
                idx_col.name = column_def.column_name
                table.primary_key.column_list.append(idx_col)
                inline_pri_exists = True
            table.column_list.append(column)
        if 'pri_list' in result:
            if inline_pri_exists:
                raise ParseError("Multiple primary keys defined")
            table.primary_key.name = 'PRIMARY'
            for col in result.pri_list:
                for name, length in col:
                    idx_col = models.IndexColumn()
                    idx_col.name = name
                    if length:
                        idx_col.length = length
                    table.primary_key.column_list.append(idx_col)
            if 'idx_key_block_size' in result:
                table.primary_key.key_block_size = result.pri_key_block_size
            if 'idx_comment' in result:
                table.primary_key.comment = result.idx_comment
        if 'index_section' in result:
            for idx_def in result.index_section:
                idx = models.TableIndex()
                idx.name = idx_def.index_name
                if 'idx_key_block_size' in idx_def:
                    idx.key_block_size = idx_def.idx_key_block_size
                if 'idx_comment' in idx_def:
                    idx.comment = idx_def.idx_comment
                if 'idx_using' in idx_def:
                    idx.using = idx_def.idx_using
                if 'key_type' in idx_def:
                    idx.key_type = idx_def.key_type
                if 'unique' in idx_def:
                    idx.is_unique = True
                for col in idx_def.index_col_list:
                    for col_def in col:
                        if len(col_def) == 4 and col_def[1].upper() == 'AS':
                            (document_path, word_as, key_type,
                             length) = col_def
                            idx_col = models.DocStoreIndexColumn()
                            idx_col.document_path = document_path
                            idx_col.key_type = key_type
                            if length:
                                idx_col.length = length
                            idx.column_list.append(idx_col)
                        else:
                            (name, length) = col_def
                            idx_col = models.IndexColumn()
                            idx_col.name = name
                            if length:
                                idx_col.length = length
                            idx.column_list.append(idx_col)
                table.indexes.append(idx)
        return table
예제 #4
0
field_value = string + ZeroOrMore(HASH + string)
field_def = Group(field_name + EQUALS + field_value)
entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def))

# Entry is surrounded either by parentheses or curlies
entry = (AT + entry_type + bracketed(cite_key + COMMA + entry_contents))

# Preamble is a macro-like thing with no name
preamble = AT + CaselessLiteral('preamble') + bracketed(field_value)

# Macros (aka strings)
macro_contents = macro_def + EQUALS + field_value
macro = AT + CaselessLiteral('string') + bracketed(macro_contents)

# Implicit comments
icomment = SkipTo('@').setParseAction(lambda t : t.insert(0, 'icomment'))

# entries are last in the list (other than the fallback) because they have
# arbitrary start patterns that would match comments, preamble or macro
definitions = Group(comment |
                    preamble |
                    macro |
                    entry |
                    icomment)

# Start symbol
bibfile = ZeroOrMore(definitions)


def parse_str(str):
    return bibfile.parseString(str)
예제 #5
0
파일: parsers.py 프로젝트: MichelPate/wowdb
    def compute(self, text, verbose=True):

        # Literals
        dollar = Literal('$')
        amper = Literal('&')
        at = Literal('@')
        qm = Literal('?')
        em = Literal('!')
        dot = Literal('.')
        colon = Literal(":")
        vbar = Literal("|")
        lbrack = Literal("[")
        rbrack = Literal("]")
        lcurly = Literal("{")
        rcurly = Literal("}")
        lparen = Literal("(")
        rparen = Literal(")")
        lt = Literal("<")
        gt = Literal(">")
        eq = Literal("=")
        deq = Literal("==")

        # Reusables
        spellId = Word(nums, min=2, max=6).addParseAction(
            tokenMap(int)).setResultsName("spellId")
        idx = Word(nums,
                   max=1).addParseAction(tokenMap(int)).setResultsName("id")
        var = Word(alphas).setResultsName("var")

        # Spell References
        effectId = Optional(
            Word(nums, max=2).addParseAction(
                tokenMap(int)).setResultsName("effectId"))
        references = (dollar.suppress() +
                      ((at.suppress() + var + Optional(spellId)) |
                       (spellId + var + effectId) |
                       (var + effectId))).addParseAction(self.setReferences)

        # Conditions
        brackets = Suppress(lbrack) + SkipTo(rbrack).setResultsName(
            "statement") + Suppress(rbrack)
        value = Word(nums, max=5).addParseAction(
            tokenMap(int)).setResultsName("value")
        conditionVar = Group(
            Optional(em).setResultsName("not") + Optional(var) +
            (spellId | idx)
            | Optional("-") + value
            | Word(alphanums, exact=8).setResultsName("hashVariable"))
        conditions = ((dollar + qm).suppress() + OneOrMore(
            Group(
                Optional(Suppress(qm)) + Optional(Suppress(lparen)) +
                OneOrMore(
                    conditionVar.setResultsName("variables*") +
                    Optional(Combine(em + eq) | amper | vbar | deq | lt
                             | gt).setResultsName("operators*")) +
                Optional(Suppress(rparen)) +
                brackets).setResultsName("conditions*")) +
                      brackets).addParseAction(lambda t: self.setConditions(
                          t, verbose=verbose)) + Optional(dot.suppress())

        # Call Variable
        callVariables = (Suppress((lt + dollar) | (dollar + lt)) +
                         SkipTo(gt).setResultsName("name") +
                         Suppress(gt)).addParseAction(self.callVariables)

        # Expressions
        expressions = (
            Suppress(dollar + lcurly) +
            SkipTo(rcurly).setResultsName("content") + rcurly + Optional(
                dot.suppress() + Word(nums, exact=1).addParseAction(
                    tokenMap(int)).setResultsName("mod"), )
        ).addParseAction(lambda t: self.setExpressions(t, verbose=verbose))

        # Language Choices
        languageChoices = (
            (Literal('$L') | Literal('$l')).suppress() +
            OneOrMore(Word(alphas) + Optional(Literal(":").suppress())
                      ).setResultsName("options*") +
            Literal(';').suppress()).addParseAction(self.setLanguageChoices)

        # Icons
        icons = (Literal("|T").suppress() +
                 SkipTo(colon).setResultsName("path") +
                 colon.suppress() + Word(nums, exact=2).addParseAction(
                     tokenMap(int)).setResultsName("size") +
                 Literal("|t").suppress()).addParseAction(self.setIcons)

        # Parsing layer by layer
        parsingOrder = [
            icons, languageChoices, callVariables, references, expressions,
            conditions
        ]
        steps = [text]
        for parser in parsingOrder:
            steps.append(parser.transformString(steps[-1]))
        result = steps[-1]

        # Replace each Sha1 Hash placeholder by refering value
        if verbose:
            for k, v in self.variables.items():
                result = result.replace(k, str(v))

        # Display fixes
        displayFixes = [["*% of", "% of"], ["power)%", "power)"]]
        for bef, aft in displayFixes:
            result = result.replace(bef, aft)

        return super(SpellDescriptionParser, self).compute(result, verbose)
예제 #6
0
 def labeled_float(label):
     return Suppress(SkipTo(label)) + Suppress(label) + FLOAT
예제 #7
0
class GiftParser(object):
    """Parser for GIFT format questions."""

    # separators, which have been suppressed
    double_colon = sep('::')

    colon = sep(':')

    span = sep('..')

    left_curly = sep('{')

    right_curly = sep('}')

    equals = sep('=')

    tilda = sep('~')

    percent = sep('%')

    arrow = sep('->')

    pound = sep('#')

    dbl_fwd_slash = sep('//')

    # integer signs
    plus = Literal('+')
    minus = Literal('-')

    bool_true = (Literal('TRUE') | Literal('T')).setParseAction(make_true)

    bool_false = (Literal('FALSE') | Literal('F')).setParseAction(make_false)

    boolean = bool_true | bool_false

    plus_or_minus = plus | minus

    number = Word(nums)

    integer = Combine(Optional(plus_or_minus) +
                      number).setParseAction(make_int)

    unsigned_float = Combine(
        Word(nums) + Optional(Word('.', nums))).setParseAction(make_float)

    signed_float = Combine(
        Optional(plus_or_minus) + Word(nums) +
        Optional(Word('.', nums))).setParseAction(make_float)

    blank_lines = Suppress(LineEnd() + OneOrMore(LineEnd()))

    comment = dbl_fwd_slash + restOfLine

    title = (double_colon +
             SkipTo(double_colon).setParseAction(strip_spaces)('title') +
             double_colon)

    task = SkipTo(left_curly).setParseAction(strip_spaces)('task')

    # Multiple choice questions with one correct answer.
    #
    # // question: 1 name: Grants tomb
    # ::Grants tomb::Who is buried in Grant's tomb in New York City? {
    # =Grant
    # ~No one
    # #Was true for 12 years
    # ~Napoleon
    # #He was buried in France
    # ~Churchill
    # #He was buried in England
    # ~Mother Teresa
    # #She was buried in India }
    #
    # Multiple choice questions with multiple right answers.
    #
    # What two people are entombed in Grant's tomb? {
    # ~%-100%No one
    # ~%50%Grant
    # ~%50%Grant's wife
    # ~%-100%Grant's father}

    eof_multi_choice_answer = equals | tilda | right_curly

    ext_eof_multi_choice_answer = pound | eof_multi_choice_answer

    # '# hello world ~'
    multi_choice_feedback = Combine(
        pound + SkipTo(eof_multi_choice_answer).setParseAction(strip_spaces))

    # 'answer #'
    multi_choice_answer_text = SkipTo(
        ext_eof_multi_choice_answer).setParseAction(strip_spaces)

    weight = Combine(percent + integer + percent).setParseAction(make_int)

    multi_choice_answer = (
        (Literal('=')('sign')
         | Literal('~')('sign') + Optional(weight, default=0)('weight')) +
        multi_choice_answer_text('answer') +
        Optional(multi_choice_feedback, default='')('feedback')
    ).setParseAction(set_multi_choice_answer)

    multi_choice_answers = OneOrMore(multi_choice_answer)

    multi_choice_question = (
        Optional(title, default='') + task + left_curly +
        multi_choice_answers.setParseAction(set_multi_choice_answers) +
        right_curly).setParseAction(set_multi_choice_question)

    multi_choice_question.ignore(comment)

    # True-false questions.
    # Sample:
    # // question: 0 name: TrueStatement using {T} style
    # ::TrueStatement about Grant::Grant was buried in a tomb in NY.{T}
    #
    # // question: 0 name: FalseStatement using {FALSE} style
    # ::FalseStatement about sun::The sun rises in the West.{FALSE}

    true_false_feedback = Combine(
        pound + SkipTo(right_curly).setParseAction(strip_spaces))

    true_false_answer = (
        left_curly + boolean('answer') +
        Optional(true_false_feedback, default='')('feedback') + right_curly)

    true_false_question = (
        Optional(title, default='') + task +
        true_false_answer).setParseAction(set_true_false_question)

    true_false_question.ignore(comment)

    # Short answer questions.
    # Samples:
    # Who's buried in Grant's tomb?{=Grant =Ulysses S. Grant =Ulysses Grant}
    # Two plus two equals {=four =4}

    eof_short_answer_answer = equals | right_curly

    ext_eof_short_answer = pound | eof_short_answer_answer

    short_answer_feedback = Combine(
        pound + SkipTo(eof_short_answer_answer).setParseAction(strip_spaces))

    short_answer_text = SkipTo(ext_eof_short_answer).setParseAction(
        strip_spaces)

    short_answer = (equals + short_answer_text('answer') +
                    Optional(short_answer_feedback, default='')('feedback')
                    ).setParseAction(set_short_answer)

    short_answers = (OneOrMore(short_answer) + right_curly + LineEnd())

    short_answer_question = (Optional(title, default='') + task + left_curly +
                             short_answers.setParseAction(set_short_answers)
                             ).setParseAction(set_short_answer_question)

    short_answer_question.ignore(comment)

    # Matching questions.
    # Sample:
    # Match the following countries with their corresponding capitals. {
    # =Canada -> Ottawa
    # =Italy  -> Rome
    # =Japan  -> Tokyo
    # =India  -> New Delhi
    # }

    eof_match_answer = equals | right_curly

    ext_eof_match_answer = pound | equals | right_curly

    match_feedback = Combine(
        pound + SkipTo(eof_match_answer).setParseAction(strip_spaces))

    lhs = SkipTo(arrow).setParseAction(strip_spaces)

    match_answer = (equals + lhs('lhs') + arrow +
                    SkipTo(ext_eof_match_answer)('rhs') +
                    Optional(match_feedback, default='')('feedback')
                    ).setParseAction(set_match_answer)

    match_answers = (left_curly + match_answer + match_answer +
                     OneOrMore(match_answer) + right_curly)

    match_question = (Optional(title, default='') + task +
                      match_answers.setParseAction(set_match_answers)
                      ).setParseAction(set_match_answer_question)

    match_question.ignore(comment)

    # Missing word questions.
    #
    # CB costs {~lots of money =nothing ~a small amount} to download.

    missing_word_answers = multi_choice_answers

    prefix = SkipTo(left_curly)

    suffix = Combine(OneOrMore(Word(alphanums)))

    missing_word_question = (
        prefix('prefix') + left_curly +
        missing_word_answers.setParseAction(set_multi_choice_answers) +
        right_curly +
        suffix('suffix')).setParseAction(set_missing_word_question)

    # Numeric questions.
    # No support for multiple numeric answers.
    # Sample: When was Ulysses S. Grant born?{#1822:5}

    numeric_single_answer = (
        left_curly + pound +
        signed_float.setParseAction(make_float)('answer') +
        Optional(colon + unsigned_float.setParseAction(make_float)('error')) +
        Optional(match_feedback, default='')('feedback') + right_curly)

    numeric_range_answer = (left_curly + pound +
                            signed_float.setParseAction(make_float)('min') +
                            span +
                            signed_float.setParseAction(make_float)('max') +
                            right_curly)

    numeric_answer = (numeric_range_answer | numeric_single_answer)

    numeric_question = (Optional(title, default='') + task +
                        numeric_answer).setParseAction(set_numeric_question)

    numeric_question.ignore(comment)

    # Essay questions.
    # Write a short biography of Dag Hammarskjold. {}

    essay_answer = left_curly + right_curly

    essay_question = (Optional(title, default='') + task +
                      essay_answer).setParseAction(set_essay_question)

    essay_question.ignore(comment)

    question = (essay_question | match_question | numeric_question
                | missing_word_question | multi_choice_question
                | true_false_question | short_answer_question)

    bnf = OneOrMore(question)

    @classmethod
    def parse(cls, text):
        try:
            return cls.bnf.parseString(text)
        except ParseException as e:
            logging.exception('Invalid GIFT syntax: %s', text)
            raise ParseError(e.msg)

    @classmethod
    def parse_questions(cls, text):
        """Parses a list new-line separated GIFT questions to."""
        tree = cls.parse(text)
        return [GiftAdapter().convert_to_question(node) for node in tree]
예제 #8
0

def parsePossibleURL(t):
    # Workaround for PyParsing versions < 2.1.0, for which t is wrapped in an
    # extra level of nesting. See enthought/enable#224.
    if len(t) == 1:
        t = t[0]

    possibleURL, fallback = t
    return [urlparse.urlsplit(possibleURL), fallback]


#Normal color declaration
colorDeclaration = none | currentColor | colourValue

urlEnd = (Literal(")").suppress() +
          Optional(Group(colorDeclaration), default=()) + StringEnd())

url = (CaselessLiteral("URL") + Literal("(").suppress() +
       Group(SkipTo(urlEnd, include=True).setParseAction(parsePossibleURL)))

#paint value will parse into a (type, details) tuple.
#For none and currentColor, the details tuple will be the empty tuple
#for CSS color declarations, it will be (type, (R,G,B))
#for URLs, it will be ("URL", ((url tuple), fallback))
#The url tuple will be as returned by urlparse.urlsplit, and can be
#an empty tuple if the parser has an error
#The fallback will be another (type, details) tuple as a parsed
#colorDeclaration, but may be the empty tuple if it is not present
paintValue = url | colorDeclaration
예제 #9
0
from pyparsing import makeHTMLTags, SkipTo, htmlComment
import urllib

serverListPage = urllib.urlopen("http://www.yahoo.com")
htmlText = serverListPage.read()
serverListPage.close()

aStart, aEnd = makeHTMLTags("A")

link = aStart + SkipTo(aEnd).setResultsName("link") + aEnd
link.ignore(htmlComment)

for toks, start, end in link.scanString(htmlText):
    print toks.link, "->", toks.startA.href
예제 #10
0
# vim: set encoding=utf-8
from pyparsing import (LineStart, Literal, OneOrMore, Optional, Regex, SkipTo,
                       srange, Suppress, Word, ZeroOrMore)

from regparser.grammar import atomic, unified
from regparser.grammar.utils import (DocLiteral, keep_pos, Marker,
                                     QuickSearchable)

smart_quotes = QuickSearchable(
    Suppress(DocLiteral(u'“', "left-smart-quote")) + keep_pos(
        SkipTo(DocLiteral(u'”', "right-smart-quote"))).setResultsName("term"))

e_tag = (
    Suppress(Regex(r"<E[^>]*>")) +
    keep_pos(OneOrMore(Word(srange("[a-zA-Z-]")))).setResultsName("term") +
    Suppress(Literal("</E>")))

xml_term_parser = QuickSearchable(
    LineStart() + Optional(Suppress(unified.any_depth_p)) +
    e_tag.setResultsName("head") +
    ZeroOrMore((atomic.conj_phrases +
                e_tag).setResultsName("tail", listAllMatches=True)) +
    Suppress(ZeroOrMore(Regex(r",[a-zA-Z ]+,"))) +
    Suppress(ZeroOrMore((Marker("this") | Marker("the")) + Marker("term"))) +
    ((Marker("mean") | Marker("means"))
     | (Marker("refers") + ZeroOrMore(Marker("only")) + Marker("to")) | (
         (Marker("has") | Marker("have")) + Marker("the") + Marker("same") +
         Marker("meaning") + Marker("as"))))

key_term_parser = QuickSearchable(
    LineStart() + Optional(Suppress(unified.any_depth_p)) +
예제 #11
0
from pyparsing import Literal,Suppress,CharsNotIn,CaselessLiteral,\
        Word,dblQuotedString,alphanums,SkipTo
import urllib
import pprint

# Define the pyparsing grammar for a URL, that is:
#    URLlink ::= <a href= URL>linkText</a>
#    URL ::= doubleQuotedString | alphanumericWordPath
# Note that whitespace may appear just about anywhere in the link.  Note also
# that it is not necessary to explicitly show this in the pyparsing grammar; by default,
# pyparsing skips over whitespace between tokens.
linkOpenTag = (Literal("<") + "a" + "href" + "=").suppress() + \
                ( dblQuotedString | Word(alphanums+"/") ) + \
                Suppress(">")
linkCloseTag = Literal("<") + "/" + CaselessLiteral("a") + ">"
link = linkOpenTag + SkipTo(linkCloseTag) + linkCloseTag.suppress()

# Go get some HTML with some links in it.
serverListPage = urllib.urlopen("http://www.yahoo.com")
htmlText = serverListPage.read()
serverListPage.close()

# scanString is a generator that loops through the input htmlText, and for each
# match yields the tokens and start and end locations (for this application, we are
# not interested in the start and end values).
for toks, strt, end in link.scanString(htmlText):
    print toks.asList()

# Rerun scanString, but this time create a dict of text:URL key-value pairs.
# Need to reverse the tokens returned by link, using a parse action.
link.setParseAction(lambda st, loc, toks: [toks[1], toks[0]])
예제 #12
0
    def __init__(self):
        self.ALPHA_LABEL = Regex(r'alpha\[\d+\]:')
        self.LNL_LABEL = Literal('Final GAMMA-based Score of best tree')
        self.FRQ_LABEL = Regex(r'Base frequencies: (?=\d+)') ^ \
                         Regex(r'ML estimate base freqs\[\d+\]:')
        self.NAMES_LABEL = Regex(r'Partition: \d+ with name:\s+')
        self.RATES_LABEL = Regex(r'rates\[\d+\].+?:')
        self.MODEL_LABEL = Literal('Substitution Matrix:')
        self.alpha = OneOrMore(
            Suppress(SkipTo(self.ALPHA_LABEL)) + Suppress(self.ALPHA_LABEL) +
            FLOAT)
        self.lnl = Suppress(SkipTo(self.LNL_LABEL)) + \
                   Suppress(self.LNL_LABEL) + FLOAT
        self.frq = OneOrMore(
            Group(
                Suppress(SkipTo(self.FRQ_LABEL)) + Suppress(self.FRQ_LABEL) +
                OneOrMore(FLOAT)))
        self.names = OneOrMore(
            Suppress(SkipTo(self.NAMES_LABEL)) + Suppress(self.NAMES_LABEL) +
            CharsNotIn('\n') + Suppress(LineEnd()))
        self.rates = OneOrMore(
            Group(
                Suppress(SkipTo(self.RATES_LABEL)) +
                Suppress(self.RATES_LABEL) + OneOrMore(FLOAT)))
        self.model = Suppress(SkipTo(self.MODEL_LABEL)) + \
                     Suppress(self.MODEL_LABEL) + WORD

        MODEL_LABEL = Literal('Substitution Matrix:')
        SCORE_LABEL = Literal('Final GAMMA  likelihood:')
        BOOT_SCORE_LABEL = Literal('Final ML Optimization Likelihood:')
        DESC_LABEL = Literal('Model Parameters of Partition')
        NAME_LEADIN = Literal(', Name:')
        DATATYPE_LEADIN = Literal(', Type of Data:')
        ALPHA_LEADIN = Literal('alpha:')
        TREELENGTH_LEADIN = Literal('Tree-Length:')
        RATES_LABEL = Regex(r'rate \w <-> \w:')
        FREQS_LABEL = Regex(r'freq pi\(\w\):')

        likelihood = Suppress(
            SkipTo(SCORE_LABEL)) + Suppress(SCORE_LABEL) + FLOAT
        boot_likelihood = Suppress(
            SkipTo(BOOT_SCORE_LABEL)) + Suppress(BOOT_SCORE_LABEL) + FLOAT
        description = Suppress(
            SkipTo(DESC_LABEL)) + Suppress(DESC_LABEL) + INT + Suppress(
                NAME_LEADIN) + SPACEDWORD + Suppress(DATATYPE_LEADIN) + WORD
        treelen = Suppress(
            SkipTo(TREELENGTH_LEADIN)) + Suppress(TREELENGTH_LEADIN) + FLOAT

        alpha = Suppress(SkipTo(ALPHA_LEADIN)) + Suppress(ALPHA_LEADIN) + FLOAT

        rates = OneOrMore(
            Group(
                Suppress(SkipTo(RATES_LABEL)) + Suppress(RATES_LABEL) +
                OneOrMore(FLOAT)))
        freqs = OneOrMore(
            Group(
                Suppress(SkipTo(FREQS_LABEL)) + Suppress(FREQS_LABEL) +
                OneOrMore(FLOAT)))

        # output of running different set of raxml analysis
        self.TC_STOCHBI_LABEL = Literal(
            'Tree certainty under stochastic bipartition '
            'adjustment for this tree:')
        self.RTC_STOCHBI_LABEL = Literal(
            'Relative tree certainty under stochastic bipartition adjustment for this tree:'
        )
        self.TCA_STOCHBI_LABEL = Literal(
            'Tree certainty including all conflicting bipartitions (TCA) under '
            'stochastic bipartition adjustment for this tree:')
        self.RTCA_STOCHBI_LABEL = Literal(
            'Relative tree certainty including all conflicting bipartitions (TCA) '
            'under stochastic bipartition adjustment for this tree:')
        self.TC_UNIBI_LABEL = Literal(
            'Tree certainty under uniform bipartition '
            'adjustment for this tree:')
        self.RTC_UNIBI_LABEL = Literal('Relative tree certainty under uniform '
                                       'bipartition adjustment for this tree:')
        self.TCA_UNIBI_LABEL = Literal(
            'Tree certainty including all conflicting bipartitions (TCA) under '
            'uniform bipartition adjustment for this tree:')
        self.RTCA_UNIBI_LABEL = Literal(
            'Relative tree certainty including all conflicting bipartitions (TCA) '
            'under uniform bipartition adjustment for this tree:')

        self.tc_stochbi = Suppress(SkipTo(self.TC_STOCHBI_LABEL)) + Suppress(
            self.TC_STOCHBI_LABEL) + FLOAT
        self.rtc_stochbi = Suppress(SkipTo(self.RTC_STOCHBI_LABEL)) + Suppress(
            self.RTC_STOCHBI_LABEL) + FLOAT
        self.tca_stochbi = Suppress(SkipTo(self.TCA_STOCHBI_LABEL)) + Suppress(
            self.TCA_STOCHBI_LABEL) + FLOAT
        self.rtca_stochbi = Suppress(SkipTo(
            self.RTCA_STOCHBI_LABEL)) + Suppress(
                self.RTCA_STOCHBI_LABEL) + FLOAT
        self.tc_unibi = Suppress(SkipTo(self.TC_UNIBI_LABEL)) + Suppress(
            self.TC_UNIBI_LABEL) + FLOAT
        self.rtc_unibi = Suppress(SkipTo(self.RTC_UNIBI_LABEL)) + Suppress(
            self.RTC_UNIBI_LABEL) + FLOAT
        self.tca_unibi = Suppress(SkipTo(self.TCA_UNIBI_LABEL)) + Suppress(
            self.TCA_UNIBI_LABEL) + FLOAT
        self.rtca_unibi = Suppress(SkipTo(self.RTCA_UNIBI_LABEL)) + Suppress(
            self.RTCA_UNIBI_LABEL) + FLOAT

        # Use these for flag 'a' option
        self.boot_likelihood = boot_likelihood
        self.freqs = freqs
        self.rates = rates
        self.alpha = alpha
        self.name = description
        self.treelen = treelen

        self._dash_f_e_parser = (
            Group(OneOrMore(self.model)) + likelihood + Group(
                OneOrMore(
                    Group(description + alpha + Suppress(TREELENGTH_LEADIN) +
                          Suppress(FLOAT) + Group(OneOrMore(rates)) +
                          Group(OneOrMore(freqs))))))
예제 #13
0
파일: rcgParser.py 프로젝트: rwu17/P6
    def strParsing(self, rcg_string):
        left_p = Literal("(")
        right_p = Literal(")")
        frame_number = Word(nums)
        teamscore_result_name = Word(alphanums)
        teamscore_result_value = Word(alphanums)
        teamscore_result_score = Word(nums)
        # This needs to be taken care of by AST because some teams have '_' in their names
        teamscore_result = (
            teamscore_result_name + "_" + teamscore_result_value +
            Optional("_" + teamscore_result_score)).setParseAction(
                rcgParsing.get_team_result)

        # Playmode
        # Playmode list
        play_mode_list = (Word(" play_on") ^ Word(" time_over")
                          ^ Word(" free_kick_r") ^ Word(" free_kick_l")
                          ^ Word(" indirect_free_kick_l")
                          ^ Word(" indirect_free_kick_r") ^ Word(" kick_in_l")
                          ^ Word(" kick_in_r") ^ Word(" foul_charge_r")
                          ^ Word(" foul_charge_l") ^ Word(" kick_off_l")
                          ^ Word(" kick_off_r") ^ Word(" corner_kick_l")
                          ^ Word(" corner_kick_r") ^ Word(" offside_r")
                          ^ Word(" offside_l") ^ Word(" foul_charge_l")
                          ^ Word(" foul_charge_r") ^ Word(" goal_kick_l")
                          ^ Word(" goal_kick_r") ^ Word(" penalty_setup_l")
                          ^ Word(" penalty_setup_r") ^ Word(" penalty_ready_l")
                          ^ Word(" penalty_ready_r") ^ Word(" penalty_taken_l")
                          ^ Word(" penalty_taken_r") ^ Word(" penalty_miss_l")
                          ^ Word(" penalty_miss_r") ^ Word(" penalty_score_r")
                          ^ Word(" penalty_score_l"))
        play_mode = (Word("playmode ") + Word(nums) +
                     play_mode_list).setParseAction(
                         rcgParsing.goal_notification)

        # Teamname
        team_name = Combine(
            Word(alphanums) +
            Optional(OneOrMore((Literal("-") | Literal("_")) +
                               Word(alphanums))))

        # Teamscore
        team_score = Word("team ") + Word(
            nums) + team_name + team_name + Word(nums) * 2
        team_score_penalty = Word("team ") + Word(
            nums) + team_name + team_name + Word(nums) * 6

        # Frame and ball information
        show_frame = Word("show ") + frame_number.setParseAction(
            rcgParsing.get_current_frame)
        ball = left_p + left_p + Literal(
            "b") + right_p + Word(nums + "-.") * 4 + right_p

        # Player information
        player_number = left_p + (Word("r") ^ Word("l")) + Word(nums) + right_p

        # Player positions
        player_position = Word(alphanums + "-.")

        # Player view mode - H for high and L for low
        view_mode = left_p + Literal("v") + (
            Word("h") ^ Word("l")) + Word(nums) + right_p
        stamina = left_p + Literal("s") + Word(nums + "-.") * 4 + right_p

        # Outer flag rules
        flag_pos = Word("lrbtc", max=1)
        field_side = Word("lr", max=1)
        distance_from_center = Word(nums)
        outer_flag = flag_pos + ZeroOrMore(field_side) + distance_from_center

        # Inner flag rules
        inner_flag_pos = Word("lrc", max=1)
        inner_flag = inner_flag_pos + (Word("b") ^ Word("t"))

        # Center flag
        center_flag = Literal("c")
        flag = left_p + Literal("f") + (outer_flag ^ inner_flag
                                        ^ center_flag) + right_p

        # Additional information
        additional = left_p + Literal("c") + Word(nums + "-.") * 11 + right_p

        player = left_p + player_number + ZeroOrMore(
            player_position) + view_mode + stamina + ZeroOrMore(
                flag) + additional + right_p

        # Start of game
        start = Word("ULG5")
        server_param = "server_param " + SkipTo(lineEnd)
        player_param = "player_param " + SkipTo(lineEnd)
        player_type = "player_type " + SkipTo(lineEnd)

        # End game - (msg 6000 1 "(result 201806211300 CYRUS2018_0-vs-HELIOS2018_1)")
        end_game = Word("result") + Word(nums) + teamscore_result + Suppress(
            "-vs-") + teamscore_result + Suppress(right_p) + Suppress(
                '"').setParseAction(rcgParsing.game_has_ended)
        team_graphic = (Word("team_graphic_l")
                        ^ Word("team_graphic_r")) + SkipTo(lineEnd)

        msg = "msg" + frame_number + Word(nums) + Suppress('"') + Suppress(
            left_p) + (end_game | team_graphic)

        # Frame lines
        frame_line1 = show_frame + ball + (player * 11)
        frame_line2 = (player * 11)

        read_line = start ^ (left_p +
                             (server_param ^ player_param ^ player_type ^ msg ^
                              ((frame_line1 + frame_line2) ^ play_mode
                               ^ team_score ^ team_score_penalty) + right_p))

        return read_line.parseString(rcg_string)
예제 #14
0
    def _generate_report(self, params):
        """
        _generate_report: generate summary report

        This will contain ALL the logic to generate the report, including areas that should/will be re-factored later

        """

        # Get
        self.dfu = dfu(self.callback_url)

        # Get filepath of summary file
        summary_fp = os.path.join(os.getcwd(), 'outdir', 'genome_by_genome_overview.csv')

        summary_df = pd.read_csv(summary_fp, header=0, index_col=0)
        html = summary_df.to_html(index=False, classes='my_class table-striped" id = "my_id')

        # Need to file write below
        direct_html = html_template.substitute(html_table=html)

        # Find header so it can be copied to footer, as dataframe.to_html doesn't include footer
        start_header = Literal("<thead>")
        end_header = Literal("</thead>")

        text = start_header + SkipTo(end_header)

        new_text = ''
        for data, start_pos, end_pos in text.scanString(direct_html):
            new_text = ''.join(data).replace(' style="text-align: right;"', '').replace('thead>',
                                                                                        'tfoot>\n  ') + '\n</tfoot>'

        # Get start and end positions to insert new text
        end_tbody = Literal("</tbody>")
        end_table = Literal("</table>")

        insertion_pos = end_tbody + SkipTo(end_table)

        final_html = ''
        for data, start_pos, end_pos in insertion_pos.scanString(direct_html):
            final_html = direct_html[:start_pos + 8] + '\n' + new_text + direct_html[start_pos + 8:]

        output_dir = os.path.join(self.scratch, str(uuid.uuid4()))
        self._mkdir_p(output_dir)
        result_fp = os.path.join(output_dir, 'index.html')

        with open(result_fp, 'w') as result_fh:
            result_fh.write(final_html)

        report_shock_id = self.dfu.file_to_shock({
            'file_path': output_dir,
            'pack': 'zip'
        })['shock_id']

        html_report = [{
            'shock_id': report_shock_id,
            'name': os.path.basename(result_fp),
            'label': os.path.basename(result_fp),
            'description': 'HTML summary report for vConTACT2'
        }]

        report_params = {'message': 'Basic message to show in the report',
                         'workspace_name': params['workspace_name'],
                         'html_links': html_report,
                         'direct_html_link_index': 0,
                         'report_object_name': 'vConTACT_report_{}'.format(str(uuid.uuid4())),
                         # Don't use until have files to attach to report
                         # 'file_links': [{}],
                         # Don't use until data objects that are created as result of running app
                         # 'objects_created': [{'ref': matrix_obj_ref,
                         #                      'description': 'Imported Matrix'}],
                         }

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {'report_name': output['name'], 'report_ref': output['ref']}

        return report_output
예제 #15
0
#!/usr/bin/env python

import sys
from pyparsing import (Word, Group, SkipTo, StringEnd, Suppress, ZeroOrMore,
                       alphas, nums, alphanums, printables, oneOf)

s = Suppress

identifier = Word(printables)('id')

definition = Word(alphas)('type') + Word(nums)('size') + \
             identifier + Word(printables)('name')

signal = Group(s('$var') + definition + s('$end'))('signal')

content = SkipTo('$end')('content') + s('$end')
section = Group(s('$') + Word(alphas)('name') + content)('section')

unit = s('1') + oneOf('s ms ns us ps fs')
timescale = (s('$timescale') + unit + s('$end'))('timescale')

scope = Group(s('$scope module') + Word(printables)('module') +
              s('$end'))('scope')
upscope = Group(s('$upscope') + s(content))('upscope')

enddefinitions = s('$enddefinitions' + content)

time = s('#') + Word(nums)('time')

std_logic = oneOf('U X 0 1 Z W L H-')('std_logic')
std_logic_vector = Word('b', 'UX01ZWLH-')('std_logic_vector')
예제 #16
0
    porter = PorterStemmer()
    snowball = SnowballStemmer("english")
    isri = ISRIStemmer()
    rslp = RSLPStemmer()
    porter2 = Stemmer('english')

    endOfString = StringEnd()
    prefix = oneOf(
        "uni inter intro de con com anti pre pro per an ab ad af ac at as re in im ex en em un dis over sub syn out thermo philo geo for fore back"
    )
    suffix = oneOf("ish")
    #suffix = oneOf("or er ed ish ian ary ation tion al ing ible able ate ly ment ism ous ness ent ic ive "
    #               "ative tude ence ance ise ant age cide ium ion")

    word = (Optional(prefix)("prefixes") +
            SkipTo(suffix | suffix + FollowedBy(endOfString)
                   | endOfString)("root") +
            ZeroOrMore(suffix | suffix + FollowedBy(endOfString))("suffix"))
    #word = (Optional(prefix)("prefixes") + SkipTo(FollowedBy(endOfString))("root"))

    for wd in wordlist:
        print wd
        stem = lanster.stem(wd)
        print "LansterStemmer:" + stem
        print "PorterStemmer2:" + porter2.stemWord(wd)
        #res = word.parseString(stem)
        #print res.dump()
        #print

finally:
    file.close()
예제 #17
0
    s_list_append = s_list.append  #lookup append func once, instead of many times
    for tok in toks:
        if isinstance(tok, basestring):  #See if it's a string
            s_list_append(' ' + tok)
        else:
            #If it's not a string
            s_list_append(normalise_templates(tok))
    s_list_append(' >')
    return ''.join(s_list)


#Skip pairs of brackets.
angle_bracket_pair = nestedExpr(
    opener='<', closer='>').setParseAction(turn_parseresults_to_list)
#TODO Fix for nesting brackets
parentheses_pair = LPAR + SkipTo(RPAR) + RPAR
square_bracket_pair = LBRACK + SkipTo(RBRACK) + RBRACK

#The raw type of the input, i.e. 'int' in (unsigned const int * foo)
#TODO I guess this should be a delimited list (by '::') of name and angle brackets
input_type = Combine(
    Word(alphanums + ':_') +
    Optional(angle_bracket_pair + Optional(Word(alphanums + ':_'))))

#A number. e.g. -1, 3.6 or 5
number = Word('-.' + nums)

#The name of the argument. We will ignore this but it must be matched anyway.
input_name = OneOrMore(
    Word(alphanums + '_') | angle_bracket_pair | parentheses_pair
    | square_bracket_pair)
예제 #18
0
 def get_symptom(self):
     quitline = Literal("crash> quit")
     analyze_expression = Combine(
         Regex(".*KERNEL:") + SkipTo(Suppress(quitline), include=True))
     return analyze_expression
예제 #19
0
파일: python.py 프로젝트: yodamaster/undebt
ASSIGN_OP = Combine((Word("~%^&*-+|/") | ~Literal("==")) + Literal("="))

UNARY_OP = addspace(OneOrMore(Word("~-+") | Keyword("not")))

BINARY_OP = ~ASSIGN_OP + (
    Word("!%^&*-+=|/<>")
    | Keyword("and")
    | Keyword("or")
    | addspace(OneOrMore(Keyword("is")
                         | Keyword("not")
                         | Keyword("in"))))

OP = ASSIGN_OP | UNARY_OP | BINARY_OP

TRAILER = DOT + NAME | PARENS | BRACKETS
TRAILERS = condense(ZeroOrMore(TRAILER))

ATOM_BASE = NAME | NUM | PARENS | BRACKETS | BRACES | STRING
ATOM = condense(ATOM_BASE + TRAILERS)
UNARY_OP_ATOM = addspace(Optional(UNARY_OP) + ATOM)

EXPR = addspace(UNARY_OP_ATOM + ZeroOrMore(BINARY_OP + UNARY_OP_ATOM))

HEADER = originalTextFor(
    START_OF_FILE + ZeroOrMore(SKIP_TO_TEXT +
                               (STRING
                                | pythonStyleComment
                                | Optional(Keyword("from") + DOTTED_NAME) +
                                Keyword("import") + SkipTo(NO_BS_NL)) + NL))
예제 #20
0
 def __init__(self):
     self.parser = self.__init_parser()
     self.parser.ignore(T_COM + SkipTo(lineEnd))
예제 #21
0
파일: amply.py 프로젝트: willu47/amply
    + END
)
param_tabbing_stmt.setParseAction(ParamTabbingStmt)

param_def_stmt = (
    KW_PARAM
    + symbol.setResultsName("name")
    + Optional(subscript_domain)
    + param_default
    + END
)
param_def_stmt.setParseAction(ParamDefStmt)

stmts = set_stmt | set_def_stmt | param_def_stmt | param_stmt | param_tabbing_stmt
grammar = ZeroOrMore(stmts) + StringEnd()
grammar.ignore("#" + SkipTo(lineEnd))
grammar.ignore("end;" + SkipTo(lineEnd))


class Amply(object):
    """
    Data parsing interface
    """

    def __init__(self, s=""):
        """
        Create an Amply parser instance

        @param s (default ""): initial string to parse
        """
예제 #22
0
#   Only used as the top of the appendix hierarchy
a1 = Word(string.digits).setResultsName("a1")
aI = Word("IVXLCDM").setResultsName("aI")

#   Catches the A in 12A but not in 12Awesome
markerless_upper = Word(string.ascii_uppercase).setResultsName(
    'markerless_upper') + ~FollowedBy(Word(string.ascii_lowercase))

paren_upper = parenthesize(string.ascii_uppercase, "paren_upper")
paren_lower = parenthesize(string.ascii_lowercase, "paren_lower")
paren_digit = parenthesize(string.digits, "paren_digit")

period_upper = decimalize(string.ascii_uppercase, "period_upper")
period_lower = decimalize(string.ascii_lowercase, "period_lower")
period_digit = decimalize(string.digits, "period_digit")

section = (atomic.section_marker.copy().leaveWhitespace() +
           unified.part_section + SkipTo(LineEnd()))

par = (atomic.section.copy().leaveWhitespace() + unified.depth1_p +
       SkipTo(LineEnd()))

marker_par = (atomic.paragraph_marker.copy().leaveWhitespace() +
              atomic.section + unified.depth1_p)

appendix = (atomic.appendix_marker.copy().leaveWhitespace() + atomic.appendix +
            SkipTo(LineEnd()))

headers = utils.QuickSearchable(LineStart() +
                                (section | marker_par | par | appendix))
예제 #23
0
    s["content"] = toks[0]


def set_head(string, loc, toks):
    s["type"] = type_head


def set_subhead(string, loc, toks):
    s["type"] = type_subhead


def set_title(string, loc, toks):
    s["type"] = type_title


Label = (Literal("[") + SkipTo(']') + "]").setParseAction(set_label)
Sentence = (Word(alphanums + " ")).setParseAction(set_content)
Subhead = (Literal("+") + ZeroOrMore(Label) +
           Sentence).setParseAction(set_subhead)
Headline = (Word(nums) + Literal(".") + ZeroOrMore(Label) +
            Sentence).setParseAction(set_head)
Title = (Literal("*") + Sentence).setParseAction(set_title)

Line = StringStart() + (Subhead | Headline | Title)

# Read a todo list, generate a schedule for the scheduler to optimize.
curr_headline = ""

items = []

for line in open(sys.argv[1], "r"):
예제 #24
0
    print(sexp.parseString(t))
    print()

with open('../tests/tex_files/reinhardt/reinhardt-optimal-control.tex', 'r') as rein_file:
    rein = rein_file.read()
#with open('../tests/tex_files/short_xymatrix_example.tex') as xymatrix_file:
#    short_example = xymatrix_file.read()
#with open('../../stacks-tests/orig/perfect.tex') as xymatrix_file:
#    stacks_example = xymatrix_file.read()

# +
cstikzfig = oneOf(["\\tikzfig", "\\mathcal"]).suppress()
lbrace = Literal('{').suppress()
rbrace = Literal('}').suppress()
parens = Word("()%\\")
inside = SkipTo(rbrace)
allchars = Word(printables, excludeChars="{}")
inside = ZeroOrMore(allchars)
inside.setParseAction(lambda tok: " ".join(tok))
content = Forward()
content << OneOrMore(allchars|(lbrace + ZeroOrMore(content) + rbrace))
#content << (allchars + lbrace + ZeroOrMore(content) + rbrace)
content.setParseAction(lambda tok: " ".join(tok))
tikzfig = cstikzfig + lbrace + inside + rbrace + lbrace + inside + rbrace + lbrace + content + rbrace

csxymatrix = oneOf(["\\xymatrix","\\mathcal"]).suppress()
xymatrix = csxymatrix + lbrace + content + rbrace

search_res = tikzfig.searchString(rein)
search_res = xymatrix.searchString(short_example)
예제 #25
0
    def parse(cls,
              content,
              basedir=None,
              resolve=True,
              unresolved_value=DEFAULT_SUBSTITUTION):
        """parse a HOCON content

        :param content: HOCON content to parse
        :type content: basestring
        :param resolve: if true, resolve substitutions
        :type resolve: boolean
        :param unresolved_value: assigned value value to unresolved substitution.
        If overriden with a default value, it will replace all unresolved value to the default value.
        If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x})
        :type unresolved_value: boolean
        :return: a ConfigTree or a list
        """

        unescape_pattern = re.compile(r'\\.')

        def replace_escape_sequence(match):
            value = match.group(0)
            return cls.REPLACEMENTS.get(value, value)

        def norm_string(value):
            return unescape_pattern.sub(replace_escape_sequence, value)

        def unescape_string(tokens):
            return ConfigUnquotedString(norm_string(tokens[0]))

        def parse_multi_string(tokens):
            # remove the first and last 3 "
            return tokens[0][3:-3]

        def convert_number(tokens):
            n = tokens[0]
            try:
                return int(n, 10)
            except ValueError:
                return float(n)

        # ${path} or ${?path} for optional substitution
        SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)"

        def create_substitution(instring, loc, token):
            # remove the ${ and }
            match = re.match(SUBSTITUTION_PATTERN, token[0])
            variable = match.group('variable')
            ws = match.group('ws')
            optional = match.group('optional') == '?'
            substitution = ConfigSubstitution(variable, optional, ws, instring,
                                              loc)
            return substitution

        # ${path} or ${?path} for optional substitution
        STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)'

        def create_quoted_string(instring, loc, token):
            # remove the ${ and }
            match = re.match(STRING_PATTERN, token[0])
            value = norm_string(match.group('value'))
            ws = match.group('ws')
            return ConfigQuotedString(value, ws, instring, loc)

        def include_config(instring, loc, token):
            url = None
            file = None
            required = False

            if token[0] == 'required':
                required = True
                final_tokens = token[1:]
            else:
                final_tokens = token

            if len(final_tokens) == 1:  # include "test"
                value = final_tokens[0].value if isinstance(
                    final_tokens[0], ConfigQuotedString) else final_tokens[0]
                if value.startswith("http://") or value.startswith(
                        "https://") or value.startswith("file://"):
                    url = value
                else:
                    file = value
            elif len(final_tokens) == 2:  # include url("test") or file("test")
                value = final_tokens[1].value if isinstance(
                    token[1], ConfigQuotedString) else final_tokens[1]
                if final_tokens[0] == 'url':
                    url = value
                else:
                    file = value

            if url is not None:
                logger.debug('Loading config from url %s', url)
                obj = ConfigFactory.parse_URL(url,
                                              resolve=False,
                                              required=required,
                                              unresolved_value=NO_SUBSTITUTION)
            elif file is not None:
                path = file if basedir is None else os.path.join(basedir, file)
                logger.debug('Loading config from file %s', path)
                obj = ConfigFactory.parse_file(
                    path,
                    resolve=False,
                    required=required,
                    unresolved_value=NO_SUBSTITUTION)
            else:
                raise ConfigException(
                    'No file or URL specified at: {loc}: {instring}',
                    loc=loc,
                    instring=instring)

            return ConfigInclude(obj if isinstance(obj, list) else obj.items())

        @contextlib.contextmanager
        def set_default_white_spaces():
            default = ParserElement.DEFAULT_WHITE_CHARS
            ParserElement.setDefaultWhitespaceChars(' \t')
            yield
            ParserElement.setDefaultWhitespaceChars(default)

        with set_default_white_spaces():
            assign_expr = Forward()
            true_expr = Keyword("true", caseless=True).setParseAction(
                replaceWith(True))
            false_expr = Keyword("false", caseless=True).setParseAction(
                replaceWith(False))
            null_expr = Keyword("null", caseless=True).setParseAction(
                replaceWith(NoneValue()))
            key = QuotedString(
                '"', escChar='\\',
                unquoteResults=False) | Word(alphanums + alphas8bit + '._- /')

            eol = Word('\n\r').suppress()
            eol_comma = Word('\n\r,').suppress()
            comment = (Literal('#') | Literal('//')) - SkipTo(eol
                                                              | StringEnd())
            comment_eol = Suppress(Optional(eol_comma) + comment)
            comment_no_comma_eol = (comment | eol).suppress()
            number_expr = Regex(
                r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))',
                re.DOTALL).setParseAction(convert_number)

            # multi line string using """
            # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969
            multiline_string = Regex(
                '""".*?"*"""',
                re.DOTALL | re.UNICODE).setParseAction(parse_multi_string)
            # single quoted line string
            quoted_string = Regex(
                r'"(?:[^"\\\n]|\\.)*"[ \t]*',
                re.UNICODE).setParseAction(create_quoted_string)
            # unquoted string that takes the rest of the line until an optional comment
            # we support .properties multiline support which is like this:
            # line1  \
            # line2 \
            # so a backslash precedes the \n
            unquoted_string = Regex(
                r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*',
                re.UNICODE).setParseAction(unescape_string)
            substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*'
                                      ).setParseAction(create_substitution)
            string_expr = multiline_string | quoted_string | unquoted_string

            value_expr = number_expr | true_expr | false_expr | null_expr | string_expr

            include_content = (quoted_string | (
                (Keyword('url') | Keyword('file')) - Literal('(').suppress() -
                quoted_string - Literal(')').suppress()))
            include_expr = (Keyword("include", caseless=True).suppress() +
                            (include_content |
                             (Keyword("required") - Literal('(').suppress() -
                              include_content - Literal(')').suppress()))
                            ).setParseAction(include_config)

            root_dict_expr = Forward()
            dict_expr = Forward()
            list_expr = Forward()
            multi_value_expr = ZeroOrMore(comment_eol | include_expr
                                          | substitution_expr | dict_expr
                                          | list_expr | value_expr
                                          | (Literal('\\') - eol).suppress())
            # for a dictionary : or = is optional
            # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation
            inside_dict_expr = ConfigTreeParser(
                ZeroOrMore(comment_eol | include_expr | assign_expr
                           | eol_comma))
            inside_root_dict_expr = ConfigTreeParser(
                ZeroOrMore(comment_eol | include_expr | assign_expr
                           | eol_comma),
                root=True)
            dict_expr << Suppress('{') - inside_dict_expr - Suppress('}')
            root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress(
                '}')
            list_entry = ConcatenatedValueParser(multi_value_expr)
            list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(
                eol_comma - list_entry)) - Suppress(']')

            # special case when we have a value assignment where the string can potentially be the remainder of the line
            assign_expr << Group(key - ZeroOrMore(comment_no_comma_eol) - (
                dict_expr | (Literal('=') | Literal(':') | Literal('+=')) -
                ZeroOrMore(comment_no_comma_eol) -
                ConcatenatedValueParser(multi_value_expr)))

            # the file can be { ... } where {} can be omitted or []
            config_expr = ZeroOrMore(comment_eol | eol) + (
                list_expr | root_dict_expr
                | inside_root_dict_expr) + ZeroOrMore(comment_eol | eol_comma)
            config = config_expr.parseString(content, parseAll=True)[0]

            if resolve:
                allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION
                has_unresolved = cls.resolve_substitutions(
                    config, allow_unresolved)
                if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION:
                    raise ConfigSubstitutionException(
                        'resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION'
                    )

            if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION:
                cls.unresolve_substitutions_to_value(config, unresolved_value)
        return config
# URL extractor
# Copyright 2004, Paul McGuire
from pyparsing import Literal,Suppress,CharsNotIn,CaselessLiteral,\
        Word,dblQuotedString,alphanums,SkipTo,makeHTMLTags
import urllib
import pprint

# Define the pyparsing grammar for a URL, that is:
#    URLlink ::= <a href= URL>linkText</a>
#    URL ::= doubleQuotedString | alphanumericWordPath
# Note that whitespace may appear just about anywhere in the link.  Note also
# that it is not necessary to explicitly show this in the pyparsing grammar; by default,
# pyparsing skips over whitespace between tokens.
linkOpenTag, linkCloseTag = makeHTMLTags("a")
link = linkOpenTag + SkipTo(linkCloseTag).setResultsName(
    "body") + linkCloseTag.suppress()

# Go get some HTML with some links in it.
serverListPage = urllib.urlopen("http://www.google.com")
htmlText = serverListPage.read()
serverListPage.close()

# scanString is a generator that loops through the input htmlText, and for each
# match yields the tokens and start and end locations (for this application, we are
# not interested in the start and end values).
for toks, strt, end in link.scanString(htmlText):
    print toks.startA.href, "->", toks.body

# Create dictionary from list comprehension, assembled from each pair of tokens returned
# from a matched URL.
pprint.pprint(
예제 #27
0
    def parse(content, basedir=None, resolve=True):
        """parse a HOCON content

        :param content: HOCON content to parse
        :type content: basestring
        :param resolve: If true, resolve substitutions
        :type resolve: boolean
        :return: a ConfigTree or a list
        """
        def norm_string(value):
            for k, v in ConfigParser.REPLACEMENTS.items():
                value = value.replace(k, v)
            return value

        def unescape_string(tokens):
            return ConfigUnquotedString(norm_string(tokens[0]))

        def parse_multi_string(tokens):
            # remove the first and last 3 "
            return tokens[0][3:-3]

        def convert_number(tokens):
            n = tokens[0]
            try:
                return int(n)
            except ValueError:
                return float(n)

        # ${path} or ${?path} for optional substitution
        SUBSTITUTION = "\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>\s*)"

        def create_substitution(instring, loc, token):
            # remove the ${ and }
            match = re.match(SUBSTITUTION, token[0])
            variable = match.group('variable')
            ws = match.group('ws')
            optional = match.group('optional') == '?'
            substitution = ConfigSubstitution(variable, optional, ws, instring,
                                              loc)
            return substitution

        def include_config(token):
            url = None
            file = None
            if len(token) == 1:  # include "test"
                if token[0].startswith("http://") or token[0].startswith(
                        "https://") or token[0].startswith("file://"):
                    url = token[0]
                else:
                    file = token[0]
            elif len(token) == 2:  # include url("test") or file("test")
                if token[0] == 'url':
                    url = token[1]
                else:
                    file = token[1]

            if url is not None:
                logger.debug('Loading config from url %s', url)
                obj = ConfigFactory.parse_URL(url, resolve=False)

            if file is not None:
                path = file if basedir is None else os.path.join(basedir, file)
                logger.debug('Loading config from file %s', path)
                obj = ConfigFactory.parse_file(path,
                                               required=False,
                                               resolve=False)

            return ConfigInclude(obj if isinstance(obj, list) else obj.items())

        ParserElement.setDefaultWhitespaceChars(' \t')

        assign_expr = Forward()
        true_expr = Keyword("true",
                            caseless=True).setParseAction(replaceWith(True))
        false_expr = Keyword("false",
                             caseless=True).setParseAction(replaceWith(False))
        null_expr = Keyword("null", caseless=True).setParseAction(
            replaceWith(NoneValue()))
        key = QuotedString('"', escChar='\\',
                           unquoteResults=False) | Word(alphanums + '._- ')

        eol = Word('\n\r').suppress()
        eol_comma = Word('\n\r,').suppress()
        comment = (Literal('#') | Literal('//')) - SkipTo(eol)
        comment_eol = Suppress(Optional(eol_comma) + comment)
        comment_no_comma_eol = (comment | eol).suppress()
        number_expr = Regex(
            '[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE]\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))',
            re.DOTALL).setParseAction(convert_number)

        # multi line string using """
        # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969
        multiline_string = Regex(
            '""".*?"""',
            re.DOTALL | re.UNICODE).setParseAction(parse_multi_string)
        # single quoted line string
        quoted_string = QuotedString(quoteChar='"',
                                     escChar='\\',
                                     multiline=True)
        # unquoted string that takes the rest of the line until an optional comment
        # we support .properties multiline support which is like this:
        # line1  \
        # line2 \
        # so a backslash precedes the \n
        unquoted_string = Regex(
            r'(\\[ \t]*[\r\n]|[^\[\{\n\r\]\}#,=\$])+?(?=($|\$|[ \t]*(//|[\}\],#\n\r])))',
            re.DOTALL).setParseAction(unescape_string)
        substitution_expr = Regex('[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(
            create_substitution)
        string_expr = multiline_string | quoted_string | unquoted_string

        value_expr = number_expr | true_expr | false_expr | null_expr | string_expr

        include_expr = (Keyword("include", caseless=True).suppress() - (
            quoted_string | (
                (Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress()))) \
            .setParseAction(include_config)

        dict_expr = Forward()
        list_expr = Forward()
        multi_value_expr = ZeroOrMore((Literal('\\') - eol).suppress()
                                      | comment_eol | include_expr
                                      | substitution_expr | dict_expr
                                      | list_expr | value_expr)
        # for a dictionary : or = is optional
        # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation
        inside_dict_expr = ConfigTreeParser(
            ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma))
        dict_expr << Suppress('{') - inside_dict_expr - Suppress('}')
        list_entry = ConcatenatedValueParser(multi_value_expr)
        list_expr << Suppress('[') - ListParser(
            list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']')

        # special case when we have a value assignment where the string can potentially be the remainder of the line
        assign_expr << Group(key - ZeroOrMore(comment_no_comma_eol) -
                             (dict_expr
                              | Suppress(Literal('=') | Literal(':')) -
                              ZeroOrMore(comment_no_comma_eol) -
                              ConcatenatedValueParser(multi_value_expr)))

        # the file can be { ... } where {} can be omitted or []
        config_expr = ZeroOrMore(comment_eol | eol) + (
            list_expr | dict_expr | inside_dict_expr) + ZeroOrMore(comment_eol
                                                                   | eol_comma)
        config = config_expr.parseString(content, parseAll=True)[0]
        if resolve:
            ConfigParser.resolve_substitutions(config)
        return config
예제 #28
0
def make_parser():
    ParserElement.setDefaultWhitespaceChars(' \t')

    EOL = OneOrMore(LineEnd()).suppress().setName("end of line")
    Spaces = OneOrMore(" ").suppress()

    # NOTE: These are not all 'printable' Unicode characters.
    # If needed, expand the alphas_extra variable.
    alphas_extra = ''.join(chr(x) for x in range(0x100, 0x350))
    chars = printables + alphas8bit + alphas_extra
    Token = Word(chars)

    InlineComment = '#' - SkipTo(EOL)
    WholelineComment = LineStart() + '#' - restOfLine - EOL

    Argument = Token('arg').setName('argument')
    Variable = Token('var').setName('variable')

    KindObject = Keyword('kind')('object')
    KindVerb = Keyword('is')('verb')
    Kind = Named(Keyword('url') | Keyword('raw') | Keyword('text'))('arg')

    MatchObject = Named(Keyword('arg'))('object')
    data = Named(Keyword('data'))('object')
    MatchVerb = Named(
        Keyword('is') | Keyword('istype') | Keyword('matches')
        | Keyword('rewrite'))('verb').setName('verb')
    Pattern = Named(Group(OneOrMore(Spaces + Argument +
                                    EOL)))('arg').leaveWhitespace()

    ActionObject = Keyword('plumb')('object')
    ActionVerb = Named(
        Keyword('run') | Keyword('notify') | Keyword('download'))('verb')
    Action = Named(originalTextFor(OneOrMore(Argument)))('arg')

    ArgMatchClause = Group(MatchObject - MatchVerb - Variable - Pattern)
    DataMatchClause = Group(data - MatchVerb - Pattern)

    # Transform every 'data match' rule to an equivalent 'arg match' rule
    def data_to_arg(toks):
        assert (len(toks) == 1)
        toks[0][0] = 'arg'
        toks[0].insert(2, '{data}')
        return toks

    DataMatchClause.setParseAction(data_to_arg)

    KindClause = Group(KindObject - KindVerb - Kind) - EOL
    MatchClause = (DataMatchClause | ArgMatchClause)
    ActionClause = Group(ActionObject - ActionVerb - Action) - EOL

    MatchBlock = Group(ZeroOrMore(MatchClause('match-clause')))
    ActionBlock = Group(OneOrMore(ActionClause('action-clause')))

    # TODO: allow the excluded chars if they are escaped.
    RuleName = Word(chars, excludeChars='{ } [ ]')('rule-name')
    RuleHeading = Suppress('[') - RuleName - Suppress(']') - EOL
    Rule = Group(RuleHeading - KindClause('kind-clause') -
                 MatchBlock('match-block') - ActionBlock('action-block'))
    RulesFile = OneOrMore(Rule)
    RulesFile.ignore(WholelineComment)
    RulesFile.ignore(InlineComment)

    for v in [MatchObject, ActionObject]:
        v.setName('object')

    for v in [MatchVerb, ActionVerb]:
        v.setName('verb')

    Kind.setName('kind')
    data.setName('object')
    Pattern.setName('pattern')
    Action.setName('action or url')
    KindClause.setName('kind clause')
    MatchClause.setName('match clause')
    ActionClause.setName('action clause')
    MatchBlock.setName('match block')
    ActionBlock.setName('action block')
    Rule.setName('rule')
    RuleName.setName('rule name')
    RulesFile.setName('rules')

    return RulesFile
예제 #29
0
ws = ' \t'
ParserElement.setDefaultWhitespaceChars(ws)

EOL = LineEnd().suppress()
SOL = LineStart().leaveWhitespace()
blankline = SOL + LineEnd()

noIndentation = SOL + ~Word(ws).leaveWhitespace().suppress()
indentation = SOL + Word(ws).leaveWhitespace().suppress()

date = Combine(
    Word(nums, exact=4) + '-' + Word(nums, exact=2) + '-' +
    Word(nums, exact=2))

description = SkipTo(';' | EOL)

accountName = SkipTo(Literal('  ') | Literal(';') | Literal('\n'))
currency = Word(alphas + '£$')
number = Word(nums + '-.,')
amount = currency('currency') + number('value')
postingLine = (indentation + accountName('account') +
               Optional(amount)('amount') + restOfLine + EOL)
postings = OneOrMore(Group(postingLine))

transaction = (date('date') + description('description') + EOL +
               Group(postings)('postings'))

# # Single statements
# keyword = Word(alphanums)
# singleValue = restOfLine
예제 #30
0
파일: parser.py 프로젝트: SCM-NV/qmflows
def skipSupress(z: str) -> ParserElement:
    """Skip until `z` and suppress the skipped values."""
    return Suppress(SkipTo(z))
예제 #31
0
# URL extractor
# Copyright 2004, Paul McGuire
from pyparsing import makeHTMLTags, SkipTo, pyparsing_common
import urllib.request
from contextlib import closing
import pprint

linkOpenTag, linkCloseTag = makeHTMLTags('a')

linkBody = SkipTo(linkCloseTag)
linkBody.setParseAction(pyparsing_common.stripHTMLTags)
linkBody.addParseAction(lambda toks: ' '.join(toks[0].strip().split()))

link = linkOpenTag + linkBody("body") + linkCloseTag.suppress()

# Go get some HTML with some links in it.
with closing(urllib.request.urlopen("http://www.yahoo.com")) as serverListPage:
    htmlText = serverListPage.read().decode("UTF-8")

# scanString is a generator that loops through the input htmlText, and for each
# match yields the tokens and start and end locations (for this application, we are
# not interested in the start and end values).
for toks,strt,end in link.scanString(htmlText):
    print(toks.asList())

# Create dictionary from list comprehension, assembled from each pair of tokens returned 
# from a matched URL.
pprint.pprint( 
    dict((toks.body, toks.href) for toks,strt,end in link.scanString(htmlText))
    )
예제 #32
0
파일: parser.py 프로젝트: SCM-NV/qmflows
def parse_section(start: str, end: str) -> ParserElement:
    """Read the lines from `start` to `end`."""
    s = Literal('{}'.format(start))
    e = Literal('{}'.format(end))

    return Suppress(SkipTo(s)) + skipLine + SkipTo(e)
예제 #33
0
        print(len(input_text.split("\n")))
        for line in input_text.split("\n"):
            # print("Word " + str(i) + " --- " + line)
            line_p = line.replace("\xa0", " ")
            # line_p = unicodedata.normalize("NFC", line)
            word_text.append(line_p)

word_bold = (Literal("**").suppress() + Concat(
    OneOrMore(
        Word(alphas) ^ Cleanup(
            Literal("(").suppress() + Word(alphas) + Literal(")").suppress())))
             + Literal("**").suppress())

word_def = (
    LineStart() + Optional(Word(nums + " /")).suppress() +
    Concat(SkipTo(Word("►¶"))).setResultsName("definition") + OneOrMore(
        Literal("►").suppress() + NotAny(Literal("►")).suppress() +
        Concat(SkipTo(oneOf(genders) ^ Word("|¶►")
                      ^ LineEnd())).setResultsName("words") +
        Concat(
            Optional(OneOrMore(
                oneOf(genders) + Optional(Literal(" ")).suppress()),
                     default="na").setResultsName("gender")) +
        Optional(
            (
                SkipTo(Literal("¶")).suppress() + Literal("¶").suppress() +
                Concat(SkipTo(Literal("►") ^ LineEnd()))
                # SkipTo(Word("►¶")).suppress()
            ).setResultsName("sources"),
            default="na")) + Optional(
                (SkipTo(Literal("►►")).suppress() + Literal("►►").suppress() +
from pyparsing import Literal, Word, Optional, Combine, delimitedList, printables, alphas, commaSeparatedList, SkipTo

expr = SkipTo("in") + commaSeparatedList

reference = "Ted has a beard and moustache in the flashback to him meeting Barney for the first time. He is shown with a goatee in the flashback to 2002 in Double Date, and with similar facial hair in the flashback to Barney's days as Insane Duane's best friend in Symphony of Illumination."

print expr.parseString( reference )