Ejemplo n.º 1
0
    def define_identifier(self):
        """
        Return the syntax definition for an identifier.
        
        """
        # --- Defining the individual identifiers:
        # Getting all the Unicode numbers in a single string:
        unicode_numbers = "".join(
            [unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()])
        unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE)
        space_char = re.escape(self._grammar.get_token("identifier_spacing"))
        identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE)
        # Identifiers cannot start with a number:
        identifier0 = Combine(~unicode_number_expr + identifier0)
        identifier0.setName("individual_identifier")

        # --- Defining the namespaces:
        namespace_sep = Suppress(
            self._grammar.get_token("namespace_separator"))
        namespace = Group(ZeroOrMore(identifier0 + namespace_sep))
        namespace.setName("namespace")

        # --- The full identifier, which could have a namespace:
        identifier = Combine(
            namespace.setResultsName("namespace_parts") +
            identifier0.setResultsName("identifier"))
        identifier.setName("full_identifier")

        return identifier
Ejemplo n.º 2
0
 def define_identifier(self):
     """
     Return the syntax definition for an identifier.
     
     """
     # --- Defining the individual identifiers:
     # Getting all the Unicode numbers in a single string:
     unicode_numbers = "".join([unichr(n) for n in xrange(0x10000)
                                if unichr(n).isdigit()])
     unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE)
     space_char = re.escape(self._grammar.get_token("identifier_spacing"))
     identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE)
     # Identifiers cannot start with a number:
     identifier0 = Combine(~unicode_number_expr + identifier0)
     identifier0.setName("individual_identifier")
     
     # --- Defining the namespaces:
     namespace_sep = Suppress(self._grammar.get_token("namespace_separator"))
     namespace = Group(ZeroOrMore(identifier0 + namespace_sep))
     namespace.setName("namespace")
     
     # --- The full identifier, which could have a namespace:
     identifier = Combine(namespace.setResultsName("namespace_parts") +
                          identifier0.setResultsName("identifier"))
     identifier.setName("full_identifier")
     
     return identifier
Ejemplo n.º 3
0
def nexus_iter(infile):
    import pyparsing
    pyparsing.ParserElement.enablePackrat()
    from pyparsing import Word, Literal, QuotedString, CaselessKeyword, \
         OneOrMore, Group, Optional, Suppress, Regex, Dict
    ## beginblock = Suppress(CaselessKeyword("begin") +
    ##                       CaselessKeyword("trees") + ";")
    ## endblock = Suppress((CaselessKeyword("end") |
    ##                      CaselessKeyword("endblock")) + ";")
    comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]"))
    ## translate = CaselessKeyword("translate").suppress()
    name = Word(string.letters+string.digits+"_.") | QuotedString("'")
    ## ttrec = Group(Word(string.digits).setResultsName("number") +
    ##               name.setResultsName("name") +
    ##               Optional(",").suppress())
    ## ttable = Group(translate + OneOrMore(ttrec) + Suppress(";"))
    newick = Regex(r'[^;]+;')
    tree = (CaselessKeyword("tree").suppress() +
            Optional("*").suppress() +
            name.setResultsName("tree_name") +
            comment.setResultsName("tree_comment") +
            Suppress("=") +
            comment.setResultsName("root_comment") +
            newick.setResultsName("newick"))
    ## treesblock = Group(beginblock +
    ##                    Optional(ttable.setResultsName("ttable")) +
    ##                    Group(OneOrMore(tree)) +
    ##                    endblock)

    def not_begin(s): return s.strip().lower() != "begin trees;"
    def not_end(s): return s.strip().lower() not in ("end;", "endblock;")
    def parse_ttable(f):
        ttable = {}
        while True:
            s = f.next().strip()
            if not s: continue
            if s.lower() == ";": break
            if s[-1] == ",": s = s[:-1]
            k, v = s.split()
            ttable[k] = v
            if s[-1] == ";": break
        return ttable
            
    # read lines between "begin trees;" and "end;"
    f = itertools.takewhile(not_end, itertools.dropwhile(not_begin, infile))
    s = f.next().strip().lower()
    if s != "begin trees;":
        print sys.stderr, "Expecting 'begin trees;', got %s" % s
        raise StopIteration
    ttable = {}
    while True:
        try: s = f.next().strip()
        except StopIteration: break
        if not s: continue
        if s.lower() == "translate":
            ttable = parse_ttable(f)
            print "ttable: %s" % len(ttable)
        elif s.split()[0].lower()=='tree':
            match = tree.parseString(s)
            yield nexus.Newick(match, ttable)
Ejemplo n.º 4
0
    def __get_spark_grammar():
        ints = Word(nums)

        date = Optional(Combine(ints + '/' + ints + '/' + ints))
        time = Optional(Combine(ints + ":" + ints + ":" + ints))
        status = Optional(Word(string.ascii_uppercase))
        service = Optional(Word(alphas + nums + '/' + '-' + '_' + '.' + '[' + ']' + ':' + '$'))
        message = Regex('.*')

        spark_grammar = date.setResultsName('date') + time.setResultsName('time') + status.setResultsName('status') + \
            service.setResultsName('service') + message.setResultsName('message')

        return spark_grammar
Ejemplo n.º 5
0
    def __get_windows_grammar():
        ints = Word(nums)

        date = Optional(Combine(ints + '-' + ints + '-' + ints))
        time = Optional(Combine(ints + ":" + ints + ":" + ints + ','))
        status = Optional(Word(string.ascii_uppercase + string.ascii_lowercase))
        service = Optional(Word(string.ascii_uppercase))
        message = Regex('.*')

        windows_grammar = date.setResultsName('date') + time.setResultsName('time') + \
            status.setResultsName('status') + service.setResultsName('service') + message.setResultsName('message')

        return windows_grammar
Ejemplo n.º 6
0
Archivo: nexus.py Proyecto: rhr/ivy
def parse_treesblock(infile):
    import string
    from pyparsing import Optional, Word, Regex, CaselessKeyword, Suppress
    from pyparsing import QuotedString
    comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]"))
    name = Word(alphanums+"_") | QuotedString("'")
    newick = Regex(r'[^;]+;')
    tree = (CaselessKeyword("tree").suppress() +
            Optional("*").suppress() +
            name.setResultsName("tree_name") +
            comment.setResultsName("tree_comment") +
            Suppress("=") +
            comment.setResultsName("root_comment") +
            newick.setResultsName("newick"))
    ## treesblock = Group(beginblock +
    ##                    Optional(ttable.setResultsName("ttable")) +
    ##                    Group(OneOrMore(tree)) +
    ##                    endblock)

    def parse_ttable(f):
        ttable = {}
        while True:
            s = f.next().strip()
            if s.lower() == ";":
                break
            if s[-1] in ",;":
                s = s[:-1]
            k, v = s.split()
            ttable[k] = v
            if s[-1] == ";":
                break
        return ttable

    ttable = {}
    while True:
        try:
            s = infile.next().strip()
        except StopIteration:
            break
        if s.lower() == "translate":
            ttable = parse_ttable(infile)
            # print("ttable: %s" % len(ttable))
        else:
            match = tree.parseString(s)
            yield Newick(match, ttable)
Ejemplo n.º 7
0
def parse_treesblock(infile):
    import string
    from pyparsing import Optional, Word, Regex, CaselessKeyword, Suppress
    from pyparsing import QuotedString
    comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]"))
    name = Word(alphanums + "_") | QuotedString("'")
    newick = Regex(r'[^;]+;')
    tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() +
            name.setResultsName("tree_name") +
            comment.setResultsName("tree_comment") + Suppress("=") +
            comment.setResultsName("root_comment") +
            newick.setResultsName("newick"))

    ## treesblock = Group(beginblock +
    ##                    Optional(ttable.setResultsName("ttable")) +
    ##                    Group(OneOrMore(tree)) +
    ##                    endblock)

    def parse_ttable(f):
        ttable = {}
        while True:
            s = f.next().strip()
            if s.lower() == ";":
                break
            if s[-1] in ",;":
                s = s[:-1]
            k, v = s.split()
            ttable[k] = v
            if s[-1] == ";":
                break
        return ttable

    ttable = {}
    while True:
        try:
            s = infile.next().strip()
        except StopIteration:
            break
        if s.lower() == "translate":
            ttable = parse_ttable(infile)
            # print("ttable: %s" % len(ttable))
        else:
            match = tree.parseString(s)
            yield Newick(match, ttable)
Ejemplo n.º 8
0
    def define_identifier(self):
        """
		Return the syntax definition for an identifier.

		"""
        # --- Defining the individual identifiers:
        # Getting all the Unicode numbers in a single string:
        try:
            unicode_numbers = "".join(
                [unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()])
        except NameError:
            unicode_numbers = "".join(
                [chr(n) for n in range(0x10000) if chr(n).isdigit()])

        unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE)
        space_char = re.escape(self._grammar.get_token("identifier_spacing"))
        identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE)
        # Identifiers cannot start with a number:
        identifier0 = Combine(identifier0)
        identifier0.setName("individual_identifier")

        # --- Defining the namespaces:
        namespace_sep = Suppress(
            self._grammar.get_token("namespace_separator"))
        namespace = Group(ZeroOrMore(identifier0 + namespace_sep))
        namespace.setName("namespace")

        # --- The full identifier, which could have a namespace:
        identifier = Combine(
            namespace.setResultsName("namespace_parts") +
            identifier0.setResultsName("identifier"))
        identifier.setName("full_identifier")

        expop = Literal('^')
        multop = oneOf('* /')
        factop = Literal('!')
        modop = Literal('%')
        signop = oneOf('+ -')
        opers = expop | signop | multop | factop | modop

        identifier = identifier + NotAny(opers)

        return identifier
Ejemplo n.º 9
0
# quoted string is either just stuff within quotes, or stuff within quotes, within
# which there is nested curliness
quotedItem = Group(curlyString) | charsNoQuotecurly
quotedString = QUOTE + ZeroOrMore(quotedItem) + QUOTE

number = Regex("[0-9]+")
# Basis characters (by exclusion) for variable / field names.  The following
# list of characters is from the btparse documentation
anyName = Regex("[^\s\"#%'(),={}]+")

# btparse says, and the test bibs show by experiment, that macro and field names
# cannot start with a digit.  In fact entry type names cannot start with a digit
# either (see tests/bibs). Cite keys can start with a digit
notDigname = Regex("[^\d\s\"#%'(),={}][^\s\"#%'(),={}]*")

comment = AT + CaselessLiteral("comment") + LCURLY + charsNoCurly.setResultsName("comment") + RCURLY
comment.setParseAction(Comment.fromParseResult)

# The name types with their digiteyness
notDigLower = notDigname.copy().setParseAction(lambda t: t[0].lower())

macroDef = notDigLower.copy()

macroRef = notDigLower.copy().setParseAction(MacroReference.fromParseResult)
fieldName = notDigLower.copy()
entryType = notDigLower.setResultsName("entry type")
citeKey = anyName.setResultsName("cite key")
string = number | macroRef | quotedString | curlyString

# There can be hash concatenation
fieldValue = string + ZeroOrMore(HASH + string)
Ejemplo n.º 10
0
# either (see tests/bibs). Cite keys can start with a digit
not_digname = Regex('[^\d\s"#%\'(),={}][^\s"#%\'(),={}]*')

# Comment comments out to end of line
comment = (AT + CaselessLiteral('comment') +
           Regex("[\s{(].*").leaveWhitespace())

# The name types with their digiteyness
not_dig_lower = not_digname.copy().setParseAction(
    lambda t: t[0].lower())
macro_def = not_dig_lower.copy()
macro_ref = not_dig_lower.copy().setParseAction(lambda t : Macro(t[0].lower()))
field_name = not_dig_lower.copy()
# Spaces in names mean they cannot clash with field names
entry_type = not_dig_lower.setResultsName('entry type')
cite_key = any_name.setResultsName('cite key')
# Number has to be before macro name
string = (number | macro_ref | quoted_string |
          curly_string)

# There can be hash concatenation
field_value = string + ZeroOrMore(HASH + string)
field_def = Group(field_name + EQUALS + field_value)
entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def))

# Entry is surrounded either by parentheses or curlies
entry = (AT + entry_type +
         bracketed(cite_key + COMMA + entry_contents))

# Preamble is a macro-like thing with no name
preamble = AT + CaselessLiteral('preamble') + bracketed(field_value)
Ejemplo n.º 11
0
def nexus_iter(infile):
    import pyparsing
    pyparsing.ParserElement.enablePackrat()
    from pyparsing import Word, Literal, QuotedString, CaselessKeyword, \
         OneOrMore, Group, Optional, Suppress, Regex, Dict
    ## beginblock = Suppress(CaselessKeyword("begin") +
    ##                       CaselessKeyword("trees") + ";")
    ## endblock = Suppress((CaselessKeyword("end") |
    ##                      CaselessKeyword("endblock")) + ";")
    comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]"))
    ## translate = CaselessKeyword("translate").suppress()
    name = Word(string.letters + string.digits + "_") | QuotedString("'")
    ## ttrec = Group(Word(string.digits).setResultsName("number") +
    ##               name.setResultsName("name") +
    ##               Optional(",").suppress())
    ## ttable = Group(translate + OneOrMore(ttrec) + Suppress(";"))
    newick = Regex(r'[^;]+;')
    tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() +
            name.setResultsName("tree_name") +
            comment.setResultsName("tree_comment") + Suppress("=") +
            comment.setResultsName("root_comment") +
            newick.setResultsName("newick"))

    ## treesblock = Group(beginblock +
    ##                    Optional(ttable.setResultsName("ttable")) +
    ##                    Group(OneOrMore(tree)) +
    ##                    endblock)

    def not_begin(s):
        return s.strip().lower() != "begin trees;"

    def not_end(s):
        return s.strip().lower() not in ("end;", "endblock;")

    def parse_ttable(f):
        ttable = {}
        while True:
            s = f.next().strip()
            if s.lower() == ";": break
            if s[-1] in ",;": s = s[:-1]
            k, v = s.split()
            ttable[k] = v
            if s[-1] == ";": break
        return ttable

    # read lines between "begin trees;" and "end;"
    f = itertools.takewhile(not_end, itertools.dropwhile(not_begin, infile))
    s = f.next().strip().lower()
    if s != "begin trees;":
        print sys.stderr, "Expecting 'begin trees;', got %s" % s
        raise StopIteration
    ttable = {}
    while True:
        try:
            s = f.next().strip()
        except StopIteration:
            break
        if s.lower() == "translate":
            ttable = parse_ttable(f)
            print "ttable: %s" % len(ttable)
        else:
            match = tree.parseString(s)
            yield nexus.Newick(match, ttable)
Ejemplo n.º 12
0
bs_sp = Group(shell_sp + OneOrMore(exponent_sp))

endbs = Word('*').suppress()

basis_set = Group(
    Str.setResultsName('bdescr') + Number +
    OneOrMore(bs
              | bs_sp).setResultsName('basis') + Optional(endbs))

ecp_body = OneOrMore(eol + SkipTo(eol).suppress() +
                     Group(Number +
                           OneOrMore(Group(Number + Number + Number))))

ecp = Group(
    Str.setResultsName('edescr') + Number.suppress() +
    SkipTo(Number).suppress() + Number.setResultsName('lmax') +
    Number.setResultsName('core') + ecp_body.setResultsName('ecp'))

# define grammar here

grammar = Optional(OneOrMore(comment)) + Optional(endbs) \
    + Group(OneOrMore(basis_set)) + Optional(OneOrMore(comment)) \
    + Optional(Group(OneOrMore(ecp)))

lstr = {
    'S': 0,
    'SP': 1,
    'P': 2,
    'D': 3,
    'F': 4,
    'G': 5,
Ejemplo n.º 13
0
def parse_line(line):
    # generic terms
    _name_re = '[a-zA-Z_][a-zA-Z0-9_]*'
    name_re = Regex(_name_re)
    perm = Or(['public', 'private']).setResultsName('permission')
    name = name_re.setResultsName('name')

    # script tags
    script_start = Regex('<script.*lang="ts">').setResultsName('script_start')
    script_stop = Regex('</script>').setResultsName('script_stop')

    # interface
    interface = Group(Suppress('interface') + name).setResultsName('interface')

    # class
    ext = Optional(
        Suppress('extends') + name_re.setResultsName('parent_class'))
    exp = Optional(ZeroOrMore(Regex('export|default')))
    class_ = Suppress(exp) + Suppress('class') + name + ext + Suppress('{')

    # decorator
    decorator = Suppress('@') + name + Suppress(Optional(Regex("\(.*\)")))

    def func(s, l, t):
        output = t.asDict()
        params = []
        for item in output['parameters']:
            temp = dict(name=None, type=None, description=None)
            temp.update(item)
            params.append(temp)

        output['parameters'] = params
        return t

    # method
    val = Regex('[a-zA-Z_][a-zA-Z0-9_]*\[?\]?')
    ptype = val.setResultsName('type')
    dfal = Regex('".*"|[.*]|{.*}|' + _name_re).setResultsName('default')
    ret = val.setResultsName('returns')
    rtype = Optional(Suppress(':') + ret)
    opt = Optional(Suppress('=') + dfal)
    param = Group(name + Optional(Suppress(':') + ptype) + Optional(opt))
    params = delimitedList(
        param, delim=',').setResultsName('parameters').setParseAction(func)
    method = perm + name + Suppress('(') + Optional(params) + Suppress(
        ')') + rtype + Suppress('{')

    # constructor
    constructor = perm + Regex("constructor") + Suppress('(') + Optional(
        params) + Suppress(')') + rtype + Suppress('{')

    # getter
    getter = perm + Suppress('get') + name + Suppress('()') + rtype + Suppress(
        '{')

    # setter
    setter = perm + Suppress('set') + name + Suppress('(') + params + Suppress(
        ')') + rtype + Suppress('{')

    # property
    atype = Optional(Suppress(':') + name_re.setResultsName('type'))
    value = Regex('.*').setResultsName('value')
    val = Optional(Suppress('=') + value)
    prop = perm + name + atype + val + Suppress(';')

    # docstring start and stop
    docstart = Regex('/\*\*').setResultsName('docstart')
    docstop = Regex('\*/').setResultsName('docstop')

    # line of docstring
    doc_com = Regex('\*(?!/)')
    name_re = Regex('[a-zA-Z_][a-zA-Z0-9_]*')
    desc = Regex('.*').setResultsName('description')
    info = doc_com + desc
    param = Group(Suppress(doc_com) + Suppress('@param') + name +
                  desc).setResultsName('params')
    returns = Regex('.*').setResultsName('returns')
    returns = doc_com + Regex('@returns?') + returns
    docline = returns | param | info

    parsers = [('setter', setter), ('method', method), ('getter', getter),
               ('constructor', constructor), ('script_start', script_start),
               ('script_stop', script_stop), ('interface', interface),
               ('class', class_), ('decorator', decorator),
               ('docstart', docstart), ('docstop', docstop),
               ('docline', docline), ('property', prop)]

    for ctype, parser in parsers:
        # return parser.parseString(line).asDict()
        content = {}
        try:
            content = parser.parseString(line).asDict()
        except:
            continue

        return {'content_type': ctype, 'content': content}
    return {}
Ejemplo n.º 14
0
# published on PyPI.
signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(
    tokenMap(int))

variable = Word(alphas, bodyChars=alphanums)

stack_item = Suppress(",") + (signed_integer | Suppress("*") | variable)

flag = oneOf(list(VTT_MNEMONIC_FLAGS.keys()))
# convert flag to binary string
flag.setParseAction(tokenMap(lambda t: VTT_MNEMONIC_FLAGS[t]))
flags = Combine(OneOrMore(flag)).setResultsName("flags")

delta_point_index = pyparsing_common.integer.setResultsName("point_index")
delta_rel_ppem = pyparsing_common.integer.setResultsName("rel_ppem")
delta_step_no = signed_integer.setResultsName("step_no")
# the step denominator is only used in VTT's DELTA[CP]* instructions,
# and must always be 8 (sic!), so we can suppress it.
delta_spec = (delta_point_index + Suppress("@") + delta_rel_ppem +
              delta_step_no + Optional(Literal("/8")).suppress())

delta = nestedExpr("(", ")", delta_spec, ignoreExpr=None)

deltas = Group(OneOrMore(delta)).setResultsName("deltas")

args = deltas | flags

stack_items = OneOrMore(stack_item).setResultsName("stack_items")

instruction = Group(mnemonic + Suppress("[") + Optional(args) + Suppress("]") +
                    Optional(stack_items))
Ejemplo n.º 15
0
normalized_port_range = (port ^ port_range).setParseAction(to_port_range)

ports  = delimitedList(normalized_port_range)('ports')

# IP addresses, name of another group, or sg-*
security_group = Regex("sg-[\w\d]+")
group_name = Regex("[\w\d\-]+")

mask = Word("/") + Word(nums).setParseAction(to_int)('mask')
ip= (Combine(Word(nums) + ('.' + Word(nums))*3)('ip') + Optional(mask)('mask')).setParseAction(normalize_ip)

parser = Optional(protocol)('protocol') + \
         Optional(port_) + \
         ports + \
         (ip.setResultsName('ip_and_mask') ^ security_group.setResultsName('security_group') ^ group_name('group_name'))


class Rule(object):

    def __init__(self, protocol, from_port, to_port, address=None, group=None, group_name=None):
        """constructs a new rule
        :param protocol tcp or udp
        :param from_port
        :param to_port
        :param address
        :param group sg-style (should almost never be used)
        :param group_name
        """
        self.protocol = protocol or "tcp"
        self.from_port = from_port
Ejemplo n.º 16
0
#     )
#     + tld_label
# )
#  Problem with above domain_fqdn is that PyParsing cannot do lookahead in time, so
#  we use the much-vaunted Regex() for domain_fqdn
domain_fqdn_regex = '('\
                        + '(' \
                            + subdomain_label_regex \
                            + '\.' \
                        + '){0,16}' + \
                        domain_label_regex + '\.' \
                    + '){0,1}'\
                    + tld_label_regex
domain_fqdn = Regex(domain_fqdn_regex)
domain_fqdn.setName('<strict-fqdn>')
domain_fqdn.setResultsName('domain_name')

# Generic fully-qualified domain name (less stringent)
domain_generic_fqdn = Combine(
    domain_generic_label
    + ZeroOrMore(
        Literal('.')
        + domain_generic_label
    )
    + Optional(Char('.'))
)
domain_generic_fqdn.setName('<generic-fqdn>')
domain_generic_fqdn.setResultsName('domain_name')

quoted_domain_generic_fqdn = (
        Combine(squote - domain_generic_fqdn - squote)
Ejemplo n.º 17
0
def sql2table_list(tables, show_columns=True):
    def field_act(s, loc, tok):
        return " ".join(tok).replace('\n', '\\n')

    def field_list_act(s, loc, tok):
        return tok

    def create_table_act(s, loc, tok):
        table = Table(tok["tableName"], None, {}, {})
        for t in tok["fields"]:
            if str(t).startswith("FK:"):
                l = t[3:].split(":")
                if len(l) > 2:
                    table.fkeys[l[0]] = {"ftable": l[1], "fcoloumn": l[2]}
                else:
                    table.fkeys[l[0]] = {"ftable": l[1]}

            elif str(t).startswith("PK:"):
                table.pk = t[3:]
            elif str(t).startswith("KEY:"):
                pass
            else:
                l = t.split(" ")
                table.columns[l[0]] = " ".join(l[1:])
        tables.append(table)

    def add_fkey_act(s, loc, tok):
        return '{tableName}:{keyName}:{fkTable}:{fkCol}'.format(**tok)

    def fkey_act(s, loc, tok):
        return 'FK:{keyName}:{fkTable}:{fkCol}'.format(**tok)

    def fkey_nocols_act(s, loc, tok):
        return 'FK:{keyName}:{fkTable}'.format(**tok)

    # def fkey_list_act(s, loc, tok):
    #     return "\n        ".join(tok)

    def other_statement_act(s, loc, tok):
        pass

    def join_string_act(s, loc, tok):
        return "".join(tok).replace('\n', '\\n')

    def quoted_default_value_act(s, loc, tok):
        return tok[0] + " " + "".join(tok[1::])

    def pk_act(s, loc, tok):
        return 'PK:{primary_key}'.format(**tok)

    def k_act(s, loc, tok):
        pass

    def no_act(s, loc, tok):
        pass

    string = Regex('[a-zA-Z0-9=_]+')
    ws = OneOrMore(White()).suppress()
    lp = Regex('[(]').suppress()
    rp = Regex('[)]').suppress()
    c = Regex('[,]').suppress()
    q = Regex("[`]").suppress()

    parenthesis = Forward()
    parenthesis <<= "(" + ZeroOrMore(CharsNotIn("()") | parenthesis) + ")"
    parenthesis.setParseAction(join_string_act)

    quoted_string = "'" + ZeroOrMore(CharsNotIn("'")) + "'"
    quoted_string.setParseAction(join_string_act)

    quoted_default_value = "DEFAULT" + quoted_string + OneOrMore(
        CharsNotIn(", \n\t"))
    quoted_default_value.setParseAction(quoted_default_value_act)

    column_comment = CaselessKeyword("COMMENT") + quoted_string

    primary_key = CaselessKeyword('PRIMARY').suppress() + CaselessKeyword(
        "KEY").suppress() + lp + string.setResultsName('primary_key') + rp
    primary_key.ignore("`")
    primary_key.setParseAction(pk_act)

    key_def = Optional(CaselessKeyword('UNIQUE').suppress()) + CaselessKeyword(
        'KEY').suppress() + Word(alphanums + "_") + lp + delimitedList(
            string.setResultsName('key'), delim=",") + rp
    key_def.ignore("`")
    key_def.setParseAction(k_act)

    fkey_def = CaselessKeyword("CONSTRAINT") + Word(
        alphanums + "_"
    ) + CaselessKeyword("FOREIGN") + CaselessKeyword("KEY") + lp + Word(
        alphanums + "_"
    ).setResultsName("keyName") + rp + CaselessKeyword("REFERENCES") + Word(
        alphanums + "._").setResultsName("fkTable") + lp + Word(
            alphanums + "_").setResultsName("fkCol") + rp + Optional(
                CaselessKeyword("DEFERRABLE")
            ) + Optional(
                CaselessKeyword("ON") +
                (CaselessKeyword("DELETE") | CaselessKeyword("UPDATE")) +
                (CaselessKeyword("CASCADE") | CaselessKeyword("RESTRICT")
                 | CaselessKeyword("NO ACTION") | CaselessKeyword("SET NULL"))
            ) + Optional(
                CaselessKeyword("ON") +
                (CaselessKeyword("DELETE") | CaselessKeyword("UPDATE")) +
                (CaselessKeyword("CASCADE") | CaselessKeyword("RESTRICT")
                 | CaselessKeyword("NO ACTION") | CaselessKeyword("SET NULL")))
    fkey_def.ignore("`")
    if show_columns:
        fkey_def.setParseAction(fkey_act)
    else:
        fkey_def.setParseAction(fkey_nocols_act)

    #fkey_list_def = ZeroOrMore(Suppress(",") + fkey_def)
    #fkey_list_def.setParseAction(fkey_list_act)

    field_def = Word(alphanums + "_\"':-/[].") + Word(
        alphanums + "_\"':-/[].") + Optional(
            CaselessKeyword("NOT NULL") | CaselessKeyword("DEFAULT") +
            Word(alphanums + "_\"':-/[].")) + Optional(
                OneOrMore(quoted_default_value | column_comment
                          | Word(alphanums + "_\"'`:-/[].") | parenthesis))
    field_def.ignore("`")

    #    if columns:
    field_def.setParseAction(field_act)
    #    else:
    #        field_def.setParseAction(no_act)

    field_list_def = delimitedList(\
        (primary_key.suppress() | \
        key_def.suppress() | \
        fkey_def | \
        field_def \
        ), delim=","\
    )
    #if columns else field_def.suppress()
    field_list_def.setParseAction(field_list_act)

    tablename_def = (Word(alphanums + "_.") | QuotedString("\""))
    tablename_def.ignore("`")

    create_table_def = CaselessKeyword("CREATE").suppress() + CaselessKeyword(
        "TABLE").suppress() + tablename_def.setResultsName(
            "tableName") + lp + field_list_def.setResultsName(
                "fields") + rp + ZeroOrMore(
                    Word(alphanums + "_\"'`:-/[].=")) + Word(";").suppress()
    create_table_def.setParseAction(create_table_act)

    add_fkey_def = CaselessKeyword(
        "ALTER") + "TABLE" + "ONLY" + tablename_def.setResultsName(
            "tableName") + "ADD" + "CONSTRAINT" + Word(
                alphanums + "_"
            ) + "FOREIGN" + "KEY" + "(" + Word(alphanums + "_").setResultsName(
                "keyName") + ")" + "REFERENCES" + Word(
                    alphanums + "._").setResultsName("fkTable") + "(" + Word(
                        alphanums + "_"
                    ).setResultsName("fkCol") + ")" + Optional(
                        Literal("DEFERRABLE")) + Optional(
                            Literal("ON") + "DELETE" +
                            (Literal("CASCADE") | Literal("RESTRICT"))) + ";"
    add_fkey_def.setParseAction(add_fkey_act)

    other_statement_def = OneOrMore(CharsNotIn(";")) + ";"
    other_statement_def.setParseAction(other_statement_act)

    comment_def = "--" + ZeroOrMore(CharsNotIn("\n"))
    comment_def.setParseAction(other_statement_act)

    return OneOrMore(comment_def | create_table_def | add_fkey_def
                     | other_statement_def)
Ejemplo n.º 18
0
_rank = _quoted(Optional(Word(alphanums)))


def _parse_date(s, l, t):
    try:
        return datetime.strptime(t[0], "%Y-%m-%d")
    except ValueError:
        # If the date is invalid, return the epoch
        return datetime.utcfromtimestamp(0)


_date = _quoted(Word(nums + '-')).setParseAction(_parse_date)

# Define format expected for each field
_fields = [
    _int.setResultsName('Game_ID'),
    _quoted(Word(alphanums)).setResultsName('Tournament_Code'),
    _date.setResultsName('Game_Date'),
    _int.setResultsName('Round'),
    _int.setResultsName('Pin_Player_1'),
    _color.setResultsName('Color_1'),
    _rank.setResultsName('Rank_1'),
    _int.setResultsName('Pin_Player_2'),
    _color.setResultsName('Color_2'),
    _rank.setResultsName('Rank_2'),
    _int.setResultsName('Handicap'),
    _int.setResultsName('Komi'),
    _color.setResultsName('Result'),
    # If Sgf_Code is NULL then the key will not be inserted into the results dict
    _quoted(Optional(Word(alphanums + '-').setResultsName('Sgf_Code')))
    | Literal('NULL'),
Ejemplo n.º 19
0
# Version 1
element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|"
                "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|"
                "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|"
                "S[bcegimnr]?|T[abcehilm]|Uu[bhopqst]|U|V|W|Xe|Yb?|Z[nr]")
elementRef = Group( element + Optional( Word( digits ), default="1" ) )
formula = OneOrMore( elementRef )

fn = lambda elemList : sum( [ atomicWeight[elem]*int(qty) for elem,qty in elemList ] )
test( formula, "H2O", fn )
test( formula, "C6H5OH", fn )
test( formula, "NaCl", fn )
print

# Version 2 - access parsed items by field name
elementRef = Group( element.setResultsName("symbol") + \
                Optional( Word( digits ), default="1" ).setResultsName("qty") )
formula = OneOrMore( elementRef )

fn = lambda elemList : sum( [ atomicWeight[elem.symbol]*int(elem.qty) for elem in elemList ] )
test( formula, "H2O", fn )
test( formula, "C6H5OH", fn )
test( formula, "NaCl", fn )
print

# Version 3 - convert integers during parsing process
integer = Word( digits ).setParseAction(lambda t:int(t[0]))
elementRef = Group( element.setResultsName("symbol") + \
                Optional( integer, default=1 ).setResultsName("qty") )
formula = OneOrMore( elementRef )
Ejemplo n.º 20
0
skeletonName = Keyword(":name") + bonename.setResultsName('name')
unitDefinition = Group(Word(alphas) + (floatValue | intValue | Word(alphas)))
unitSection = Keyword(":units") + \
        Dict(ZeroOrMore(unitDefinition)).setResultsName('units')
documentationSection = Keyword(':documentation') + \
        SkipTo(":").setResultsName('documentation')
rootSection = Group(
    Keyword(":root") & (Keyword("order") + channels.setResultsName('channels'))
    & (Keyword("position") + floatVector.setResultsName('position'))
    & (Keyword("axis") + rotationOrder.setResultsName("axisRotationOrder"))
    & (Keyword("orientation") +
       floatVector.setResultsName("axis"))).setResultsName('root')
bone = Group(begin + Keyword("id") + intValue + Keyword("name") +
             bonename.setResultsName("name") + Keyword("direction") +
             floatVector.setResultsName("direction") + Keyword("length") +
             floatValue.setResultsName("length") + Keyword("axis") +
             floatVector.setResultsName("axis") +
             rotationOrder.setResultsName("axisRotationOrder") + Optional(
                 Keyword("dof") + channels.setResultsName("channels") +
                 Keyword("limits") + limits.setResultsName("limits")) + end)

bonedataSection = (Keyword(":bonedata") +
                   Group(ZeroOrMore(bone)).setResultsName("bones"))
hierarchyEntry = Group(
    bonename.setResultsName("parent") +
    Group(OneOrMore(bonename)).setResultsName("children") +
    Suppress(LineEnd()))
hierarchySection = (
    Keyword(":hierarchy") + begin + LineEnd() +
    Dict(OneOrMore(hierarchyEntry)).setResultsName("hierarchy") + end)
Ejemplo n.º 21
0
def parser(text):
    var_any = Literal("_")
    p = Regex("[\w:]+").setResultsName("text")
    var_any = Regex("_") #handled by p anyway
    attribute = Literal("@").suppress()
    eq = Literal("=").suppress()
    closure = (Literal("?") | Literal("*") | Literal("+")).setResultsName("closure")

    test = Literal("^").setResultsName("modifier") + p | p + Literal("$").setResultsName("modifier") | p #| var_any
    axis = (Literal("\\\\*") | \
            Literal("\\\\") | \
            Literal("\\") | \
            Literal(".") | \
            Literal("//*") | \
            Literal("//") | \
            Literal("/") | \
            Literal("-->") | \
            Literal("<--") | \
            Literal("->") | \
            Literal("<-") | \
            Literal("==>") | \
            Literal("<==") | \
            Literal("=>") | \
            Literal("<=")).setResultsName("connector")

    g_left_brack = Literal("[").suppress()
    g_right_brack = Literal("]").suppress()

    # working
    """
    abspath = Forward()
    locstep = Forward()
    
    node = test.setResultsName("node")
    attr_test = Group(attribute.suppress() + node.setResultsName("attr") + eq.suppress() + node.setResultsName("attr_val")).setResultsName("attr_test")
    predicate = (Group(Literal("[").suppress() + attr_test + Literal("]").suppress()).setResultsName("predicate") |\
                 Group(Literal("[").suppress() + abspath + Literal("]").suppress()).setResultsName("predicate"))
    locstep << Group(axis.setResultsName("axis") + node + \
              Optional(predicate + Optional(closure).setResultsName("closure"))).setResultsName("locstep")

    abs2 = abspath
    abspath << ( Group(locstep.setResultsName("left_step") + abs2).setResultsName("abspath") | \
                 locstep.setResultsName("right_step") )

    # TODO
    locpath = abspath
    fexpr = locpath.setResultsName("exp")
    """

    # clean
    locpath = Forward()
    steps = Forward()

    fexpr = locpath.setResultsName("exp")

    attr_test = Group(attribute + p.setResultsName("attr") + eq + p.setResultsName("attr_val"))
    pred_opt = (fexpr.setResultsName("predicate") | attr_test.setResultsName("attr_test"))

    # connector order handling is the same as EmuQL, but the root lacks a left, as it refers to context node
    nodetest = Group(test + Optional(g_left_brack + pred_opt + g_right_brack + Optional(closure)))
    steps << ( Group(nodetest("left") + axis + steps("right")) | \
               Group(test + Optional(g_left_brack + pred_opt + g_right_brack + Optional(closure))))

    locpath << Group(axis + steps.setResultsName("right"))
    
    return fexpr.parseString(text)
Ejemplo n.º 22
0
            INTO|VALUES|DELETE|UPDATE|SET|CREATE|INDEX|USING|BTREE|HASH|
            ON|INTEGER|FLOAT|DATETIME|DATE|VARCHAR|CHAR|TABLE|DATABASE|
            DROP|ORDER|BY|ASC|DESC)

# Define basic symbols
LPAR, RPAR = map(Suppress, '()')
dot = Literal(".").suppress()
comma = Literal(",").suppress()
semi_colon  = Literal(";").suppress()

# Basic identifier used to define vars, tables, columns
identifier = ~keywords + Word(alphas, alphanums + '_')

# Literal Values
integer_literal = Regex(r"([+-]?[1-9][0-9]*|0)")
integer_literal = integer_literal.setResultsName('integer_literal')
float_literal = Regex(r"([+-]?[1-9][0-9]*|0)\.[0-9]+")
float_literal = float_literal.setResultsName('float_literal')
numeric_literal = float_literal | integer_literal
string_literal = QuotedString("'").setResultsName('string_literal')
literal_value = (numeric_literal|string_literal|NULL)

# SQL-Type-names
INTEGER = INTEGER.setResultsName('type_name')
FLOAT = FLOAT.setResultsName('type_name')
DATETIME = DATETIME.setResultsName('type_name')
DATE = DATE.setResultsName('type_name')
VARCHAR = VARCHAR.setResultsName('type_name')
CHAR = CHAR.setResultsName('type_name')

# SQL-Data-types
Ejemplo n.º 23
0
    elif root.high:
        return f'{root}({write_tree(root.high)},)'
    elif root.low:
        return f'{root}(,{write_tree(root.low)})'
    else:
        return f'{root}'


# Strategy tree grammar
node = Regex(rf'\w+[{NEVER_FIND_FLAG}]?')
LPAREN, COMMA, RPAREN = map(Suppress, '(,)')
tree = Forward()
subtree = Group(Optional(tree))
subtrees = LPAREN - subtree.setResultsName(
    'high') - COMMA - subtree.setResultsName('low') - RPAREN
tree << node.setResultsName('root') - Optional(subtrees)


def read_tree(tree_str, gusher_map, start=BASKET_LABEL):
    """Read the strategy encoded in tree_str and build the corresponding decision tree.
    V(H, L) represents the tree with root node V, high subtree H, and low subtree L.
    A node name followed by * indicates that the gusher is being opened solely for information and the Goldie will
    never be found there."""
    def build_tree(
        tokens
    ):  # recursively convert ParseResults object into GusherNode tree
        findable = tokens.root[-1] is not NEVER_FIND_FLAG
        rootname = tokens.root.rstrip(NEVER_FIND_FLAG)
        try:
            root = GusherNode(rootname,
                              gusher_map=gusher_map,
Ejemplo n.º 24
0
                               LessThanCondition, LessThanOrEqualCondition,
                               RegexCondition, RegexNegatedCondition)


end_of_line = Regex(r' *\n') ^ LineEnd()

settings_table = Literal('*** Settings ***') + Regex(r'[^\*]+(?=\*)')
settings_table.setParseAction(lambda t: '\n'.join(t))
variables_table = Literal('*** Variables ***') + Regex(r'[^\*]+(?=\*)')
variables_table.setParseAction(lambda t: '\n'.join(t))
keywords_table = Literal('*** Keywords ***') + CharsNotIn('') + StringEnd()
keywords_table.setParseAction(lambda t: '\n'.join(t))

state_name = Regex(r'\w+( \w+)*')
state_name.leaveWhitespace()
state_name = state_name.setResultsName('state_name')

robo_step = Regex(r'([\w\$\{\}][ \w\$\{\}]*[\w\}]|\w)')
robo_step.leaveWhitespace()
robo_step = robo_step.setResultsName('robo_step')

variable = Regex(Variable.REGEX)

variable_value = Regex(r'[\w\$\{\}!?\-\=\_\.\/]+( [\w\$\{\}!?\-\=\_\.\/]+)*')

splitter = Literal(' ') + OneOrMore(' ')
splitter.setParseAction(lambda t: '  ')

variable_values = (variable_value + ZeroOrMore(splitter + variable_value)).setResultsName('variable_values')
variable_values.setParseAction(lambda t: [[t[2 * i] for i in range(int((len(t) + 1) / 2))]])
Ejemplo n.º 25
0
Archivo: nexus.py Proyecto: rhr/ivy
def iter_trees(infile):
    import pyparsing
    pyparsing.ParserElement.enablePackrat()
    from pyparsing import (
        Word, Literal, QuotedString, CaselessKeyword, CharsNotIn,
        OneOrMore, Group, Optional, Suppress, Regex, Dict, ZeroOrMore,
        alphanums, nums)
    comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]"))
    name = Word(alphanums+"_.") | QuotedString("'")
    newick = Regex(r'[^;]+;')
    tree = (CaselessKeyword("tree").suppress() +
            Optional("*").suppress() +
            name.setResultsName("tree_name") +
            comment.setResultsName("tree_comment") +
            Suppress("=") +
            comment.setResultsName("root_comment") +
            newick.setResultsName("newick"))

    def not_begin(s):
        # print('not_begin', s)
        return s.strip().lower() != "begin trees;"
    def not_end(s):
        # print('not_end', s)
        return s.strip().lower() not in ("end;", "endblock;")
    def parse_ttable(f):
        ttable = {}
        # com = Suppress('[') + ZeroOrMore(CharsNotIn(']')) + Suppress(']')
        com = Suppress('[' + ZeroOrMore(CharsNotIn(']') + ']'))
        while True:
            s = next(f).strip()
            if not s:
                continue
            s = com.transformString(s).strip()
            if s.lower() == ";":
                break
            b = False
            if s[-1] in ",;":
                if s[-1] == ';':
                    b = True
                s = s[:-1]
            # print(s)
            k, v = s.split()
            ttable[k] = v
            if b:
                break
        return ttable

    # read lines between "begin trees;" and "end;"
    f = itertools.takewhile(not_end, itertools.dropwhile(not_begin, infile))
    s = next(f).strip().lower()
    if s != "begin trees;":
        print("Expecting 'begin trees;', got %s" % s, file=sys.stderr)
        raise StopIteration
    ttable = {}
    while True:
        try:
            s = next(f).strip()
        except StopIteration:
            break
        if not s:
            continue
        if s.lower() == "translate":
            ttable = parse_ttable(f)
            # print "ttable: %s" % len(ttable)
        elif s.split()[0].lower()=='tree':
            match = tree.parseString(s)
            yield Newick(match, ttable)
Ejemplo n.º 26
0
        floatVector.setResultsName('position')) &
        (Keyword("axis") +
        rotationOrder.setResultsName("axisRotationOrder")) &
        (Keyword("orientation") +
        floatVector.setResultsName("axis"))
        ).setResultsName('root')
bone = Group(
        begin +
        Keyword("id") +
        intValue +
        Keyword("name") +
        bonename.setResultsName("name") +
        Keyword("direction") +
        floatVector.setResultsName("direction") +
        Keyword("length") +
        floatValue.setResultsName("length") +
        Keyword("axis") +
        floatVector.setResultsName("axis") +
        rotationOrder.setResultsName("axisRotationOrder") +
        Optional(
            Keyword("dof") +
            channels.setResultsName("channels") +
            Keyword("limits") +
            limits.setResultsName("limits")
            ) +
        end
        )

bonedataSection = (
        Keyword(":bonedata") +
        Group(ZeroOrMore(bone)).setResultsName("bones")
Ejemplo n.º 27
0
nonzero_digits = Word('123456789')
integer_literal = Regex(r"([+-]?[1-9][0-9]*|0)")
Literal(".")
num_dot = Literal(".")
real_number_literal = Regex(r"([+-]?[1-9][0-9]*|0)\.[0-9]+")
numeric_literal = real_number_literal | integer_literal
string_literal = QuotedString("'")
literal_value = (numeric_literal | string_literal | NULL)
literal_value = literal_value.setName('literal_value')

# Data-types
integer_type = INTEGER
float_type = FLOAT
datetime_type = DATETIME
date_type = DATE
string_size = integer_literal.setResultsName('size')
nvarchar_type = Group(VARCHAR + LPAR + string_size + RPAR)
nchar_type = Group(CHAR + LPAR + string_size + RPAR)
data_type = (integer_type | float_type | datetime_type | date_type
             | nvarchar_type | nchar_type).setResultsName('data_type')

# Table
alias = identifier.copy().setResultsName('alias')
simple_table_name = identifier.setResultsName("table_name")
table_name = simple_table_name.copy()

# Column
simple_column_name = identifier.setResultsName("column_name")
fully_qualified_column_name = Group(simple_table_name + dot +
                                    simple_column_name)
column_name = fully_qualified_column_name | simple_column_name
Ejemplo n.º 28
0
# either (see tests/bibs). Cite keys can start with a digit
not_digname = Regex('[^\d\s"#%\'(),={}][^\s"#%\'(),={}]*')

# Comment comments out to end of line
comment = (AT + CaselessLiteral('comment') +
           Regex("[\s{(].*").leaveWhitespace())

# The name types with their digiteyness
not_dig_lower = not_digname.copy().setParseAction(
    lambda t: t[0].lower())
macro_def = not_dig_lower.copy()
macro_ref = not_dig_lower.copy().setParseAction(lambda t : Macro(t[0].lower()))
field_name = not_dig_lower.copy()
# Spaces in names mean they cannot clash with field names
entry_type = not_dig_lower.setResultsName('entry type')
cite_key = any_name.setResultsName('cite key')
# Number has to be before macro name
string = (number | macro_ref | quoted_string |
          curly_string)

# There can be hash concatenation
field_value = string + ZeroOrMore(HASH + string)
field_def = Group(field_name + EQUALS + field_value)
entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def))

# Entry is surrounded either by parentheses or curlies
entry = (AT + entry_type +
         bracketed(cite_key + COMMA + entry_contents))

# Preamble is a macro-like thing with no name
preamble = AT + CaselessLiteral('preamble') + bracketed(field_value)
Ejemplo n.º 29
0
def iter_trees(infile):
    import pyparsing
    pyparsing.ParserElement.enablePackrat()
    from pyparsing import (Word, Literal, QuotedString, CaselessKeyword,
                           CharsNotIn, OneOrMore, Group, Optional, Suppress,
                           Regex, Dict, ZeroOrMore, alphanums, nums)
    comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]"))
    name = Word(alphanums + "_.") | QuotedString("'")
    newick = Regex(r'[^;]+;')
    tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() +
            name.setResultsName("tree_name") +
            comment.setResultsName("tree_comment") + Suppress("=") +
            comment.setResultsName("root_comment") +
            newick.setResultsName("newick"))

    def not_begin(s):
        # print('not_begin', s)
        return s.strip().lower() != "begin trees;"

    def not_end(s):
        # print('not_end', s)
        return s.strip().lower() not in ("end;", "endblock;")

    def parse_ttable(f):
        ttable = {}
        # com = Suppress('[') + ZeroOrMore(CharsNotIn(']')) + Suppress(']')
        com = Suppress('[' + ZeroOrMore(CharsNotIn(']') + ']'))
        while True:
            s = next(f).strip()
            if not s:
                continue
            s = com.transformString(s).strip()
            if s.lower() == ";":
                break
            b = False
            if s[-1] in ",;":
                if s[-1] == ';':
                    b = True
                s = s[:-1]
            # print(s)
            k, v = s.split()
            ttable[k] = v
            if b:
                break
        return ttable

    # read lines between "begin trees;" and "end;"
    f = itertools.takewhile(not_end, itertools.dropwhile(not_begin, infile))
    s = next(f).strip().lower()
    if s != "begin trees;":
        print("Expecting 'begin trees;', got %s" % s, file=sys.stderr)
        raise StopIteration
    ttable = {}
    while True:
        try:
            s = next(f).strip()
        except StopIteration:
            break
        if not s:
            continue
        if s.lower() == "translate":
            ttable = parse_ttable(f)
            # print "ttable: %s" % len(ttable)
        elif s.split()[0].lower() == 'tree':
            match = tree.parseString(s)
            yield Newick(match, ttable)
Ejemplo n.º 30
0
# Version 1
element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|"
                "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|"
                "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|"
                "S[bcegimnr]?|T[abcehilm]|Uu[bhopqst]|U|V|W|Xe|Yb?|Z[nr]")
elementRef = Group( element + Optional( Word( digits ), default="1" ) )
formula = OneOrMore( elementRef )

fn = lambda elemList : sum( [ atomicWeight[elem]*int(qty) for elem,qty in elemList ] )
test( formula, "H2O", fn )
test( formula, "C6H5OH", fn )
test( formula, "NaCl", fn )
print

# Version 2 - access parsed items by field name
elementRef = Group( element.setResultsName("symbol") + \
                Optional( Word( digits ), default="1" ).setResultsName("qty") )
formula = OneOrMore( elementRef )

fn = lambda elemList : sum( [ atomicWeight[elem.symbol]*int(elem.qty) for elem in elemList ] )
test( formula, "H2O", fn )
test( formula, "C6H5OH", fn )
test( formula, "NaCl", fn )
print

# Version 3 - convert integers during parsing process
integer = Word( digits ).setParseAction(lambda t:int(t[0]))
elementRef = Group( element.setResultsName("symbol") + \
                Optional( integer, default=1 ).setResultsName("qty") )
formula = OneOrMore( elementRef )
Ejemplo n.º 31
0
# btparse says, and the test bibs show by experiment, that macro and field names
# cannot start with a digit.  In fact entry type names cannot start with a digit
# either (see tests/bibs). Cite keys can start with a digit
not_digname = Regex("[^\d\s\"#%'(),={}][^\s\"#%'(),={}]*")

# Comment comments out to end of line
comment = AT + CaselessLiteral("comment") + Regex("[\s{(].*").leaveWhitespace()

# The name types with their digiteyness
not_dig_lower = not_digname.copy().setParseAction(lambda t: t[0].lower())
macro_def = not_dig_lower.copy()
macro_ref = not_dig_lower.copy().setParseAction(lambda t: Macro(t[0].lower()))
field_name = not_dig_lower.copy()
# Spaces in names mean they cannot clash with field names
entry_type = not_dig_lower.setResultsName("entry type")
cite_key = any_name.setResultsName("cite key")
# Number has to be before macro name
string = number | macro_ref | quoted_string | curly_string

# There can be hash concatenation
field_value = string + ZeroOrMore(HASH + string)
field_def = Group(field_name + EQUALS + field_value)
entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def))

# Entry is surrounded either by parentheses or curlies
entry = AT + entry_type + bracketed(cite_key + COMMA + entry_contents)

# Preamble is a macro-like thing with no name
preamble = AT + CaselessLiteral("preamble") + bracketed(field_value)

# Macros (aka strings)