Python regex 예제들, parsec.regex Python 예제들

예제 #1

0

파일 보기

    def __init__(self):
        #
        # Parser combinators
        #

        SPACES = spaces()
        optional_spaces = optional(SPACES)
        empty = SPACES.parsecmap(lambda x: EMPTY)
        comment = string('%%%') >> regex('.*')
        comment = comment.parsecmap(Comment)
        codepoint_hex = regex('[0-9A-F]+')
        codepoint_hex = codepoint_hex.parsecmap(lambda x: int(x, 16))
        codepoint = string('U+') >> codepoint_hex
        codepoint_seq = sepBy(codepoint, SPACES)
        codepoint_seq = codepoint_seq.parsecmap(tuple)
        arrow = string('=>')
        arrow = optional_spaces >> arrow << optional_spaces
        mapping = joint(
            codepoint_seq << arrow,
            codepoint_seq,
            optional(comment),
        )
        mapping = mapping.parsecmap(lambda x: Mapping(x[0], x[1], x[2]))
        line = try_choice(mapping, try_choice(
            comment,
            empty,
        ))
        self.parse = line.parse

예제 #2

0

파일 보기

    def parse_ECOSTRESS(filename):
        """
        Load a spectrum from an ECOSTRESS file
        """
        spaces = parsec.regex(r'\s*', re.MULTILINE)

        @parsec.generate
        def header_field():
            k = yield spaces >> parsec.regex('[^:]*') << spaces
            yield parsec.string(':')
            v = yield parsec.ends_with(parsec.regex('[^\n]*'), parsec.string('\n'))
            return { k: v.strip() }

        @parsec.generate
        def header():
            items = yield parsec.many(header_field)
            d = {}
            for item in items:
                d.update(item)
            return d

        floating = parsec.regex('[-+]?([0-9]+(\.[0-9]+)?|\.[0-9]+)')

        @parsec.generate
        def sample():
            fwhm = yield spaces >> floating << spaces
            level = yield floating << spaces
            yield parsec.optional(parsec.string('\n'))
            return (float(fwhm), float(level))

        @parsec.generate
        def parser():
            head = yield header
            yield parsec.many(parsec.string('\n'))
            samps = yield parsec.many(sample)
            return head, samps

        def parse_spectrum(filename):
            try:
                with open(filename, 'r', encoding='iso-8859-1') as f:
                    parsed = parser.parse(f.read())
                return parsed
            except:
                print('Error parsing '+filename)
                raise

        header, data = parse_spectrum(filename)

        name = header.pop('Name')
        type_ = header.pop('Type')
        class_ = header.pop('Class')
        x0 = float(header.pop('First X Value'))
        x1 = float(header.pop('Last X Value'))
        x_min, x_max = min(x0, x1), max(x0, x1)
        n_samples = header.pop('Number of X Values')

        x = list(map(lambda v: v[0], data))
        y = list(map(lambda v: v[1], data))

        return Spectrum(name, type_, class_, x_min, x_max, n_samples, header, x, y)

예제 #3

0

파일 보기

파일: ibis_parser.py 프로젝트: jdpatt/PyAMI

def param():
    "Parse IBIS parameter."
    pname = yield regex(
        r"^[a-zA-Z]\w*",
        re.MULTILINE)  # Parameters must begin with a letter in column 1.
    if DBG:
        print(pname)
    res = yield (regex(r"\s*") >> (
        (word(string("=")) >> number) | typminmax | name | rest_line))
    yield ignore  # So that ``param`` functions as a lexeme.
    return (pname.lower(), res)

예제 #4

0

파일 보기

파일: rinexer.py 프로젝트: umeat/RINEX-Parser

def n_ANY(n):
    """Given n, return Parser for string of n length containing any characters

    Useful for ignoring n characters with the compose >> or skip << operations
    or for capturing fixed length fields
    """
    return p.regex('(.){{{0}}}'.format(n))

예제 #5

0

파일 보기

 def string_esc():
     return string('\\') >> (string('\\')
                             | string('/')
                             | string('b').result('\b')
                             | string('f').result('\f')
                             | string('n').result('\n')
                             | string('r').result('\r')
                             | string('t').result('\t')
                             |
                             regex(r'u[0-9a-fA-F]{4}').parsecmap(to_unichr)
                             | string(end_quote))

예제 #6

0

파일 보기

 def _parse_fasta(self, filehandle, sep="|"):
     """
     Parse a fasta file. The header is split into fields on 'sep'. The
     sequence is added as a final field.
     """
     p_header = parsec.string(">") >> parsec.regex("[^\n\r]*") << parsec.spaces()
     p_seq = (
         parsec.sepBy1(
             parsec.regex("[^>\n\r]*"), sep=parsec.regex("[\r\n\t ]+")
         ).parsecmap(concat)
         << parsec.spaces()
     )
     p_entry = p_header + p_seq
     p_fasta = parsec.many1(p_entry)
     log(f"Reading {file_str(filehandle)} as a fasta file:")
     try:
         entries = p_fasta.parse(filehandle.read())
     except AttributeError:
         # in case I want to pass in a list of strings, e.g., in tests
         entries = p_fasta.parse(filehandle)
     row = [h.split(sep) + [q] for (h, q) in entries]
     return row

예제 #7

0

파일 보기

파일: ibis_parser.py 프로젝트: jdpatt/PyAMI

 def fn():
     "Parse IBIS keyword."
     yield regex(r"^\[", re.MULTILINE)
     wordlets = yield sepBy1(name_only, one_of(
         " _"))  # ``name`` gobbles up trailing space, which we don't want.
     yield string("]")
     yield ignore  # So that ``keyword`` functions as a lexeme.
     res = ("_".join(wordlets)
            )  # Canonicalize to: "<wordlet1>_<wordlet2>_...".
     if kywrd:
         # assert res.lower() == kywrd.lower(), f"Expecting: {kywrd}; got: {res}."  # Does not work!
         if res.lower() == kywrd.lower():
             return res
         else:
             return fail.desc(f"Expecting: {kywrd}; got: {res}.")
     return res

예제 #8

0

파일 보기

파일: ibis_parser.py 프로젝트: jdpatt/PyAMI

def number():
    "Parse an IBIS numerical value."
    s = yield word(
        regex(
            r"[-+]?[0-9]*\.?[0-9]+(([eE][-+]?[0-9]+)|([TknGmpMuf][a-zA-Z]*))?")
        << many(letter()))
    m = re.search(r'[^\d]+$', s)
    if m:
        ix = m.start()
        c = s[ix]
        if c in IBIS_num_suf:
            res = float(s[:ix] + IBIS_num_suf[c])
        else:
            raise ParseError("IBIS numerical suffix", s[ix:], ix)
    else:
        res = float(s)
    return res

예제 #9

0

파일 보기

파일: sdfp_parser.py 프로젝트: crizzy9/py_tools

'''
""        => ""
"1"       => "One"
"234"     => "Two,Three,Four"
"10,000"  => "One,Zero,Zero,Zero,Zero"
"  4619 " => "Four,Six,One,Nine"
'''

examples = ['', '1', '234', '10,000', '  4619 ']
expected = [
    '', 'One', 'Two,Three,Four', 'One,Zero,Zero,Zero,Zero', 'Four,Six,One,Nine'
]

print(parsec.string("1").parse("111"))

whitespace = parsec.regex(r'\s+')
commas = parsec.regex(r',+')
ignore = parsec.many((whitespace | commas))

lexeme = lambda p: p << ignore

zero = lexeme(parsec.string('0')).result('Zero')
one = lexeme(parsec.string('1')).result('One')
two = lexeme(parsec.string('2')).result('Two')
three = lexeme(parsec.string('3')).result('Three')
four = lexeme(parsec.string('4')).result('Four')
five = lexeme(parsec.string('5')).result('Five')
six = lexeme(parsec.string('6')).result('Six')
seven = lexeme(parsec.string('7')).result('Seven')
eight = lexeme(parsec.string('8')).result('Eight')
nine = lexeme(parsec.string('9')).result('Nine')

예제 #10

0

파일 보기

import parsec as ps
import weakref
from wurlitzer import sys_pipes

__author__    = "Craig Ramsay"
__copyright__ = "Copyright 2019, Xilinx"
__email__     = "*****@*****.**"


# The HWH file includes a lot of information about all of the available code
# parameters. This includes nested lists, etc. so we use [parser
# combinators](https://en.wikipedia.org/wiki/Parsec_(parser)) to keep this
# managable.


_whitespace = ps.regex(r'\s*')
_lexeme = lambda p: p << _whitespace

_lbrace      = _lexeme(ps.string('{'))
_rbrace      = _lexeme(ps.string('}'))
_separator   = _lexeme(ps.regex(r'[ ,]'))
_name        = _lexeme(ps.regex(r'[\w]+'))
_num_hex     = _lexeme(ps.regex(r'0x[0-9a-fA-F]+')).parsecmap(lambda h: int(h, base=16))
_num_int     = _lexeme(ps.regex(r'-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?')).parsecmap(int)
_num_float   = _lexeme(ps.regex(r'-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?')).parsecmap(float)
_list_of     = lambda elems: _lbrace >> ps.many(elems) << _rbrace
_sep_list_of = lambda elems: _lbrace >> ps.sepBy(elems, _separator) << _rbrace

_param_value = _num_int | _list_of(_num_int)

@ps.generate

예제 #11

0

파일 보기

 def int_number():
     return psc.regex('-?(0|[1-9][0-9]*)').parsecmap(int)

예제 #12

0

파일 보기

def number():
    return lexeme(regex(
        r'-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?')).parsecmap(float)

예제 #13

0

파일 보기

파일: rinexer.py 프로젝트: umeat/RINEX-Parser

def n_I(n):
    """ Given n, return parser for an integer between one and n digits long
    """
    return p.regex('([0-9]){{1,{0}}}'.format(n)).parsecmap(int)

예제 #14

0

파일 보기

파일: codegen.py 프로젝트: xiaming9880/libvineyard

        if self.is_plain:
            return '%s%s' % (self.element_type, star_str)
        if self.is_list:
            return '[%s%s]' % (self.element_type, star_str)
        if self.is_dlist:
            return '[[%s%s]]' % (self.element_type, star_str)
        if self.is_set:
            return '{%s%s}' % (self.element_type, star_str)
        if self.is_dict:
            return '{%s: %s%s}' % (self.element_type[0], self.element_type[1],
                                   star_str)
        raise RuntimeError('Invalid codegen kind: %s' % self.kind)


name_pattern = parsec.spaces() >> parsec.regex(
    r'[_a-zA-Z][_a-zA-Z0-9<>, ]*(::[_a-zA-Z][_a-zA-Z0-9<>, ]*)*'
) << parsec.spaces()

star_pattern = parsec.spaces() >> parsec.optional(parsec.string('*'),
                                                  '') << parsec.spaces()

parse_meta = parsec.spaces().parsecmap(lambda _: CodeGenKind('meta'))

parse_plain = (parsec.spaces() >>
               (name_pattern + star_pattern) << parsec.spaces()
               ).parsecmap(lambda value: CodeGenKind('plain', value))
parse_list = (parsec.string('[') >>
              (name_pattern + star_pattern) << parsec.string(']')
              ).parsecmap(lambda value: CodeGenKind('list', value))
parse_dlist = (parsec.string('[[') >>
               (name_pattern + star_pattern) << parsec.string(']]')

예제 #15

0

파일 보기

import collections
import re

import parsec


# The targets of our parsing.
# We look for instructions to insert text, chapters,
# paragraphs and line breaks.
Text = collections.namedtuple('Text', ['text'])
NewChapter = collections.namedtuple('NewChapter', ['text'])
NewParagraph = collections.namedtuple('NewParagraph', [])
Break = collections.namedtuple('Break', [])

# Parse and ignore trailing white space after our reserved words.
whitespace = parsec.regex(r'\s*', re.MULTILINE)
skip_whitespace = lambda p: p << whitespace  # noqa

# Break and new paragraph commands are simply reserved words.
break_command = skip_whitespace(parsec.string('#break')).result(Break())
par_command = skip_whitespace(parsec.string('#par')).result(NewParagraph())

# The newch_parser is used to implement the newch_command parser below.
newch_parser = skip_whitespace(parsec.string('#newch'))
# The text parser consumes all input between reserved words.
commands = '#newch|#par|#break'
text_parser = parsec.regex('(?!(%s))(.+?)(?=%s|$)' % (commands, commands))


@parsec.Parser
def text_command(text, index):

예제 #16

0

파일 보기

 def header_field():
     k = yield spaces >> parsec.regex('[^:]*') << spaces
     yield parsec.string(':')
     v = yield parsec.ends_with(parsec.regex('[^\n]*'), parsec.string('\n'))
     return { k: v.strip() }

예제 #17

0

파일 보기

파일: calculator.py 프로젝트: crizzy9/py_tools

def number():
    return lexeme(parsec.regex(r'-?(0|[1-9][0-9]*)([.0-9]+)?'))

예제 #18

0

파일 보기

파일: calculator.py 프로젝트: crizzy9/py_tools

import parsec

# "1 - (2 * (3 + 4) / 5)" => -1.8

example = "1 - (2 * (3 + 4) / 5)"
expected = -1.8

whitespace = parsec.regex(r'\s*')

lexeme = lambda p: p << whitespace

plus = lexeme(parsec.string('+'))
minus = lexeme(parsec.string('-'))
divide = lexeme(parsec.string('/'))
multiply = lexeme(parsec.string('*'))
lbrace = lexeme(parsec.string('('))
rbrace = lexeme(parsec.string(')'))


def number():
    return lexeme(parsec.regex(r'-?(0|[1-9][0-9]*)([.0-9]+)?'))


@parsec.generate
def addition():
    n1 = yield value
    yield plus
    n2 = yield value
    return float(n1) + float(n2)

예제 #19

0

파일 보기

파일: animal.py 프로젝트: flu-crew/octofludb

import parsec as p
import re


def clean_host(x):
    x = re.sub(";.*", "", x.strip().lower())
    if "scrofa" in x:
        x = "swine"
    elif "pig" in x:
        x = "swine"
    elif "porcine" in x:
        x = "swine"
    elif "boar" in x:
        x = "swine"
    elif "sapiens" in x:
        x = "human"
    return x


p_host = p.regex(re.compile("swine|human", re.IGNORECASE))

예제 #20

0

파일 보기

파일: machine.py 프로젝트: kevinkjt2000/advent-of-code

def parse_program():
    integer_arg = p.regex(r"[+-][0-9]+").parsecmap(int)
    expression = (((p.string("nop") << p.space()) + integer_arg) |
                  ((p.string("acc") << p.space()) + integer_arg) |
                  ((p.string("jmp") << p.space()) + integer_arg))
    return (yield p.many(expression << p.optional(p.string("\n"))))

예제 #21

0

파일 보기

import re

from parsec import string, sepBy, regex, sepEndBy1, spaces, Parser, separated, Value, generate, many1, digit

quoted_string = regex(r'"[^"]*"', re.MULTILINE)
cell = quoted_string ^ regex(r'[^,"\r\n]*')
end_line = regex(r'\r\n?', re.MULTILINE)
row = sepBy(cell, string(",") << spaces())
header = row
csv = (header << end_line) + sepEndBy1(row, end_line)


def parser_by_count(value):
    try:
        num_cells = int(value)
        return separated(cell,
                         string(",") << spaces(),
                         mint=num_cells,
                         maxt=num_cells)
    except ValueError:
        return Parser(
            lambda index, text: Value.failure(index, "expected a number"))


first_cell = (cell << string(",") << spaces())
counting_parser = first_cell.bind(parser_by_count)


# @generate
def matrix_parser():
    cell = many1(digit()).parsecmap(''.join).parsecmap(int)

예제 #22

0

파일 보기

def day_minus_one(day):
    m, d = day
    d -= 1

    if d <= 0:
        m -= 1
        if m <= 0:
            m = 12
        return (m, max_days_in_month[m])
    else:
        return (m, d)


# === DAYS AND RANGES === #

number = parsec.regex(r"[０-９0-9]{1,2}")
month_def = (number << parsec.string("月")) ^ parsec.string("")
day_def = number << parsec.regex(r"日?")


@parsec.generate
def range_def():
    start_month = yield month_def
    start_day = yield day_def

    yield parsec.string("～")

    end_month = yield month_def
    end_day = yield day_def

    start_month = int(start_month) if start_month else None

예제 #23

0

파일 보기

파일: msch.py 프로젝트: notoriginal1/mindustry-modding

class Schematics(namedtuple("Schematics", "width height tags tiles")):
    pass


HEADER = b"msch"
VERSION = b"\x00"

########################################
## Reader
########################################

header = string(HEADER)
version = string(VERSION)

everything = regex(b"(?s).*")  # don't forget newlines

byte = regex(b"(?s).")
char = byte.parsecmap(lambda x: unpack("b", x)[0])
short = regex(b"(?s).{2}").parsecmap(lambda x: unpack(">h", x)[0])
intp = regex(b"(?s).{4}").parsecmap(lambda x: unpack(">i", x)[0])
nbytes = lambda x: times(byte, x).parsecmap(lambda x: b"".join(x))


@generate
def utf8_bytes():
    """ Parses utf8 string, prefixed with length. """
    length = yield short
    name = yield nbytes(length)
    return name.decode("utf8")

예제 #24

0

파일 보기

파일: ami_parse.py 프로젝트: jdpatt/PyAMI

                    res[pname] = self.get(pname + "_")[pname + "_"]
                except:  # If we get an exception, we have an ordinary (i.e. - not mapped) trait.
                    res[pname] = self.get(pname)[pname]
        elif isinstance(param, dict):  # We received a dictionary of subparameters, in 'param'.
            subs = {}
            for sname in param.keys():
                subs.update(self.input_ami_param(param, sname))
            res[pname] = subs
        return res

#####
# AMI file parser.
#####

# ignore cases.
whitespace = regex(r"\s+", re.MULTILINE)
comment = regex(r"\|.*")
ignore = many((whitespace | comment))


def lexeme(p):
    """Lexer for words."""
    return p << ignore  # skip all ignored characters.

def int2tap(x):
    """Convert integer to tap position."""
    if (x[0] == '-'):
        res = ("pre" + x[1:])
    else:
        res = ("post" + x)
    return res

예제 #25

0

파일 보기

Uses the parser-combinator library Parsec (https://github.com/sighingnow/parsec.py)

To install from pypi::

    pip install relaxedjson

To install as an egg-link in development mode::

    python setup.py develop -N

"""

import re
from parsec import (sepBy, regex, string, generate, many, endBy)

whitespace = regex(r'\s*', re.MULTILINE)

lexeme = lambda p: p << whitespace

comment = string('/*') >> regex(r'(?:[^*]|\*(?!\/))+',
                                re.MULTILINE) << string('*/')
comment = lexeme(comment)

lbrace = lexeme(string('{'))
rbrace = lexeme(string('}'))
lbrack = lexeme(string('['))
rbrack = lexeme(string(']'))
colon = lexeme(string(':'))
comma = lexeme(string(','))
true = lexeme(string('true')).result(True)
false = lexeme(string('false')).result(False)

예제 #26

0

파일 보기

파일: parsers.py 프로젝트: uktrade/tamato

    def __init__(
        self,
        duty_expressions: Iterable[DutyExpression],
        monetary_units: Iterable[MonetaryUnit],
        permitted_measurements: Iterable[Measurement],
        component_output: Type[TrackedModel] = MeasureComponent,
    ):
        # Decimal numbers are a sequence of digits (without a left-trailing zero)
        # followed optionally by a decimal point and a number of digits (we have seen
        # some percentage values have three decimal digits).  Money values are similar
        # but only 2 digits are allowed after the decimal.
        # TODO: work out if float will cause representation problems.
        decimal = regex(r"(0|[1-9][0-9]*)([.][0-9]+)?").parsecmap(float)

        # Specific duty amounts reference various types of unit.
        # For monetary units, the expression just contains the same code as is
        # present in the sentence. Percentage values correspond to no unit.
        self._monetary_unit = (reduce(try_choice, map(code, monetary_units))
                               if monetary_units else fail)
        percentage_unit = token("%").result(None)

        # We have to try and parse measurements with qualifiers first
        # else we may match the first part of a unit without the qualifier
        with_qualifier = [
            m for m in permitted_measurements
            if m.measurement_unit_qualifier is not None
        ]
        no_qualifier = [
            m for m in permitted_measurements
            if m.measurement_unit_qualifier is None
        ]
        measurements = [
            measurement(m) for m in chain(with_qualifier, no_qualifier)
        ]
        self._measurement = reduce(try_choice,
                                   measurements) if measurements else fail

        # Each measure component can have an amount, monetary unit and measurement.
        # Which expression elements are allowed in a component is controlled by
        # the duty epxression applicability codes. We convert the duty expressions
        # into parsers that will only parse the elements that are permitted for this type.
        def component(duty_exp: DutyExpression) -> Parser:
            """Matches a string prefix and returns the associated type id, along
            with any parsed amounts and units according to their applicability,
            as a 4-tuple of (id, amount, monetary unit, measurement)."""
            prefix = duty_exp.prefix
            has_amount = duty_exp.duty_amount_applicability_code
            has_measurement = duty_exp.measurement_unit_applicability_code
            has_monetary = duty_exp.monetary_unit_applicability_code

            id = token(prefix).result(duty_exp)
            this_value = if_applicable(has_amount, decimal)
            this_monetary_unit = if_applicable(
                has_monetary,
                spaces() >> self._monetary_unit,
                # We must match the percentage if the amount should be there
                # and no monetary unit matches.
                default=(percentage_unit
                         if has_amount == ApplicabilityCode.MANDATORY else
                         optional(percentage_unit)),
            )
            this_measurement = if_applicable(
                has_measurement,
                optional(token("/")) >> self._measurement,
            )

            component = joint(id, this_value, this_monetary_unit,
                              this_measurement)
            measurement_only = joint(id, this_measurement).parsecmap(
                lambda t: (t[0], None, None, t[1]), )

            # It's possible for units that contain numbers (e.g. DTN => '100 kg')
            # to be confused with a simple specific duty (e.g 100.0 + kg)
            # So in the case that amounts are only optional and measurements are present,
            # we have to check for just measurements first.
            return (measurement_only
                    ^ component if has_amount == ApplicabilityCode.PERMITTED
                    and has_measurement != ApplicabilityCode.NOT_PERMITTED else
                    component).parsecmap(
                        lambda exp: component_output(
                            duty_expression=exp[0],
                            duty_amount=exp[1],
                            monetary_unit=exp[2],
                            component_measurement=exp[3],
                        ), )

        # Duty sentences can only be of a finite length – each expression may only
        # appear once and in order of increasing expression id. So we try all expressions
        # in order and filter out the None results for ones that did not match.
        expressions = ([
            component(exp) ^ empty
            for exp in sorted(duty_expressions, key=lambda e: e.sid)
        ] if duty_expressions else [fail])
        self._sentence = joint(*expressions).parsecmap(
            lambda sentence: [exp for exp in sentence if exp is not None], )

예제 #27

0

파일 보기

 def string_part():
     return regex(r'[^{}\\]+'.format(end_quote))

예제 #28

0

파일 보기

def is_any(parsers):
    if not parsers:
        return  # ?
    result = parsers[0]
    for p in parsers[1:]:
        result |= p
    return result


def is_a(enum_cls):
    return is_any([string(m)
                   for m in enum_cls.__members__]).parsecmap(enum_cls)


identifier = lexeme(regex(r'[^\d\W]\w*'))
number = lexeme(
    regex('-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?'))  # eugh


@lexeme
@generate
def quoted_string():
    yield string('"')
    chars = yield many(none_of('"'))
    yield string('"')
    return ''.join(chars)


literal = number | quoted_string
value = identifier | literal

예제 #29

0

파일 보기

def object_pair():
    key = yield quoted | lexeme(regex(r'[a-zA-Z][-_a-zA-Z0-9]*'))
    yield many(comment) << colon << many(comment)
    val = yield value
    raise StopGenerator((key, val))

예제 #30

0

파일 보기

파일: gas_diff_stats.py 프로젝트: vdavalon01/solidity

def number() -> int:
    """Parse number."""
    return regex(r"([0-9]*)").parsecmap(int)