def __init__(self): # # Parser combinators # SPACES = spaces() optional_spaces = optional(SPACES) empty = SPACES.parsecmap(lambda x: EMPTY) comment = string('%%%') >> regex('.*') comment = comment.parsecmap(Comment) codepoint_hex = regex('[0-9A-F]+') codepoint_hex = codepoint_hex.parsecmap(lambda x: int(x, 16)) codepoint = string('U+') >> codepoint_hex codepoint_seq = sepBy(codepoint, SPACES) codepoint_seq = codepoint_seq.parsecmap(tuple) arrow = string('=>') arrow = optional_spaces >> arrow << optional_spaces mapping = joint( codepoint_seq << arrow, codepoint_seq, optional(comment), ) mapping = mapping.parsecmap(lambda x: Mapping(x[0], x[1], x[2])) line = try_choice(mapping, try_choice( comment, empty, )) self.parse = line.parse
def field(): q = yield qualifier ft = yield field_type ident = yield identifier yield equals fi = yield field_id options = yield optional(field_options, default_value=[]) yield lexeme(string(';')) return Field(q, ft, ident, fi, options)
def component(duty_exp: DutyExpression) -> Parser: """Matches a string prefix and returns the associated type id, along with any parsed amounts and units according to their applicability, as a 4-tuple of (id, amount, monetary unit, measurement).""" prefix = duty_exp.prefix has_amount = duty_exp.duty_amount_applicability_code has_measurement = duty_exp.measurement_unit_applicability_code has_monetary = duty_exp.monetary_unit_applicability_code id = token(prefix).result(duty_exp) this_value = if_applicable(has_amount, decimal) this_monetary_unit = if_applicable( has_monetary, spaces() >> self._monetary_unit, # We must match the percentage if the amount should be there # and no monetary unit matches. default=(percentage_unit if has_amount == ApplicabilityCode.MANDATORY else optional(percentage_unit)), ) this_measurement = if_applicable( has_measurement, optional(token("/")) >> self._measurement, ) component = joint(id, this_value, this_monetary_unit, this_measurement) measurement_only = joint(id, this_measurement).parsecmap( lambda t: (t[0], None, None, t[1]), ) # It's possible for units that contain numbers (e.g. DTN => '100 kg') # to be confused with a simple specific duty (e.g 100.0 + kg) # So in the case that amounts are only optional and measurements are present, # we have to check for just measurements first. return (measurement_only ^ component if has_amount == ApplicabilityCode.PERMITTED and has_measurement != ApplicabilityCode.NOT_PERMITTED else component).parsecmap( lambda exp: component_output( duty_expression=exp[0], duty_amount=exp[1], monetary_unit=exp[2], component_measurement=exp[3], ), )
def fn(): "Parse an individual component pin." [nm, sig] = yield count(name, 2) mod = yield name_only rem_line = yield rest_line rlc_vals = optional(count(number, 3), []).parse(rem_line) rlc_dict = {} if rlcs: rlc_dict.update(dict(zip(rlcs, rlc_vals))) return ((nm + "(" + sig + ")"), (mod, rlc_dict))
def typminmax(): "Parse Typ/Min/Max values." typ = yield number if DBG: print(f"Typ.: {typ}") minmax = yield optional(count(number, 2) | count(na, 2).result([]), []) if DBG: print(f"Min./Max.: {minmax}") yield ignore # So that ``typminmax`` behaves as a lexeme. res = [typ] res.extend(minmax) return res
def pins(): "Parse [Component].[Pin]." def filt(x): (_, (mod, _)) = x m = mod.upper() return (not ((m == "POWER") or (m == "GND") or (m == "NC"))) yield (lexeme(string("signal_name")) << lexeme(string("model_name"))) rlcs = yield optional(count(rlc, 3), []) prs = yield many1(pin(rlcs)) prs_filt = list(filter(filt, prs)) return dict(prs_filt)
def fix_image_url(url, repo_name): '''Fixes a GitHub image urls. Any links with `github.com` are invalid, because the return *html* content. Image links would have `githubusercontent.com`. For example: - This returns an html: https://github.com/Retrothopter/Niobium-Nanotech/blob/master/Preview.png - This returns a png: https://githubusercontent.com/Retrothopter/Niobium-Nanotech/blob/master/Preview.png Any links that are relative are also invalid. For example: - preview.png - sprites/preview.png - /sprites/preview.png''' # FIXME: this assumes `master` is always the branch we want, while in reality we need the # `default_branch` of the repository, which could also for example be `main` from urllib.parse import urlparse from parsec import optional, string, regex, none_of, many, ParseError glob = ( optional(string('/')) >> string(repo_name) >> string("/blob/master/") >> many(none_of("?")).parsecmap(lambda x: "".join(x))) o = urlparse(url) if o.netloc == "raw.githubusercontent.com": return url try: path = glob.parse(o.path) except ParseError as e: path = None if o.netloc == "github.com" and path: return f"https://raw.githubusercontent.com/{repo_name}/master/{path}" if o.netloc == "": return f"https://raw.githubusercontent.com/{repo_name}/master/{o.path}" return url
def fix_image_url(url, repo_name): '''Fixes a GitHub url, where the url should point to an image. Any links with `github.com` are invalid, because they're html links, while image links would have `githubusercontent.com`, for example: - https://github.com/Retrothopter/Niobium-Nanotech/blob/master/Preview.png; Any links that don't have a domain are relative and as such invalid, for example: - preview.png; - sprites/preview.png; - /sprites/preview.png This is also why a repo name is required. ''' from urllib.parse import urlparse from parsec import optional, string, regex, none_of, many, ParseError glob = ( optional(string('/')) >> string(repo_name) >> string("/blob/master/") >> many(none_of("?")).parsecmap(lambda x: "".join(x))) o = urlparse(url) if o.netloc == "raw.githubusercontent.com": return url try: path = glob.parse(o.path) except ParseError as e: path = None if o.netloc == "github.com" and path: return f"https://raw.githubusercontent.com/{repo_name}/master/{path}" if o.netloc == "": return f"https://raw.githubusercontent.com/{repo_name}/master/{o.path}" # print('[warning] non github url:', url) return url
return '[%s%s]' % (self.element_type, star_str) if self.is_dlist: return '[[%s%s]]' % (self.element_type, star_str) if self.is_set: return '{%s%s}' % (self.element_type, star_str) if self.is_dict: return '{%s: %s%s}' % (self.element_type[0], self.element_type[1], star_str) raise RuntimeError('Invalid codegen kind: %s' % self.kind) name_pattern = parsec.spaces() >> parsec.regex( r'[_a-zA-Z][_a-zA-Z0-9<>, ]*(::[_a-zA-Z][_a-zA-Z0-9<>, ]*)*' ) << parsec.spaces() star_pattern = parsec.spaces() >> parsec.optional(parsec.string('*'), '') << parsec.spaces() parse_meta = parsec.spaces().parsecmap(lambda _: CodeGenKind('meta')) parse_plain = (parsec.spaces() >> (name_pattern + star_pattern) << parsec.spaces() ).parsecmap(lambda value: CodeGenKind('plain', value)) parse_list = (parsec.string('[') >> (name_pattern + star_pattern) << parsec.string(']') ).parsecmap(lambda value: CodeGenKind('list', value)) parse_dlist = (parsec.string('[[') >> (name_pattern + star_pattern) << parsec.string(']]') ).parsecmap(lambda value: CodeGenKind('dlist', value)) parse_set = (parsec.string('{') >> (name_pattern + star_pattern) << parsec.string('}') ).parsecmap(lambda value: CodeGenKind('set', value))
def sample(): fwhm = yield spaces >> floating << spaces level = yield floating << spaces yield parsec.optional(parsec.string('\n')) return (float(fwhm), float(level))
def parse_program(): integer_arg = p.regex(r"[+-][0-9]+").parsecmap(int) expression = (((p.string("nop") << p.space()) + integer_arg) | ((p.string("acc") << p.space()) + integer_arg) | ((p.string("jmp") << p.space()) + integer_arg)) return (yield p.many(expression << p.optional(p.string("\n"))))
# from __future__ import absolute_import from __future__ import print_function import logging from parsec import joint from parsec import optional from parsec import regex from parsec import string from parsec import spaces from parsec import sepBy from parsec import sepBy1 logger = logging.getLogger(__name__) optionalspaces = optional(spaces()) arrow = optionalspaces >> string('->') << optionalspaces identifier = (regex('[a-zA-Z_$][a-zA-Z_$0-9]*') ^ string('<init>') ^ string('<clinit>')) className = sepBy1(identifier, string('$')) packagedFullName = sepBy1(identifier, string('.')) packagedClassName = packagedFullName.parsecmap(lambda l: '.'.join(l)) typeName = packagedClassName | regex('[a-z]+') javatype = joint(typeName, optional(string('[]'))) methodName = identifier methodArguments = sepBy(optionalspaces >> javatype << optionalspaces, string(',')) methodArguments = string('(') >> methodArguments << string(')')