class KarmaParser(TextParsers): anything = reg(r".") > constant(None) word_topic = reg(r'[^"\s]+?(?=[+-]{2})') string_topic = reg(r'".*?(?<!\\)(\\\\)*?"(?=[+-]{2})') topic = (word_topic > (lambda t: [t, False])) | (string_topic > (lambda t: [t[1:-1], True])) op_positive = reg(r"(?<![+-])\+\+(?![+-])") > constant( KarmaOperation.POSITIVE) op_neutral = (reg(r"(?<![+-])\+-(?![+-])") | reg(r"(?<![+-])-\+(?![+-])")) > constant( KarmaOperation.NEUTRAL) op_negative = reg(r"(?<![+-])--(?![+-])") > constant( KarmaOperation.NEGATIVE) operator = op_positive | op_neutral | op_negative bracket_reason = reg(r"\(.+?\)") > (lambda s: s[1:-1]) quote_reason = reg(r'".*?(?<!\\)(\\\\)*?"(?![+-]{2})') > ( lambda s: s[1:-1]) reason_words = reg(r"(?i)because") | reg(r"(?i)for") text_reason = reason_words >> (reg(r'[^",]+') | quote_reason) reason = bracket_reason | quote_reason | text_reason karma = (topic & operator & opt(reason)) > make_karma parse_all = rep(karma | anything) > filter_out_none
class PrologParser(TextParsers, whitespace='[ \t\n]*'): module = lit('module') > constant("module") literal = reg(r'[a-z_][a-zA-Z_0-9]*') > (lambda x: ["Atom", str(x)]) id = reg(r'[a-z_][a-zA-Z_0-9]*') > (lambda x: ["ID", str(x)]) identificator = pred(id, lambda x: x != ['module'], 'ID') disunction = lit(';') > constant("DIS") conjunction = lit(',') > constant("CON") dot = lit('.') > constant("DOT") lbr = lit('(') rbr = lit(')') tstile = lit(':-') > constant("TSTILE") # --------------------------------------------------------------- mod = module & identificator & dot # --------------------------------------------------------------- head = head_atom & tstile & expression & dot | head_atom & dot expression = M & disunction & expression | M M = P & conjunction & M | P P = lbr & expression & rbr | atom1 atom = head_atom & dot atom1 = literal & atom2 | literal head_atom = identificator & atom2 | identificator atom2 = lbr & atom3 & rbr & atom2 | atom1 | lbr & atom3 & rbr atom3 = atom1 | lbr & atom3 & rbr
class FormatTextParsers(TextParsers, whitespace=None): integer = reg(r'[0-9]+') > int dense = lit('d') > constant(Mode.dense) compressed = lit('s') > constant(Mode.compressed) mode = dense | compressed # Use eof to ensure each parser goes to end format_without_orderings = rep(mode) << eof > ( lambda modes: Format(tuple(modes), tuple(range(len(modes))))) format_with_orderings = rep(mode & integer) << eof > make_format_with_orderings format = format_without_orderings | format_with_orderings
class Homework(TextParsers): number = reg(r"\d+") > int plus = lit("+") > constant(add) times = lit("*") > constant(mul) operator = plus | times # No precedence base = "(" >> unprecedented << ")" | number unprecedented = base & rep(operator & base) > reduce # Addition first, then multiplication base = "(" >> multiplication << ")" | number addition = base & rep(plus & base) > reduce multiplication = addition & rep(times & addition) > reduce
class JsonParsers(TextParsers, whitespace=r'[ \t\n\r]*'): number = reg(r'-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][-+]?[0-9]+)?') > float false = lit('false') > constant(False) true = lit('true') > constant(True) null = lit('null') > constant(None) string = JsonStringParsers.string array = '[' >> repsep(value, ',') << ']' entry = string << ':' & value obj = '{' >> repsep(entry, ',') << '}' > dict value = number | false | true | null | string | array | obj
class TypicalUrlParsers(TextParsers, whitespace=None): encoded = '%' >> reg(r'[0-9A-F]{2}') > (lambda x: chr(int(x, 16))) scheme = reg(r'[A-Za-z][-+.A-Za-z0-9]*') > str.lower username = reg(r'[A-Za-z][-_+A-Za-z0-9]*([.][-_+A-Za-z0-9]+)*') > str.lower password = rep(reg(r'[-_.+A-Za-z0-9]+') | encoded) > ''.join userinfo = username << ':' & password > splat(UserInfo) domain_name = rep1sep(reg('[A-Za-z0-9]+([-]+[A-Za-z0-9]+)*') > str.lower, '.') << opt('.') > ( lambda x: DomainName(list(reversed(x)))) ipv4_address = reg(r'[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}') > IPv4Address ipv6_address = reg(r'\[([A-Fa-f0-9]{1,4}(:[A-Fa-f0-9]{1,4}){1,7})\]' r'|\[(([A-Fa-f0-9]{1,4}:){1,7})(:[A-Fa-f0-9]{1,4}){1,6}\]' r'|\[:(:[A-Fa-f0-9]{1,4}){0,7}\]' r'|\[([A-Fa-f0-9]{1,4}:){0,7}:\]') > IPv6Address host = ipv4_address | ipv6_address | domain_name port = ':' >> reg(r'[0-9]+') > int path = rep('/' >> (reg(r'[-._~A-Za-z0-9]*') | encoded)) query_as_is = reg(r'[*-._A-Za-z0-9]+') query_space = lit('+') > constant(' ') query_string = rep1(query_as_is | query_space | encoded) > ''.join query = '?' >> repsep(query_string << '=' & query_string, '&') > OrderedDict fragment = '#' >> reg(r'[-._~/?A-Za-z0-9]*') url = scheme << '://' & opt(userinfo << '@') & host & opt(port) & path & opt(query) & opt(fragment) > splat(Url)
class LineParser(TextParsers, whitespace=None): s = reg(r'[ \t]+') valid = reg(r'[A-Za-z][A-Za-z0-9_]*') label = valid << ':' > Label #arg = reg(r'[vimVIM][A-Za-z0-9_]*') function_header = lit( 'FUNC', 'PRIM') & s >> valid & rep(s >> valid) > splat(FunctionHeader) function_call = valid & rep(s >> (MapParser.map_ | ImmParser.imm2 | valid | '$POP' | '$TOP')) > splat(FunctionCall) memory_command = lit('LOAD', 'STORE') & opt('NEXT') & opt('BIG') & opt('RED') end = lit('END') > constant(End(False, [])) line = end | label | function_header | function_call
class JsonStringParsers(TextParsers, whitespace=None): quote = lit(r'\"') > constant('"') reverse_solidus = lit(r'\\') > constant('\\') solidus = lit(r'\/') > constant('/') backspace = lit(r'\b') > constant('\b') form_feed = lit(r'\f') > constant('\f') line_feed = lit(r'\n') > constant('\n') carriage_return = lit(r'\r') > constant('\r') tab = lit(r'\t') > constant('\t') uni = reg(r'\\u([0-9a-fA-F]{4})') > (lambda x: chr(int(x.group(1), 16))) escaped = (quote | reverse_solidus | solidus | backspace | form_feed | line_feed | carriage_return | tab | uni) unescaped = reg(r'[\u0020-\u0021\u0023-\u005B\u005D-\U0010FFFF]+') string = '"' >> rep(escaped | unescaped) << '"' > ''.join
class Parser(TextParsers, whitespace=r'[ \t\n\r]*'): idreg = reg(r'[a-z_A-Z][a-z_A-Z0-9]*') id = pred( idreg, (lambda x: x != 'module' and x != 'type'), "Identificator name cannot be 'module' or 'type'") > concatenateall rthen = lit(':-') > constant('') conj = lit(',') > constant('') disj = lit(';') > constant('') lbracket = lit('(') > constant('') rbracket = lit(')') > constant('') dot = lit('.') > constant('') mod = lit('module') type = lit('type') atom_in = (((id & atom_close) > (lambda x: "ID(" + x[0] + "), " + x[1])) | (id > (lambda x: "ID(" + x + ")")) | ((lbracket & atom_in & rbracket) > (lambda x: x[1]))) > concatenateall atom_in_gen = lbracket & atom_in & rbracket > ( lambda x: "Atom(" + x[1] + ")") atom_close = (((id & atom_close) > (lambda x: "ID(" + x[0] + "), " + x[1])) | (id > (lambda x: "ID(" + x + ")")) | ((atom_in_gen & atom_close) > (lambda x: x[0] + ", " + x[1])) | atom_in_gen) > concatenateall atom = (((id & atom_close) > (lambda x: "Atom(ID(" + x[0] + "), " + x[1] + ")")) | (id > (lambda x: "ID(" + x + ")"))) > concatenateall liter = (((lbracket & Disj & rbracket) > (lambda x: x[1])) | atom) > concatenateall Conj = (((liter & conj & Conj) > (lambda x: "Conj(" + x[0] + ", " + x[2] + ")")) | liter) > concatenateall Disj = (((Conj & disj & Disj) > (lambda x: "Disj(" + x[0] + ", " + x[2] + ")")) | Conj) > concatenateall relation = ((((atom & rthen & Disj) > (lambda x: "Head(" + x[0] + "), Body(" + x[2] + ")")) | atom) & dot) > (lambda x: "Rel(" + x[0] + ")") possible_t = ( ((lbracket & possible_t & rbracket & rep1('->' >> possible_t)) > (lambda x: "Type(" + x[1] + ", " + ", ".join(x[3]) + ")")) | ((lbracket & possible_t & rbracket) > (lambda x: x[1])) | (atom & rep1('->' >> possible_t) > (lambda x: "Type(" + x[0] + ", " + ", ".join(x[1]) + ")")) | atom) > (lambda x: "".join(x)) types = (rep1sep(possible_t, '->') > (lambda x: ", ".join(x))) > correctprefix type_block = type & id & types & dot > ( lambda x: "Typedef(" + x[1] + ", " + x[2] + ")") mod_block = mod & id & dot > (lambda x: "Module(" + x[1] + ")") program = ((opt(mod_block) > (lambda x: "\n".join(x)))\ & (rep(type_block) > (lambda x: "\n".join(x))) \ & (rep(relation) > (lambda x: "\n".join(x)))) > (lambda x: "\n".join(x))
class ProgramParser(TextParsers): # Actual grammar split1 = lambda item, separator: item & rep(separator & item) split = lambda item, separator: opt(split1(item, separator)) identifier = reg(r"[a-zA-Z]\w*") string = reg(r'".*?(?<!\\)(\\\\)*?"') | reg(r"'.*?(?<!\\)(\\\\)*?'") > ( lambda s: TokenString(s[1:-1]) ) num_int = reg(r"\d+") > int num_float = reg(r"(\d*\.\d+|\d+\.\d*)") > float num_positive = num_float | num_int num_negative = "-" >> num > (lambda x: -x) num = num_negative | num_positive number = num > TokenNumber op_eq = lit("==") > constant(Operator.EQ) op_ne = lit("!=") > constant(Operator.NE) op_ge = lit(">=") > constant(Operator.GE) op_gt = lit(">") > constant(Operator.GT) op_le = lit("<=") > constant(Operator.LE) op_lt = lit("<") > constant(Operator.LT) op_and = lit("&") > constant(Operator.AND) op_or = lit("|") > constant(Operator.OR) op_add = lit("+") > constant(Operator.ADD) op_sub = lit("-") > constant(Operator.SUB) op_mul = lit("*") > constant(Operator.MUL) op_div = lit("/") > constant(Operator.DIV) op_pow = lit("^") > constant(Operator.POW) op_not = lit("!") > constant(Operator.NOT) op_neg = lit("-") > constant(Operator.NEG) equality_op = op_eq | op_ne comparison_op = op_ge | op_gt | op_le | op_lt logic_op = op_and | op_or term_op = op_add | op_sub factor_op = op_mul | op_div power_op = op_pow unary_op = op_neg | op_not case_pair = expr << "->" & expr assignment = identifier << "=" & expr let_stmt = "^" >> rep1sep(assignment, ";") << "$" & expr > let anon_func = (lit("\\") | lit("\\\\")) >> rep1(identifier) & "->" >> expr > anon variable = identifier > TokenVariable expr = rep1sep(equality, reg(r"\s*")) > maybe_application equality = split1(comparison, equality_op) > bin_operator comparison = split1(logic, comparison_op) > bin_operator logic = split1(term, logic_op) > bin_operator term = split1(factor, term_op) > bin_operator factor = split1(power, factor_op) > bin_operator power = split1(ternary, power_op) > bin_operator_right ternary = case & opt("?" >> expr << ":" & expr) > maybe_ternary case = dice & opt(lit("$") >> "(" >> rep1sep(case_pair, ";") << ")") > maybe_case dice = unary & opt("d" >> unary) > maybe_dice unary = unary_op & unary | primary > mon_operator primary = number | string | bracketed | let_stmt | anon_func | variable bracketed = "(" >> expr << ")" func = identifier & "=" >> expr > function program = repsep("@" >> func | expr, ";") << opt(";") > Program main = program
class DiceParser(TextParsers): split1 = lambda item, separator: item & rep(separator & item) split = lambda item, separator: opt(split1(item, separator)) comment = "/*" >> expr << "*/" > constant(None) string = reg(r'".*?(?<!\\)(\\\\)*?"') > (lambda s: ValueString(s[1:-1])) num_int = reg(r"\d+") > int num_float = reg(r"(\d*\.\d+|\d+\.\d*)") > float num_positive = num_float | num_int num_negative = "-" >> num > (lambda x: -x) num = num_negative | num_positive number = num > ValueNumber # set = "(" >> repsep(expr, ",") << ")" > ValueSet op_eq = lit("==") > constant(Operator.EQ) op_ne = lit("!=") > constant(Operator.NE) op_ge = lit(">=") > constant(Operator.GE) op_gt = lit(">") > constant(Operator.GT) op_le = lit("<=") > constant(Operator.LE) op_lt = lit("<") > constant(Operator.LT) op_and = lit("&") > constant(Operator.AND) op_or = lit("|") > constant(Operator.OR) op_add = lit("+") > constant(Operator.ADD) op_sub = lit("-") > constant(Operator.SUB) op_mul = lit("*") > constant(Operator.MUL) op_div = lit("/") > constant(Operator.DIV) op_pow = lit("^") > constant(Operator.POW) op_not = lit("!") > constant(Operator.NOT) op_neg = lit("-") > constant(Operator.NEG) equality_op = op_eq | op_ne comparison_op = op_ge | op_gt | op_le | op_lt logic_op = op_and | op_or term_op = op_add | op_sub factor_op = op_mul | op_div power_op = op_pow unary_op = op_neg | op_not case_pair = expr << "," & expr program = repsep(expr, ";") > Program expr = equality equality = split1(comparison, equality_op) > bin_operator comparison = split1(logic, comparison_op) > bin_operator logic = split1(term, logic_op) > bin_operator term = split1(factor, term_op) > bin_operator factor = split1(power, factor_op) > bin_operator power = split1(dice, power_op) > bin_operator_right dice = opt(opt(ternary) << "d") & ternary > maybe_dice ternary = case & opt("?" >> expr << ":" & expr) > maybe_ternary case = unary & opt( lit(":") >> "(" >> repsep(case_pair, ";") << ")") > maybe_case unary = unary_op & unary | primary > mon_operator primary = number | string | bracketed bracketed = "(" >> expr << ")" parse_all = program