예제 #1
0
파일: ctoj3.py 프로젝트: zvolsky/jsforhuman
def get_parse_rules():
    """define pyparsing rules here
    pyparsing is used mainly to
        - easy parse and pass comments and quoted strings,
        - separate line breaks and get indent (whitespace) immediately after them,
        - locate keywords which do not always start the command but are sometimes inside, this is
         -- used for var [for (var i=0; ..)], because we split in whitespace and (
         -- not used
    token categories:
        + javascript significant characters (include space inside),
        / comments,
        | line breaks + next indentation
    """
    space = Word(' \t')('+Space')
    lineBreak = (Word('\r\n', exact=2) | Word('\n', exact=1))('|Linebreak')
    breakIndent = lineBreak + Optional(Word(' \t'))('|Indent')
    quotedString = (QuotedString('"', unquoteResults=False)
                    | QuotedString("'", unquoteResults=False)
                    | QuotedString("/", unquoteResults=False)
                    | QuotedString('`', multiline=True, unquoteResults=False))('+Cmd')
    keywordVar = Keyword('var')('+CmVar')
    return (space
            | cppStyleComment()('/Comment')
            | quotedString
            | breakIndent
            | keywordVar
            | CharsNotIn(';"\'/` (\r\n\t')('+Cmd')
            | Word(';"\'/`(\r', exact=1)('+Cmd')
            )
예제 #2
0
def getParseRules():

    lineBreak = (Word('\r\n', exact=2) | Word('\n', exact=1))('Linebreak')
    breakIndent = lineBreak + Optional(Word(' \t'))('Indent')
    quotedString = (QuotedString('"', unquoteResults=False)
                    | QuotedString("'", unquoteResults=False)
                    | QuotedString('`', multiline=True, unquoteResults=False))('Quoted')
    return (cppStyleComment()('Comment')
            | quotedString
            | breakIndent
            | Word(';', exact=1)('CmdSep')
            | CharsNotIn(';"\'`/\r\n')('Command'))
예제 #3
0
def preprocessor(source):
    """
    Divide raw source code to statements
    by locating longest possible parenthesized statements
    """
    expression = pyparsing.Forward()
    comment = pyparsing.cppStyleComment()
    all_chars = pyparsing.Word(
        pyparsing.printables.replace('(', '').replace(')', ''))
    expression << pyparsing.nestedExpr(
        content=pyparsing.OneOrMore(expression | all_chars))
    syntax = pyparsing.OneOrMore(expression)
    syntax.ignore(comment)
    return [code_reassembly(token) for token in syntax.parseString(source)]
예제 #4
0
def preprocessor(source):
    """
    Divide raw source code to statements
    by locating longest possible parenthesized statements
    """
    expression = pyparsing.Forward()
    comment = pyparsing.cppStyleComment()
    all_chars = pyparsing.Word(
        pyparsing.printables.replace('(', '').replace(')', '')
    )
    expression << pyparsing.nestedExpr(
        content=pyparsing.OneOrMore(expression | all_chars)
    )
    syntax = pyparsing.OneOrMore(expression)
    syntax.ignore(comment)
    return [
        code_reassembly(token)
        for token in syntax.parseString(source)
    ]
예제 #5
0
파일: tojs.py 프로젝트: zvolsky/jsforhuman
def get_parse_rules():
    """define pyparsing rules here
    pyparsing is used mainly to
        - easy parse and pass comments and quoted strings,
        - separate line breaks and get indent (whitespace) immediately after them,
    token categories:
        + javascript significant characters (include space inside),
        / comments,
        | line breaks + next indentation
    """
    space = Word(' \t')('+Space')
    lineBreak = (Word('\r\n', exact=2) | Word('\n', exact=1))('|Linebreak')
    breakIndent = lineBreak + Optional(Word(' \t'))('|Indent')
    quotedString = (QuotedString('"', unquoteResults=False)
                    | QuotedString("'", unquoteResults=False)
                    | QuotedString("/", unquoteResults=False)
                    | QuotedString('`', multiline=True, unquoteResults=False))('+Cmd')
    return (space
            | cppStyleComment()('/Comment')
            | quotedString
            | breakIndent
            | CharsNotIn(';"\'/` (\r\n\t')('+Cmd')
            | Word(';"\'/`(\r', exact=1)('+Cmd')
            )
예제 #6
0
파일: tocs.py 프로젝트: zvolsky/jsforhuman
def get_parse_rules():
    """define pyparsing rules here
    pyparsing is used mainly to
        - easy parse and pass comments and quoted strings,
        - separate line breaks and get indent (whitespace) immediately after them,
    token categories:
        + javascript significant characters (include space inside),
        / comments,
        | line breaks + next indentation
    """
    unicodeAlphas = u''.join(unichr(c) for c in xrange(65536) if unichr(c).isalpha())
    unicodeAlphanums = u''.join(unichr(c) for c in xrange(65536) if unichr(c).isalnum())
    unicodeNonStarters = u''.join(unichr(c) for c in xrange(65536) if filter(
                            lambda uc: not uc.isalpha() and uc not in u';{}"\'/`\r\n', unichr(c)))

    #space = Word(' \t')('+Space')
    lineBreak = (Word('\r\n', exact=2) | Word('\n', exact=1))('|Linebreak')
    breakIndent = lineBreak + Optional(Word(' \t'))('|Indent')
    quotedString = (QuotedString('"', unquoteResults=False)
                    | QuotedString("'", unquoteResults=False)
                    #| QuotedString("/", unquoteResults=False)
                    | QuotedString('`', multiline=True, unquoteResults=False))('+CmdQuoted')
    jsName = Word(unicodeAlphas, unicodeAlphanums)('+CmdName')
    jsNotName = Word(unicodeNonStarters)('+CmdOther')
    return (#space |
              cppStyleComment()('/Comment')
            | quotedString
            | breakIndent
            | jsName
            | Literal(u';')('+CmdSep')
            | Word(u'{}', exact=1)('+CmdBlock')
            | Word(u'"\'`\r', exact=1)('+CmdOther')  # all excluded from unicodeNonStarters and not handled explicitly earlier, but except of /
            | jsNotName
            #| CharsNotIn(';"\'/` (\r\n\t')('+Cmd')
            #| Word(';"\'/`(\r', exact=1)('+CmdOther')
            )
예제 #7
0
파일: parser.py 프로젝트: tstanisl/rlang
def prepare_grammar():
	stack = []
	def pop(id, n, extra = []):
		print("pop(id={}, n={})".format(id, n))
		tail = stack[-n:]
		del stack[-n:]
		stack.append([id] + tail + extra)
		return stack[-1]

	def RIGHT_UNARY(sym, arg):
		parser = pp.Forward()
		body = sym + parser
		body.setParseAction(lambda t: pop(t[0], 1))
		parser << (arg ^ body)
		return parser

	def LEFT_BINARY(sym, arg):
		body = sym + arg
		body.setParseAction(lambda t: pop(t[0], 2))
		return arg + pp.ZeroOrMore(body)

	def RIGHT_BINARY(sym, arg):
		parser = pp.Forward()
		body = sym + parser
		body.setParseAction(lambda t: pop(t[0], 2))
		parser << (arg + pp.Optional(body))
		return parser

	def push(t):
		print('push0')
		print(stack)
		print(t)
		stack.append(t[0])
		print(stack)
		return t[0]
	def push1(t):
		print('push1')
		print(stack)
		print(t)
		b = stack.pop()
		a = stack.pop()
		#print(a)
		r = [t[0], a, b]
		stack.append(r)
		print(stack)
		#print(r)
		return r
	def push_unr(t):
		print('push_unr')
		print(stack)
		print(t)
		a = stack.pop()
		r = [t[0], a]
		stack.append(r)
		print(stack)
		return r;

	import pyparsing as pp

	LBRA, RBRA, SCOLON, LPAR, RPAR, COMMA = [pp.Suppress(c) for c in '{};(),']
	ASSIGN, QUESTION, COLON, PLUS, MINUS = [pp.Suppress(c) for c in '=?:+-']
	MUL, DIV, MOD, NOT = [pp.Suppress(c) for c in '*/%!']

	ident = pp.Word(pp.alphas + '_', pp.alphanums + '_')
	eident = ident.copy()
	eident.setParseAction(push)

	dec_digit = pp.Regex(r'0|([1-9]\d*)').setParseAction(lambda toks: int(toks[0]))
	hex_digit = pp.Regex(r'0x[0-9a-fA-F]+').setParseAction(lambda toks: int(toks[0][2:],16))
	bin_digit = pp.Regex(r'0b[01]+').setParseAction(lambda toks: int(toks[0][2:],2))
	digit = dec_digit ^ hex_digit ^ bin_digit
	#push(digit)
	digit.setParseAction(push)

	expr = pp.Forward()

	DOT = pp.Literal('.')
	struct_access = DOT + eident
	struct_access.setParseAction(push1)

	LSPAR = pp.Literal('[')
	RSPAR = pp.Literal(']')
	array_access = LSPAR + expr + RSPAR
	array_access.setParseAction(push1)
	#array_access.setParseAction(lambda toks: ['[]', toks[0]])
	access_expr = eident + pp.Group(pp.ZeroOrMore(struct_access ^ array_access))
	#access_expr.setParseAction(lambda t: ['.', t[0], list(t[1:])])

	par_expr = LPAR + expr + RPAR
	# TODO: what about casts
	top_expr = digit ^ access_expr ^ par_expr

	PLUS = pp.Literal('+')
	MINUS = pp.Literal('-')
	NOT = pp.Literal('!')
	#unr_expr = pp.ZeroOrMore(PLUS ^ MINUS ^ NOT) + top_expr
	#unr_expr = pp.Forward()
	#unr_expr << (top_expr ^ ((PLUS ^ MINUS ^ NOT) + unr_expr).setParseAction(push_unr))
	unr_expr = RIGHT_UNARY(PLUS ^ MINUS ^ NOT, top_expr)

	MUL = pp.Literal('*')
	DIV = pp.Literal('/')
	MOD = pp.Literal('%')
	mul_expr = LEFT_BINARY(MUL ^ DIV ^ MOD, unr_expr)
	add_expr = LEFT_BINARY(PLUS ^ MINUS, mul_expr)
	#mul_expr = unr_expr + pp.ZeroOrMore(((MUL ^ DIV ^ MOD) + unr_expr).setParseAction(push1))
	#add_expr = mul_expr + pp.ZeroOrMore(((PLUS ^ MINUS) + mul_expr).setParseAction(push1))

	LT = pp.Literal("<")
	LE = pp.Literal("<=")
	EQ = pp.Literal("==")
	NEQ = pp.Literal("!=")
	GE = pp.Literal(">=")
	GT = pp.Literal(">")
	CMP = LT ^ LE ^ EQ ^ NEQ ^ GE ^ GT

	def cmp_tail2_merge(t):
		print('cmp_tail2_merge')
		print(stack)
		print(t)
		z = stack.pop()
		print("z={}".format(z))
		h = stack.pop()
		print("h={}".format(h))
		if h[0] == '<>':
			# stack = ..., [<> ... op1 a], z
			#           -> [<> ... op1 a op2 z]
			ret = h + [t[0], z]
		else:
			# stack = ..., [op1 a b], z
			#           -> [<> a op1 b op2 z]
			ret = ['<>', h[1], h[0], h[2], t[0], z]
		print("ret={}".format(ret))
		stack.append(ret)
		return ret
	cmp_tail2 = CMP + add_expr
	cmp_tail2.setParseAction(cmp_tail2_merge)

	cmp_tail1 = CMP + add_expr
	cmp_tail1.setParseAction(lambda t: pop(t[0], 2))

	# a < b -> ['<', a, b]
	# a < b < c -> ['<>', a, '<', b, '<', c]
	cmp_expr = add_expr + pp.Optional(cmp_tail1 + pp.ZeroOrMore(cmp_tail2))

	#and_expr = pp.Forward()
	#and_expr << (cmp_expr + pp.Optional((pp.Literal('&&') + and_expr).setParseAction(push1)))
	AND = pp.Literal('&&')
	and_expr = RIGHT_BINARY(AND, cmp_expr)
	#and_expr = cmp_expr + pp.ZeroOrMore(pp.Suppress('&&') + cmp_expr)
	#or_expr = and_expr + pp.ZeroOrMore(pp.Suppress('||') + and_expr)
	OR = pp.Literal('||')
	or_expr = RIGHT_BINARY(OR, and_expr)
	#or_expr = pp.Forward()
	#or_expr << (and_expr + pp.Optional((pp.Literal('||') + or_expr).setParseAction(push1)))
	#induc_expr = pp.Forward()
	#induc_expr << (or_expr + pp.Optional((pp.Literal('==>') + induc_expr).setParseAction(push1)))
	IND = pp.Literal("==>")
	induc_expr = RIGHT_BINARY(IND, or_expr)
	equiv_expr = induc_expr + pp.Optional((pp.Literal('<==>') + induc_expr).setParseAction(push1))

	cond_expr = pp.Forward()
	QUEST = pp.Literal('?')
	cond_tail = QUEST + cond_expr + COLON + cond_expr
	cond_tail.setParseAction(lambda t: pop(t[0], 3))
	cond_expr << (equiv_expr + pp.Optional(cond_tail))

	expr << cond_expr

	buildin_type = pp.Keyword('int')

	extern_mod = pp.Optional(pp.Keyword('extern'))
	extern_mod.setParseAction(lambda toks: len(toks) != 0)
	extern_mod.setResultsName('extern')

	var_decl_body = pp.Forward()

	stmt = pp.Forward()
	block_stmt = LBRA + pp.Group(pp.ZeroOrMore(stmt)) + RBRA
	assign_stmt = access_expr + ASSIGN + expr + SCOLON
	RUN = pp.Suppress(pp.Keyword('run'))
	run_stmt = RUN + ident + SCOLON
	IF = pp.Suppress(pp.Keyword('if'))
	ELSE = pp.Suppress(pp.Keyword('else'))
	if_stmt = pp.Forward()
	if_stmt << IF + LPAR + expr + RPAR + block_stmt + pp.Optional(ELSE + (block_stmt ^ if_stmt))

	arg_bind = pp.Group(DOT + ident + ASSIGN + access_expr)
	arg_bind_list = pp.Group(pp.Optional(arg_bind + pp.ZeroOrMore(COMMA + arg_bind) + pp.Optional(COMMA)))
	template_stmt = ident + NOT + LPAR + arg_bind_list + RPAR + SCOLON

	stmt << pp.Group(block_stmt ^ if_stmt ^ run_stmt ^ assign_stmt ^ template_stmt)

	contracts_decl = pp.Empty()
	arg_list = pp.Group(pp.Optional(var_decl_body + pp.ZeroOrMore( \
		COMMA + var_decl_body) + pp.Optional(COMMA)))
	TEMPLATE = pp.Suppress(pp.Keyword('template'))
	template_decl = extern_mod + TEMPLATE + ident + LPAR + \
		arg_list + RPAR + contracts_decl + block_stmt

	SEQUENCE = pp.Suppress(pp.Keyword('sequence'))
	sequence_decl = extern_mod + SEQUENCE + ident + contracts_decl + block_stmt

	STRUCT = pp.Keyword('struct')
	struct_def = LBRA + pp.Group(pp.ZeroOrMore(var_decl_body + SCOLON)) + RBRA
	struct_decl = STRUCT + ident + struct_def + SCOLON

	struct_type = STRUCT + ident
	type_decl = buildin_type ^ struct_type

	var_decl_body << type_decl + ident
	var_decl = extern_mod + var_decl_body + pp.Optional(ASSIGN + expr, default = None) + SCOLON

	decl = pp.Group(var_decl ^ struct_decl ^ template_decl ^ sequence_decl)

	grammar = pp.ZeroOrMore(decl)

	comment = pp.cppStyleComment()
	grammar.ignore(comment)

	def show(t):
		print(stack)
	grammar.setParseAction(show)

	return grammar