Example #1
0
    def __init__(self, lp, lex_ws, ptrees, sen=None):
        self.lex = Lexer.parse(open(lp, 'r').read())
        self.lex_ws = lex_ws
        self.ptrees = ptrees
        self.sen = sen
        self.amb_type = 'horizontal'
        for l in iter(self.ptrees.splitlines()):
            if re.match(VERTICAL_AMBIGUITY, l):
                self.amb_type = 'vertical'
                break

        amb1, amb2 = None, None
        if self.amb_type == 'vertical':
            amb1, amb2 = self.parse_vamb()
        else:
            amb1, amb2 = self.parse_hamb()

        # extract the ambiguous string from sentence
        amb1s = self.ambiguous_string(amb1)
        amb2s = self.ambiguous_string(amb2)
        assert amb1s == amb2s
        self.amb_str = amb1s

        # extract the ambiguous grammar rules from parse trees
        _cfg = self.ambiguous_cfg_subset(amb1, amb2)
        # first, minimise the lex based on the cfg
        self.sym_toks, self.toks = self.minimise_lex(_cfg)
        tp = tempfile.mktemp()
        Lexer.write(self.sym_toks, self.toks, self.lex_ws, tp)
        lex = Lexer.parse(open(tp, 'r').read())

        # convert _cfg to a CFG instance.
        self.min_cfg = self.to_CFG(_cfg, lex)
Example #2
0
    def verify_ambiguity(self, mingp, minlp, minsen, duration=None):
        print "==> verify grammar %s with minimiser %s \n" % \
                (mingp, self._sin.minp)
        self._sin.lex = Lexer.parse(open(self._sin.lp, 'r').read())
        self._sin.cfg = CFG.parse(self._sin.lex, open(self._sin.gp, "r").read())
        self._sin.parser = Accent.compile(self._sin.gp, self._sin.lp)

        minlex = Lexer.parse(open(minlp, 'r').read())
        mincfg = CFG.parse(minlex, open(mingp, 'r').read())
        seq = mincfg.get_rule('root').seqs[0]
        # check if the root rule of minimised cfg == root of original cfg
        if (len(seq) == 1) and (str(seq[0]) == self._sin.cfg.start_rulen):
            out = Accent.run(self._sin.parser, minsen)
            if Accent.was_ambiguous(out):
                print "** verified **"

        minbend = "%sm" % self._sin.backend
        if minbend in Backends.BACKENDS:
            bend = Backends.BACKENDS[minbend](self._sin, mincfg, minsen)
        else:
            bend = Backends.WGTBACKENDS[minbend](self._sin, mincfg, minsen)

        # we keep trying until we hit the subseq
        while not bend.found:
            bend.run(self._sin.t_depth, self._sin.wgt, duration)

        print "** verified **"
Example #3
0
	def run(self):
		tst = Lexer("")
		prs = Parser(tst)
		
		while True:
			try:
				compound = 0
				tst.flush()
				text = ""
				descend = False;
				text = input('mpr> ')
				if '{' in text:
					descend = True;
					compound += 1
				while compound > 0 or (text != "" and text[-1] != ';' and text[-1] != '}'):
					inpt = input('...  ')
					if '{' in inpt:
						compound += 1
					if '}' in inpt:
						compound -= 1
					text += inpt
			except EOFError:
				break;
			tst.append(text)
			try:
				self.interpret(descend, prs.compound())
			except ValueError as err:
				print(err)
			except SyntaxError as err:
				print(err)
			except TypeError as err:
				print(err)
			except KeyError as err:
				print("Variable {var} not defined!".format(var=err))
Example #4
0
	def parse(self):
		lexer = Lexer()
		self.token = lexer.nextToken()
		self.expr(lexer)
		print 'PRINT'

		if lexer.count < len(lexer.text)+1:
			print 'Syntax error!'
Example #5
0
def make_include(sourceText):
    """
    """
    global token

    moduleFile   = ""
    waveformFile = ""
    signalFile   = ""
    sequenceFile = ""

    lexer.initialize(sourceText)

    getToken()
    while True:
        if token.type == EOF:
            break
        elif found("MODULE_FILE"):
            consume("MODULE_FILE")
            consume("=")
            if found(STRING):
                moduleFile = token.cargo
            consume(STRING)
        elif found("WAVEFORM_FILE"):
            consume("WAVEFORM_FILE")
            consume("=")
            if found(STRING):
                waveformFile = token.cargo
            consume(STRING)
        elif found("SIGNAL_FILE"):
            consume("SIGNAL_FILE")
            consume("=")
            if found(STRING):
                signalFile = token.cargo
            consume(STRING)
        elif found("SEQUENCE_FILE"):
            consume("SEQUENCE_FILE")
            consume("=")
            if found(STRING):
                sequenceFile = token.cargo
            consume(STRING)
        else:
            error("unrecognized keyword: " + dq(token.cargo))

    if len(waveformFile) == 0:
        raise ParserError("missing WAVEFORM_FILE")

    if len(signalFile) == 0:
        raise ParserError("missing SIGNAL_FILE")

    if len(sequenceFile) == 0:
        raise ParserError("missing SEQUENCE_FILE")

    print( "MODULE_FILE " + moduleFile )  # PHM requires this to appear in the output
    print( "SIGNAL_FILE " + signalFile )  # PHM requires this to appear in the output

    print( "#include " + signalFile   )
    print( "#include " + waveformFile )
    print( "#include " + sequenceFile )
    def test_bug_with_quotation(self):
        lexer = Lexer('A = "word" B;')

        token = lexer.get_next_token()
        self.assertEquals(IDENTIFIER, token.type)
        self.assertEquals('A', token.value)

        token = lexer.get_next_token()
        self.assertEquals(EQUAL, token.type)
        self.assertEquals('EQUAL', token.value)

        token = lexer.get_next_token()
        self.assertEquals(QUOTATION_MARK, token.type)
        self.assertEquals('QUOTATION_MARK', token.value)

        token = lexer.get_next_token()
        self.assertEquals(IDENTIFIER, token.type)
        self.assertEquals('word', token.value)

        token = lexer.get_next_token()
        self.assertEquals(QUOTATION_MARK, token.type)
        self.assertEquals('QUOTATION_MARK', token.value)

        token = lexer.get_next_token()
        self.assertEquals(IDENTIFIER, token.type)
        self.assertEquals('B', token.value)

        token = lexer.get_next_token()
        self.assertEquals(SEMICOLON, token.type)
        self.assertEquals('SEMICOLON', token.value)

        token = lexer.get_next_token()
        self.assertEquals(EOF, token.type)
        self.assertEquals('EOF', token.value)
Example #7
0
def parse_waveform(sourceText):
    """
    """
    global token

    lexer.initialize(sourceText)

    while True:
        getToken()
        if token.type == EOF: break
        waveform()
Example #8
0
def main(sourceText):
	global f
	f = open(outputFilename, "w")
	writeln("Here are the tokens returned by the lexer:")

	Lexer.initialize(sourceText)

	while True:
		token = Lexer.get()
		writeln(token.show(True))
		if token.type == EOF:
			break
	f.close()
Example #9
0
def parse(sourceText):
    """
    """
    global token
    global paramList

    lexer.initialize(sourceText)

    waveformText = ""
    sequenceText = ""
    mainText     = ""
    moduleFile   = ""
    signalFile   = ""

    getToken()
    while True:
        if token.type == EOF:
            break
        elif found("WAVEFORM"):
            waveformText += waveform()
        elif found("param"):
            param()
        elif found("MAIN"):
            mainText = main()
        elif found(IDENTIFIER):
            sequenceText += sequence()
        elif found("MODULE_FILE"):
            consume("MODULE_FILE")
            moduleFile = token.cargo.strip('"')
            consume(STRING)
        elif found("SIGNAL_FILE"):
            consume("SIGNAL_FILE")
            signalFile = token.cargo.strip('"')
            consume(STRING)
        else:
            error("unrecognized token " + token.show(align=False) )
            break

    if not gotMain:
        error("must define at least one MAIN in the .seq file")

    retval =""
    retval += "modulefile " + moduleFile + "\n"
    retval += "signalfile " + signalFile + "\n\n"
    retval += mainText
    retval += sequenceText
    retval += waveformText
    for p in paramList:
        retval += p + "\n"

    return retval
Example #10
0
def main():
    verbosity = False
    args = sys.argv
    if len(args) == 3 and sys.argv[1] in ["-v", "--verbose"]:
        verbosity = True
        args.pop(1)
    f = open(args[1], 'r')
    program = f.read()
    f.close()
    token_list = Lexer.lex_program(program)
    if verbosity:
        print("Token list:\n", [x["type"] for x in token_list])
    Parser.parse_program(token_list, verbosity)
    cst_root = Tree.Tree({"type": "Program"})
    cst_root.generate_cst(token_list)
    if verbosity:
        print("CST:")
        cst_root.print_tree(1)
        print()
    ast_root = Tree.Tree({"type": "Block"})
    ast_root.generate_ast(cst_root.children[0])
    if verbosity:
        print("AST:")
        ast_root.print_tree(1)
        print()
    symbol_table = Semantics.Scope(ast_root)
    if verbosity:
        symbol_table.print_table(0)
    code = CodeGen.ExecEnv(ast_root, symbol_table)
    print("\nCompilation successful!")
Example #11
0
def getToken():
    """
    Gets the next token from the source text and assigns it to the
    global variable 'token'.
    """
    global token
    token = lexer.get()
Example #12
0
def parse_sequence(sourceText):
    """
    """
    global token
    global paramList

    lexer.initialize(sourceText)

    getToken()
    param()
    main()

    while True:
        getToken()
        if token.type == EOF: break
        param()
        sequence()
Example #13
0
 def __init__(self, namespace, module, outFile, force=False):
     self.namespace = namespace
     self.lexer = Lexer(module)
     self.outFile = outFile
     self.moduleFile = module.__file__
     self._force = force
     self.actionTable = {}
     self._tokenChars = {}
     self._closedTokens = [self.lexer.default.name]
    def test_alternatives(self):
        lexer = Lexer('A | B')

        token = lexer.get_next_token()
        self.assertEquals(IDENTIFIER, token.type)
        self.assertEquals('A', token.value)

        token = lexer.get_next_token()
        self.assertEquals(ALTERNATIVE, token.type)
        self.assertEquals('ALTERNATIVE', token.value)

        token = lexer.get_next_token()
        self.assertEquals(IDENTIFIER, token.type)
        self.assertEquals('B', token.value)

        token = lexer.get_next_token()
        self.assertEquals(EOF, token.type)
        self.assertEquals('EOF', token.value)
Example #15
0
def get_params(sourceText):
    """
    """
    global token
    global paramNames

    lexer.initialize(sourceText)

    while True:
        getToken()
        if token.type == EOF: break
        if found("param"):
            consume("param")
            paramNames.append(token.cargo)
            consume(IDENTIFIER)
            consume("=")
            consume(NUMBER)

    return paramNames
Example #16
0
File: LexNeo.py Project: danmt/NEO
def main():
	if (len(sys.argv) > 1):
		if (sys.argv[1].split(".")[1].upper() != 'NEO') : 
			print("Advertencia: se recomienda usar archivos .neo con este Lexer\n")

		try:
		    stream = open(sys.argv[1])
		    data = stream.read()
		    stream.close()
		except IOError:
		    print ('ERROR al abrir el archivo \"%s\"' % sys.argv[1])
		    exit()

		neoLexer = Lexer()
		neoLexer.build()
		neoLexer.tokenize(data)
		
	else:
		print("Verifique que se ejecuto el programa correctamente:")
		print("./LexNeo <nombre del archivo>\n")
Example #17
0
def compare(gp1, gp2, lp):
    lex = Lexer.parse(open(lp,"r").read())
    cfg1 = CFG.parse(lex, open(gp1, "r").read())
    cfg2 = CFG.parse(lex, open(gp2, "r").read())
    _cfg1 = _cfg(cfg1)
    _cfg2 = _cfg(cfg2)
    
    if _cfg1 == _cfg2:
        return True

    return False
Example #18
0
    def write_stat(self, gp, lp, tag=''):
        """ write no of rules, alts, symbols
            Use the tag to mark the final line
        """
        s = "-,-,-" 
        if gp is not None:
            lex = Lexer.parse(open(lp, 'r').read())
            cfg = CFG.parse(lex, open(gp, 'r').read())
            rules, alts, syms = cfg.size()
            s = "%s,%s,%s" % (rules, alts, syms)

        with open(self.statslog, "a") as logp:
            logp.write("%s%s\n" % (tag, s))
Example #19
0
    def __init__(self):
        # Create a Lexer
        self.the_Lex = Lexer()
        # Calls getData() to get all of the Tokens from input
        self.statements = self.the_Lex.getData()
        # A stack that we will use to load all of the Tokens that make
        # up a single statement
        self.the_stack = []

        #self.state_pos = 0

        # Calls the statements function
        self.Statements()
    def test_one_line(self):
        lexer = Lexer('PROGRAM = RULE, {RULE};')

        token = lexer.get_next_token()
        self.assertEquals(IDENTIFIER, token.type)
        self.assertEquals('PROGRAM', token.value)

        token = lexer.get_next_token()
        self.assertEquals(EQUAL, token.type)
        self.assertEquals('EQUAL', token.value)

        token = lexer.get_next_token()
        self.assertEquals(IDENTIFIER, token.type)
        self.assertEquals('RULE', token.value)

        token = lexer.get_next_token()
        self.assertEquals(COMMA, token.type)
        self.assertEquals('COMMA', token.value)

        token = lexer.get_next_token()
        self.assertEquals(BRACKET_CURLY_OPEN, token.type)
        self.assertEquals('BRACKET_CURLY_OPEN', token.value)

        token = lexer.get_next_token()
        self.assertEquals(IDENTIFIER, token.type)
        self.assertEquals('RULE', token.value)

        token = lexer.get_next_token()
        self.assertEquals(BRACKET_CURLY_CLOSE, token.type)
        self.assertEquals('BRACKET_CURLY_CLOSE', token.value)

        token = lexer.get_next_token()
        self.assertEquals(SEMICOLON, token.type)
        self.assertEquals('SEMICOLON', token.value)

        token = lexer.get_next_token()
        self.assertEquals(EOF, token.type)
        self.assertEquals('EOF', token.value)
Example #21
0
 def eval(self, s):
     if self.debug:
         print "Input: " + s
     lexemes = Lexer.lex(s)
     if self.debug:
         print "Lexemes: " + str(lexemes)
     ast = self.parser.parse(lexemes)
     if self.debug:
         print "AST:"
         self.printAST(ast, 0)
     result = self.interpreter.eval(ast)
     if self.debug:
         print "Result: ",
     print str(result)
Example #22
0
def get_subroutines(sourceText):
    """
    """
    global token
    global subroutines

    lexer.initialize(sourceText)

    subroutines = []

    while True:
        getToken()
        if token.type == EOF: break
        if found(IDENTIFIER):
            name = token.cargo
            consume(IDENTIFIER)
            if found("{"):
                subroutines.append(name)
                consume("{")
                while not found("}"):
                    getToken()

    return subroutines
Example #23
0
def parse_modules(sourceText):
    """
    """
    global token
    global drvOutput
    global adcOutput
    global hvlOutput
    global hvhOutput
    global sysOutput

    lexer.initialize(sourceText)

    # initialize the system output string
    sysOutput = ""
    sysOutput += "BACKPLANE_ID=0000000000000000\n"
    sysOutput += "BACKPLANE_REV=0\n"
    sysOutput += "BACKPLANE_TYPE=1\n"
    sysOutput += "BACKPLANE_VERSION=0.0.0\n"

    getToken()
    while True:
        if token.type == EOF:
            break
        elif found("SLOT"):
            slot()
        else:
            error("unrecognized token " + token.show(align=False) )
            break

    retval = ""
    retval += drvOutput
    retval += adcOutput
    retval += hvlOutput
    retval += hvhOutput
    retval += dioOutput

    return retval
Example #24
0
File: OC.py Project: Arty52/OC_gen
def main():
    #initialize variables
    tokens = []
    lexemes = []
    
    #global init lets us change the global variable
    global toProcess                    
    global _filename

    #main loop will go until the user is finished
    while True:
        #reset global variables after loop
        reset()
        
        #call lexer
        #returned from Lexer.main() is deque containing lexers and filename of processed file
        toProcess, _filename = Lexer.main()             
        
        #if there is contents in toProcess (sent from Lexer.main()), proceed to Syntax Analyser
        if toProcess:   
            #set the output filehandle so that we can print to a file  
            setFileHandle()
            
            #Syntax Analyser
            print('\nObject Code Generator running...')
            getNext()                                       #get input
            rat15S()                                        #call Syntax Analyser
            print('\n...Object Code Generator finished!\n')
            
            #report to user if error or no error in syntax analysis
            print('There were no errors!') if _error else print('An error was found!')
            
            #report to user where the contents of the file have been saved
            print('Your syntactic analysis of {} has been saved as {} in the working directory.'.format(_filename,_filename + '.SA'))
        
            #print object code tables
            instr_table.print_table()
            print('\n', file = out_fh_OC)
            symbol_table.list()
            
            print('Your object code of {} has been saved as {} in the working directory.'.format(_filename,_filename + '.OC'))
        
        #ask user if they would like to run another file    
        _continue = input('\nWould you like to process another file? (yes/no): ')
        if _continue == 'no' or _continue == 'quit':
            print('Goodbye!')
            sys.exit()
Example #25
0
    def to_accent(self, sen, lp):
        """ sen contains symbolic tokens, convert to 'actual' tokens using
            the lex
        """
        lex = Lexer.parse(open(lp, "r").read())
        _sen = []
        for tok in sen.split():
            if tok in lex.keys():
                _sen.append(lex[tok])
            else:
                # single char quoted tokens
                _sen.append(tok.replace("'", ""))

        if not self._sin.lex_ws:
            return " ".join(_sen)

        return "".join(_sen)
Example #26
0
def valid(gf, lf, max_alts_allowed=None, empty_alts_ratio=None):
    """ Generated grammar is valid if it:
        a) has no empty rule
        b) number of alternatives/rule < max_alts_allowed
        c) %age of empty alternatives < empty_alts_ratio 
        d) has no unreachable rules 
        e) doesn't contain a subset which taken no input 
        f) is not trivially ambiguous """
       
    lex = Lexer.parse(open(lf, "r").read())
    cfg = CFG.parse(lex, open(gf, "r").read())

    # check for empty rules
    if empty_rule(cfg):
        return False

    # check if any of the rule has > N alts
    if max_alts_allowed is not None:
        if has_too_many_alts(cfg, max_alts_allowed):
            return False

    # check if we have too many empty alts
    if empty_alts_ratio is not None:
        if has_too_many_empty_alts(cfg, empty_alts_ratio):
            return False

    # Check if all the rules are reachable from the start rule.
    if (len(unreachable(cfg)) > 0):
        print "unreachable: " , unreachable(cfg)
        sys.stdout.write("r")
        sys.stdout.flush()
        return False       

    # Check if the grammar is unproductive        
    if unproductive(cfg,lex):
        sys.stdout.write("u")
        sys.stdout.flush()    
        return False
                        
    # Check the grammar for trivial ambiguities
    if ambiguous(cfg):
        sys.stdout.write("a")
        sys.stdout.flush()
        return False

    return True        
Example #27
0
    def run(self):
        currgp = self.mingp
        currlp = self.minlp
        currparse = self._sin.ambi_parse
        n = 1
        found = True

        while found:
            found = False
            lex = Lexer.parse(open(currlp, "r").read())
            cfg = CFG.parse(lex, open(currgp, "r").read())
            # work on rules with no of alts > 1
            keys = [r.name for r in cfg.rules if len(r.seqs) > 1]
            for key in keys:
                seqs = cfg.get_rule(key).seqs
                for i in range(len(seqs)):
                    _cfg = self.cfg_minus_alt(cfg, key, i)
                    if self.valid_cfg(_cfg):
                        # we could minimise lex first before pruning
                        _cfg_p = self.prune_cfg(_cfg, lex)
                        _gf, _lf = "%s.acc" % n, "%s.lex" % n
                        _gp = os.path.join(self._sin.td, "pruned.%s" % _gf)
                        CFG.write(_cfg_p, _gp)
                        n += 1
                        amb, _, ptrees = self._sin.find_ambiguity(_gp, currlp, self._sin.backend, self._sin.mint)
                        if amb:
                            ambi_parse = AmbiParse.parse(currlp, self._sin.lex_ws, ptrees)
                            __gp = os.path.join(self._sin.td, "min.%s" % _gf)
                            __lp = os.path.join(self._sin.td, "min.%s" % _lf)
                            self.write_cfg_lex(ambi_parse, __gp, __lp)
                            self.write_stat(__gp, __lp)
                            found = True
                            currparse = ambi_parse
                            currgp = __gp
                            currlp = __lp
                            break

                if found:
                    break

        return currgp, currlp, currparse.amb_str
Example #28
0
    def run(self):
        currgp = self.mingp
        currlp = self.minlp
        currparse = self._sin.ambi_parse
        n = 1
        found = True

        while found:
            found = False
            lex = Lexer.parse(open(currlp, 'r').read())
            cfg = CFG.parse(lex, open(currgp, 'r').read())
            combs = self.rule_alts_combs(cfg)
            random.shuffle(combs)
            while combs:
                key, i = combs.pop()
                _cfg = self.cfg_minus_alt(cfg, key, i)
                if self.valid_cfg(_cfg):
                    # we could minimise lex first before pruning
                    _cfg_p = self.prune_cfg(_cfg, lex)
                    _gf, _lf = "%s.acc" % n, "%s.lex" % n
                    _gp = os.path.join(self._sin.td, "pruned.%s" % _gf)
                    CFG.write(_cfg_p, _gp)
                    n += 1
                    amb, _, ptrees = self._sin.find_ambiguity(_gp, currlp,
                                       self._sin.backend, self._sin.mint)
                    if amb:
                        ambi_parse = AmbiParse.parse(currlp, self._sin.lex_ws, ptrees)
                        __gp = os.path.join(self._sin.td, "min.%s" % _gf)
                        __lp = os.path.join(self._sin.td, "min.%s" % _lf)
                        self.write_cfg_lex(ambi_parse, __gp, __lp)
                        self.write_stat(__gp, __lp)
                        found = True
                        currparse = ambi_parse
                        currgp = __gp
                        currlp = __lp
                        break

        return currgp, currlp, currparse.amb_str
Example #29
0
def mutate_cfg(gp, lp, type):
    lex = Lexer.parse(open(lp, "r").read())
    cfg = CFG.parse(lex, open(gp, "r").read())
    sym_toks = Utils.sym_tokens(gp)

    _cfg = cfg.clone()

    if type == 'empty':
        empty(_cfg)
    elif type == 'add':
        tok = Utils.randomTok(cfg, lex, sym_toks)
        add(_cfg, tok)
    elif type == 'mutate':
        tok = Utils.randomTok(cfg, lex, sym_toks)
        mutate(_cfg, tok)
    elif type == 'delete':
        delete(_cfg)
    elif type == 'switch':
        switch(_cfg)
    else:
        assert "mutation type '%s' is not supported" % type

    return _cfg
Example #30
0
import sys
from Lexer import *
from TokenTypes import *


if __name__ == "__main__":
    input = sys.argv[1]
    lexer = Lexer(input)
    print("Tokenizing ", end="")
    print(input)
    while True:
        t = lexer.lex()
        if t.get_token().value == TokenTypes.EOF.value:
            break
Example #31
0
    def for_expr(self):
        res = ParseResult()

        if not self.current_tok.matches(Lexer.TT_KEYWORD, 'Untuk_kita'):
            return res.failure(
                Lexer.InvalidSyntaxError(
                    self.current_tok.pos_start, self.current_tok.pos_end,
                    f"Mengharapkan seperti (senja kala itu) 'Untuk_kita'"))

        res.register_advancement()
        self.advance()

        if self.current_tok.type != Lexer.TT_IDENTIFIER:
            return res.failure(
                Lexer.InvalidSyntaxError(
                    self.current_tok.pos_start, self.current_tok.pos_end,
                    f"Mengharapkan seperti (senja kala itu) identifier"))

        var_name = self.current_tok
        res.register_advancement()
        self.advance()

        if self.current_tok.type != Lexer.TT_EQ:
            return res.failure(
                Lexer.InvalidSyntaxError(
                    self.current_tok.pos_start, self.current_tok.pos_end,
                    f"Mengharapkan seperti (senja kala itu) '='"))

        res.register_advancement()
        self.advance()

        start_value = res.register(self.expr())
        if res.error: return res

        if not self.current_tok.matches(Lexer.TT_KEYWORD, 'Ke'):
            return res.failure(
                Lexer.InvalidSyntaxError(
                    self.current_tok.pos_start, self.current_tok.pos_end,
                    f"Mengharapkan seperti (senja kala itu) 'Ke'"))

        res.register_advancement()
        self.advance()

        end_value = res.register(self.expr())
        if res.error: return res

        if self.current_tok.matches(Lexer.TT_KEYWORD, 'Melangkah'):
            res.register_advancement()
            self.advance()

            step_value = res.register(self.expr())
            if res.error: return res
        else:
            step_value = None

        if not self.current_tok.matches(Lexer.TT_KEYWORD, 'Maka_cakrawala'):
            return res.failure(
                Lexer.InvalidSyntaxError(
                    self.current_tok.pos_start, self.current_tok.pos_end,
                    f"Mengharapkan seperti (senja kala itu) 'Maka_cakrawala'"))

        res.register_advancement()
        self.advance()

        body = res.register(self.expr())
        if res.error: return res

        return res.success(
            Lexer.ForNode(var_name, start_value, end_value, step_value, body))
Example #32
0
 [email protected]@[email protected]@
 0
 1
 Where the first line is the RE and the next ones are the alphabet
 '''
 regular_expressions = automata.readFile("RE.txt")
 with open('quintuple_NFA.json', 'w') as file:
     file.write("")
 with open('quintuple_DFA.json', 'w') as file:
     file.write("")
 for regular_expression in regular_expressions:
     #print(regular_expression)
     identifier = regular_expressions[regular_expression][0]
     alphabet = regular_expressions[regular_expression][1]
     REPostfix = automata.convertREToPostfix(regular_expression, alphabet)
     #print(REPostfix)
     automata.convertREToNFA(identifier, REPostfix, alphabet)
     '''
     nfa_example = automata_IO.nfa_json_importer('quintuple_NFA.json')
     automata_IO.nfa_to_dot(nfa_example, 'graphic_NFA', './')
     '''
     automata.createTransitionMatrix()
     automata.NFA_to_DFA()
 '''
 dfa_example = automata_IO.dfa_json_importer('quintuple_DFA.json')
 automata_IO.dfa_to_dot(dfa_example, 'graphic_DFA', './')
 '''
 lexer = Lexer.Lexer()
 DFAs = lexer.readFileDFAs("quintuple_DFA.json")
 tokens = lexer.readFileTokens("code.txt")
 lexer.evalauteTokens(DFAs, tokens)
Example #33
0
 def __init__(self, asm_file):
     self.lex = Lexer.Lex(asm_file)
     self.cmd_info()
Example #34
0
class Parser:
    def __init__(self, file_path):
        self.ERROR = 0
        self.RIGHT = 1
        self.path = file_path
        self.lexer = Lexer(file_path)
        self.token = Token(Token_Type.ERRTOKEN, "", 0.0, None)
        self.state = self.RIGHT
        self.count = 0
        self.iters = 0
        self.origin_x = 0.0
        self.origin_y = 0.0
        self.rot_ang = 0.0
        self.scale_x = 1.0
        self.scale_y = 1.0

        self.tree = Tree()
        self.root = Node()

    def typecheck(self, _type):
        if (self.token.type != _type):
            self.state = self.ERROR

    def add_node(self, node_name, parents=None, _data=None):
        node = Node(tag=node_name, data=_data)
        self.tree.add_node(node, parent=parents)
        return node

    def getValue(self):
        fig = plt.figure()
        pic = plt.subplot()
        with open(self.path, 'r') as f:
            lines = f.readline()

            while (lines):
                lines = lines.lower()
                if (lines.find('pi') != -1):
                    lines = lines.replace('pi', '3.1415926')
                if (lines.find('origin') != -1):
                    start = lines.find('(')
                    end = lines.find(',')
                    endd = lines.find(')')
                    self.origin_x = eval(lines[start + 1:end])
                    self.origin_y = eval(lines[end + 1:endd])
                elif (lines.find('rot') != -1):
                    start = lines.find('is')
                    self.rot_ang = eval(lines[start + 2:-2])
                elif (lines.find('scale') != -1):
                    start = lines.find('(')
                    end = lines.find(',')
                    endd = lines.find(')')
                    self.scale_x = eval(lines[start + 1:end])
                    self.scale_y = eval(lines[end + 1:endd])
                elif (lines.find('for') != -1):
                    first = lines.find('from')
                    second = lines.find('to')
                    third = lines.find('step')
                    fourth = lines.find('draw')
                    start = eval(lines[first + 4:second])
                    end = eval(lines[second + 2:third])
                    steps = eval(lines[third + 4:fourth])
                    ax = []
                    ay = []
                    l_c = lines.find('(')
                    comma = lines.find(',')
                    r_c = lines.rfind(')')
                    # for iters in range (start, end, steps) :
                    #     t = iters
                    #     ax.append ( eval (lines[l_c + 1 : comma]) )
                    #     ay.append ( eval (lines[comma + 1 : r_c]) )
                    iters = start
                    while (iters < end):
                        t = iters
                        ax.append(eval(lines[l_c + 1:comma]))
                        ay.append(eval(lines[comma + 1:r_c]))
                        iters += steps
                    ax = np.array(ax)

                    ay = np.array(ay)
                    ax = ax * self.scale_x
                    ay = ay * self.scale_y
                    temp = ax * np.cos(self.rot_ang) + ay * np.sin(
                        self.rot_ang)
                    ay = ay * np.cos(self.rot_ang) - ax * np.sin(self.rot_ang)
                    ax = temp
                    ax += self.origin_x
                    ay += self.origin_y
                    color = ['blue', 'green', 'yellow', 'red']
                    ax = ax.tolist()
                    ay = ay.tolist()

                    self.count = self.count % 4
                    pic.scatter(ax, ay, s=2, c=color[self.count])
                    # plt.show ()
                    self.count += 1
                    self.origin_x = 0
                    self.origin_y = 0
                    self.scale_x = 1
                    self.scale_y = 1
                    self.rot_ang = 0

                lines = f.readline()
        print(self.origin_x, self.origin_y)
        print(self.scale_x, self.scale_y)
        print(self.rot_ang)
        plt.show()

    def program(self):
        '''
        Program → Statement SEMICO Program |ε
        P -> S ; P
        '''
        # node = Node (tag= 'a')
        self.root = Node(tag='Program')
        self.token = self.lexer.getToken()
        self.tree.add_node(self.root)
        node = self.root
        while (self.token.type != Token_Type.NONTOKEN):

            node1 = Node(tag='Statement')
            node2 = Node(tag=';')
            node3 = Node(tag='Program')
            self.tree.add_node(node1, node)
            self.tree.add_node(node2, node)
            self.tree.add_node(node3, node)

            self.statement(node1)
            # self.token = self.lexer.getToken ()
            # print (self.token.type)
            self.typecheck(Token_Type.SEMICO)
            self.token = self.lexer.getToken()
            node = node3
            if (self.state == self.ERROR):
                raise SyntaxError('SyntaxError !')
        self.add_node('Empty', node)
        print('---------------------Object Tree----------------------')
        self.tree.show()
        self.getValue()

    def statement(self, node):
        '''
        Statement →  OriginStatment | ScaleStatment
        |  RotStatment    | ForStatment
        '''
        print('--Enter Statement--')
        if (self.token.type == Token_Type.ORIGIN):
            node_temp = self.add_node('OriginStatment', node)
            self.originstatement(node_temp)
        elif (self.token.type == Token_Type.SCALE):
            node_temp = self.add_node('ScaleStatment', node)
            self.scalestatement(node_temp)
        elif (self.token.type == Token_Type.ROT):
            node_temp = self.add_node('RotStatment', node)
            self.rotstatement(node_temp)
        elif (self.token.type == Token_Type.FOR):
            node_temp = self.add_node('forstatement', node)
            self.forstatement(node_temp)
        else:
            self.state = self.ERROR

        print(self.state)
        print('--End statement--')

    def originstatement(self, node):
        '''
        OriginStatment → ORIGIN is
        L_BRACKET Expression COMMA Expression R_BRACKET

        '''
        print('--Enter originstatement--')
        temp_node = Node(tag=' ')
        if (self.token.type == Token_Type.ORIGIN):
            temp_node = self.add_node('ORIGIN', node)
            self.token = self.lexer.getToken()
            if (self.token.type == Token_Type.IS):
                self.token = self.lexer.getToken()
                temp_node = self.add_node('IS', node)
                if (self.token.type == Token_Type.L_BRACKET):
                    temp_node = self.add_node('L_BRACKET', node)
                    self.token = self.lexer.getToken()
                    temp_node = self.add_node('Expression', node)
                    self.expression(temp_node)
                    # self.token = self.lexer.getToken ()
                    self.typecheck(Token_Type.COMMA)
                    temp_node = self.add_node('COMMA', node)
                    self.token = self.lexer.getToken()
                    temp_node = self.add_node('Expression', node)
                    self.expression(temp_node)

                    # self.token = self.lexer.getToken ()
                    if (self.token.type != Token_Type.R_BRACKET):
                        self.state = self.ERROR
                    temp_node = self.add_node('R_BRACKET', node)
                    self.token = self.lexer.getToken()
                else:
                    self.state = self.ERROR

            else:
                self.state = self.ERROR

        print(self.state)
        print('--End originstatement--')

    def scalestatement(self, node):
        '''
        ScaleStatment  → SCALE IS
            L_BRACKET Expression COMMA Expression R_BRACKET
        '''
        print('--Enter scalestatement--')
        temp_node = self.add_node('SCALE', node)
        temp_node = self.add_node('IS', node)
        temp_node = self.add_node('L_BRACKET', node)

        self.token = self.lexer.getToken()
        if (self.token.type != Token_Type.IS):
            self.state = self.ERROR

        self.token = self.lexer.getToken()
        if (self.token.type != Token_Type.L_BRACKET):
            self.state = self.ERROR

        self.token = self.lexer.getToken()
        temp_node = self.add_node('Expression', node)
        self.expression(temp_node)
        temp_node = self.add_node('COMMA', node)
        temp_node = self.add_node('Expression', node)
        # self.token = self.lexer.getToken ()
        self.typecheck(Token_Type.COMMA)

        self.token = self.lexer.getToken()

        self.expression(temp_node)
        # self.token = self.lexer.getToken ()
        if (self.token.type != Token_Type.R_BRACKET):
            self.state = self.ERROR
        temp_node = self.add_node('R_BRACKET', node)
        self.token = self.lexer.getToken()
        print(self.state)
        print('--End scalestatement--')

    def rotstatement(self, node):
        '''
        RotStatment → ROT IS Expression
        '''
        print('--Enter rotstatement --')
        temp_node = self.add_node('ROT', node)
        temp_node = self.add_node('IS', node)
        temp_node = self.add_node('Expression', node)
        self.token = self.lexer.getToken()
        if (self.token.type != Token_Type.IS):
            self.state = self.ERROR
        self.token = self.lexer.getToken()
        # print (self.token.type)

        self.expression(temp_node)
        print(self.state)
        print('--End rotstatement--')

    def forstatement(self, node):
        '''
        ForStatment → FOR T
        FROM Expression
        TO   Expression
        STEP Expression
        DRAW L_BRACKET Expression COMMA Expression R_BRACKET
        '''
        print('--Enter forstatement--')
        temp_node = self.add_node('FOR', node)
        temp_node = self.add_node('T', node)
        temp_node = self.add_node('FROM', node)
        temp_node = self.add_node('Expression', node)
        self.token = self.lexer.getToken()
        if (self.token.type != Token_Type.T):
            self.state = self.ERROR
        self.token = self.lexer.getToken()
        if (self.token.type != Token_Type.FROM):
            self.state = self.ERROR
        self.token = self.lexer.getToken()
        self.expression(temp_node)
        temp_node = self.add_node('TO', node)
        temp_node = self.add_node('Expression', node)
        # self.token = self.lexer.getToken ()
        self.typecheck(Token_Type.TO)
        self.token = self.lexer.getToken()
        self.expression(temp_node)
        temp_node = self.add_node('STEP', node)
        temp_node = self.add_node('Expression', node)
        # self.token = self.lexer.getToken ()
        self.typecheck(Token_Type.STEP)
        self.token = self.lexer.getToken()
        self.expression(temp_node)
        temp_node = self.add_node('DRAW', node)
        temp_node = self.add_node('L_BRACKET', node)
        temp_node = self.add_node('Expression', node)
        # self.token = self.lexer.getToken ()
        self.typecheck(Token_Type.DRAW)
        self.token = self.lexer.getToken()
        self.typecheck(Token_Type.L_BRACKET)
        self.token = self.lexer.getToken()
        self.expression(temp_node)
        temp_node = self.add_node('COMMA', node)
        temp_node = self.add_node('Expression', node)
        # self.token = self.lexer.getToken ()
        self.typecheck(Token_Type.COMMA)
        self.token = self.lexer.getToken()
        self.expression(temp_node)
        temp_node = self.add_node('R_BRACKET', node)
        # self.token = self.lexer.getToken ()
        self.typecheck(Token_Type.R_BRACKET)
        self.token = self.lexer.getToken()
        print(self.state)
        print('--End forstatement--')

    def expression(self, node):
        '''
        Expression → Term {(PLUS|MINUS)Term }
        E -> T {(PLUS | MINUS) T}
        '''
        print('--Enter expression--')
        temp_node = self.add_node('Term', node)
        self.term(temp_node)
        while (self.token.type == Token_Type.PLUS
               or self.token.type == Token_Type.MINUS):
            if (token.type == Token_Type.PLUS):
                temp_node = self.add_node('PLUS', node, _data='+')
            else:
                temp_node = self.add_node('MINUS', node, _data='-')
            temp_node = self.add_node('Term', node)
            self.token = self.lexer.getToken()
            self.term(temp_node)
            # self.token = self.lexer.getToken ()
        print(self.state)
        print('--End expression--')

    def term(self, node):
        '''
        Term       	→ Factor { ( MUL | DIV ) Factor }
        '''
        print('--Enter term--')
        temp_node = self.add_node('Factor', node)
        self.factor(temp_node)
        while (self.token.type == Token_Type.MUL
               or self.token.type == Token_Type.DIV):
            if (self.token.type == Token_Type.MUL):
                temp_node = self.add_node('*', node)
            else:
                temp_node = self.add_node('/', node)
            temp_node = self.add_node('Factor', node)
            self.token = self.lexer.getToken()
            self.factor(temp_node)
            # self.token = self.lexer.getToken ()
        print(self.state)
        print('--End term--')

    def factor(self, node):
        '''
        Factor  	→ PLUS Factor | MINUS Factor | Component
        '''
        print('--Enter factor--')
        if (self.token.type == Token_Type.PLUS
                or self.token.type == Token_Type.MINUS):
            if (self.token.type == Token_Type.PLUS):
                temp_node = self.add_node('+', node)
            else:
                temp_node = self.add_node('-', node)

            temp_node = self.add_node('Factor', node)
            self.token = self.lexer.getToken()
            self.factor(temp_node)
        else:
            # print (self.token.type, self.token.value)
            temp_node = self.add_node('Component', node)
            self.component(temp_node)
        print(self.state)
        print('--End Factor--')

    def component(self, node):
        '''
        Component 	→ Atom [POWER Component]
        '''
        print('--Enter component--')
        temp_node = self.add_node('Atom', node)
        self.atom(temp_node)
        self.token = self.lexer.getToken()

        if (self.token.type == Token_Type.POWER):
            # self.token = self.lexer.getToken ()
            self.token = self.lexer.getToken()
            temp_node = self.add_node('POWER', node)
            temp_node = self.add_node('Component', node)
            self.component(temp_node)

        print(self.state)
        # print (self.token.type)
        print('--End component--')

    def atom(self, node):
        '''
        Atom → CONST_ID
         | T
	     | FUNC L_BRACKET Expression R_BRACKET
         | L_BRACKET Expression R_BRACKET
        '''
        print('--Enter atom--')
        if (self.token.type == Token_Type.CONST_ID):
            value = self.token.value
            print(value)
            temp_node = self.add_node('CONST_ID', node, _data=value)

        elif (self.token.type == Token_Type.T):
            temp_node = self.add_node('T', node)

        elif (self.token.type == Token_Type.FUNC):
            temp_node = self.add_node('FUNC', node)
            temp_node = self.add_node('L_BRACKET', node)
            temp_node = self.add_node('Expression', node)
            self.token = self.lexer.getToken()
            self.typecheck(Token_Type.L_BRACKET)
            self.token = self.lexer.getToken()
            self.expression(temp_node)
            self.typecheck(Token_Type.R_BRACKET)
            temp_node = self.add_node('R_BRACKET', node)
        elif (self.token.type == Token_Type.L_BRACKET):
            temp_node = self.add_node('L_BRACKET', node)
            temp_node = self.add_node('Expression', node)
            self.token = self.lexer.getToken()
            self.typecheck(Token_Type.R_BRACKET)
            temp_node = self.add_node('R_BRACKET', node)
        else:
            self.state = self.ERROR
        print(self.state)
        print('--End Atom--')

    def start(self):
        print('Begin !')
        self.program()
        print('End !')
Example #35
0
import Lexer
from Parser import Parser
from Token import Token
from IllegalCharError import IllegalCharError
while True:
    text = input('Compiler> ')
    
    result,error = Lexer.run('<file> ',text)
    
    if error: 
        print(error.as_string())
    else: 
        print(result)
def test_lex_tokens():
    l = Lexer()
    l.lex("./Tok_test.txt")
    for i in range(len(l.Tok_list)):
        assert (l.Tok_list[i][0] == exp_tok[i])
Example #37
0
types = {"INT": r"[0-9]+", "string": r"[a-zA-Z]+", "VARCHAR": r"[a-zA-Z]+"}
dic = {}
for sig, params in util.parse_signature(sig_loc):
    if params == [('', )]:
        rules.append((sig + r"\(\)", SIGNATURE))
        dic[sig] = ([], [])
    else:
        rules.append(
            (sig + r"\(" + ",".join([types.get("string")
                                     for _ in params]) + r"\)", SIGNATURE))
        dic[sig] = ([
            e.replace("int", "INT").replace("string", "VARCHAR")
            for _, e in params
        ], [])

rules.extend(Lexer.get_rules())
tokens = Lexer.lex(util.parse_formula(formula_loc), rules)
node = MyParser.parse(tokens)
print(node.to_str())

conn = sqlite3.connect(":memory:")
context = Context(conn, ["A", "B"])

with open(log_loc, 'r') as f:
    for line in f:
        parse_ts = re.compile(r"@[0-9]+").findall(line)
        if len(parse_ts) == 1:
            ts = int(parse_ts[0][1:])
        else:
            raise RuntimeError("No timestamp found")
        context.set_ts(ts)
Example #38
0
global l
print("Enter list")
if next_token.get_token().value == TokenTypes.LPAREN.value:
  next_token = l.lex()
list()
if next_token.get_token().value == TokenTypes.RPAREN.value:
  next_token = l.lex()
list()
print("Exit list")

def seq():
  global next_token
global l
print("Enter seq")
while next_token.get_token().value == TokenTypes.INT.value:
  next_token = l.lex()
if next_token.get_token().value != TokenTypes.RPAREN.value:
  seq()
print("Exit seq")

def main():
  global next_token
global l
l = Lexer(sys.argv[1])
next_token = l.lex()
lisp()
if next_token.get_token().value == TokenTypes.EOF.value:
  print("PARSE SUCCEEDED")
else :
  print("PARSE FAILED")
Example #39
0
# -*- coding: utf-8 -*-
import sys
import Lexer
import Parser
import CodeGenerator

tl_file = sys.argv[1]
file_name = tl_file.split('.')[0]
tok_file = file_name + '.tok'
ast_file = file_name + '.ast.dot'
cfg_file = file_name + '.3A.cfg.dot'
s_file = file_name + '.s'

if Lexer.lexer(tl_file, tok_file):
    temp = Parser.parser(tok_file, ast_file)
    if temp:
        ast_tree = temp[0]
        symbol_table = temp[1]
        CodeGenerator.code_generator(ast_tree, symbol_table, cfg_file, s_file)
Example #40
0
    def atom(self):
        res = ParseResult()
        tok = self.current_tok

        if tok.type in (Lexer.TT_INT, Lexer.TT_FLOAT):
            res.register_advancement()
            self.advance()
            return res.success(Lexer.NumberNode(tok))

        elif tok.type == Lexer.TT_STRING:
            res.register_advancement()
            self.advance()
            return res.success(Lexer.StringNode(tok))

        elif tok.type == Lexer.TT_IDENTIFIER:
            res.register_advancement()
            self.advance()
            return res.success(Lexer.VarAccessNode(tok))

        elif tok.type == Lexer.TT_LPAREN:
            res.register_advancement()
            self.advance()
            expr = res.register(self.expr())
            if res.error: return res
            if self.current_tok.type == Lexer.TT_RPAREN:
                res.register_advancement()
                self.advance()
                return res.success(expr)
            else:
                return res.failure(
                    Lexer.InvalidSyntaxError(
                        self.current_tok.pos_start, self.current_tok.pos_end,
                        "Mengharapkan seperti (senja kala itu) ')'"))

        elif tok.type == Lexer.TT_LSQUARE:
            list_expr = res.register(self.list_expr())
            if res.error: return res
            return res.success(list_expr)

        elif tok.matches(Lexer.TT_KEYWORD, 'Ketika_nada'):
            if_expr = res.register(self.if_expr())
            if res.error: return res
            return res.success(if_expr)

        elif tok.matches(Lexer.TT_KEYWORD, 'Untuk_kita'):
            for_expr = res.register(self.for_expr())
            if res.error: return res
            return res.success(for_expr)

        elif tok.matches(Lexer.TT_KEYWORD, 'Sedangkan'):
            while_expr = res.register(self.while_expr())
            if res.error: return res
            return res.success(while_expr)

        elif tok.matches(Lexer.TT_KEYWORD, 'Fungsi'):
            func_def = res.register(self.func_def())
            if res.error: return res
            return res.success(func_def)

        return res.failure(
            Lexer.InvalidSyntaxError(
                tok.pos_start, tok.pos_end,
                "Mengharapkan seperti (senja kala itu) int, float, identifier, '+', '-', '(', '[', 'Ketika_nada', 'Untuk_kita', 'Sedangkan', 'Fungsi'"
            ))
Example #41
0
File: main.py Project: flash548/KLP
        # set the perl $0 to name of the script
        vm.set_variable('0', Value(filename), 'scalar')
        vm.set_variable('ARGV', Value(argv_list), 'list')
        fp = open(filename)
        code = fp.read()
        fp.close()
    else:
        # run the -e code
        vm.set_variable('ARGV', Value(argv_list), 'list')
        # set the perl $0 to <stdin>
        vm.set_variable('0', Value('-'), 'scalar')

if is_n:
    code = "while (<>) { " + code + "}"

lex = Lexer(code)
if (is_dump_toks):
    # dump tokens and exit
    lex.dump_tokens()
    sys.exit(0)

# scan, parse, to IR (AST)
p = Parser(lex)
ast = p.parse()

# walk and emit bytecode from AST (compile)
ast.emit(vm)

# run the VM
#try:
if (not is_dump):
Example #42
0
 def Unit(self, priority):
     if priority == 0:
         if self.current_token.type == LPAREN:
             self.eat(LPAREN)
             ans = self.Unit(Max_Priority)
             self.eat(RPAREN)
         elif self.current_token.type == NUM:
             ans = AST_Num(self.current_token.value)
             self.eat(NUM)
         elif self.current_token.type == STRING:
             ans = AST_String(self.current_token.value)
             self.eat(STRING)
         elif self.current_token.type in UnaryOp:
             token = self.current_token
             self.eat(token.type)
             ans = AST_UnaryOp(token, self.Unit(0))
         elif self.current_token.type == NAME:
             name = self.current_token.value
             self.eat(NAME)
             if self.current_token.type == LPAREN:
                 self.eat(LPAREN)
                 arglist = []
                 if self.current_token.type != RPAREN:
                     arglist.append(self.Unit(Max_Priority))
                     while self.current_token.type != RPAREN:
                         self.eat(COMMA)
                         arglist.append(self.Unit(Max_Priority))
                 self.eat(RPAREN)
                 ans = AST_FuncCall(name, arglist)
             else:
                 ans = AST_ID(name)
         elif self.current_token.type == LBRACK:
             self.eat(LBRACK)
             lst = []
             if self.current_token.type != RBRACK:
                 lst.append(self.Unit(Max_Priority))
                 while self.current_token.type == COMMA:
                     self.eat(COMMA)
                     lst.append(self.Unit(Max_Priority))
             self.eat(RBRACK)
             ans = AST_Array(lst)
         else:
             self.Error('invalid syntax at "%r" at pos %d' %
                        (self.lexer.get_local_text(), self.lexer.pos - 1l))
         while self.current_token.type == LBRACK:
             self.eat(LBRACK)
             ind = self.Unit(Max_Priority)
             self.eat(RBRACK)
             ans = AST_BinOp(Lexer.Token(INDEX, '[]'), ans, ind)
     elif Associativity[priority] == LeftAssoc:
         ans = self.Unit(priority - 1)
         while self.current_token.type in Prio and Prio[
                 self.current_token.type] == priority:
             token = self.current_token
             self.eat(token.type)
             ans = AST_BinOp(token, ans, self.Unit(priority - 1))
     else:
         ans = self.Unit(priority - 1)
         rightest_node = ans
         first = True
         while self.current_token.type in Prio and Prio[
                 self.current_token.type] == priority:
             token = self.current_token
             self.eat(token.type)
             if first:
                 ans = AST_BinOp(token, ans, self.Unit(priority - 1))
                 rightest_node = ans
                 first = False
             else:
                 rightest_node.rson = AST_BinOp(token, rightest_node.rson,
                                                self.Unit(priority - 1))
                 rightest_node = rightest_node.rson
     return ans
Example #43
0
class Parser(object):


    def __init__(self, lexer):
        self.lexer = lexer
        self.tokens_list = lexer.lex()
        self.current_token = self.tokens_list[0]
        self.states = []
        self.transitions = []
        self.final = []
        self.alphabet = []
        self.initial_state = ''

    # Method that raises and error.
    def error(self, type_got):
        print('Token type {type} expected, received {type_got}!'.format(type=self.current_token.type, type_got=type_got))

    # Method that goes to the next token if the current one has already been processed.
    def pop_token(self, token_type):
        if self.current_token.type == token_type:
            if not self.lexer.expr_end():
                self.current_token = self.lexer.token_next()
        else:
            self.error(token_type)

    def process_statement(self):
        token = self.current_token
        if token.type == RESERVED:
            if token.value == 'alphabet:':
                self.pop_token(RESERVED)
                while self.current_token.type == LETTER_SMALL:
                    self.alphabet.append(self.current_token)
                    if self.lexer.expr_end():
                        break
                    self.pop_token(LETTER_SMALL)
            elif token.value == 'states:':
                self.pop_token(RESERVED)
                while self.current_token.type == LETTER_CAPITAL:
                    self.states.append(State(name=self.current_token.value))
                    if self.lexer.expr_end():
                        break
                    self.pop_token(LETTER_CAPITAL)
                    if self.current_token.type == COMMA:
                        self.pop_token(COMMA)
            elif token.value == 'final:':
                self.pop_token(RESERVED)
                while self.current_token.type == LETTER_CAPITAL:
                    self.final.append(State(self.current_token.value))
                    if self.lexer.expr_end():
                        break
                    self.pop_token(LETTER_CAPITAL)
                    if self.current_token.type == COMMA:
                        self.pop_token(COMMA)
            elif token.value == 'transitions:':
                self.pop_token(RESERVED)
                while not self.current_token.value == 'end.':
                    origin = self.current_token.value
                    if type(self.initial_state) == str:
                        for state in self.states:
                            if state.state_name == origin:
                                self.initial_state = state
                    self.pop_token(LETTER_CAPITAL)
                    self.pop_token(COMMA)
                    edge = self.current_token.value
                    self.pop_token(LETTER_SMALL)
                    self.pop_token(DASH)
                    self.pop_token(DASH)
                    self.pop_token(ANGLE_BRACKET)
                    destination = self.current_token.value
                    self.pop_token(LETTER_CAPITAL)
                    self.transitions.append(Transition(origin=origin, edge=edge, destination=destination))
                    if self.lexer.expr_end():
                        break
                self.pop_token(RESERVED)
        else:
            print('Unexpected type!')

    def process_regex(self):
        token = self.current_token
        node = ""

        if token.type == LETTER_SMALL:
            # probably for assigning an alphabet letter!
            # self.pred_list.append(token.value)
            node = Letter(self.current_token)
            self.pop_token(LETTER_SMALLq)

            if self.current_token.type == COMMA:
                self.pop_token(COMMA)

            if self.current_token.type == RPAR:
                self.pop_token(RPAR)

            return node

        # Logic if the current token is a start for repeating the letter.
        elif token.type == STAR:
            op = Token(type=STAR, value='*')
            self.pop_token(STAR)
            if self.current_token.type == LPAR:
                self.pop_token(LPAR)
            node = Repeat(op=op, letter=self.process_regex())
            return node

        elif token.type == UNDERSCORE:
            # Logic if the current token is a start for repeating the letter.
            op = Token(type=UNDERSCORE, value='e')
            self.pop_token(UNDERSCORE)
            if self.current_token.type == LPAR:
                self.pop_token(LPAR)
            node = ET(symbol=self.process_regex())
            return node

        # Logic if the current token is one of the Contingency operators.
        elif token.type in (DOT, PIPE):
            if token.type == DOT:
                op = Token(type=DOT, value='.')
            elif token.type == BICOND:
                op = Token(type=PIPE, value='|')
            self.pop_token(token.type)
            self.pop_token(LPAR)
            node = TransitionOp(left=self.op_statement(), op=op, right=self.op_statement())
            return node

        # Logic if the current token is a right parentheses.
        elif token.type == RPAR:
            self.pop_token(RPAR)
            node = self.op_statement()
            return node

        # Logic if the current token is a comma.
        elif token.type == COMMA:
            self.pop_token(COMMA)
            node = self.op_statement()
            return node

        return node

    def new_lexer(self, _expression):
        self.lexer = Lexer(_expression)
        self.tokens_list = self.lexer.lex()
        self.current_token = self.tokens_list[0]


    def parse(self):
        node = self.process_regex()
        return node
Example #44
0
    def UploadAction(self, event=None, arg=None):
        """
        Uploading a file of rules.

        :param event: Any
        :param arg: a filename.
        :return: None
        """
        self.interpreter.deleted = False
        if self.searching:
            return
        if arg is not None:
            filename = arg
        else:
            filename = filedialog.askopenfilename()
            if filename == "":
                return
            print('Selected:', filename)
        try:
            z = open(filename, 'r')
        except FileNotFoundError:
            self.sendMessage("Error: File Not Found")
            return
        self.interpreter.setFilePath("/".join(filename.split("/")[:-1]))
        data = z.read()
        z.close()
        self.lexer.input(data)
        tokens = []

        while True:
            tok = self.lexer.token()
            if not tok:
                break  # No more input
            tokens.append(tok)

        if len(Lexer.SyntaxErrors) != 0:
            for e in Lexer.SyntaxErrors:
                self.sendMessage(e)
            Lexer.SyntaxErrors = []
            return

        try:

            def reader():
                self.interpreter.read(tokens)
                if len(self.interpreter.errorLoad) == 0:
                    self.sendMessage(f"File {filename} has been uploaded.\n")
                else:
                    self.viewErrorsAndMessages()
                self.searching = False
                if "on-start" in self.interpreter.predicates:
                    self.queryReceived(given="on-start()")

            self.searching = True
            threading.Thread(target=reader).start()
        except Exception as e:
            ed, md = self.viewErrorsAndMessages()
            if not ed:
                self.sendMessage(f"Unknown Error: {e}")

        self.lexer = Lexer.build()
Example #45
0
class Parser:

# Säännöllisten lausekkeiden syntaksi on seuraava:
# regex -> alt EOF
# alt -> concat alt_tail*
# alt_tail -> '' | '|' concat alt_tail
# concat -> quant concat_tail
# concat_tail -> '' | quant concat_tail
# quant -> paren quant_tail
# quant_tail -> '?' | '+' | '*' | ''
# paren -> '(' alt ')' | lit

# toteutus perstuntumalta käsin koodattu LL-parseri suurinpiirtein tämän mukaisesti: https://www.cs.helsinki.fi/i/vihavain/k10/okk/content3.html
# Parserin sisällä NFA:t kulkevat parina (first, out_transitions), missä first on alkutila, ja out_transitions on List()a Transition-olioista, josta kulkemalla pääsee hyväksyvään tilaan
# Jäsentämisen jälkeen hyväksyvistä tiloista pidetään kirjaa solmujen accepting-attribuutin avulla
################ kielen produktiot #############
	def regex(self):
		(first, accept_transitions) = self.alt()
	        accepting_state = Node()
		accepting_state.accepting = True
		for x in accept_transitions:
			x.attach_destination(accepting_state)

		self.assert_match("<EOF>")
		return first
	def alt(self):
		(first, transitions) = self.concat()
		branch_node = None

		# alt_tail
		while self.match('|'):
			(second, snd_transitions) = self.concat()
			if branch_node == None:
				branch_node = Node()
				branch_node.add_epsilon_transition(first)
			branch_node.add_epsilon_transition(second)
			transitions += snd_transitions
			self.append_postfix('|')
		if branch_node == None:
			return (first, transitions)
		else:
			return (branch_node, transitions)
	def concat(self):
		(first, transitions) = self.quant()
		
		while self.quant_matches():
			(second, second_transitions) = self.quant()
			for tr in transitions:
				tr.attach_destination(second)
			transitions = second_transitions
			self.append_postfix('#')
		return (first, transitions)
	def quant(self):
		(first, transitions) = self.paren()

		# quant_tail
		res = self.match('+', '?', '*')
		if res:
			self.append_postfix(res)
			if res == '*':
				loop_node = Node()
				for n in transitions:
					n.attach_destination(loop_node)
				loop_node.add_epsilon_transition(first)
				transition = Transition(loop_node, None)
				return (loop_node, List(transition))
			elif res == '+':
				loop_node = Node()
				for tr in transitions:
					tr.attach_destination(loop_node)
				loop_node.add_epsilon_transition(first)
				out_transition = Transition(loop_node, None)
				return (first, List(out_transition))
			elif res == '?':
				skip_node = Node()
				skip_node.add_epsilon_transition(first)
				return (skip_node, transitions + List(Transition(skip_node, None)))
		return (first, transitions)
	def quant_matches(self):
		return self.__token == '(' or isinstance(self.__token, CharSet)
	def paren(self):
		ret = None
		if self.match('('):
			ret = self.alt()
			self.assert_match(')')
		else:
			ret = self.lit()
		return ret
	def lit(self):
		token = self.advance()
		if isinstance(token, CharSet):
			self.append_postfix(token)
			first = Node()
			
			return (first, token.to_transition_list(first))
		raise ParseError('Unexpected input: ' + str(token))
############# luokan varsinaiset metodit #####
	def match(self, *lits):
		"""Tarkistaa, onko seuraavana syötteessä oleva tekstialkio jokin parametrina annetuista.
		Palauttaa sen ja siirtyy eteenpäin syötteessä jos on, palauttaa False muuten"""
		if self.__token in lits:
			old_token = self.advance()
			return old_token
		return False
	
	def assert_match(self, *lits):
		"""Sama kuin match(), mutta heittää fataalin poikkeuksen mikäli seuraava tekstialkio ei vastaa jotain parametrinä annetuista"""
		if self.match(*lits) is False:
			raise ParseError('Unexpected `' + str(self.__token) + "', expecting one of " + ", ".join(lits))
	def advance(self):
		"""Palauttaa nykyisen tekstialkion ja siirtyy eteenpäin syötteessä"""
		old_token = self.__token
		self.__token = self.__lexer.next_token()
		return old_token
	def __init__(self, str):
		self.__lexer = Lexer(str)
		self.__token = self.__lexer.next_token()
		self.postfix = ''

	def append_postfix(self, c):
		"""Lisää merkki postfix-sivutuotteeseen"""
		self.postfix += str(c)
	def parse(self):
		"""Jäsentää konstruktorissa annetun lausekkeen, ja palauttaa viitteen NFA:n alkutilaan"""
		return self.regex()
Example #46
0
 def __init__(self):
     self.tokens = Lexer.TPTPLexer.tokens
     self.lexer = Lexer.TPTPLexer()
     self.parser = yacc.yacc(module=self)
Example #47
0
 def new_lexer(self, _expression):
     self.lexer = Lexer(_expression)
     self.tokens_list = self.lexer.lex()
     self.current_token = self.tokens_list[0]
Example #48
0
def genericError(structure):
    global val
    Lex.markActualError(val, structure)
    match("Error")
Example #49
0
 def illegal_operation(self, other=None):
     if not other: other = self
     return Lexer.RTError(self.pos_start, other.pos_end,
                          ' Engkau tak diperkenankan (Operasi illegal)',
                          self.context)
Example #50
0
    BEGIN
        BEGIN
            number := 2;
            a := number;
            b := 10 * a + 10 * number / 4;
            c := a - - b
        END;
        x := 11;
    END.
    """
    text = """PROGRAM Part10;
        VAR
        number     : INTEGER;
        a, b, c, x : INTEGER;
        y          : REAL;
    BEGIN
    BEGIN
        number := 2;
        a := number;
        b := 10 * a + 10 * number DIV 4;
        c := a - - b
    END;
        x := 11;
        y := 20 / 7 + 3.14;
    END."""
    lexer = Lexer.Lexer(text)
    parser = Parser.Parser(lexer)
    interpreter = Interpret.Interpreter(parser)
    interpreter.interpret()
    print(interpreter.GLOBAL_SCOPE)
Example #51
0
    """
    if deep == 0:
        return []
    result = []
    for seq in rule.seqs:
        genrule = []
        for sym in seq:
            if isinstance(sym, CFG.Term):
                gen = [sym.tok]
            else:
                if isinstance(sym, CFG.Sym_Term):
                    gen = [sym.tok[3:]] # remove TK_ prefix
                else:
                    if isinstance(sym, CFG.Non_Term_Ref):
                        gen = list(be(grammar, grammar.get_rule(sym.name), deep-1))
            genrule = combine(result, gen)
        result = result + genrule
    return set(result)

if __name__ == "__main__":
    if len(sys.argv) < 4:
        print "Usage: " + sys.argv[0] + " grammar lex unrool-level"
    else:
        l = open(sys.argv[2], "r")
        g = open(sys.argv[1], "r")
        n = int(sys.argv[3])
        lex = Lexer.parse( l.read() )
        grammar = CFG.parse(lex, g.read())
        r = be(grammar, grammar.rules[0], n)
        print r, ": ", len(r)
Example #52
0
    def __init__(self, version, time_limit, recursion_limit, imports):
        """
        Creates an empty console design using tkinter.

        :param version: the current version of the console
        :param time_limit: the time limit in searches
        :param recursion_limit: the recursion limit in the language
        :param imports: a list of possible libraries to import.
        """
        self.version = version
        self.time_limit = time_limit
        self.recursion_limit = recursion_limit

        # Build Lexer and Interpreter
        self.lexer = Lexer.build()
        self.interpreter = Interpreter.Interpreter(self.time_limit, imports)

        # For Queries
        self.asked_for_more, self.found_more = False, False
        self.requested_input, self.got_input = False, False

        # Get the width and the height of the Page
        width = GetSystemMetrics(0)
        height = GetSystemMetrics(1)

        # define TK
        self.root = Tk()
        self.root.geometry(
            f"1500x750+{width // 2 - 750}+{height // 2 - 750 // 2}")

        # Handle Arrow Press
        self.root.bind('<Down>', self.downPress)
        self.root.bind('<Up>', self.upPress)

        # define image for tkinter
        p1 = PhotoImage(file=sys.path[0] + '\\Images\\LCL.png')

        # Setting icon of master window
        self.root.iconphoto(False, p1)
        self.root.title("Local")

        # define the main frame
        self.mainFrame = Frame(self.root,
                               highlightbackground="black",
                               highlightthickness=1)
        self.mainFrame.grid(row=0,
                            column=0,
                            padx=(10),
                            pady=(10),
                            sticky=W + E + S + N)

        # configure the grid
        self.root.rowconfigure(0, weight=1)
        self.root.columnconfigure(0, weight=1)
        self.mainFrame.columnconfigure(1, weight=1)
        self.mainFrame.rowconfigure(1, weight=100)
        self.mainFrame.rowconfigure(2, weight=1)

        # set a variable for the query text and console text
        self.queryText = StringVar()
        self.consoleText = '\n' * 100 + f'Local Version {self.version} Loaded...\n\n'

        # A variable to save the generator in between the first and later results
        self.solutions = None
        self.pastQueries = ['', '']
        self.currentIndex = 1
        self.searching = False

        # Frame for console, and console definition
        self.textFrame = Frame(self.mainFrame, width=10000, height=27000)
        self.textFrame.grid(row=0,
                            column=0,
                            rowspan=2,
                            columnspan=3,
                            sticky=N + S + W + E)
        self.console = Text(self.textFrame,
                            height=1650,
                            width=500,
                            padx=4,
                            pady=4,
                            wrap=WORD,
                            font=("Courier", 12),
                            borderwidth=4,
                            relief="groove")
        self.console.insert(END, self.consoleText)
        self.console.config(state=DISABLED)
        self.console.pack()
        self.scrolling = Scrollbar(self.textFrame)
        self.scrolling.pack(side=RIGHT, fill=Y)
        self.scrolling.config(command=self.console.yview)
        self.console.config(yscrollcommand=self.scrolling.set)
        self.console.see(END)

        # Query button and text
        self.directionsQuery = Label(self.mainFrame,
                                     text="Enter Query Here:",
                                     font=("Helvetica", 14, "bold"))
        self.directionsQuery.grid(row=3,
                                  column=0,
                                  padx=(10),
                                  pady=(10, 15),
                                  sticky=W)
        self.query = Entry(self.mainFrame,
                           font=("Courier", 14),
                           textvariable=self.queryText)
        self.query.grid(row=3,
                        column=1,
                        padx=(10),
                        pady=(10, 20),
                        sticky=S + W + E)
        self.root.bind("<Return>", self.moreSolutions)
        self.sendQuery = Button(self.mainFrame,
                                text='Send Query',
                                command=self.queryReceived,
                                font=("Helvetica", 14))
        self.sendQuery.grid(row=3, column=2, padx=(10), pady=(10, 20), stick=W)

        # upload query
        self.upload = Button(self.mainFrame,
                             text='Upload Rules',
                             command=self.UploadAction,
                             height=4,
                             width=24,
                             font=("Helvetica", 10, "bold"))
        self.upload.grid(row=0, column=0, padx=(10), pady=(10))

        # delete button
        self.delete = Button(self.mainFrame,
                             text='Delete All Rules',
                             command=self.DeleteRules,
                             height=4,
                             width=24,
                             font=("Helvetica", 10, "bold"))
        self.delete.grid(row=0, column=1, padx=(10), pady=(10), sticky='W')

        self.clear = Button(self.mainFrame,
                            text='Clear Console',
                            command=self.ClearConsole,
                            height=4,
                            width=24,
                            font=("Helvetica", 10, "bold"))
        self.clear.grid(row=0, column=2, padx=(10), pady=(10), sticky='E')

        self.sendMessage("", False)
Example #53
0
import Lexer
input = 'let result = add(five, ten);'
lexer = Lexer.Lexer(input,'',0,0)
l = lexer.next_token()
m = lexer.next_token()
n = lexer.next_token()
print('Read the next token {} {}'.format(m.type.name,n.type.name))

Example #54
0
 def __init__(self, text):
     self.level = 0
     self.lexer = Lexer.Lexer(text)
     self.current_token = self.lexer.get_next_token()
Example #55
0
    def func_def(self):
        res = ParseResult()

        if not self.current_tok.matches(Lexer.TT_KEYWORD, 'Fungsi'):
            return res.failure(
                Lexer.InvalidSyntaxError(
                    self.current_tok.pos_start, self.current_tok.pos_end,
                    f"Mengharapkan seperti (senja kala itu) 'Fungsi'"))

        res.register_advancement()
        self.advance()

        if self.current_tok.type == Lexer.TT_IDENTIFIER:
            var_name_tok = self.current_tok
            res.register_advancement()
            self.advance()
            if self.current_tok.type != Lexer.TT_LPAREN:
                return res.failure(
                    Lexer.InvalidSyntaxError(
                        self.current_tok.pos_start, self.current_tok.pos_end,
                        f"Mengharapkan seperti (senja kala itu) '('"))
        else:
            var_name_tok = None
            if self.current_tok.type != Lexer.TT_LPAREN:
                return res.failure(
                    Lexer.InvalidSyntaxError(
                        self.current_tok.pos_start, self.current_tok.pos_end,
                        f"Mengharapkan seperti (senja kala itu) identifier or '('"
                    ))

        res.register_advancement()
        self.advance()
        arg_name_toks = []

        if self.current_tok.type == Lexer.TT_IDENTIFIER:
            arg_name_toks.append(self.current_tok)
            res.register_advancement()
            self.advance()

            while self.current_tok.type == Lexer.TT_COMMA:
                res.register_advancement()
                self.advance()

                if self.current_tok.type != Lexer.TT_IDENTIFIER:
                    return res.failure(
                        Lexer.InvalidSyntaxError(
                            self.current_tok.pos_start,
                            self.current_tok.pos_end,
                            f"Mengharapkan seperti (senja kala itu) identifier"
                        ))

                arg_name_toks.append(self.current_tok)
                res.register_advancement()
                self.advance()

            if self.current_tok.type != Lexer.TT_RPAREN:
                return res.failure(
                    Lexer.InvalidSyntaxError(
                        self.current_tok.pos_start, self.current_tok.pos_end,
                        f"Mengharapkan seperti (senja kala itu) ',' or ')'"))
        else:
            if self.current_tok.type != Lexer.TT_RPAREN:
                return res.failure(
                    Lexer.InvalidSyntaxError(
                        self.current_tok.pos_start, self.current_tok.pos_end,
                        f"Mengharapkan seperti (senja kala itu) identifier or ')'"
                    ))

        res.register_advancement()
        self.advance()

        if self.current_tok.type != Lexer.TT_ARROW:
            return res.failure(
                Lexer.InvalidSyntaxError(
                    self.current_tok.pos_start, self.current_tok.pos_end,
                    f"Mengharapkan seperti (senja kala itu) '->'"))

        res.register_advancement()
        self.advance()
        node_to_return = res.register(self.expr())
        if res.error: return res

        return res.success(
            Lexer.FuncDefNode(var_name_tok, arg_name_toks, node_to_return))