Esempio n. 1
0
    def verify_ambiguity(self, mingp, minlp, minsen, duration=None):
        print "==> verify grammar %s with minimiser %s \n" % \
                (mingp, self._sin.minp)
        self._sin.lex = Lexer.parse(open(self._sin.lp, 'r').read())
        self._sin.cfg = CFG.parse(self._sin.lex, open(self._sin.gp, "r").read())
        self._sin.parser = Accent.compile(self._sin.gp, self._sin.lp)

        minlex = Lexer.parse(open(minlp, 'r').read())
        mincfg = CFG.parse(minlex, open(mingp, 'r').read())
        seq = mincfg.get_rule('root').seqs[0]
        # check if the root rule of minimised cfg == root of original cfg
        if (len(seq) == 1) and (str(seq[0]) == self._sin.cfg.start_rulen):
            out = Accent.run(self._sin.parser, minsen)
            if Accent.was_ambiguous(out):
                print "** verified **"

        minbend = "%sm" % self._sin.backend
        if minbend in Backends.BACKENDS:
            bend = Backends.BACKENDS[minbend](self._sin, mincfg, minsen)
        else:
            bend = Backends.WGTBACKENDS[minbend](self._sin, mincfg, minsen)

        # we keep trying until we hit the subseq
        while not bend.found:
            bend.run(self._sin.t_depth, self._sin.wgt, duration)

        print "** verified **"
Esempio n. 2
0
    def remove_ε(self):
        import CFG
        import re

        Nε = self.Nε

        N = self.N
        S = self.S
        P = CFG.P()

        for A in self.N:
            P[A] = self.P[A] - Set("ε")
            for opt in P[A]:
                for subset in all_subsets(Nε):
                    new_opt = re.sub(f"[ε{''.join(subset)}]", "", str(opt))
                    if new_opt != "":
                        P[A].add(new_opt)

        for opt in self.P[self.S]:
            if all(X in Nε for X in opt):
                N |= (Set("S'"))
                S = "S'"
                P["S'"] = f"ε | {self.S}"
                break

        G = CFG(N, self.Σ, P, S)
        return G
Esempio n. 3
0
def REMOVE_NULL_PRODUCTIONS(productions, variables, Vars):
    
    while ( CFG.isExistNullProduction(productions, variables) ):
        NullVariables = []
        CopyProductions = productions.copy()
        for rule in CopyProductions:
            left, right = rule
            if CFG.isNullProduct(rule): 
                NullVariables.append(left)
                productions.remove(rule)
                
        for NullVariable in NullVariables:
            NullVariablesRemoved = []
            for rule in productions:
                left, right = rule
                if CFG.isExistNullVar(rule, NullVariable):
                    NullVariablesRemoved += CFG.replaceNullVar(rule, NullVariable)
            for rule in NullVariablesRemoved:
                if rule not in productions:
                    left, right = rule 
                    if len(right) == 0:
                        right.append("e")
                    productions.append(rule)

    return productions, variables, Vars
Esempio n. 4
0
def REMOVE_TERM_PRODUCTION(productions, variables, Vars):
    CopyProductions = productions.copy()
    for rule in productions:
        left, right = rule
        if CFG.isTermProduction(variables, rule):
            productions, variables, Vars = CFG.replaceTermProduction(productions, variables, rule, Vars)
    return productions, variables, Vars
 def Creat_CFG(self):
     self.CFG = CFG()
     self.NPDA.Convert_NPDA_to_CFG(self.CFG)
     #set start variable
     self.CFG.Start_Variable = "(" + self.NPDA.Start_Variable.Name + self.NPDA.First_Stack_Symbol[
         0] + self.NPDA.Final_State.Name + ")"
     #add start variable if it is not in cfg.variables
     if self.CFG.Start_Variable not in self.CFG.Variables:
         self.CFG.Variables[self.CFG.Start_Variable] = []
Esempio n. 6
0
def REMOVE_MORE_THAN_2_VARIABLES_PRODUCTION(productions, variables, Vars):
    
    productions, variables, Vars = CFG.replaceTerms(productions, variables, Vars)

    CopyProductions = productions.copy()
    for rule in CopyProductions:
        if CFG.isVariablesMoreThan2(variables, rule):
            newRules, variables, Vars = CFG.replaceMoreThan2Var(variables, rule, Vars)
            productions.remove(rule)
            productions += newRules
    return productions, variables, Vars
Esempio n. 7
0
def compare(gp1, gp2, lp):
    lex = Lexer.parse(open(lp,"r").read())
    cfg1 = CFG.parse(lex, open(gp1, "r").read())
    cfg2 = CFG.parse(lex, open(gp2, "r").read())
    _cfg1 = _cfg(cfg1)
    _cfg2 = _cfg(cfg2)
    
    if _cfg1 == _cfg2:
        return True

    return False
Esempio n. 8
0
    def remove_primitive_rules(self):
        """
        remove all rules of type A → B where A,B ∈ N
        """
        import CFG

        P = CFG.P()

        for A in self.N:
            NA = self.Nx(A)
            P[A] = Set(rule for B in NA for rule in self.P[B]
                       if not self.isprimitive(rule))

        return CFG(self.N, self.Σ, P, self.S)
Esempio n. 9
0
    def toCNF(self):
        """
        each rule must be of format
        A → BC   (A,B,C ∈ N)
        A → a    (A ∈ N, a ∈ Σ)
        S → ε    if S is not on the right side of any rule
        """
        import CFG

        G = self.toOwn()
        N = G.N.copy()
        P = CFG.P()

        i = 0
        while i < len(N):
            A = N[i]
            for rule in G.P.get(A, P[A]):
                A = N[i]
                if len(rule) >= 2:
                    # change rule A -> abcd... to A -> a<bcd...>
                    B, *C = rule

                    B1 = Rule(B if B.isupper() else f"{B}̄")
                    C1 = Rule((f"<{''.join(C)}>" if len(C) > 1 else
                               C[0] if C[0].isupper() else f"{C[0]}̄"))

                    P[A].add(Rule(f"{B1}{C1}"))

                    A = Rule(f"{C1[0]}")

                    # continue generating <bcd...> -> b<cd...>
                    while len(A[0]) >= 2 and A[0] != "<>":
                        rule = Rule(f"{C1[0][1]}<{C1[0][2:-1]}>")
                        B, *C = rule

                        B1 = Rule(B if B.isupper() else f"{B}̄")
                        C1 = Rule((f"{''.join(C[0])}" if len(C[0]) > 1 else
                                   C[0] if C[0].isupper() else f"{C[0]}̄"))

                        P[A].add(Rule(f"{B1}{C1}"))

                        A = Rule(f"{C1[0]}")

                else:
                    P[A].add(Rule(rule))

            i += 1

        return CFG(N, self.Σ, P, self.S)
Esempio n. 10
0
    def remove_left_recursion(self):
        def calc_potentials():
            potentials = {}
            for A in N:
                potentials.setdefault(A, Set())
                for rule in P[A]:
                    if rule[0] in N:
                        potentials[A].add(rule[0])
            return potentials

        N = self.N.copy()
        P = self.P.copy()

        for i, A in enumerate(N):
            for B in N[:i + 1]:
                if not B in calc_potentials()[A]:
                    continue

                if A == B:
                    α = [rule for rule in P[A] if rule.startswith(B)]
                    β = [rule for rule in P[A] if not rule.startswith(B)]
                    N = Set(f"{A}'").union(N)
                    P[f"{A}'"] |= Set(Rule(rule[1:]) for rule in α) | Set(
                        Rule(rule[1:] + f"{A}'") for rule in α)
                    P[A] = Set(Rule(rule) for rule in β) | Set(
                        Rule(rule + f"{A}'") for rule in β)

                else:
                    α = [rule for rule in P[A] if rule.startswith(B)]
                    β = [rule for rule in P[A] if not rule.startswith(B)]
                    P[A] = Set(Rule(rule) for rule in β) | Set(
                        Rule(ruleB + rule[1:]) for rule in α for ruleB in P[B])

        return CFG(N, self.Σ.copy(), P, self.S)
Esempio n. 11
0
def get_best_syntax_tree(text, cfg):

    blob = TextBlob(text)
    ph_list = []
    #trova i token genera le regole semplificate
    #Taglia l'albero a vari livelli fino e genera le frasi
    #cfg.print_grammar()
    #print()
    for depth in range(4):
        cfg_copy = CFG.CFG(cfg.prod)
        cfg_copy.prune(blob.upper().words, depth)
        if "S" in cfg_copy.prod:
            #Genera un set di frasi
            for i in range(10):
                sent, tree = cfg_copy.gen_random_convergent('S')
                ph_list.append({"S": sent, "Tree": tree, "Score": 0})
        else:
            break

    for ph in ph_list:
        ph["Score"] = fuzz.ratio(ph["S"], blob.upper())

    max_score = ph_list[0]["Score"]
    best_ph = ph_list[0]
    for ph in ph_list:
        if ph["Score"] > max_score:
            max_score = ph["Score"]
            best_ph = ph

    #print(best_ph)
    return best_ph
Esempio n. 12
0
def main():
    cfg = CFG.chatConnection(Globals.HOST, Globals.PORT, Globals.OAUTH,
                             Globals.USERNAME, Globals.CHANNEL,
                             Globals.CLIENT_ID)
    lastMsg = time.time()
    Globals.tLock = threading.Lock()
    try:
        #create bot and attempt connection
        botSocket = Bot.bot(cfg)
        Globals.connected = botSocket.connect()

        while Globals.connected:
            tCount = Globals.getWorkerCount()
            if tCount < 1:
                t = threading.Thread(name="reciever", target=botSocket.recv)
                t.start()
            else:
                #Globals.printLockmsg(time.time() - lastMsg)
                if (time.time() - lastMsg) >= 1.0 and Globals.botQue.qsize(
                ) > 0:  # send a msg every 1second
                    botSocket.handleMsg()
                    lastMsg = time.time()
            time.sleep(1)
    finally:
        print("exiting")
        botSocket.sendMsg("Goodbye!!")
        botSocket.closeSocket()
Esempio n. 13
0
    def to_CFG(self, cfg, lex):
        """ At present, I have taken an easier approach to create CFG.
            I write the token line and the rules to a temp file and
            read that back. An alternative way (without I/O) would be
            to iterate through the rules and build your CFG instance.
        """
        tp = tempfile.mktemp()
        header = ""
        if len(self.sym_toks) > 0:
            header = "%token " + "%s;" % (", ".join(t for t in self.sym_toks))

        with open(tp, 'w') as tf:
            tf.write(('%s\n\n' % header) + "%nodefault\n\n")
            pp_seqs = Set()
            for seq in cfg['root']:
                seq_s = " ".join(str(e) for e in seq)
                pp_seqs.add(seq_s)

            tf.write("%s : %s\n;\n" % ('root', " | ".join(pp_seqs)))

            nt_list = [nt for nt in cfg.keys() if nt != 'root']
            nt_list.sort()
            for k in nt_list:
                pp_seqs = []
                seqs = cfg[k]
                for seq in seqs:
                    seq_s = " ".join(str(e) for e in seq)
                    pp_seqs.append(seq_s)

                tf.write("%s : %s\n;\n" % (k, " | ".join(pp_seqs)))

        return CFG.parse(lex, open(tp, 'r').read())
Esempio n. 14
0
def REMOVE_UNIT_PRODUCTIONS(productions, variables, Vars):
    
    CopyProductions = productions.copy()
    for rule in CopyProductions:            
        if CFG.isUnitProduct(rule, variables) and rule in productions:
            newRules = CFG.replaceUnitProduct(productions, variables, rule)
            productions.remove(rule)
            for newRule in newRules:
                if newRule not in productions:
                    productions.append(newRule)
            
    CopyProductions = productions.copy()
    for rule in CopyProductions:
        if CFG.isRuleUnreachable(productions, variables, rule):
            productions.remove(rule)

    return productions, variables, Vars
Esempio n. 15
0
    def run(self):
        currgp = self.mingp
        currlp = self.minlp
        currparse = self._sin.ambi_parse
        n = 1
        found = True

        while found:
            found = False
            lex = Lexer.parse(open(currlp, "r").read())
            cfg = CFG.parse(lex, open(currgp, "r").read())
            # work on rules with no of alts > 1
            keys = [r.name for r in cfg.rules if len(r.seqs) > 1]
            for key in keys:
                seqs = cfg.get_rule(key).seqs
                for i in range(len(seqs)):
                    _cfg = self.cfg_minus_alt(cfg, key, i)
                    if self.valid_cfg(_cfg):
                        # we could minimise lex first before pruning
                        _cfg_p = self.prune_cfg(_cfg, lex)
                        _gf, _lf = "%s.acc" % n, "%s.lex" % n
                        _gp = os.path.join(self._sin.td, "pruned.%s" % _gf)
                        CFG.write(_cfg_p, _gp)
                        n += 1
                        amb, _, ptrees = self._sin.find_ambiguity(_gp, currlp, self._sin.backend, self._sin.mint)
                        if amb:
                            ambi_parse = AmbiParse.parse(currlp, self._sin.lex_ws, ptrees)
                            __gp = os.path.join(self._sin.td, "min.%s" % _gf)
                            __lp = os.path.join(self._sin.td, "min.%s" % _lf)
                            self.write_cfg_lex(ambi_parse, __gp, __lp)
                            self.write_stat(__gp, __lp)
                            found = True
                            currparse = ambi_parse
                            currgp = __gp
                            currlp = __lp
                            break

                if found:
                    break

        return currgp, currlp, currparse.amb_str
Esempio n. 16
0
def construct_cfg(bitwidth, filename, mode):
    processed = subprocess.check_output(f"gcc -E {filename}",
                                        shell=True).decode()
    processed = my_preprocess(processed)
    print(processed)
    parser = pycparser.c_parser.CParser()
    x = parser.parse(processed, filename="<none>")
    cfg = CFG.construct_CFG(x, mode, bitwidth)
    cfg.print("out/impact")
    cfg.print("out/cfg_pred", "pred")
    cfg._check_consistency()
    return cfg
Esempio n. 17
0
    def write_stat(self, gp, lp, tag=''):
        """ write no of rules, alts, symbols
            Use the tag to mark the final line
        """
        s = "-,-,-" 
        if gp is not None:
            lex = Lexer.parse(open(lp, 'r').read())
            cfg = CFG.parse(lex, open(gp, 'r').read())
            rules, alts, syms = cfg.size()
            s = "%s,%s,%s" % (rules, alts, syms)

        with open(self.statslog, "a") as logp:
            logp.write("%s%s\n" % (tag, s))
Esempio n. 18
0
    def run(self):
        currgp = self.mingp
        currlp = self.minlp
        currparse = self._sin.ambi_parse
        n = 1
        found = True

        while found:
            found = False
            lex = Lexer.parse(open(currlp, 'r').read())
            cfg = CFG.parse(lex, open(currgp, 'r').read())
            combs = self.rule_alts_combs(cfg)
            random.shuffle(combs)
            while combs:
                key, i = combs.pop()
                _cfg = self.cfg_minus_alt(cfg, key, i)
                if self.valid_cfg(_cfg):
                    # we could minimise lex first before pruning
                    _cfg_p = self.prune_cfg(_cfg, lex)
                    _gf, _lf = "%s.acc" % n, "%s.lex" % n
                    _gp = os.path.join(self._sin.td, "pruned.%s" % _gf)
                    CFG.write(_cfg_p, _gp)
                    n += 1
                    amb, _, ptrees = self._sin.find_ambiguity(_gp, currlp,
                                       self._sin.backend, self._sin.mint)
                    if amb:
                        ambi_parse = AmbiParse.parse(currlp, self._sin.lex_ws, ptrees)
                        __gp = os.path.join(self._sin.td, "min.%s" % _gf)
                        __lp = os.path.join(self._sin.td, "min.%s" % _lf)
                        self.write_cfg_lex(ambi_parse, __gp, __lp)
                        self.write_stat(__gp, __lp)
                        found = True
                        currparse = ambi_parse
                        currgp = __gp
                        currlp = __lp
                        break

        return currgp, currlp, currparse.amb_str
Esempio n. 19
0
    def reduce(self):
        import CFG

        N = self.N.copy()
        Σ = self.Σ.copy()
        P = self.P.copy()

        G = CFG(N, Σ, P, self.S)

        # remove non-reduced terminals
        nonreduced = N - G.Ne
        for A in nonreduced:
            # remove whole rule
            del P[A]
            N.remove(A)

            # remove each option with that rule
            for B, options in P.items():
                for opt in options:
                    if A in opt:
                        P[B].remove(opt)

        return G.remove_unreachable()
Esempio n. 20
0
    def remove_unreachable(self):
        N, Σ, P = self.N.copy(), self.Σ.copy(), self.P.copy()

        unreachable = N.union(self.Σ) - self.V
        for X in unreachable:
            # remove non-terminals
            if X.isupper():
                del P[X]
                N.remove(X)

            # remove terminals
            else:
                Σ.remove(X)

        return CFG(N, Σ, P, self.S)
Esempio n. 21
0
def valid(gf, lf, max_alts_allowed=None, empty_alts_ratio=None):
    """ Generated grammar is valid if it:
        a) has no empty rule
        b) number of alternatives/rule < max_alts_allowed
        c) %age of empty alternatives < empty_alts_ratio 
        d) has no unreachable rules 
        e) doesn't contain a subset which taken no input 
        f) is not trivially ambiguous """
       
    lex = Lexer.parse(open(lf, "r").read())
    cfg = CFG.parse(lex, open(gf, "r").read())

    # check for empty rules
    if empty_rule(cfg):
        return False

    # check if any of the rule has > N alts
    if max_alts_allowed is not None:
        if has_too_many_alts(cfg, max_alts_allowed):
            return False

    # check if we have too many empty alts
    if empty_alts_ratio is not None:
        if has_too_many_empty_alts(cfg, empty_alts_ratio):
            return False

    # Check if all the rules are reachable from the start rule.
    if (len(unreachable(cfg)) > 0):
        print "unreachable: " , unreachable(cfg)
        sys.stdout.write("r")
        sys.stdout.flush()
        return False       

    # Check if the grammar is unproductive        
    if unproductive(cfg,lex):
        sys.stdout.write("u")
        sys.stdout.flush()    
        return False
                        
    # Check the grammar for trivial ambiguities
    if ambiguous(cfg):
        sys.stdout.write("a")
        sys.stdout.flush()
        return False

    return True        
Esempio n. 22
0
    def __init__(self, IRcode, globalVariables=None, stringInit=0, functName=""):
        self.IRcode = IRcode
        self.tinyCode = ""
        self.declCode = ""
        self.regNum = 0
        self.tempNum = 0
        self.regDict = {}
        self.declDict = {}
        # self.regVals = {}
        self.stringDict = {}
        # self.writeVals = {}
        self.parameters = 0
        self.functCFG = None
        self.lineNum = 0
        self.totalLineNum = 0
        self.numTempParams = 0
        self.tempsSpilledDict = {} # Saves mapping of temp to stack in case of spilling
        self.stringInit = stringInit # If this code generation is just for the string initialization
        self.globalVariables = globalVariables
        self.stackOffset = 2
        self.localVarOffset = 4
        self.numLocalParams = 0
        self.registersToPush = ["r0", "r1", "r2", "r3"]
        self.functName = functName

        # Add 4 registers
        self.Registers = []
        self.Registers.append(RegisterStatus(0))
        self.Registers.append(RegisterStatus(1))
        self.Registers.append(RegisterStatus(2))
        self.Registers.append(RegisterStatus(3))

        if not stringInit:
            self.functCFG = CFG(IRcode, functName=self.functName)
            self.functCFG.populateNodeInfo()
            self.functCFG.removeLinesWithNoPredecessors()
            self.functCFG.runLivenessAnalysis(globalVariables)
            self.functCFG.setLeaders()
            self.functCFG.printGraphWithNodeLists() if DEBUG else None

            # self.functCFG.printGraph()
            self.IRcode = self.functCFG.getCode()
Esempio n. 23
0
def mutate_cfg(gp, lp, type):
    lex = Lexer.parse(open(lp, "r").read())
    cfg = CFG.parse(lex, open(gp, "r").read())
    sym_toks = Utils.sym_tokens(gp)

    _cfg = cfg.clone()

    if type == 'empty':
        empty(_cfg)
    elif type == 'add':
        tok = Utils.randomTok(cfg, lex, sym_toks)
        add(_cfg, tok)
    elif type == 'mutate':
        tok = Utils.randomTok(cfg, lex, sym_toks)
        mutate(_cfg, tok)
    elif type == 'delete':
        delete(_cfg)
    elif type == 'switch':
        switch(_cfg)
    else:
        assert "mutation type '%s' is not supported" % type

    return _cfg
Esempio n. 24
0
    def toGNF(self):
        """
        each rule must be of format
        A → aB1B2B3...Bn   (a ∈ Σ, B1,B2,B3,...,Bn ∈ N)
        """
        import CFG

        G = self.remove_left_recursion()
        N = Set(reversed(G.N.copy()))
        P = G.P.copy()

        def resolve(A, B):
            for _ in range(len(P[A])):
                rule = list(P[A].pop(0))
                if rule[0].islower():
                    for i, c in enumerate(rule[1:]):
                        if c.islower() and len(c) == 1:
                            rule[i + 1] = f"{c}̄"

                    rule = "".join(rule)
                    P[A].add(Rule(rule))

                elif rule[0] == B:
                    rules = Set()
                    for rule1 in P[B]:
                        rules.add(Rule(rule1 + "".join(rule[1:])))
                    P[A] |= rules

                else:
                    P[A].add(Rule(rule))

        for i, A in enumerate(N):
            for B in N[:i + 1]:
                resolve(A, B)

        return CFG(N, self.Σ, P, self.S)
Esempio n. 25
0
def genNonRecGrammar(g, n):
    """
    Return an non recursive grammar with n unrolls
    """
    # step 1: generate rules from 0..n-1 unrools
    global rs
    rs = reachableSymbols(g)
    print "Reachablity relation:"
    print rs
    nextSymbolIndex = {}  # dictionary non-term -> last-index
    lastGenSymbolIndex = {}  # last index of generated rule
    # put recursive symbols in symbolIndex dictionary
    for sym in rs.keys():
        if sym in rs[sym]:
            nextSymbolIndex[sym] = 0
            lastGenSymbolIndex[sym] = 0
    # invariant: symbolIndex[sym] == next index to generate
    print "\nRecursive rules:"
    print nextSymbolIndex
    print "\nGenerating non recursive grammar:"
    rules = []
    generated = set()
    while not unrools(nextSymbolIndex, n):
        for r in g.rules:
            if r.name in rs[r.name]:
                # the rule is recursive
                i = nextSymbolIndex[r.name]
                newrule = CFG.Rule(r.name + str(i), [])
                nextSymbolIndex[r.name] = i + 1
                for seq in r.seqs:
                    newseq = []
                    for sym in seq:
                        if isinstance(sym, CFG.Non_Term_Ref
                                      ) and sym.name in nextSymbolIndex.keys():
                            # sym is recursive: generate indexed symbol
                            j = nextSymbolIndex[sym.name]
                            lastGenSymbolIndex[sym.name] = j
                            newseq.append(CFG.Non_Term_Ref(sym.name + str(j)))
                        else:
                            # sym is not recursive: generate original symbol
                            newseq.append(sym)
                    newrule.seqs.append(newseq)
            else:
                if r.name in generated:
                    continue
                newrule = CFG.Rule(r.name, [])
                for seq in r.seqs:
                    newseq = []
                    for sym in seq:
                        if isinstance(
                                sym,
                                CFG.Non_Term_Ref) and sym.name in rs.keys():
                            name = sym.name + "0"
                            newseq.append(CFG.Non_Term_Ref(name))
                        else:
                            newseq.append(sym)
                    newrule.seqs.append(newseq)

            generated.add(newrule.name)
            rules.append(newrule)

    # step 2: generate indexed rules with only terminal sequences as rhs

    # step 2.1: Generate only s : rhs width rhs containing only non-rec symbols
    for name in lastGenSymbolIndex.keys():
        if lastGenSymbolIndex[name] == nextSymbolIndex[name]:
            r = g.get_rule(name)
            newrule = CFG.Rule(name + str(lastGenSymbolIndex[name]), [])
            nextSymbolIndex[name] = nextSymbolIndex[name] + 1
            for seq in r.seqs:
                if nonrec(seq):
                    newrule.seqs.append(seq)
                    rules.append(newrule)

    # step 2.2: Generate not yet generated rules
    for name in lastGenSymbolIndex.keys():
        if lastGenSymbolIndex[name] == nextSymbolIndex[name]:
            r = g.get_rule(name)
            newrule = CFG.Rule(name + str(lastGenSymbolIndex[name]), [])
            for seq in r.seqs:
                newseq = []
                for sym in seq:
                    if isinstance(sym,
                                  CFG.Non_Term_Ref) and sym.name in rs.keys():
                        name = sym.name + lastGenSymbolIndex[name]
                        newseq.append(CFG.Non_Term_Ref(name))
                    else:
                        newseq.append(sym)
                rules.append(newrule)
            generated.add(newrule.name)

    return CFG.CFG(g.tokens, rules)
Esempio n. 26
0
varContainer += ['A12', 'B12', 'C12', 'D12', 'E12', 'F12', 'G12', 'H12', 'I12', 'J12', 'K12', 'L12', 'M12', 'N12', 'O12', 'P12', 'Q12', 'R12', 'S12', 'T12', 'U12', 'V12', 'W12', 'X12', 'Y12', 'Z12']

def readSyntax(Terminals, languages):
    syntax = languages
    syntax = syntax.replace("\t", "") 
    syntax = syntax.replace("\n", " ENDL ")
    syntax = syntax.split(" ")
    for x in range (syntax.count('')):
        syntax.remove('')
    for i in range(len(syntax)):
        if syntax[i] not in Terminals and len(syntax[i]) > 0:
            syntax[i] = "NAME"
    return syntax


Terminals, V, Productions = CFG.loadModel("model.txt")
varContainer = CFG.getNotUsedVariables(V, varContainer)

Productions, V, varContainer = CFG2CNF.START(Productions, V, varContainer)
Productions, V, varContainer = CFG2CNF.REMOVE_NULL_PRODUCTIONS(Productions, V, varContainer)
Productions, V, varContainer = CFG2CNF.REMOVE_UNIT_PRODUCTIONS(Productions, V, varContainer)
Productions, V, varContainer = CFG2CNF.REMOVE_MORE_THAN_2_VARIABLES_PRODUCTION(Productions, V, varContainer)
Productions, V, varContainer = CFG2CNF.REMOVE_TERM_PRODUCTION(Productions, V, varContainer) 

languages = open("syntax.txt").read()
languages = readSyntax(Terminals, languages)
for x in Productions:
    print(x)
print(languages)
CYK.CYK(Productions, 'S0', languages)
class App:
    def __init__(self, file_address, output):
        self.File_Address = file_address
        self.output = output
        self.NPDA = None
        self.CFG = None
        self.Alphabet = None
        #self.Start_Variable_CFG=None
    def Creat_NPDA(self):
        #read file
        File = open(self.File_Address, 'r')
        Lines = File.readlines()
        File.close()
        self.Alphabet = Lines[1].replace('\n', '').split(',')
        #add lambda to alphabet for npda
        NPDA_Alphabet = self.Alphabet + ["_"]
        # split stack symbol and first stack symbol
        Stack_Symbol = Lines[2].replace('\n', '').split(',')
        First_Stack_Symbol = Lines[3].replace('\n', '').split(',')
        #creat base noda
        self.NPDA = NPDA(NPDA_Alphabet, int(Lines[0]), Stack_Symbol,
                         First_Stack_Symbol)
        #reduce state numbert to start at zero
        minimum = int(Lines[4].split(',')[0].split('q')[1])
        for line in range(4, len(Lines)):
            info = Lines[line].split(',')
            origin_index = int(info[0].split('q')[1])
            if origin_index < minimum:
                minimum = origin_index
            destination_index = int(info[4].replace('\n', '').split('q')[1])
            if destination_index < minimum:
                minimum = destination_index
        #set start variable for npda
        self.NPDA.Start_Variable = self.NPDA.States[
            int(Lines[4].split(',')[0].split('q')[1]) - minimum]
        #complete npda"
        for line in range(4, len(Lines)):
            info = Lines[line].split(',')
            origin_index = int(info[0].split('q')[1]) - minimum
            destination_index = int(info[4].replace(
                '\n', '').split('q')[1]) - minimum
            self.NPDA.States[origin_index].Nueighbor[(
                info[1],
                info[2])] = self.NPDA.States[origin_index].Nueighbor.get(
                    (info[1], info[2]), []) + [
                        (self.NPDA.States[destination_index], info[3])
                    ]
            #final states"
            if "*" in info[0]:
                self.NPDA.Final_State = self.NPDA.States[origin_index]
            if "*" in info[4]:
                self.NPDA.Final_State = self.NPDA.States[destination_index]

    def Creat_CFG(self):
        self.CFG = CFG()
        self.NPDA.Convert_NPDA_to_CFG(self.CFG)
        #set start variable
        self.CFG.Start_Variable = "(" + self.NPDA.Start_Variable.Name + self.NPDA.First_Stack_Symbol[
            0] + self.NPDA.Final_State.Name + ")"
        #add start variable if it is not in cfg.variables
        if self.CFG.Start_Variable not in self.CFG.Variables:
            self.CFG.Variables[self.CFG.Start_Variable] = []

    def Write_CFG(self):
        File = open(self.output, "w")
        File.write(
            "***my grammar does not lambda productions and remove them for detectin string***"
            + '\n')
        for var, pro in self.CFG.Variables.items():
            result = var + '->'
            for p in pro:
                result += ('|' + p)
            File.write(result.replace('|', "", 1) + '\n')
        File.close()

    def Detection_String(self, String):
        String_Map = self.CFG.Detection_String(String)
        File = open(self.output, "a")
        File.write("input :" + String + '\n')
        File.write("output :" + '\n')
        if String_Map == False:
            File.write("False" + '\n')
        else:
            File.write("True" + '\n')
            String_Map += [String]
            result = ''
            for i in range(len(String_Map)):
                result += "=>" + String_Map[i]
            File.write(result.replace("=>", "", 1) + '\n')

        File.close()
Esempio n. 28
0
 def set_CFG(self):
     res = CFG.CFG()  #res is a str
     self.textBrowser.setText("CFG similarity:" + str(res) + "%")
Esempio n. 29
0
class TinyGenerator():

    def __init__(self, IRcode, globalVariables=None, stringInit=0, functName=""):
        self.IRcode = IRcode
        self.tinyCode = ""
        self.declCode = ""
        self.regNum = 0
        self.tempNum = 0
        self.regDict = {}
        self.declDict = {}
        # self.regVals = {}
        self.stringDict = {}
        # self.writeVals = {}
        self.parameters = 0
        self.functCFG = None
        self.lineNum = 0
        self.totalLineNum = 0
        self.numTempParams = 0
        self.tempsSpilledDict = {} # Saves mapping of temp to stack in case of spilling
        self.stringInit = stringInit # If this code generation is just for the string initialization
        self.globalVariables = globalVariables
        self.stackOffset = 2
        self.localVarOffset = 4
        self.numLocalParams = 0
        self.registersToPush = ["r0", "r1", "r2", "r3"]
        self.functName = functName

        # Add 4 registers
        self.Registers = []
        self.Registers.append(RegisterStatus(0))
        self.Registers.append(RegisterStatus(1))
        self.Registers.append(RegisterStatus(2))
        self.Registers.append(RegisterStatus(3))

        if not stringInit:
            self.functCFG = CFG(IRcode, functName=self.functName)
            self.functCFG.populateNodeInfo()
            self.functCFG.removeLinesWithNoPredecessors()
            self.functCFG.runLivenessAnalysis(globalVariables)
            self.functCFG.setLeaders()
            self.functCFG.printGraphWithNodeLists() if DEBUG else None

            # self.functCFG.printGraph()
            self.IRcode = self.functCFG.getCode()

    def generate(self):
        oldLocalVarOffset = self.localVarOffset
        self.generateCode()
        self.updateLocalVarOffset()

        while oldLocalVarOffset != self.localVarOffset:
            oldLocalVarOffset = self.localVarOffset
            print("; RESTARTING EVERYTHING cause varOffset is {0}\n\n\n\n\n".format(self.localVarOffset)) if DEBUG else None

            # Restore defaults for everything
            self.tinyCode = ""
            self.declCode = ""
            self.regNum = 0
            self.tempNum = 0
            self.regDict = {}
            self.declDict = {}
            self.stringDict = {}
            self.parameters = 0
            self.lineNum = 0
            self.totalLineNum = 0
            self.numTempParams = 0
            self.tempsSpilledDict = {} # Saves mapping of temp to stack in case of spilling
            self.stackOffset = 2
            self.numLocalParams = 0

            self.generateCode()
            self.updateLocalVarOffset()

        return self.tinyCode

    def generateCode(self):
        stmtList = self.IRcode.split("\n")
        switcher = {
                "INCI": self.inci,
                "DECI": self.deci,
                "ADDI": self.addi,
                "ADDF": self.addf,
                "SUBI": self.subi,
                "SUBF": self.subf,
                "MULTI": self.multi,
                "MULTF": self.multf,
                "DIVI": self.divi,
                "DIVF": self.divf,
                "STOREI": self.storei,
                "STOREF": self.storei,
                "STORES": self.stores,
                "GTI": self.comp,
                "GEI": self.comp,
                "LTI": self.comp,
                "LEI": self.comp,
                "NEI": self.comp,
                "EQI": self.comp,
                "GTF": self.comp,
                "GEF": self.comp,
                "LTF": self.comp,
                "LEF": self.comp,
                "NEF": self.comp,
                "EQF": self.comp,
                "JUMP": self.jump,
                "LABEL": self.label,
                "READI": self.readi,
                "READF": self.readf,
                "WRITEI": self.writei,
                "WRITEF": self.writef,
                "WRITES": self.writes,
                "JSR":self.jsr,
                "PUSH":self.push,
                "POP":self.pop,
                "RET":self.ret,
                "LINK":self.link
            }
        #set up a main caller
        code = []
        self.totalLineNum = len(stmtList)
        
        # code.append("push")
        # code.append("push r0")
        # code.append("push r1")
        # code.append("push r2")
        # code.append("push r3")

        # code.append("jsr main")

        # code.append("sys halt")

        # self.tinyCode += "\n".join(code) + "\n"

        for line in stmtList:
            if not self.stringInit:
                if self.lineNum in self.functCFG.leaders:
                    self.resetRegisters(keepValid=1)
            # Get the function from switcher dictionary
            func = switcher.get(line.split(" ")[0], self.errorFunct)
            # Execute the function
            print(";", line) if DEBUG else None
            func(line)
            if not self.stringInit:
                if self.lineNum in self.functCFG.leaders:
                    self.invalidateAllRegisters()
            self.lineNum += 1
            self.printRegs() if DEBUG else None
        # print(self.regVals)

        # if len(self.declDict) != 0:
        #     self.tinyCode = "var " + "\nvar ".join(self.declDict.keys()) + "\n" + self.tinyCode + "\nend"
        # else:
        #     self.tinyCode = self.tinyCode + "\nend"
        
        self.tinyCode = re.sub(r'link.*\n', "link {0}\n".format(str(self.numLocalParams + self.localVarOffset + self.numTempParams)), self.tinyCode)
        self.removeUnnecessaryMoves()
        return self.tinyCode

    def updateLocalVarOffset(self):
        registersUsed = []
        for register in self.Registers:
            print("; Register used at least Once for {0}: {1}".format(register.regNum, str(register.usedAtLeastOnce))) if DEBUG else None
            if register.usedAtLeastOnce:
                registersUsed.append("r{0}".format(register.regNum))
        self.localVarOffset = len(registersUsed)
        self.registersToPush = registersUsed


    # def countRegsUsed(self):
    #     tinyCodeArray = self.tinyCode.split("\n")
    #     regsUsed = []
    #     for tinyLine in tinyCodeArray:
    #         tinyLineSplit = tinyLine.split()
    #         if tinyLineSplit

    def resetRegisters(self, keepValid=0):
        print("; resetting reg allocation") if DEBUG else None
        registersFreed = []
        for regNum in range(4):
            if self.Registers[regNum].valid:
                self.freeRegister("r{0}".format(regNum), keepTemporaries=1)
                registersFreed.append("r{0}".format(regNum))
                if keepValid:
                    self.Registers[regNum].valid = 1
        return registersFreed

    def invalidateAllRegisters(self):
        for regNum in range(4):
            self.Registers[regNum].valid = 0

    def saveGlobalVariablesBack(self):
        print("; storing globalVariables back") if DEBUG else None
        for regNum in range(4):
            if self.Registers[regNum].valid and self.Registers[regNum].variable in self.globalVariables:
                self.freeRegister("r{0}".format(regNum))


    def removeUnnecessaryMoves(self):
        tinyCodeArray = self.tinyCode.strip().rstrip('\n').split('\n')
        linesToRemove = []
        for tinyLine in tinyCodeArray:
            tinyLine = tinyLine.strip()
            if tinyLine == "":
                continue
            tinyLineSplit = tinyLine.split()
            if tinyLineSplit[0] != "move":
                continue
            if tinyLineSplit[1] == tinyLineSplit[2]:
                linesToRemove.append(tinyLine)

        for lineToRemove in linesToRemove:
            tinyCodeArray.remove(lineToRemove)

        self.tinyCode = "\n".join(tinyCodeArray) + "\n"
        return


    def printRegs(self):
        strToPrint = ""
        for register in self.Registers:
            strToPrint += " r{0} -> ".format(register.regNum)    
            if register.valid:
                strToPrint += register.variable
            else:
                strToPrint += "null"
        print(";", strToPrint)

        return


    def convertIRVarToTinyVar(self, IRVar):
        tinyVar = ""
        if not IRVar.startswith("$"):
            tinyVar = IRVar
            self.declDict[tinyVar] = ""
        elif IRVar.startswith("$L"):
            tinyVar = "$" + str(int(IRVar.replace("L", "-")[1:]) - self.localVarOffset)
        elif IRVar.startswith("$P"):
            tinyVar = "$" + str(-int(IRVar[2:]) + self.stackOffset + self.parameters)
        elif IRVar.startswith("$R"):
            tinyVar = "$" + str(self.stackOffset + self.parameters)

        return tinyVar

    def ensureRegister(self, variable, doneWithLine=0):
        print("; ensuring {0}".format(variable)) if DEBUG else None
        for register in self.Registers:
            if register.variable == variable and register.valid:
                return "r{0}".format(register.regNum)

        register = self.registerAllocate(variable, doneWithLine)
        tinyVar = self.convertIRVarToTinyVar(variable)
        if variable in self.tempsSpilledDict.keys():
            tinyVar = self.tempsSpilledDict[variable]
            del self.tempsSpilledDict[variable]
        print(";move {0} {1}\n".format(tinyVar, register)) if DEBUG else None
        self.tinyCode += "move {0} {1}\n".format(tinyVar, register)
        return register

    def checkVariableLive(self, variable):
        return variable in self.functCFG.CFGNodeList[self.lineNum].outList


    def spillRegister(self, register, keepTemporaries=0):
        regNum = int(register[1])
        tinyVar = self.convertIRVarToTinyVar(self.Registers[regNum].variable)

        if self.Registers[regNum].variable.startswith("$T") and keepTemporaries:
            return

        if self.Registers[regNum].variable.startswith("$T"):  
            tempStackVar = self.numLocalParams + self.localVarOffset + 1
            while True:
                if "$-{0}".format(tempStackVar) in self.tempsSpilledDict.values():
                    tempStackVar += 1
                    continue

                if (tempStackVar - self.numLocalParams - self.localVarOffset) > self.numTempParams:
                    self.numTempParams = (tempStackVar - self.numLocalParams - self.localVarOffset)

                tinyVar = "$-{0}".format(tempStackVar)
                self.tempsSpilledDict[self.Registers[regNum].variable] = tinyVar
                print("; spilling temp ",self.Registers[regNum].variable) if DEBUG else None
                break

        print("; spilling {0} to {1}".format(register, tinyVar)) if DEBUG else None
        print("; move {0} {1}\n".format(register, tinyVar)) if DEBUG else None
        self.tinyCode += "move {0} {1}\n".format(register, tinyVar)
        return


    def freeRegister(self, register, keepTemporaries=0):
        regNum = int(register[1])
        print("; freeing {0} with {1}, valid: {2}, dirty: {3}".format(register, self.Registers[regNum].variable, self.Registers[regNum].valid, self.Registers[regNum].dirty)) if DEBUG else None
        if self.Registers[regNum].valid and self.Registers[regNum].dirty and self.checkVariableLive(self.Registers[regNum].variable):
            self.spillRegister(register, keepTemporaries=keepTemporaries)
        if self.Registers[regNum].variable.startswith("$T") and keepTemporaries:  
            return
        self.Registers[regNum].valid = 0
        self.Registers[regNum].dirty = 0
        return
    

    def chooseRegToFree(self, doneWithLine=0):
        regsToUse = [0,1,2,3]
        regsToRemove = []
        if not doneWithLine:
            for regNum in regsToUse:
                if self.Registers[regNum].valid and self.Registers[regNum].variable in self.functCFG.CFGNodeList[self.lineNum].genList:
                    regsToRemove.append(regNum)
            for regNum in regsToRemove:
                regsToUse.remove(regNum)

            if len(regsToUse) == 1:
                return regsToUse[0]

        tempRegsToRemove = []
        for regNum in regsToUse:
            if self.Registers[regNum].dirty:
                tempRegsToRemove.append(regNum)

        for regNum in tempRegsToRemove:
            regsToUse.remove(regNum)

        if len(regsToUse) == 1:
            return regsToUse[0]

        if len(regsToUse) == 0:
            regsToUse = [0,1,2,3]
            for regNum in regsToRemove:
                regsToUse.remove(regNum)


        lineToCheck = self.lineNum + 1
        while True:
            print(";", regsToUse) if DEBUG else None
            if lineToCheck == (self.totalLineNum - 1):
                return regsToUse[0]
            if lineToCheck in self.functCFG.leaders:
                return regsToUse[0]

            regsToRemove = []
            for regNum in regsToUse:
                if self.Registers[regNum].variable in self.functCFG.CFGNodeList[lineToCheck].genList:
                    regsToRemove.append(regNum)

            for regNum in regsToRemove:
                regsToUse.remove(regNum)
                if len(regsToUse) == 1:
                    return regsToUse[0]
            lineToCheck += 1

    def chooseAndFreeRegister(self, doneWithLine=0):
        regNum = self.chooseRegToFree(doneWithLine)
        self.freeRegister("r{0}".format(regNum))
        return regNum

    def registerAllocate(self, varName, doneWithLine=0, registersToUse=[]):
        print("; starting allocation of {0}".format(varName)) if DEBUG else None
        regNum = 0
        if len(registersToUse) != 0:
            regNum = int(registersToUse[0][1])
        else:
            for register in self.Registers:
                if not register.valid: 
                    register.valid = 1
                    register.dirty = 0
                    register.variable = varName
                    print("; allocating {0} to r{1}".format(varName, register.regNum)) if DEBUG else None
                    register.usedAtLeastOnce = True
                    return "r{0}".format(register.regNum)
                    # foundReg = 1
                    # regToUse = register.regNum

            regNum = self.chooseAndFreeRegister(doneWithLine)

        self.Registers[regNum].valid = 1
        self.Registers[regNum].dirty = 0
        self.Registers[regNum].variable = varName
        self.Registers[regNum].usedAtLeastOnce = True
        print("; allocating {0} to r{1}".format(varName, regNum)) if DEBUG else None
        return "r{0}".format(regNum)

    def freeRegistersIfDead(self, variablesToTryFree, keepVariablesLive=[]):
        registersFreed = []
        for regNum in range(4):
            if self.Registers[regNum].valid and self.Registers[regNum].variable in variablesToTryFree and self.Registers[regNum].variable not in keepVariablesLive:
                if (not self.checkVariableLive(self.Registers[regNum].variable)) or (self.Registers[regNum].variable in self.functCFG.CFGNodeList[self.lineNum].killList):
                    print("; freeing cause dead r{0} with {1} -> {2}".format(regNum, self.Registers[regNum].variable, self.functCFG.CFGNodeList[self.lineNum].outList)) if DEBUG else None
                    self.Registers[regNum].valid = 0
                    self.Registers[regNum].dirty = 0
                    registersFreed.append("r{0}".format(regNum))

        return registersFreed


    def temporaryAllocate(self):
            tempName = "&{}".format(self.tempNum)
            self.functCFG.CFGNodeList[self.lineNum].genList.append(tempName)
            self.tempNum += 1
            return self.registerAllocate(tempName)

    def incDecOperandSetup(self, op1):
        opmrl_op1 = ""
        opmrl_op2 = ""
        reg_op2   = ""
        isReg1    = False
        op1Allocated = False

        reg_op2 = self.ensureRegister(op1, 0)
        self.markRegisterDirty(reg_op2)

        return reg_op2

    def inci(self, IRLine):
        lineSplit = IRLine.split(" ")
        op1 = lineSplit[1]
        code = []

        reg_op2 = self.incDecOperandSetup(op1)
        code.append("inci {0}".format(reg_op2))

        self.tinyCode += "\n".join(code) + "\n"

        regsToTryFree = []
        regsToTryFree.append(op1)

        self.freeRegistersIfDead(regsToTryFree)
        return

    def deci(self, IRLine):
        lineSplit = IRLine.split(" ")
        op1 = lineSplit[1]
        code = []

        reg_op2 = self.incDecOperandSetup(op1)
        code.append("deci {0}".format(reg_op2))

        self.tinyCode += "\n".join(code) + "\n"

        regsToTryFree = []
        regsToTryFree.append(op1)

        self.freeRegistersIfDead(regsToTryFree)
        return

    def mathOperandSetup(self, op1, op2, result, orderMatters):
        opmrl_op1 = ""
        opmrl_op2 = ""
        reg_op2   = ""
        op1Allocated = False
        isReg1 = False
        isReg2 = False

        if op1.replace(".", "").replace("-", "").isdigit():
            opmrl_op1 = op1
        else:
            opmrl_op1 = self.ensureRegister(op1, 0)
            isReg1 = True

        if op2.replace(".", "").replace("-", "").isdigit():
            opmrl_op2 = op2
        else:
            opmrl_op2 = self.ensureRegister(op2, 0)
            isReg2 = True

        regsToTryFree = []
        if isReg1:
            regsToTryFree.append(op1)
        if isReg2:
            regsToTryFree.append(op2)
        print(";", op1, "-->", opmrl_op1) if DEBUG else None
        print(";", op2, "-->", opmrl_op2) if DEBUG else None

        registersFreed = self.freeRegistersIfDead(regsToTryFree, keepVariablesLive=[op2])
        reg_op2 = self.registerAllocate(result, doneWithLine=0, registersToUse=registersFreed)
        self.markRegisterDirty(reg_op2)
        self.freeRegistersIfDead(regsToTryFree)
        print(";", result, "-->", reg_op2) if DEBUG else None

        print("; opmrl_op1 = {0}, opmrl_op2 = {1}, reg_op2 = {2}".format(opmrl_op1, opmrl_op2, reg_op2)) if DEBUG else None
        print(("; move {0} {1}\n".format(opmrl_op1, reg_op2))) if DEBUG else None
        self.tinyCode += ("move {0} {1}\n".format(opmrl_op1, reg_op2))
        return opmrl_op2, reg_op2

    def addi(self, IRLine):
        lineSplit = IRLine.split(" ")
        op1 = lineSplit[1]
        op2 = lineSplit[2]
        result = lineSplit[3]
        code = []

        opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, False)
        code.append("addi {0} {1}".format(opmrl_op1, reg_op2))

        self.tinyCode += "\n".join(code) + "\n"
        return

    def addf(self, IRLine):
        lineSplit = IRLine.split(" ")
        op1 = lineSplit[1]
        op2 = lineSplit[2]
        result = lineSplit[3]
        code = []

        opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, False) 
        code.append("addr {0} {1}".format(opmrl_op1, reg_op2))

        self.tinyCode += "\n".join(code) + "\n"
        return

    def subi(self, IRLine):
        lineSplit = IRLine.split(" ")
        op1 = lineSplit[1]
        op2 = lineSplit[2]
        result = lineSplit[3]
        code = []

        opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, True) 
        code.append("subi {0} {1}".format(opmrl_op1, reg_op2))
        
        self.tinyCode += "\n".join(code) + "\n"
        return

    def subf(self, IRLine):
        lineSplit = IRLine.split(" ")
        op1 = lineSplit[1]
        op2 = lineSplit[2]
        result = lineSplit[3]
        code = []

        opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, True) 
        code.append("subr {0} {1}".format(opmrl_op1, reg_op2))
        
        self.tinyCode += "\n".join(code) + "\n"
        return

    def multi(self, IRLine):
        lineSplit = IRLine.split(" ")
        op1 = lineSplit[1]
        op2 = lineSplit[2]
        result = lineSplit[3]
        code = []

        opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, False) 
        code.append("muli {0} {1}".format(opmrl_op1, reg_op2))
        
        self.tinyCode += "\n".join(code) + "\n"
        return

    def multf(self, IRLine):
        lineSplit = IRLine.split(" ")
        op1 = lineSplit[1]
        op2 = lineSplit[2]
        result = lineSplit[3]
        code = []

        opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, False) 
        code.append("mulr {0} {1}".format(opmrl_op1, reg_op2))
        
        self.tinyCode += "\n".join(code) + "\n"
        return

    def divi(self, IRLine):
        lineSplit = IRLine.split(" ")
        op1 = lineSplit[1]
        op2 = lineSplit[2]
        result = lineSplit[3]
        code = []

        opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, True) 
        code.append("divi {0} {1}".format(opmrl_op1, reg_op2))
        
        self.tinyCode += "\n".join(code) + "\n"
        return

    def divf(self, IRLine):
        lineSplit = IRLine.split(" ")
        op1 = lineSplit[1]
        op2 = lineSplit[2]
        result = lineSplit[3]
        code = []

        opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, True) 
        code.append("divr {0} {1}".format(opmrl_op1, reg_op2))
        
        self.tinyCode += "\n".join(code) + "\n"
        return

    def storei(self, IRLine):
        lineSplit = IRLine.split(" ")
        op1 = lineSplit[1]
        result = lineSplit[2]
        code  = []
        opmrl_op1 = ""
        opmr_op2  = ""
        isMem1 = False
        isMem2 = False
        isReg1 = False

        if op1.replace(".", "").replace("-", "").isdigit():
            opmrl_op1 = op1
        else:
            opmrl_op1 = self.ensureRegister(op1, 0)
            isReg1 = True

        regsToTryFree = []
        if isReg1:
            regsToTryFree.append(op1)

        registersFreed = self.freeRegistersIfDead(regsToTryFree)
        if result.startswith("$R"):
            isMem2 = True
            opmr_op2 = "$" + str(self.stackOffset + self.parameters)
        else:
            opmr_op2 = self.registerAllocate(result, 1, registersToUse=registersFreed)
            self.markRegisterDirty(opmr_op2)

        print("; move {0} {1}".format(opmrl_op1, opmr_op2)) if DEBUG else None
        code.append("move {0} {1}".format(opmrl_op1, opmr_op2)) 
        self.tinyCode += "\n".join(code) + "\n"
        return

    def stores(self, IRLine):
        lineSplit = IRLine.split(" ")
        result = " ".join(lineSplit[1:-1])
        op1 = lineSplit[-1]
        code  = []

        if op1.startswith("$L"):
            self.tinyCode += "str {0} {1}\n".format(op1, result)
        else:
            self.tinyCode = "str {0} {1}\n".format(op1, result) + self.tinyCode

        return

    def compOperand(self, op1, op2, dataType):
        code = []
        opmrl_op1 = ""
        opmrl_op2 = ""
        reg_op2   = ""
        op1Allocated = False
        flipped = False
        isReg1 = False
        isReg2 = False

        if (not op1.replace(".", "").replace("-", "").isdigit()) and (op2.replace(".", "").replace("-", "").isdigit()):
            flipped = True
            op2, op1 = op1, op2

        if op1.replace(".", "").replace("-", "").isdigit():
            opmrl_op1 = op1
        else:
            opmrl_op1 = self.ensureRegister(op1, 0)
            isReg1 = True

        if op2.replace(".", "").replace("-", "").isdigit():
            opmr_op2 = self.temporaryAllocate()
        else:
            opmrl_op2 = self.ensureRegister(op2, 0)
            isReg2 = True

        regsToTryFree = []
        if isReg1:
            regsToTryFree.append(op1)
        if isReg2:
            regsToTryFree.append(op2)

        self.freeRegistersIfDead(regsToTryFree)

        if dataType:
            code.append("cmpi {0} {1}".format(opmrl_op1, opmrl_op2))
        else:
            code.append("cmpr {0} {1}".format(opmrl_op1, opmrl_op2))
    

        self.tinyCode += "\n".join(code) + "\n"
        return flipped

    def comp(self, IRLine):
        lineSplit = IRLine.split(" ")
        op  = lineSplit[0]
        op1 = lineSplit[1]
        op2 = lineSplit[2]
        label = lineSplit[3]
        CompOP = None
        code = []

        if op in ["LTI", "LTF"]:
            CompOP = COMPOP.LT
        elif op in ["GTI","GTF"]:
            CompOP = COMPOP.GT
        elif op in ["EQI","EQF"]:
            CompOP = COMPOP.EQ
        elif op in ["NEI","NEF"]:
            CompOP = COMPOP.NE
        elif op in ["LEI","LEF"]:
            CompOP = COMPOP.LE
        elif op in ["GEI","GEF"]:
            CompOP = COMPOP.GE

        flipped = self.compOperand(op1, op2, op.endswith("I"))
        if flipped:
            CompOP = COMPOP.inverseTinyOP(CompOP)

        if CompOP == COMPOP.LT:
            code.append("jlt {0}".format(label))
        elif CompOP == COMPOP.GT:
            code.append("jgt {0}".format(label))
        elif CompOP == COMPOP.EQ:
            code.append("jeq {0}".format(label))
        elif CompOP == COMPOP.NE:
            code.append("jne {0}".format(label))
        elif CompOP == COMPOP.LE:
            code.append("jle {0}".format(label))
        elif CompOP == COMPOP.GE:
            code.append("jge {0}".format(label))

        self.tinyCode += "\n".join(code) + "\n"
        return

    def jump(self, IRLine):
        lineSplit = IRLine.split(" ")
        label = lineSplit[1]
        code = []

        code.append("jmp {0}".format(label))    
        self.tinyCode += "\n".join(code) + "\n"

        return

    def label(self, IRLine):
        lineSplit = IRLine.split(" ")
        label = lineSplit[1]
        code = []

        code.append("label {0}".format(label))   
        self.tinyCode += "\n".join(code) + "\n"

        return

    def readWriteOperandSetup(self, op2, code):
        opmr_op2 = ""
        if op2.replace(".", "").replace("-", "").isdigit():
            regVar = self.temporaryAllocate()
            code.append("move {0} r{1}".format(op2, self.regDict[regVar])) 
            opmr_op2 = "r" + str(self.regDict[regVar])   
        elif not op2.startswith("$"):
            opmr_op2 = op2
            self.declDict[opmr_op2] = ""
        elif op2.startswith("$L"):
            opmr_op2 = op2.replace("L", "-")
        elif op2.startswith("$P"):
            opmr_op2 = "$" + str(-int(op2[2:]) + self.stackOffset + self.parameters)
        elif op2.startswith("$R"):
            opmr_op2 = "$" + str(self.stackOffset + self.parameters)
        else:
            self.registerAllocate(op2)
            opmr_op2 = "r{0}".format(self.regDict[op2])

        return opmr_op2

    def markRegisterDirty(self, op):
        self.Registers[int(op[1])].dirty = 1
        return


    def readOperandSetup(self, op2, code):
        reg_op2 = self.registerAllocate(op2, 1)
        self.markRegisterDirty(reg_op2)
        return reg_op2

    def writeOperandSetup(self, op2, code):
        opmr_op2 = ""
        if op2.replace(".", "").replace("-", "").isdigit():
            opmr_op2 = self.temporaryAllocate()
            code.append("move {0} {1}".format(op2, opmr_op2)) 
        else:
            opmr_op2 = self.ensureRegister(op2, 0)

        self.freeRegistersIfDead([op2])
        return opmr_op2


    def readi(self, IRLine):
        lineSplit = IRLine.split(" ")
        result = lineSplit[1]
        code = []

        opmr_op2 = self.readOperandSetup(result, code)
        code.append("sys readi {0}".format(opmr_op2)) 
        
        self.tinyCode += "\n".join(code) + "\n"
        pass

    def readf(self, IRLine):
        lineSplit = IRLine.split(" ")
        result = lineSplit[1]
        code = []

        opmr_op2 = self.readOperandSetup(result, code)
        code.append("sys readr {0}".format(opmr_op2)) 
        
        self.tinyCode += "\n".join(code) + "\n"
        pass

    def writei(self, IRLine):
        lineSplit = IRLine.split(" ")
        result = lineSplit[1]
        code = []

        opmr_op2 = self.writeOperandSetup(result, code)
        code.append("sys writei {0}".format(opmr_op2)) 
        
        self.tinyCode += "\n".join(code) + "\n"
        pass

    def writef(self, IRLine):
        lineSplit = IRLine.split(" ")
        result = lineSplit[1]
        code = []

        opmr_op2 = self.writeOperandSetup(result, code)
        code.append("sys writer {0}".format(opmr_op2)) 
        
        self.tinyCode += "\n".join(code) + "\n"
        pass

    def writes(self, IRLine):
        lineSplit = IRLine.split(" ")
        result = lineSplit[1]
        code = []

        code.append("sys writes {0}".format(result)) 
        
        self.tinyCode += "\n".join(code) + "\n"
        pass

    def jsr(self, IRLine):
        lineSplit = IRLine.split(" ")
        label = lineSplit[1]
        code = []
        self.saveGlobalVariablesBack()

        # code.append("push r0")
        # code.append("push r1")
        # code.append("push r2")
        # code.append("push r3")

        code.append("jsr {0}".format(label))

        # code.append("pop r3")
        # code.append("pop r2")
        # code.append("pop r1")
        # code.append("pop r0")

        self.tinyCode += "\n".join(code) + "\n"
        return

    def push(self, IRLine):
        lineSplit = IRLine.rstrip().split(" ")
        code = []
        value = ""
        isReg1 = False
        if len(lineSplit) == 2:
            op1 = lineSplit[1]
            if op1.replace(".", "").replace("-", "").isdigit():
                value = op1
            else:
                value = self.ensureRegister(op1)
                isReg1 = True

            if isReg1:
                self.freeRegistersIfDead([op1])
            code.append("push {0}".format(value))
        else:
            code.append("push")

        self.tinyCode += "\n".join(code) + "\n"
        return


    def pop(self, IRLine):
        lineSplit = IRLine.rstrip().split(" ")
        code = []
        if len(lineSplit) == 2:
            op1 = lineSplit[1]
            value = self.registerAllocate(op1)
            code.append("pop {0}".format(value))
            self.markRegisterDirty(value)
        else:
            code.append("pop")

        self.tinyCode += "\n".join(code) + "\n"
        return

    def ret(self, IRLine):
        code = []
        self.saveGlobalVariablesBack()
        if self.functName != "main":
            for registerToPush in reversed(self.registersToPush):
                code.append("pop {0}".format(registerToPush))
        code.append("unlnk")
        code.append("ret")
        self.tinyCode += "\n".join(code) + "\n"

    def link(self, IRLine):
        lineSplit = IRLine.split(" ")
        parameters = lineSplit[2]
        localparam = lineSplit[1]
        code = []
        self.parameters = int(parameters)
        self.numLocalParams = int(localparam)
        code.append("link {0}".format(str(self.numLocalParams + self.localVarOffset)))


        if self.functName != "main":
            for registerToPush in self.registersToPush:
                code.append("push {0}".format(registerToPush))
        self.tinyCode += "\n".join(code) + "\n"

    def errorFunct(self, IRLine):
        pass
Esempio n. 30
0
                if isinstance(e, CFG.Non_Term_Ref):
                    nonterms += 1
                else:
                    terms += 1

    return total, empty, long_alts, longest, nonterms, terms



if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('gp', help='relative path to grammar file')
    parser.add_argument('lp', help='relative path to lex file')
    parser.add_argument('altlen', help='threshold alternative length')
    args = parser.parse_args()
    gp = args.gp
    lp = args.lp
    altlen = int(args.altlen)
    lex = Lexer.parse(open(lp, "r").read())
    cfg = CFG.parse(lex, open(gp, "r").read())
    total, empty, long_alts, longest, nonterms, terms = alts_info(cfg, altlen)
    cycles = ValidGrammar.cyclicInfo(cfg, lex)
    
    headers =  "rules, alts, empty, empty(%), long alts, long alts(%), longest, nonterms, terms, cycles"
    print headers
    print "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s" % \
          (len(cfg.rules), total, empty, ((empty * 1.0)/total),
           long_alts, ((long_alts * 1.0)/total) , longest, nonterms, terms, cycles)
    
Esempio n. 31
0
#classifier
train = [('What time is it?', 'question'), ('where is Naples?', 'question'),
         ('How long is it been?', 'question'),
         ('which one is the champion?', 'question'),
         ('do the thing', 'command'), ('find the way to Naples', 'command'),
         ("get me the summary", 'command'), ("search on internet", 'command'),
         ('tell me the time', 'command'), ('hello bot', 'greeting'),
         ('hi there', 'greeting'), ('hey bot', 'greeting'),
         ('I do not like my job.', 'statement'),
         ("I feel amazing!", 'statement'), ("I feel better", 'statement'),
         ("you think like a bot", 'statement')]
#classifier
cl = NaiveBayesClassifier(train, feature_extractor=chatbot_extractor)

#definizioni delle grammatiche
grammar = CFG.CFG()
grammar.add_prod('S', 'AUXV NP MAINV OBJ| WHW BE_VERB OBJ |WHW ABOUT OBJ')
grammar.add_prod('NP', 'YOU')
grammar.add_prod('NP', 'I')
grammar.add_prod('AUXV', 'DO | CAN')
grammar.add_prod('DET', 'THE | A')
grammar.add_prod('BE_VERB', 'IS | ARE')
grammar.add_prod('MAINV',
                 'BE_VERB | THINK | LIKE |FIND | SEARCH |HAVE | TELL |KNOW')
grammar.add_prod('WHW', 'WHAT | WHERE | WHEN')
grammar.add_prod('OBJ', 'CC NN| DET NN | DET NN CC OBJ')
grammar.add_prod('CC', ' OF')
grammar.add_prod('NN', 'ICECREAM | PEN | BOOK')


def get_best_syntax_tree(text, cfg):
Esempio n. 32
0
 def write_cfg_lex(self, ambi_parse, gp, lp):
     CFG.write(ambi_parse.min_cfg, gp)
     Lexer.write(ambi_parse.sym_toks, ambi_parse.toks, self._sin.lex_ws, lp)
Esempio n. 33
0
def test_cfg():
    P = CFG.P({
        "S": "aA | bB",
        "A": "aAB | aa | AC | AE",
        "B": "bBA | bb | CB | BF",
        "C": "DE",
        "D": "cc | DD",
        "E": "FF | FE",
        "F": "EcE"
    })
    A = CFG(Set("S", "A", "B", "C", "D", "E", "F"), Set("a", "b", "c"), P, "S")

    B = A.reduce()

    ######################################################################

    P = CFG.P({
        "S": "ABC",
        "A": "Ab | BC",
        "B": "bB | b | Ab | ε",
        "C": "cD | c | Ac | ε",
        "D": "SSD | cSAc"
    })
    A = CFG(Set("S", "A", "B", "C", "D"), Set("b", "c"), P, "S")
    B = A.remove_ε()

    ######################################################################

    P = CFG.P({
        "S": "X | Y",
        "A": "bS | D",
        "D": "bA",
        "B": "Sa | a",
        "X": "aAS | C",
        "C": "aD | S",
        "Y": "SBb"
    })
    A = CFG(Set("S", "A", "B", "C", "D", "X", "Y"), Set("a", "b"), P, "S")
    B = A.remove_primitive_rules()

    ######################################################################

    P = CFG.P({
        "S": "SaSbS | aAa | bBb",
        "A": "aA | aaa | B | ε",
        "B": "Bb | bb | b"
    })
    A = CFG(Set("S", "A", "B"), Set("a", "b"), P, "S")
    B = A.toOwn()
    C = B.toCNF()

    ######################################################################

    P = CFG.P({
        "S": "YZ | aXZa",
        "X": "YX | bY | aYZ",
        "Y": "ε | c | YZ",
        "Z": "a | Xb | ε | c"
    })
    G1 = CFG(Set("S", "X", "Y", "Z"), Set("a", "b", "c"), P, "S")
    G1_1 = G1.remove_ε()
    G1_2 = G1_1.remove_primitive_rules()
    # print("---[ G1 ]---")
    # print(G1, end="\n" * 3)
    # print(G1.Nε, end="\n" * 3)
    # print(G1_1, end="\n" * 3)
    # print(G1_1.NS, G1_1.NX, G1_1.NY, G1_1.NZ, end="\n" * 3)
    # print(G1_2, end="\n" * 3)

    P = CFG.P({
        "S": "Aa | a | Eb | abbc | aDD",
        "A": "Aab | b | SEE | baD",
        "B": "DaS | BaaC | a",
        "C": "Da | a | bB | Db | SaD",
        "D": "Da | DBc | bDb | DEaD",
        "E": "Aa | a | bca"
    })
    G2 = CFG(Set("S", "A", "B", "C", "D", "E"), Set("a", "b", "c"), P, "S")
    G2_1 = G2.toOwn()
    G2_2 = G2.toCNF()
    # print("---[ G2 ]---")
    # print(G2, end="\n" * 3)
    # print("Nε=", G2.Nε, " NA={", G2.NS, G2.NA,
    #       G2.NB, G2.NC, G2.ND, G2.NE, "} V=", G2.V)
    # print(G2_1, end="\n" * 3)
    # print(G2_2, end="\n" * 3)

    P = CFG.P({"S": "Xc | Yd | Yb", "X": "Xb | a", "Y": "SaS | Xa"})
    G = CFG(Set("S", "X", "Y"), Set("a", "b", "c", "d"), P, "S")
    G1 = G.remove_left_recursion()
    G2 = G1.toGNF()

    P = CFG.P({"S": "ε | abSA", "A": "AaB | aB | a", "B": "aSS | bA | aB"})
    G = CFG(Set("S", "A", "B"), Set("a", "b"), P, "S")
Esempio n. 34
0
 def __init__(self, gp, lp, mutype, cnt, gdir):
     lex = Lexer.parse(open(lp, "r").read())
     self.cfg = CFG.parse(lex, open(gp, "r").read())
     self.header = Utils.cfg_header(gp)
     self.run(gp, lp, gdir, mutype, cnt)
 def deployCFG(self):
     s2 = CFG.CFG()
Esempio n. 36
0
 def __call__(self, *args, **kwargs):
     return CFG(*args, **kwargs)
Esempio n. 37
0
v=set()
rules=set()
alpha=set()
s=''
line=input('Enter your Varibles:\n')
v=line.split(',')
line=input('Enter your alphebet:\n')
alpha=line.split(',')
s=input('Enter your start var:\n')
print('Enter your rules')
while True:
    line=input('')
    if line.upper()=='END':
        break
    else:
        tmpr=r.Rule(line)
        rules.add(tmpr)
c1=c.CFG(v,alpha,rules,s)
b=c1.accept('aaaaaaaaaacccccccc')

for i in range(10):
    print('\n')
if b==True:
    print('yes')
else:
    print('no')




Esempio n. 38
0
    """
    if deep == 0:
        return []
    result = []
    for seq in rule.seqs:
        genrule = []
        for sym in seq:
            if isinstance(sym, CFG.Term):
                gen = [sym.tok]
            else:
                if isinstance(sym, CFG.Sym_Term):
                    gen = [sym.tok[3:]] # remove TK_ prefix
                else:
                    if isinstance(sym, CFG.Non_Term_Ref):
                        gen = list(be(grammar, grammar.get_rule(sym.name), deep-1))
            genrule = combine(result, gen)
        result = result + genrule
    return set(result)

if __name__ == "__main__":
    if len(sys.argv) < 4:
        print "Usage: " + sys.argv[0] + " grammar lex unrool-level"
    else:
        l = open(sys.argv[2], "r")
        g = open(sys.argv[1], "r")
        n = int(sys.argv[3])
        lex = Lexer.parse( l.read() )
        grammar = CFG.parse(lex, g.read())
        r = be(grammar, grammar.rules[0], n)
        print r, ": ", len(r)
Esempio n. 39
0
                return der_list + labels
            else:
                return labels

    def derivable(self, max_steps):
        return self.derivable_from([self.start], max_steps)


# Example CFGs
cfg1 = CFG(start=NT("S"),
           rules=[
               Rule(NT("S"), [NT("NP"), NT("VP")]),
               Rule(NT("NP"), [NT("D"), NT("N")]),
               Rule(NT("VP"), [NT("V"), NT("NP")]),
               Rule(NT("NP"), [T("John")]),
               Rule(NT("NP"), [T("Mary")]),
               Rule(NT("D"), [T("the")]),
               Rule(NT("D"), [T("a")]),
               Rule(NT("N"), [T("cat")]),
               Rule(NT("N"), [T("dog")]),
               Rule(NT("V"), [T("saw")]),
               Rule(NT("V"), [T("likes")])
           ])

cfg_anbn = CFG(start=NT(0),
               rules=[
                   Rule(NT(0), [NT(10), NT(1)]),
                   Rule(NT(1), [NT(0), NT(11)]),
                   Rule(NT(0), [NT(10), NT(11)]),
                   Rule(NT(10), [T('a')]),
                   Rule(NT(11), [T('b')])
               ])