def verify_ambiguity(self, mingp, minlp, minsen, duration=None): print "==> verify grammar %s with minimiser %s \n" % \ (mingp, self._sin.minp) self._sin.lex = Lexer.parse(open(self._sin.lp, 'r').read()) self._sin.cfg = CFG.parse(self._sin.lex, open(self._sin.gp, "r").read()) self._sin.parser = Accent.compile(self._sin.gp, self._sin.lp) minlex = Lexer.parse(open(minlp, 'r').read()) mincfg = CFG.parse(minlex, open(mingp, 'r').read()) seq = mincfg.get_rule('root').seqs[0] # check if the root rule of minimised cfg == root of original cfg if (len(seq) == 1) and (str(seq[0]) == self._sin.cfg.start_rulen): out = Accent.run(self._sin.parser, minsen) if Accent.was_ambiguous(out): print "** verified **" minbend = "%sm" % self._sin.backend if minbend in Backends.BACKENDS: bend = Backends.BACKENDS[minbend](self._sin, mincfg, minsen) else: bend = Backends.WGTBACKENDS[minbend](self._sin, mincfg, minsen) # we keep trying until we hit the subseq while not bend.found: bend.run(self._sin.t_depth, self._sin.wgt, duration) print "** verified **"
def remove_ε(self): import CFG import re Nε = self.Nε N = self.N S = self.S P = CFG.P() for A in self.N: P[A] = self.P[A] - Set("ε") for opt in P[A]: for subset in all_subsets(Nε): new_opt = re.sub(f"[ε{''.join(subset)}]", "", str(opt)) if new_opt != "": P[A].add(new_opt) for opt in self.P[self.S]: if all(X in Nε for X in opt): N |= (Set("S'")) S = "S'" P["S'"] = f"ε | {self.S}" break G = CFG(N, self.Σ, P, S) return G
def REMOVE_NULL_PRODUCTIONS(productions, variables, Vars): while ( CFG.isExistNullProduction(productions, variables) ): NullVariables = [] CopyProductions = productions.copy() for rule in CopyProductions: left, right = rule if CFG.isNullProduct(rule): NullVariables.append(left) productions.remove(rule) for NullVariable in NullVariables: NullVariablesRemoved = [] for rule in productions: left, right = rule if CFG.isExistNullVar(rule, NullVariable): NullVariablesRemoved += CFG.replaceNullVar(rule, NullVariable) for rule in NullVariablesRemoved: if rule not in productions: left, right = rule if len(right) == 0: right.append("e") productions.append(rule) return productions, variables, Vars
def REMOVE_TERM_PRODUCTION(productions, variables, Vars): CopyProductions = productions.copy() for rule in productions: left, right = rule if CFG.isTermProduction(variables, rule): productions, variables, Vars = CFG.replaceTermProduction(productions, variables, rule, Vars) return productions, variables, Vars
def Creat_CFG(self): self.CFG = CFG() self.NPDA.Convert_NPDA_to_CFG(self.CFG) #set start variable self.CFG.Start_Variable = "(" + self.NPDA.Start_Variable.Name + self.NPDA.First_Stack_Symbol[ 0] + self.NPDA.Final_State.Name + ")" #add start variable if it is not in cfg.variables if self.CFG.Start_Variable not in self.CFG.Variables: self.CFG.Variables[self.CFG.Start_Variable] = []
def REMOVE_MORE_THAN_2_VARIABLES_PRODUCTION(productions, variables, Vars): productions, variables, Vars = CFG.replaceTerms(productions, variables, Vars) CopyProductions = productions.copy() for rule in CopyProductions: if CFG.isVariablesMoreThan2(variables, rule): newRules, variables, Vars = CFG.replaceMoreThan2Var(variables, rule, Vars) productions.remove(rule) productions += newRules return productions, variables, Vars
def compare(gp1, gp2, lp): lex = Lexer.parse(open(lp,"r").read()) cfg1 = CFG.parse(lex, open(gp1, "r").read()) cfg2 = CFG.parse(lex, open(gp2, "r").read()) _cfg1 = _cfg(cfg1) _cfg2 = _cfg(cfg2) if _cfg1 == _cfg2: return True return False
def remove_primitive_rules(self): """ remove all rules of type A → B where A,B ∈ N """ import CFG P = CFG.P() for A in self.N: NA = self.Nx(A) P[A] = Set(rule for B in NA for rule in self.P[B] if not self.isprimitive(rule)) return CFG(self.N, self.Σ, P, self.S)
def toCNF(self): """ each rule must be of format A → BC (A,B,C ∈ N) A → a (A ∈ N, a ∈ Σ) S → ε if S is not on the right side of any rule """ import CFG G = self.toOwn() N = G.N.copy() P = CFG.P() i = 0 while i < len(N): A = N[i] for rule in G.P.get(A, P[A]): A = N[i] if len(rule) >= 2: # change rule A -> abcd... to A -> a<bcd...> B, *C = rule B1 = Rule(B if B.isupper() else f"{B}̄") C1 = Rule((f"<{''.join(C)}>" if len(C) > 1 else C[0] if C[0].isupper() else f"{C[0]}̄")) P[A].add(Rule(f"{B1}{C1}")) A = Rule(f"{C1[0]}") # continue generating <bcd...> -> b<cd...> while len(A[0]) >= 2 and A[0] != "<>": rule = Rule(f"{C1[0][1]}<{C1[0][2:-1]}>") B, *C = rule B1 = Rule(B if B.isupper() else f"{B}̄") C1 = Rule((f"{''.join(C[0])}" if len(C[0]) > 1 else C[0] if C[0].isupper() else f"{C[0]}̄")) P[A].add(Rule(f"{B1}{C1}")) A = Rule(f"{C1[0]}") else: P[A].add(Rule(rule)) i += 1 return CFG(N, self.Σ, P, self.S)
def remove_left_recursion(self): def calc_potentials(): potentials = {} for A in N: potentials.setdefault(A, Set()) for rule in P[A]: if rule[0] in N: potentials[A].add(rule[0]) return potentials N = self.N.copy() P = self.P.copy() for i, A in enumerate(N): for B in N[:i + 1]: if not B in calc_potentials()[A]: continue if A == B: α = [rule for rule in P[A] if rule.startswith(B)] β = [rule for rule in P[A] if not rule.startswith(B)] N = Set(f"{A}'").union(N) P[f"{A}'"] |= Set(Rule(rule[1:]) for rule in α) | Set( Rule(rule[1:] + f"{A}'") for rule in α) P[A] = Set(Rule(rule) for rule in β) | Set( Rule(rule + f"{A}'") for rule in β) else: α = [rule for rule in P[A] if rule.startswith(B)] β = [rule for rule in P[A] if not rule.startswith(B)] P[A] = Set(Rule(rule) for rule in β) | Set( Rule(ruleB + rule[1:]) for rule in α for ruleB in P[B]) return CFG(N, self.Σ.copy(), P, self.S)
def get_best_syntax_tree(text, cfg): blob = TextBlob(text) ph_list = [] #trova i token genera le regole semplificate #Taglia l'albero a vari livelli fino e genera le frasi #cfg.print_grammar() #print() for depth in range(4): cfg_copy = CFG.CFG(cfg.prod) cfg_copy.prune(blob.upper().words, depth) if "S" in cfg_copy.prod: #Genera un set di frasi for i in range(10): sent, tree = cfg_copy.gen_random_convergent('S') ph_list.append({"S": sent, "Tree": tree, "Score": 0}) else: break for ph in ph_list: ph["Score"] = fuzz.ratio(ph["S"], blob.upper()) max_score = ph_list[0]["Score"] best_ph = ph_list[0] for ph in ph_list: if ph["Score"] > max_score: max_score = ph["Score"] best_ph = ph #print(best_ph) return best_ph
def main(): cfg = CFG.chatConnection(Globals.HOST, Globals.PORT, Globals.OAUTH, Globals.USERNAME, Globals.CHANNEL, Globals.CLIENT_ID) lastMsg = time.time() Globals.tLock = threading.Lock() try: #create bot and attempt connection botSocket = Bot.bot(cfg) Globals.connected = botSocket.connect() while Globals.connected: tCount = Globals.getWorkerCount() if tCount < 1: t = threading.Thread(name="reciever", target=botSocket.recv) t.start() else: #Globals.printLockmsg(time.time() - lastMsg) if (time.time() - lastMsg) >= 1.0 and Globals.botQue.qsize( ) > 0: # send a msg every 1second botSocket.handleMsg() lastMsg = time.time() time.sleep(1) finally: print("exiting") botSocket.sendMsg("Goodbye!!") botSocket.closeSocket()
def to_CFG(self, cfg, lex): """ At present, I have taken an easier approach to create CFG. I write the token line and the rules to a temp file and read that back. An alternative way (without I/O) would be to iterate through the rules and build your CFG instance. """ tp = tempfile.mktemp() header = "" if len(self.sym_toks) > 0: header = "%token " + "%s;" % (", ".join(t for t in self.sym_toks)) with open(tp, 'w') as tf: tf.write(('%s\n\n' % header) + "%nodefault\n\n") pp_seqs = Set() for seq in cfg['root']: seq_s = " ".join(str(e) for e in seq) pp_seqs.add(seq_s) tf.write("%s : %s\n;\n" % ('root', " | ".join(pp_seqs))) nt_list = [nt for nt in cfg.keys() if nt != 'root'] nt_list.sort() for k in nt_list: pp_seqs = [] seqs = cfg[k] for seq in seqs: seq_s = " ".join(str(e) for e in seq) pp_seqs.append(seq_s) tf.write("%s : %s\n;\n" % (k, " | ".join(pp_seqs))) return CFG.parse(lex, open(tp, 'r').read())
def REMOVE_UNIT_PRODUCTIONS(productions, variables, Vars): CopyProductions = productions.copy() for rule in CopyProductions: if CFG.isUnitProduct(rule, variables) and rule in productions: newRules = CFG.replaceUnitProduct(productions, variables, rule) productions.remove(rule) for newRule in newRules: if newRule not in productions: productions.append(newRule) CopyProductions = productions.copy() for rule in CopyProductions: if CFG.isRuleUnreachable(productions, variables, rule): productions.remove(rule) return productions, variables, Vars
def run(self): currgp = self.mingp currlp = self.minlp currparse = self._sin.ambi_parse n = 1 found = True while found: found = False lex = Lexer.parse(open(currlp, "r").read()) cfg = CFG.parse(lex, open(currgp, "r").read()) # work on rules with no of alts > 1 keys = [r.name for r in cfg.rules if len(r.seqs) > 1] for key in keys: seqs = cfg.get_rule(key).seqs for i in range(len(seqs)): _cfg = self.cfg_minus_alt(cfg, key, i) if self.valid_cfg(_cfg): # we could minimise lex first before pruning _cfg_p = self.prune_cfg(_cfg, lex) _gf, _lf = "%s.acc" % n, "%s.lex" % n _gp = os.path.join(self._sin.td, "pruned.%s" % _gf) CFG.write(_cfg_p, _gp) n += 1 amb, _, ptrees = self._sin.find_ambiguity(_gp, currlp, self._sin.backend, self._sin.mint) if amb: ambi_parse = AmbiParse.parse(currlp, self._sin.lex_ws, ptrees) __gp = os.path.join(self._sin.td, "min.%s" % _gf) __lp = os.path.join(self._sin.td, "min.%s" % _lf) self.write_cfg_lex(ambi_parse, __gp, __lp) self.write_stat(__gp, __lp) found = True currparse = ambi_parse currgp = __gp currlp = __lp break if found: break return currgp, currlp, currparse.amb_str
def construct_cfg(bitwidth, filename, mode): processed = subprocess.check_output(f"gcc -E {filename}", shell=True).decode() processed = my_preprocess(processed) print(processed) parser = pycparser.c_parser.CParser() x = parser.parse(processed, filename="<none>") cfg = CFG.construct_CFG(x, mode, bitwidth) cfg.print("out/impact") cfg.print("out/cfg_pred", "pred") cfg._check_consistency() return cfg
def write_stat(self, gp, lp, tag=''): """ write no of rules, alts, symbols Use the tag to mark the final line """ s = "-,-,-" if gp is not None: lex = Lexer.parse(open(lp, 'r').read()) cfg = CFG.parse(lex, open(gp, 'r').read()) rules, alts, syms = cfg.size() s = "%s,%s,%s" % (rules, alts, syms) with open(self.statslog, "a") as logp: logp.write("%s%s\n" % (tag, s))
def run(self): currgp = self.mingp currlp = self.minlp currparse = self._sin.ambi_parse n = 1 found = True while found: found = False lex = Lexer.parse(open(currlp, 'r').read()) cfg = CFG.parse(lex, open(currgp, 'r').read()) combs = self.rule_alts_combs(cfg) random.shuffle(combs) while combs: key, i = combs.pop() _cfg = self.cfg_minus_alt(cfg, key, i) if self.valid_cfg(_cfg): # we could minimise lex first before pruning _cfg_p = self.prune_cfg(_cfg, lex) _gf, _lf = "%s.acc" % n, "%s.lex" % n _gp = os.path.join(self._sin.td, "pruned.%s" % _gf) CFG.write(_cfg_p, _gp) n += 1 amb, _, ptrees = self._sin.find_ambiguity(_gp, currlp, self._sin.backend, self._sin.mint) if amb: ambi_parse = AmbiParse.parse(currlp, self._sin.lex_ws, ptrees) __gp = os.path.join(self._sin.td, "min.%s" % _gf) __lp = os.path.join(self._sin.td, "min.%s" % _lf) self.write_cfg_lex(ambi_parse, __gp, __lp) self.write_stat(__gp, __lp) found = True currparse = ambi_parse currgp = __gp currlp = __lp break return currgp, currlp, currparse.amb_str
def reduce(self): import CFG N = self.N.copy() Σ = self.Σ.copy() P = self.P.copy() G = CFG(N, Σ, P, self.S) # remove non-reduced terminals nonreduced = N - G.Ne for A in nonreduced: # remove whole rule del P[A] N.remove(A) # remove each option with that rule for B, options in P.items(): for opt in options: if A in opt: P[B].remove(opt) return G.remove_unreachable()
def remove_unreachable(self): N, Σ, P = self.N.copy(), self.Σ.copy(), self.P.copy() unreachable = N.union(self.Σ) - self.V for X in unreachable: # remove non-terminals if X.isupper(): del P[X] N.remove(X) # remove terminals else: Σ.remove(X) return CFG(N, Σ, P, self.S)
def valid(gf, lf, max_alts_allowed=None, empty_alts_ratio=None): """ Generated grammar is valid if it: a) has no empty rule b) number of alternatives/rule < max_alts_allowed c) %age of empty alternatives < empty_alts_ratio d) has no unreachable rules e) doesn't contain a subset which taken no input f) is not trivially ambiguous """ lex = Lexer.parse(open(lf, "r").read()) cfg = CFG.parse(lex, open(gf, "r").read()) # check for empty rules if empty_rule(cfg): return False # check if any of the rule has > N alts if max_alts_allowed is not None: if has_too_many_alts(cfg, max_alts_allowed): return False # check if we have too many empty alts if empty_alts_ratio is not None: if has_too_many_empty_alts(cfg, empty_alts_ratio): return False # Check if all the rules are reachable from the start rule. if (len(unreachable(cfg)) > 0): print "unreachable: " , unreachable(cfg) sys.stdout.write("r") sys.stdout.flush() return False # Check if the grammar is unproductive if unproductive(cfg,lex): sys.stdout.write("u") sys.stdout.flush() return False # Check the grammar for trivial ambiguities if ambiguous(cfg): sys.stdout.write("a") sys.stdout.flush() return False return True
def __init__(self, IRcode, globalVariables=None, stringInit=0, functName=""): self.IRcode = IRcode self.tinyCode = "" self.declCode = "" self.regNum = 0 self.tempNum = 0 self.regDict = {} self.declDict = {} # self.regVals = {} self.stringDict = {} # self.writeVals = {} self.parameters = 0 self.functCFG = None self.lineNum = 0 self.totalLineNum = 0 self.numTempParams = 0 self.tempsSpilledDict = {} # Saves mapping of temp to stack in case of spilling self.stringInit = stringInit # If this code generation is just for the string initialization self.globalVariables = globalVariables self.stackOffset = 2 self.localVarOffset = 4 self.numLocalParams = 0 self.registersToPush = ["r0", "r1", "r2", "r3"] self.functName = functName # Add 4 registers self.Registers = [] self.Registers.append(RegisterStatus(0)) self.Registers.append(RegisterStatus(1)) self.Registers.append(RegisterStatus(2)) self.Registers.append(RegisterStatus(3)) if not stringInit: self.functCFG = CFG(IRcode, functName=self.functName) self.functCFG.populateNodeInfo() self.functCFG.removeLinesWithNoPredecessors() self.functCFG.runLivenessAnalysis(globalVariables) self.functCFG.setLeaders() self.functCFG.printGraphWithNodeLists() if DEBUG else None # self.functCFG.printGraph() self.IRcode = self.functCFG.getCode()
def mutate_cfg(gp, lp, type): lex = Lexer.parse(open(lp, "r").read()) cfg = CFG.parse(lex, open(gp, "r").read()) sym_toks = Utils.sym_tokens(gp) _cfg = cfg.clone() if type == 'empty': empty(_cfg) elif type == 'add': tok = Utils.randomTok(cfg, lex, sym_toks) add(_cfg, tok) elif type == 'mutate': tok = Utils.randomTok(cfg, lex, sym_toks) mutate(_cfg, tok) elif type == 'delete': delete(_cfg) elif type == 'switch': switch(_cfg) else: assert "mutation type '%s' is not supported" % type return _cfg
def toGNF(self): """ each rule must be of format A → aB1B2B3...Bn (a ∈ Σ, B1,B2,B3,...,Bn ∈ N) """ import CFG G = self.remove_left_recursion() N = Set(reversed(G.N.copy())) P = G.P.copy() def resolve(A, B): for _ in range(len(P[A])): rule = list(P[A].pop(0)) if rule[0].islower(): for i, c in enumerate(rule[1:]): if c.islower() and len(c) == 1: rule[i + 1] = f"{c}̄" rule = "".join(rule) P[A].add(Rule(rule)) elif rule[0] == B: rules = Set() for rule1 in P[B]: rules.add(Rule(rule1 + "".join(rule[1:]))) P[A] |= rules else: P[A].add(Rule(rule)) for i, A in enumerate(N): for B in N[:i + 1]: resolve(A, B) return CFG(N, self.Σ, P, self.S)
def genNonRecGrammar(g, n): """ Return an non recursive grammar with n unrolls """ # step 1: generate rules from 0..n-1 unrools global rs rs = reachableSymbols(g) print "Reachablity relation:" print rs nextSymbolIndex = {} # dictionary non-term -> last-index lastGenSymbolIndex = {} # last index of generated rule # put recursive symbols in symbolIndex dictionary for sym in rs.keys(): if sym in rs[sym]: nextSymbolIndex[sym] = 0 lastGenSymbolIndex[sym] = 0 # invariant: symbolIndex[sym] == next index to generate print "\nRecursive rules:" print nextSymbolIndex print "\nGenerating non recursive grammar:" rules = [] generated = set() while not unrools(nextSymbolIndex, n): for r in g.rules: if r.name in rs[r.name]: # the rule is recursive i = nextSymbolIndex[r.name] newrule = CFG.Rule(r.name + str(i), []) nextSymbolIndex[r.name] = i + 1 for seq in r.seqs: newseq = [] for sym in seq: if isinstance(sym, CFG.Non_Term_Ref ) and sym.name in nextSymbolIndex.keys(): # sym is recursive: generate indexed symbol j = nextSymbolIndex[sym.name] lastGenSymbolIndex[sym.name] = j newseq.append(CFG.Non_Term_Ref(sym.name + str(j))) else: # sym is not recursive: generate original symbol newseq.append(sym) newrule.seqs.append(newseq) else: if r.name in generated: continue newrule = CFG.Rule(r.name, []) for seq in r.seqs: newseq = [] for sym in seq: if isinstance( sym, CFG.Non_Term_Ref) and sym.name in rs.keys(): name = sym.name + "0" newseq.append(CFG.Non_Term_Ref(name)) else: newseq.append(sym) newrule.seqs.append(newseq) generated.add(newrule.name) rules.append(newrule) # step 2: generate indexed rules with only terminal sequences as rhs # step 2.1: Generate only s : rhs width rhs containing only non-rec symbols for name in lastGenSymbolIndex.keys(): if lastGenSymbolIndex[name] == nextSymbolIndex[name]: r = g.get_rule(name) newrule = CFG.Rule(name + str(lastGenSymbolIndex[name]), []) nextSymbolIndex[name] = nextSymbolIndex[name] + 1 for seq in r.seqs: if nonrec(seq): newrule.seqs.append(seq) rules.append(newrule) # step 2.2: Generate not yet generated rules for name in lastGenSymbolIndex.keys(): if lastGenSymbolIndex[name] == nextSymbolIndex[name]: r = g.get_rule(name) newrule = CFG.Rule(name + str(lastGenSymbolIndex[name]), []) for seq in r.seqs: newseq = [] for sym in seq: if isinstance(sym, CFG.Non_Term_Ref) and sym.name in rs.keys(): name = sym.name + lastGenSymbolIndex[name] newseq.append(CFG.Non_Term_Ref(name)) else: newseq.append(sym) rules.append(newrule) generated.add(newrule.name) return CFG.CFG(g.tokens, rules)
varContainer += ['A12', 'B12', 'C12', 'D12', 'E12', 'F12', 'G12', 'H12', 'I12', 'J12', 'K12', 'L12', 'M12', 'N12', 'O12', 'P12', 'Q12', 'R12', 'S12', 'T12', 'U12', 'V12', 'W12', 'X12', 'Y12', 'Z12'] def readSyntax(Terminals, languages): syntax = languages syntax = syntax.replace("\t", "") syntax = syntax.replace("\n", " ENDL ") syntax = syntax.split(" ") for x in range (syntax.count('')): syntax.remove('') for i in range(len(syntax)): if syntax[i] not in Terminals and len(syntax[i]) > 0: syntax[i] = "NAME" return syntax Terminals, V, Productions = CFG.loadModel("model.txt") varContainer = CFG.getNotUsedVariables(V, varContainer) Productions, V, varContainer = CFG2CNF.START(Productions, V, varContainer) Productions, V, varContainer = CFG2CNF.REMOVE_NULL_PRODUCTIONS(Productions, V, varContainer) Productions, V, varContainer = CFG2CNF.REMOVE_UNIT_PRODUCTIONS(Productions, V, varContainer) Productions, V, varContainer = CFG2CNF.REMOVE_MORE_THAN_2_VARIABLES_PRODUCTION(Productions, V, varContainer) Productions, V, varContainer = CFG2CNF.REMOVE_TERM_PRODUCTION(Productions, V, varContainer) languages = open("syntax.txt").read() languages = readSyntax(Terminals, languages) for x in Productions: print(x) print(languages) CYK.CYK(Productions, 'S0', languages)
class App: def __init__(self, file_address, output): self.File_Address = file_address self.output = output self.NPDA = None self.CFG = None self.Alphabet = None #self.Start_Variable_CFG=None def Creat_NPDA(self): #read file File = open(self.File_Address, 'r') Lines = File.readlines() File.close() self.Alphabet = Lines[1].replace('\n', '').split(',') #add lambda to alphabet for npda NPDA_Alphabet = self.Alphabet + ["_"] # split stack symbol and first stack symbol Stack_Symbol = Lines[2].replace('\n', '').split(',') First_Stack_Symbol = Lines[3].replace('\n', '').split(',') #creat base noda self.NPDA = NPDA(NPDA_Alphabet, int(Lines[0]), Stack_Symbol, First_Stack_Symbol) #reduce state numbert to start at zero minimum = int(Lines[4].split(',')[0].split('q')[1]) for line in range(4, len(Lines)): info = Lines[line].split(',') origin_index = int(info[0].split('q')[1]) if origin_index < minimum: minimum = origin_index destination_index = int(info[4].replace('\n', '').split('q')[1]) if destination_index < minimum: minimum = destination_index #set start variable for npda self.NPDA.Start_Variable = self.NPDA.States[ int(Lines[4].split(',')[0].split('q')[1]) - minimum] #complete npda" for line in range(4, len(Lines)): info = Lines[line].split(',') origin_index = int(info[0].split('q')[1]) - minimum destination_index = int(info[4].replace( '\n', '').split('q')[1]) - minimum self.NPDA.States[origin_index].Nueighbor[( info[1], info[2])] = self.NPDA.States[origin_index].Nueighbor.get( (info[1], info[2]), []) + [ (self.NPDA.States[destination_index], info[3]) ] #final states" if "*" in info[0]: self.NPDA.Final_State = self.NPDA.States[origin_index] if "*" in info[4]: self.NPDA.Final_State = self.NPDA.States[destination_index] def Creat_CFG(self): self.CFG = CFG() self.NPDA.Convert_NPDA_to_CFG(self.CFG) #set start variable self.CFG.Start_Variable = "(" + self.NPDA.Start_Variable.Name + self.NPDA.First_Stack_Symbol[ 0] + self.NPDA.Final_State.Name + ")" #add start variable if it is not in cfg.variables if self.CFG.Start_Variable not in self.CFG.Variables: self.CFG.Variables[self.CFG.Start_Variable] = [] def Write_CFG(self): File = open(self.output, "w") File.write( "***my grammar does not lambda productions and remove them for detectin string***" + '\n') for var, pro in self.CFG.Variables.items(): result = var + '->' for p in pro: result += ('|' + p) File.write(result.replace('|', "", 1) + '\n') File.close() def Detection_String(self, String): String_Map = self.CFG.Detection_String(String) File = open(self.output, "a") File.write("input :" + String + '\n') File.write("output :" + '\n') if String_Map == False: File.write("False" + '\n') else: File.write("True" + '\n') String_Map += [String] result = '' for i in range(len(String_Map)): result += "=>" + String_Map[i] File.write(result.replace("=>", "", 1) + '\n') File.close()
def set_CFG(self): res = CFG.CFG() #res is a str self.textBrowser.setText("CFG similarity:" + str(res) + "%")
class TinyGenerator(): def __init__(self, IRcode, globalVariables=None, stringInit=0, functName=""): self.IRcode = IRcode self.tinyCode = "" self.declCode = "" self.regNum = 0 self.tempNum = 0 self.regDict = {} self.declDict = {} # self.regVals = {} self.stringDict = {} # self.writeVals = {} self.parameters = 0 self.functCFG = None self.lineNum = 0 self.totalLineNum = 0 self.numTempParams = 0 self.tempsSpilledDict = {} # Saves mapping of temp to stack in case of spilling self.stringInit = stringInit # If this code generation is just for the string initialization self.globalVariables = globalVariables self.stackOffset = 2 self.localVarOffset = 4 self.numLocalParams = 0 self.registersToPush = ["r0", "r1", "r2", "r3"] self.functName = functName # Add 4 registers self.Registers = [] self.Registers.append(RegisterStatus(0)) self.Registers.append(RegisterStatus(1)) self.Registers.append(RegisterStatus(2)) self.Registers.append(RegisterStatus(3)) if not stringInit: self.functCFG = CFG(IRcode, functName=self.functName) self.functCFG.populateNodeInfo() self.functCFG.removeLinesWithNoPredecessors() self.functCFG.runLivenessAnalysis(globalVariables) self.functCFG.setLeaders() self.functCFG.printGraphWithNodeLists() if DEBUG else None # self.functCFG.printGraph() self.IRcode = self.functCFG.getCode() def generate(self): oldLocalVarOffset = self.localVarOffset self.generateCode() self.updateLocalVarOffset() while oldLocalVarOffset != self.localVarOffset: oldLocalVarOffset = self.localVarOffset print("; RESTARTING EVERYTHING cause varOffset is {0}\n\n\n\n\n".format(self.localVarOffset)) if DEBUG else None # Restore defaults for everything self.tinyCode = "" self.declCode = "" self.regNum = 0 self.tempNum = 0 self.regDict = {} self.declDict = {} self.stringDict = {} self.parameters = 0 self.lineNum = 0 self.totalLineNum = 0 self.numTempParams = 0 self.tempsSpilledDict = {} # Saves mapping of temp to stack in case of spilling self.stackOffset = 2 self.numLocalParams = 0 self.generateCode() self.updateLocalVarOffset() return self.tinyCode def generateCode(self): stmtList = self.IRcode.split("\n") switcher = { "INCI": self.inci, "DECI": self.deci, "ADDI": self.addi, "ADDF": self.addf, "SUBI": self.subi, "SUBF": self.subf, "MULTI": self.multi, "MULTF": self.multf, "DIVI": self.divi, "DIVF": self.divf, "STOREI": self.storei, "STOREF": self.storei, "STORES": self.stores, "GTI": self.comp, "GEI": self.comp, "LTI": self.comp, "LEI": self.comp, "NEI": self.comp, "EQI": self.comp, "GTF": self.comp, "GEF": self.comp, "LTF": self.comp, "LEF": self.comp, "NEF": self.comp, "EQF": self.comp, "JUMP": self.jump, "LABEL": self.label, "READI": self.readi, "READF": self.readf, "WRITEI": self.writei, "WRITEF": self.writef, "WRITES": self.writes, "JSR":self.jsr, "PUSH":self.push, "POP":self.pop, "RET":self.ret, "LINK":self.link } #set up a main caller code = [] self.totalLineNum = len(stmtList) # code.append("push") # code.append("push r0") # code.append("push r1") # code.append("push r2") # code.append("push r3") # code.append("jsr main") # code.append("sys halt") # self.tinyCode += "\n".join(code) + "\n" for line in stmtList: if not self.stringInit: if self.lineNum in self.functCFG.leaders: self.resetRegisters(keepValid=1) # Get the function from switcher dictionary func = switcher.get(line.split(" ")[0], self.errorFunct) # Execute the function print(";", line) if DEBUG else None func(line) if not self.stringInit: if self.lineNum in self.functCFG.leaders: self.invalidateAllRegisters() self.lineNum += 1 self.printRegs() if DEBUG else None # print(self.regVals) # if len(self.declDict) != 0: # self.tinyCode = "var " + "\nvar ".join(self.declDict.keys()) + "\n" + self.tinyCode + "\nend" # else: # self.tinyCode = self.tinyCode + "\nend" self.tinyCode = re.sub(r'link.*\n', "link {0}\n".format(str(self.numLocalParams + self.localVarOffset + self.numTempParams)), self.tinyCode) self.removeUnnecessaryMoves() return self.tinyCode def updateLocalVarOffset(self): registersUsed = [] for register in self.Registers: print("; Register used at least Once for {0}: {1}".format(register.regNum, str(register.usedAtLeastOnce))) if DEBUG else None if register.usedAtLeastOnce: registersUsed.append("r{0}".format(register.regNum)) self.localVarOffset = len(registersUsed) self.registersToPush = registersUsed # def countRegsUsed(self): # tinyCodeArray = self.tinyCode.split("\n") # regsUsed = [] # for tinyLine in tinyCodeArray: # tinyLineSplit = tinyLine.split() # if tinyLineSplit def resetRegisters(self, keepValid=0): print("; resetting reg allocation") if DEBUG else None registersFreed = [] for regNum in range(4): if self.Registers[regNum].valid: self.freeRegister("r{0}".format(regNum), keepTemporaries=1) registersFreed.append("r{0}".format(regNum)) if keepValid: self.Registers[regNum].valid = 1 return registersFreed def invalidateAllRegisters(self): for regNum in range(4): self.Registers[regNum].valid = 0 def saveGlobalVariablesBack(self): print("; storing globalVariables back") if DEBUG else None for regNum in range(4): if self.Registers[regNum].valid and self.Registers[regNum].variable in self.globalVariables: self.freeRegister("r{0}".format(regNum)) def removeUnnecessaryMoves(self): tinyCodeArray = self.tinyCode.strip().rstrip('\n').split('\n') linesToRemove = [] for tinyLine in tinyCodeArray: tinyLine = tinyLine.strip() if tinyLine == "": continue tinyLineSplit = tinyLine.split() if tinyLineSplit[0] != "move": continue if tinyLineSplit[1] == tinyLineSplit[2]: linesToRemove.append(tinyLine) for lineToRemove in linesToRemove: tinyCodeArray.remove(lineToRemove) self.tinyCode = "\n".join(tinyCodeArray) + "\n" return def printRegs(self): strToPrint = "" for register in self.Registers: strToPrint += " r{0} -> ".format(register.regNum) if register.valid: strToPrint += register.variable else: strToPrint += "null" print(";", strToPrint) return def convertIRVarToTinyVar(self, IRVar): tinyVar = "" if not IRVar.startswith("$"): tinyVar = IRVar self.declDict[tinyVar] = "" elif IRVar.startswith("$L"): tinyVar = "$" + str(int(IRVar.replace("L", "-")[1:]) - self.localVarOffset) elif IRVar.startswith("$P"): tinyVar = "$" + str(-int(IRVar[2:]) + self.stackOffset + self.parameters) elif IRVar.startswith("$R"): tinyVar = "$" + str(self.stackOffset + self.parameters) return tinyVar def ensureRegister(self, variable, doneWithLine=0): print("; ensuring {0}".format(variable)) if DEBUG else None for register in self.Registers: if register.variable == variable and register.valid: return "r{0}".format(register.regNum) register = self.registerAllocate(variable, doneWithLine) tinyVar = self.convertIRVarToTinyVar(variable) if variable in self.tempsSpilledDict.keys(): tinyVar = self.tempsSpilledDict[variable] del self.tempsSpilledDict[variable] print(";move {0} {1}\n".format(tinyVar, register)) if DEBUG else None self.tinyCode += "move {0} {1}\n".format(tinyVar, register) return register def checkVariableLive(self, variable): return variable in self.functCFG.CFGNodeList[self.lineNum].outList def spillRegister(self, register, keepTemporaries=0): regNum = int(register[1]) tinyVar = self.convertIRVarToTinyVar(self.Registers[regNum].variable) if self.Registers[regNum].variable.startswith("$T") and keepTemporaries: return if self.Registers[regNum].variable.startswith("$T"): tempStackVar = self.numLocalParams + self.localVarOffset + 1 while True: if "$-{0}".format(tempStackVar) in self.tempsSpilledDict.values(): tempStackVar += 1 continue if (tempStackVar - self.numLocalParams - self.localVarOffset) > self.numTempParams: self.numTempParams = (tempStackVar - self.numLocalParams - self.localVarOffset) tinyVar = "$-{0}".format(tempStackVar) self.tempsSpilledDict[self.Registers[regNum].variable] = tinyVar print("; spilling temp ",self.Registers[regNum].variable) if DEBUG else None break print("; spilling {0} to {1}".format(register, tinyVar)) if DEBUG else None print("; move {0} {1}\n".format(register, tinyVar)) if DEBUG else None self.tinyCode += "move {0} {1}\n".format(register, tinyVar) return def freeRegister(self, register, keepTemporaries=0): regNum = int(register[1]) print("; freeing {0} with {1}, valid: {2}, dirty: {3}".format(register, self.Registers[regNum].variable, self.Registers[regNum].valid, self.Registers[regNum].dirty)) if DEBUG else None if self.Registers[regNum].valid and self.Registers[regNum].dirty and self.checkVariableLive(self.Registers[regNum].variable): self.spillRegister(register, keepTemporaries=keepTemporaries) if self.Registers[regNum].variable.startswith("$T") and keepTemporaries: return self.Registers[regNum].valid = 0 self.Registers[regNum].dirty = 0 return def chooseRegToFree(self, doneWithLine=0): regsToUse = [0,1,2,3] regsToRemove = [] if not doneWithLine: for regNum in regsToUse: if self.Registers[regNum].valid and self.Registers[regNum].variable in self.functCFG.CFGNodeList[self.lineNum].genList: regsToRemove.append(regNum) for regNum in regsToRemove: regsToUse.remove(regNum) if len(regsToUse) == 1: return regsToUse[0] tempRegsToRemove = [] for regNum in regsToUse: if self.Registers[regNum].dirty: tempRegsToRemove.append(regNum) for regNum in tempRegsToRemove: regsToUse.remove(regNum) if len(regsToUse) == 1: return regsToUse[0] if len(regsToUse) == 0: regsToUse = [0,1,2,3] for regNum in regsToRemove: regsToUse.remove(regNum) lineToCheck = self.lineNum + 1 while True: print(";", regsToUse) if DEBUG else None if lineToCheck == (self.totalLineNum - 1): return regsToUse[0] if lineToCheck in self.functCFG.leaders: return regsToUse[0] regsToRemove = [] for regNum in regsToUse: if self.Registers[regNum].variable in self.functCFG.CFGNodeList[lineToCheck].genList: regsToRemove.append(regNum) for regNum in regsToRemove: regsToUse.remove(regNum) if len(regsToUse) == 1: return regsToUse[0] lineToCheck += 1 def chooseAndFreeRegister(self, doneWithLine=0): regNum = self.chooseRegToFree(doneWithLine) self.freeRegister("r{0}".format(regNum)) return regNum def registerAllocate(self, varName, doneWithLine=0, registersToUse=[]): print("; starting allocation of {0}".format(varName)) if DEBUG else None regNum = 0 if len(registersToUse) != 0: regNum = int(registersToUse[0][1]) else: for register in self.Registers: if not register.valid: register.valid = 1 register.dirty = 0 register.variable = varName print("; allocating {0} to r{1}".format(varName, register.regNum)) if DEBUG else None register.usedAtLeastOnce = True return "r{0}".format(register.regNum) # foundReg = 1 # regToUse = register.regNum regNum = self.chooseAndFreeRegister(doneWithLine) self.Registers[regNum].valid = 1 self.Registers[regNum].dirty = 0 self.Registers[regNum].variable = varName self.Registers[regNum].usedAtLeastOnce = True print("; allocating {0} to r{1}".format(varName, regNum)) if DEBUG else None return "r{0}".format(regNum) def freeRegistersIfDead(self, variablesToTryFree, keepVariablesLive=[]): registersFreed = [] for regNum in range(4): if self.Registers[regNum].valid and self.Registers[regNum].variable in variablesToTryFree and self.Registers[regNum].variable not in keepVariablesLive: if (not self.checkVariableLive(self.Registers[regNum].variable)) or (self.Registers[regNum].variable in self.functCFG.CFGNodeList[self.lineNum].killList): print("; freeing cause dead r{0} with {1} -> {2}".format(regNum, self.Registers[regNum].variable, self.functCFG.CFGNodeList[self.lineNum].outList)) if DEBUG else None self.Registers[regNum].valid = 0 self.Registers[regNum].dirty = 0 registersFreed.append("r{0}".format(regNum)) return registersFreed def temporaryAllocate(self): tempName = "&{}".format(self.tempNum) self.functCFG.CFGNodeList[self.lineNum].genList.append(tempName) self.tempNum += 1 return self.registerAllocate(tempName) def incDecOperandSetup(self, op1): opmrl_op1 = "" opmrl_op2 = "" reg_op2 = "" isReg1 = False op1Allocated = False reg_op2 = self.ensureRegister(op1, 0) self.markRegisterDirty(reg_op2) return reg_op2 def inci(self, IRLine): lineSplit = IRLine.split(" ") op1 = lineSplit[1] code = [] reg_op2 = self.incDecOperandSetup(op1) code.append("inci {0}".format(reg_op2)) self.tinyCode += "\n".join(code) + "\n" regsToTryFree = [] regsToTryFree.append(op1) self.freeRegistersIfDead(regsToTryFree) return def deci(self, IRLine): lineSplit = IRLine.split(" ") op1 = lineSplit[1] code = [] reg_op2 = self.incDecOperandSetup(op1) code.append("deci {0}".format(reg_op2)) self.tinyCode += "\n".join(code) + "\n" regsToTryFree = [] regsToTryFree.append(op1) self.freeRegistersIfDead(regsToTryFree) return def mathOperandSetup(self, op1, op2, result, orderMatters): opmrl_op1 = "" opmrl_op2 = "" reg_op2 = "" op1Allocated = False isReg1 = False isReg2 = False if op1.replace(".", "").replace("-", "").isdigit(): opmrl_op1 = op1 else: opmrl_op1 = self.ensureRegister(op1, 0) isReg1 = True if op2.replace(".", "").replace("-", "").isdigit(): opmrl_op2 = op2 else: opmrl_op2 = self.ensureRegister(op2, 0) isReg2 = True regsToTryFree = [] if isReg1: regsToTryFree.append(op1) if isReg2: regsToTryFree.append(op2) print(";", op1, "-->", opmrl_op1) if DEBUG else None print(";", op2, "-->", opmrl_op2) if DEBUG else None registersFreed = self.freeRegistersIfDead(regsToTryFree, keepVariablesLive=[op2]) reg_op2 = self.registerAllocate(result, doneWithLine=0, registersToUse=registersFreed) self.markRegisterDirty(reg_op2) self.freeRegistersIfDead(regsToTryFree) print(";", result, "-->", reg_op2) if DEBUG else None print("; opmrl_op1 = {0}, opmrl_op2 = {1}, reg_op2 = {2}".format(opmrl_op1, opmrl_op2, reg_op2)) if DEBUG else None print(("; move {0} {1}\n".format(opmrl_op1, reg_op2))) if DEBUG else None self.tinyCode += ("move {0} {1}\n".format(opmrl_op1, reg_op2)) return opmrl_op2, reg_op2 def addi(self, IRLine): lineSplit = IRLine.split(" ") op1 = lineSplit[1] op2 = lineSplit[2] result = lineSplit[3] code = [] opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, False) code.append("addi {0} {1}".format(opmrl_op1, reg_op2)) self.tinyCode += "\n".join(code) + "\n" return def addf(self, IRLine): lineSplit = IRLine.split(" ") op1 = lineSplit[1] op2 = lineSplit[2] result = lineSplit[3] code = [] opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, False) code.append("addr {0} {1}".format(opmrl_op1, reg_op2)) self.tinyCode += "\n".join(code) + "\n" return def subi(self, IRLine): lineSplit = IRLine.split(" ") op1 = lineSplit[1] op2 = lineSplit[2] result = lineSplit[3] code = [] opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, True) code.append("subi {0} {1}".format(opmrl_op1, reg_op2)) self.tinyCode += "\n".join(code) + "\n" return def subf(self, IRLine): lineSplit = IRLine.split(" ") op1 = lineSplit[1] op2 = lineSplit[2] result = lineSplit[3] code = [] opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, True) code.append("subr {0} {1}".format(opmrl_op1, reg_op2)) self.tinyCode += "\n".join(code) + "\n" return def multi(self, IRLine): lineSplit = IRLine.split(" ") op1 = lineSplit[1] op2 = lineSplit[2] result = lineSplit[3] code = [] opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, False) code.append("muli {0} {1}".format(opmrl_op1, reg_op2)) self.tinyCode += "\n".join(code) + "\n" return def multf(self, IRLine): lineSplit = IRLine.split(" ") op1 = lineSplit[1] op2 = lineSplit[2] result = lineSplit[3] code = [] opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, False) code.append("mulr {0} {1}".format(opmrl_op1, reg_op2)) self.tinyCode += "\n".join(code) + "\n" return def divi(self, IRLine): lineSplit = IRLine.split(" ") op1 = lineSplit[1] op2 = lineSplit[2] result = lineSplit[3] code = [] opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, True) code.append("divi {0} {1}".format(opmrl_op1, reg_op2)) self.tinyCode += "\n".join(code) + "\n" return def divf(self, IRLine): lineSplit = IRLine.split(" ") op1 = lineSplit[1] op2 = lineSplit[2] result = lineSplit[3] code = [] opmrl_op1, reg_op2 = self.mathOperandSetup(op1, op2, result, True) code.append("divr {0} {1}".format(opmrl_op1, reg_op2)) self.tinyCode += "\n".join(code) + "\n" return def storei(self, IRLine): lineSplit = IRLine.split(" ") op1 = lineSplit[1] result = lineSplit[2] code = [] opmrl_op1 = "" opmr_op2 = "" isMem1 = False isMem2 = False isReg1 = False if op1.replace(".", "").replace("-", "").isdigit(): opmrl_op1 = op1 else: opmrl_op1 = self.ensureRegister(op1, 0) isReg1 = True regsToTryFree = [] if isReg1: regsToTryFree.append(op1) registersFreed = self.freeRegistersIfDead(regsToTryFree) if result.startswith("$R"): isMem2 = True opmr_op2 = "$" + str(self.stackOffset + self.parameters) else: opmr_op2 = self.registerAllocate(result, 1, registersToUse=registersFreed) self.markRegisterDirty(opmr_op2) print("; move {0} {1}".format(opmrl_op1, opmr_op2)) if DEBUG else None code.append("move {0} {1}".format(opmrl_op1, opmr_op2)) self.tinyCode += "\n".join(code) + "\n" return def stores(self, IRLine): lineSplit = IRLine.split(" ") result = " ".join(lineSplit[1:-1]) op1 = lineSplit[-1] code = [] if op1.startswith("$L"): self.tinyCode += "str {0} {1}\n".format(op1, result) else: self.tinyCode = "str {0} {1}\n".format(op1, result) + self.tinyCode return def compOperand(self, op1, op2, dataType): code = [] opmrl_op1 = "" opmrl_op2 = "" reg_op2 = "" op1Allocated = False flipped = False isReg1 = False isReg2 = False if (not op1.replace(".", "").replace("-", "").isdigit()) and (op2.replace(".", "").replace("-", "").isdigit()): flipped = True op2, op1 = op1, op2 if op1.replace(".", "").replace("-", "").isdigit(): opmrl_op1 = op1 else: opmrl_op1 = self.ensureRegister(op1, 0) isReg1 = True if op2.replace(".", "").replace("-", "").isdigit(): opmr_op2 = self.temporaryAllocate() else: opmrl_op2 = self.ensureRegister(op2, 0) isReg2 = True regsToTryFree = [] if isReg1: regsToTryFree.append(op1) if isReg2: regsToTryFree.append(op2) self.freeRegistersIfDead(regsToTryFree) if dataType: code.append("cmpi {0} {1}".format(opmrl_op1, opmrl_op2)) else: code.append("cmpr {0} {1}".format(opmrl_op1, opmrl_op2)) self.tinyCode += "\n".join(code) + "\n" return flipped def comp(self, IRLine): lineSplit = IRLine.split(" ") op = lineSplit[0] op1 = lineSplit[1] op2 = lineSplit[2] label = lineSplit[3] CompOP = None code = [] if op in ["LTI", "LTF"]: CompOP = COMPOP.LT elif op in ["GTI","GTF"]: CompOP = COMPOP.GT elif op in ["EQI","EQF"]: CompOP = COMPOP.EQ elif op in ["NEI","NEF"]: CompOP = COMPOP.NE elif op in ["LEI","LEF"]: CompOP = COMPOP.LE elif op in ["GEI","GEF"]: CompOP = COMPOP.GE flipped = self.compOperand(op1, op2, op.endswith("I")) if flipped: CompOP = COMPOP.inverseTinyOP(CompOP) if CompOP == COMPOP.LT: code.append("jlt {0}".format(label)) elif CompOP == COMPOP.GT: code.append("jgt {0}".format(label)) elif CompOP == COMPOP.EQ: code.append("jeq {0}".format(label)) elif CompOP == COMPOP.NE: code.append("jne {0}".format(label)) elif CompOP == COMPOP.LE: code.append("jle {0}".format(label)) elif CompOP == COMPOP.GE: code.append("jge {0}".format(label)) self.tinyCode += "\n".join(code) + "\n" return def jump(self, IRLine): lineSplit = IRLine.split(" ") label = lineSplit[1] code = [] code.append("jmp {0}".format(label)) self.tinyCode += "\n".join(code) + "\n" return def label(self, IRLine): lineSplit = IRLine.split(" ") label = lineSplit[1] code = [] code.append("label {0}".format(label)) self.tinyCode += "\n".join(code) + "\n" return def readWriteOperandSetup(self, op2, code): opmr_op2 = "" if op2.replace(".", "").replace("-", "").isdigit(): regVar = self.temporaryAllocate() code.append("move {0} r{1}".format(op2, self.regDict[regVar])) opmr_op2 = "r" + str(self.regDict[regVar]) elif not op2.startswith("$"): opmr_op2 = op2 self.declDict[opmr_op2] = "" elif op2.startswith("$L"): opmr_op2 = op2.replace("L", "-") elif op2.startswith("$P"): opmr_op2 = "$" + str(-int(op2[2:]) + self.stackOffset + self.parameters) elif op2.startswith("$R"): opmr_op2 = "$" + str(self.stackOffset + self.parameters) else: self.registerAllocate(op2) opmr_op2 = "r{0}".format(self.regDict[op2]) return opmr_op2 def markRegisterDirty(self, op): self.Registers[int(op[1])].dirty = 1 return def readOperandSetup(self, op2, code): reg_op2 = self.registerAllocate(op2, 1) self.markRegisterDirty(reg_op2) return reg_op2 def writeOperandSetup(self, op2, code): opmr_op2 = "" if op2.replace(".", "").replace("-", "").isdigit(): opmr_op2 = self.temporaryAllocate() code.append("move {0} {1}".format(op2, opmr_op2)) else: opmr_op2 = self.ensureRegister(op2, 0) self.freeRegistersIfDead([op2]) return opmr_op2 def readi(self, IRLine): lineSplit = IRLine.split(" ") result = lineSplit[1] code = [] opmr_op2 = self.readOperandSetup(result, code) code.append("sys readi {0}".format(opmr_op2)) self.tinyCode += "\n".join(code) + "\n" pass def readf(self, IRLine): lineSplit = IRLine.split(" ") result = lineSplit[1] code = [] opmr_op2 = self.readOperandSetup(result, code) code.append("sys readr {0}".format(opmr_op2)) self.tinyCode += "\n".join(code) + "\n" pass def writei(self, IRLine): lineSplit = IRLine.split(" ") result = lineSplit[1] code = [] opmr_op2 = self.writeOperandSetup(result, code) code.append("sys writei {0}".format(opmr_op2)) self.tinyCode += "\n".join(code) + "\n" pass def writef(self, IRLine): lineSplit = IRLine.split(" ") result = lineSplit[1] code = [] opmr_op2 = self.writeOperandSetup(result, code) code.append("sys writer {0}".format(opmr_op2)) self.tinyCode += "\n".join(code) + "\n" pass def writes(self, IRLine): lineSplit = IRLine.split(" ") result = lineSplit[1] code = [] code.append("sys writes {0}".format(result)) self.tinyCode += "\n".join(code) + "\n" pass def jsr(self, IRLine): lineSplit = IRLine.split(" ") label = lineSplit[1] code = [] self.saveGlobalVariablesBack() # code.append("push r0") # code.append("push r1") # code.append("push r2") # code.append("push r3") code.append("jsr {0}".format(label)) # code.append("pop r3") # code.append("pop r2") # code.append("pop r1") # code.append("pop r0") self.tinyCode += "\n".join(code) + "\n" return def push(self, IRLine): lineSplit = IRLine.rstrip().split(" ") code = [] value = "" isReg1 = False if len(lineSplit) == 2: op1 = lineSplit[1] if op1.replace(".", "").replace("-", "").isdigit(): value = op1 else: value = self.ensureRegister(op1) isReg1 = True if isReg1: self.freeRegistersIfDead([op1]) code.append("push {0}".format(value)) else: code.append("push") self.tinyCode += "\n".join(code) + "\n" return def pop(self, IRLine): lineSplit = IRLine.rstrip().split(" ") code = [] if len(lineSplit) == 2: op1 = lineSplit[1] value = self.registerAllocate(op1) code.append("pop {0}".format(value)) self.markRegisterDirty(value) else: code.append("pop") self.tinyCode += "\n".join(code) + "\n" return def ret(self, IRLine): code = [] self.saveGlobalVariablesBack() if self.functName != "main": for registerToPush in reversed(self.registersToPush): code.append("pop {0}".format(registerToPush)) code.append("unlnk") code.append("ret") self.tinyCode += "\n".join(code) + "\n" def link(self, IRLine): lineSplit = IRLine.split(" ") parameters = lineSplit[2] localparam = lineSplit[1] code = [] self.parameters = int(parameters) self.numLocalParams = int(localparam) code.append("link {0}".format(str(self.numLocalParams + self.localVarOffset))) if self.functName != "main": for registerToPush in self.registersToPush: code.append("push {0}".format(registerToPush)) self.tinyCode += "\n".join(code) + "\n" def errorFunct(self, IRLine): pass
if isinstance(e, CFG.Non_Term_Ref): nonterms += 1 else: terms += 1 return total, empty, long_alts, longest, nonterms, terms if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('gp', help='relative path to grammar file') parser.add_argument('lp', help='relative path to lex file') parser.add_argument('altlen', help='threshold alternative length') args = parser.parse_args() gp = args.gp lp = args.lp altlen = int(args.altlen) lex = Lexer.parse(open(lp, "r").read()) cfg = CFG.parse(lex, open(gp, "r").read()) total, empty, long_alts, longest, nonterms, terms = alts_info(cfg, altlen) cycles = ValidGrammar.cyclicInfo(cfg, lex) headers = "rules, alts, empty, empty(%), long alts, long alts(%), longest, nonterms, terms, cycles" print headers print "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s" % \ (len(cfg.rules), total, empty, ((empty * 1.0)/total), long_alts, ((long_alts * 1.0)/total) , longest, nonterms, terms, cycles)
#classifier train = [('What time is it?', 'question'), ('where is Naples?', 'question'), ('How long is it been?', 'question'), ('which one is the champion?', 'question'), ('do the thing', 'command'), ('find the way to Naples', 'command'), ("get me the summary", 'command'), ("search on internet", 'command'), ('tell me the time', 'command'), ('hello bot', 'greeting'), ('hi there', 'greeting'), ('hey bot', 'greeting'), ('I do not like my job.', 'statement'), ("I feel amazing!", 'statement'), ("I feel better", 'statement'), ("you think like a bot", 'statement')] #classifier cl = NaiveBayesClassifier(train, feature_extractor=chatbot_extractor) #definizioni delle grammatiche grammar = CFG.CFG() grammar.add_prod('S', 'AUXV NP MAINV OBJ| WHW BE_VERB OBJ |WHW ABOUT OBJ') grammar.add_prod('NP', 'YOU') grammar.add_prod('NP', 'I') grammar.add_prod('AUXV', 'DO | CAN') grammar.add_prod('DET', 'THE | A') grammar.add_prod('BE_VERB', 'IS | ARE') grammar.add_prod('MAINV', 'BE_VERB | THINK | LIKE |FIND | SEARCH |HAVE | TELL |KNOW') grammar.add_prod('WHW', 'WHAT | WHERE | WHEN') grammar.add_prod('OBJ', 'CC NN| DET NN | DET NN CC OBJ') grammar.add_prod('CC', ' OF') grammar.add_prod('NN', 'ICECREAM | PEN | BOOK') def get_best_syntax_tree(text, cfg):
def write_cfg_lex(self, ambi_parse, gp, lp): CFG.write(ambi_parse.min_cfg, gp) Lexer.write(ambi_parse.sym_toks, ambi_parse.toks, self._sin.lex_ws, lp)
def test_cfg(): P = CFG.P({ "S": "aA | bB", "A": "aAB | aa | AC | AE", "B": "bBA | bb | CB | BF", "C": "DE", "D": "cc | DD", "E": "FF | FE", "F": "EcE" }) A = CFG(Set("S", "A", "B", "C", "D", "E", "F"), Set("a", "b", "c"), P, "S") B = A.reduce() ###################################################################### P = CFG.P({ "S": "ABC", "A": "Ab | BC", "B": "bB | b | Ab | ε", "C": "cD | c | Ac | ε", "D": "SSD | cSAc" }) A = CFG(Set("S", "A", "B", "C", "D"), Set("b", "c"), P, "S") B = A.remove_ε() ###################################################################### P = CFG.P({ "S": "X | Y", "A": "bS | D", "D": "bA", "B": "Sa | a", "X": "aAS | C", "C": "aD | S", "Y": "SBb" }) A = CFG(Set("S", "A", "B", "C", "D", "X", "Y"), Set("a", "b"), P, "S") B = A.remove_primitive_rules() ###################################################################### P = CFG.P({ "S": "SaSbS | aAa | bBb", "A": "aA | aaa | B | ε", "B": "Bb | bb | b" }) A = CFG(Set("S", "A", "B"), Set("a", "b"), P, "S") B = A.toOwn() C = B.toCNF() ###################################################################### P = CFG.P({ "S": "YZ | aXZa", "X": "YX | bY | aYZ", "Y": "ε | c | YZ", "Z": "a | Xb | ε | c" }) G1 = CFG(Set("S", "X", "Y", "Z"), Set("a", "b", "c"), P, "S") G1_1 = G1.remove_ε() G1_2 = G1_1.remove_primitive_rules() # print("---[ G1 ]---") # print(G1, end="\n" * 3) # print(G1.Nε, end="\n" * 3) # print(G1_1, end="\n" * 3) # print(G1_1.NS, G1_1.NX, G1_1.NY, G1_1.NZ, end="\n" * 3) # print(G1_2, end="\n" * 3) P = CFG.P({ "S": "Aa | a | Eb | abbc | aDD", "A": "Aab | b | SEE | baD", "B": "DaS | BaaC | a", "C": "Da | a | bB | Db | SaD", "D": "Da | DBc | bDb | DEaD", "E": "Aa | a | bca" }) G2 = CFG(Set("S", "A", "B", "C", "D", "E"), Set("a", "b", "c"), P, "S") G2_1 = G2.toOwn() G2_2 = G2.toCNF() # print("---[ G2 ]---") # print(G2, end="\n" * 3) # print("Nε=", G2.Nε, " NA={", G2.NS, G2.NA, # G2.NB, G2.NC, G2.ND, G2.NE, "} V=", G2.V) # print(G2_1, end="\n" * 3) # print(G2_2, end="\n" * 3) P = CFG.P({"S": "Xc | Yd | Yb", "X": "Xb | a", "Y": "SaS | Xa"}) G = CFG(Set("S", "X", "Y"), Set("a", "b", "c", "d"), P, "S") G1 = G.remove_left_recursion() G2 = G1.toGNF() P = CFG.P({"S": "ε | abSA", "A": "AaB | aB | a", "B": "aSS | bA | aB"}) G = CFG(Set("S", "A", "B"), Set("a", "b"), P, "S")
def __init__(self, gp, lp, mutype, cnt, gdir): lex = Lexer.parse(open(lp, "r").read()) self.cfg = CFG.parse(lex, open(gp, "r").read()) self.header = Utils.cfg_header(gp) self.run(gp, lp, gdir, mutype, cnt)
def deployCFG(self): s2 = CFG.CFG()
def __call__(self, *args, **kwargs): return CFG(*args, **kwargs)
v=set() rules=set() alpha=set() s='' line=input('Enter your Varibles:\n') v=line.split(',') line=input('Enter your alphebet:\n') alpha=line.split(',') s=input('Enter your start var:\n') print('Enter your rules') while True: line=input('') if line.upper()=='END': break else: tmpr=r.Rule(line) rules.add(tmpr) c1=c.CFG(v,alpha,rules,s) b=c1.accept('aaaaaaaaaacccccccc') for i in range(10): print('\n') if b==True: print('yes') else: print('no')
""" if deep == 0: return [] result = [] for seq in rule.seqs: genrule = [] for sym in seq: if isinstance(sym, CFG.Term): gen = [sym.tok] else: if isinstance(sym, CFG.Sym_Term): gen = [sym.tok[3:]] # remove TK_ prefix else: if isinstance(sym, CFG.Non_Term_Ref): gen = list(be(grammar, grammar.get_rule(sym.name), deep-1)) genrule = combine(result, gen) result = result + genrule return set(result) if __name__ == "__main__": if len(sys.argv) < 4: print "Usage: " + sys.argv[0] + " grammar lex unrool-level" else: l = open(sys.argv[2], "r") g = open(sys.argv[1], "r") n = int(sys.argv[3]) lex = Lexer.parse( l.read() ) grammar = CFG.parse(lex, g.read()) r = be(grammar, grammar.rules[0], n) print r, ": ", len(r)
return der_list + labels else: return labels def derivable(self, max_steps): return self.derivable_from([self.start], max_steps) # Example CFGs cfg1 = CFG(start=NT("S"), rules=[ Rule(NT("S"), [NT("NP"), NT("VP")]), Rule(NT("NP"), [NT("D"), NT("N")]), Rule(NT("VP"), [NT("V"), NT("NP")]), Rule(NT("NP"), [T("John")]), Rule(NT("NP"), [T("Mary")]), Rule(NT("D"), [T("the")]), Rule(NT("D"), [T("a")]), Rule(NT("N"), [T("cat")]), Rule(NT("N"), [T("dog")]), Rule(NT("V"), [T("saw")]), Rule(NT("V"), [T("likes")]) ]) cfg_anbn = CFG(start=NT(0), rules=[ Rule(NT(0), [NT(10), NT(1)]), Rule(NT(1), [NT(0), NT(11)]), Rule(NT(0), [NT(10), NT(11)]), Rule(NT(10), [T('a')]), Rule(NT(11), [T('b')]) ])