def find_simple_subs_for_a_vble(vble, val, vble_list, val_list): res = [] if type(val) == type([]): s = vble + " := " + Bmch.convert_python_set_to_b_string(val) else: s = vble + " := " + val res.append(s) for i in xrange(len(vble_list)): if val_list[i] == val: s = vble + " := " + vble_list[i] #s = s.replace("u'","") """ if type(val_list[i]) == type([]): s = vble + " := " + Bmch.convert_python_set_to_b_string(val_list[i]) else: s = vble + " := " + val_list[i] """ res.append(s) #res = list(set(res)) resT = [] for x in res: if not (x in resT): resT.append(x) res = resT return res
def TransListToData(self,TList,SType,OpeList): res = [] """ # MinDims is the minimum dimension of data. It is very useful for ResNet. if MinDims == None: kdim = 1 else: p = TLIst[0] x = self.StateToVector(p[0],SType)+self.StateToVector(p[2],SType) kdim = MinDims / len(x) + 1 """ for p in TList: x = self.StateToVector(p[0],SType)+self.StateToVector(p[2],SType) #x = x * kdim opename = Bmch.get_first_token(p[1]) t = OpeList.index(opename) res.append([x,t]) random.shuffle(res) l = len(res) ltr = int(l * 0.8) lcv = int(l * 0.9) tr_res = res[0:ltr] cv_res = res[ltr:lcv] te_res = res[lcv:l] return [tr_res,cv_res,te_res]
def CFG_Substitution_Generator(TL, VL, conffile, wdir): L = Bmch.read_config(conffile, "max_CFG_length", "int") cmd = "mkdir %s" % wdir os.system(cmd) op = TL[0][1] print "====== VT <-- GetVariableTypes(TL) ======" sg = Bgenlib.BStateGraphForNN() VT = sg.GetSetTypeFromTransList(TL) #for x in VT: print x print "====== PPTL <-- ConvertToPreAndPostFormat(TL,VL) ======" PPTL = Convert_Transitions_To_Pre_Post_Operations(TL, VL) print "====== CFGL <-- GenerateCFGOperations(VT,VL) ======" CFGL = GenerateCFGOperations(VT, VL, conffile, wdir) print "====== CM <-- CoverageModel(VT,VL,PPTL,CFGL) ======" CM = GenerateCoverageModel(VT, VL, PPTL, CFGL) fn = wdir + "/CM.mch" Bmch.print_mch_to_file(CM, fn) mch = CM + [] CM = fn print "====== D <-- StateDiagram(CM) ======" D = "%s/D.txt" % wdir #bscope = Bmch.generate_training_set_condition(mch) oscmd = "./../ProB/probcli %s -model_check -df -p MAX_DISPLAY_SET -1 -p MAX_INITIALISATIONS %d -p MAX_OPERATIONS %d -noinv -nodead -spdot %s -c" % ( CM, len(PPTL) * 10, len(CFGL) * 10, D) os.system(oscmd) sg = Bgenlib.BStateGraphForNN() sg.ReadStateGraph(D) D = sg.GetTransList() #for x in D: print x #print len(D) print "====== CSL <-- FindAllSubstitutionCandidates(D,CFGL) ======" CSL = FindAllSubstitutionCandidates(D, CFGL) return CSL
def Assign_Values_To_Variables(VL, XL): res = "" for i in xrange(len(VL)): V = VL[i] X = XL[i] if type(X) == type([]): X = Bmch.convert_python_set_to_b_string(X) VeX = V + " := " + X if i > 0: res = res + " ; " res = res + VeX return res
def initialise_vble_by_examples(vl, exs): v = vl[0] for x in vl[1:len(vl)]: v = v + " , " + x v_init = vl[0] + "_ivble" for x in vl[1:len(vl)]: v_init = v_init + " , " + x + "_ivble" cond = [] for p in exs: y = p[0] # If y is a set. if type(y) == type([]): y = Bmch.convert_python_set_to_b_string(y) #print p for x in p[1:len(p)]: if type(x) == type([]): xv = Bmch.convert_python_set_to_b_string(x) else: xv = x y = y + " , " + xv #pppp s = "( " + v_init + " ) = ( " + y + " )" cond.append(s) res = [] res.append("ANY " + v_init + " WHERE") res.append(cond[0]) for i in xrange(1, len(cond)): res.append("or " + cond[i]) res.append("THEN " + v + " := " + v_init + " END") #for x in res: print x return res
def EnumerateSubsetCFG(S, N): SS = [] for NX in xrange(N + 1): W = list(itertools.combinations(S, NX)) for X in W: Y = list(X) Y = Bmch.convert_python_set_to_b_string(Y) SS.append(Y) res = "" for i in xrange(len(SS)): X = SS[i] if i > 0: res = res + " | " res = res + "\'" + str(X) + "\'" return res
def FMSDTransListToData(self,TList,SType,OpeList): res = [] """ # MinDims is the minimum dimension of data. It is very useful for ResNet. if MinDims == None: kdim = 1 else: p = TLIst[0] x = self.StateToVector(p[0],SType)+self.StateToVector(p[2],SType) kdim = MinDims / len(x) + 1 """ for p in TList: x = self.StateToVector(p[0],SType)+self.StateToVector(p[2],SType) #x = x * kdim opename = Bmch.get_first_token(p[1]) t = OpeList.index(opename) res.append([x,t]) return res
def convert_pexp_to_cond(pexp, VL): res = "" for j in xrange(len(pexp)): X = pexp[j] P = "" for i in xrange(len(X)): if type(X[i]) == type([]): Y = Bmch.convert_python_set_to_b_string(X[i]) else: Y = str(X[i]) V = VL[i] VeY = V + " = " + Y if i > 0: P = P + " & " P = P + VeY P = "( " + P + " )" if j > 0: res = res + " or " res = res + P return res
def GetInitList(self): res = [] EdgeList = self.graph.get_edge_list() NodeList = self.graph.get_node_list() S = [] for k in xrange(len(EdgeList)): Ope = EdgeList[k].get_label() Ope = Ope.replace("\\n","") Ope = Ope.replace("\"","") Ope = Bmch.get_first_token(Ope) if Ope != "INITIALISATION" and Ope != "INITIALIZATION": continue q = EdgeList[k].get_destination() for i in xrange(len(NodeList)): nid = NodeList[i].get_name() if nid == q: Y = NodeList[i].get_label() Y = self.GetLabelValue(Y) S.append(Y) break self.InitList = S return S
def GetTransList(self): D = [] EdgeList = self.graph.get_edge_list() NodeList = self.graph.get_node_list() for k in range(len(EdgeList)): Ope = EdgeList[k].get_label() #print Ope Ope = Ope.replace("\\n", "") Ope = Ope.replace("\"", "") Ope = Bmch.get_first_token(Ope) p = EdgeList[k].get_source() q = EdgeList[k].get_destination() u = "None" v = "None" for i in range(len(NodeList)): nid = NodeList[i].get_name() if nid == p: u = i if nid == q: v = i if u != "None" and v != "None": break X = NodeList[u].get_label() Y = NodeList[v].get_label() #input([X, Y]) X = self.GetLabelValue(X) Y = self.GetLabelValue(Y) #X = self.SplitLabel(X) #Y = self.SplitLabel(Y) #print X,Ope,Y if X != "ROOT" and X != "NONE" and Ope != "INITIALISATION" and Ope != "SETUP_CONSTANTS": D.append([X, Ope, Y]) D.sort() res = [D[0]] for i in range(1, len(D)): if D[i] != D[i - 1]: res.append(D[i]) self.TList = res return res
def __init__(self, data, conffile): print "Training BNBayes models..." self.neg_prop = Bmch.read_config(conffile, "bnb_neg_prop", "float") self.num_labels = 0 self.labels = [] for x in data: v = x[1] if not (v in self.labels): self.labels.append(v) self.num_labels = self.num_labels + 1 else: continue self.labels.sort() print self.num_labels print self.labels self.BNBs = [] for i in xrange(self.num_labels): # Make positive data. sdata = [u[0] for u in data if u[1] == self.labels[i]] # Make negative data. fd = len(sdata[0]) fdh = fd / 2 num_data = len(sdata) num_iso = len(sdata) * self.neg_prop ni = 0 iflag = 0 ndata = [] while ni < num_iso: p = int(random.random() * num_data) q = int(random.random() * num_data) u = sdata[p][0:fdh] + sdata[q][fdh:fd] if not (u in sdata): ndata.append(u) ni += 1 iflag = 0 else: iflag += 1 if iflag > 10000: print "Warning: Not able to find Iso Relation." break feat = sdata + ndata tgt = [1] * len(sdata) + [0] * len(ndata) # Train BNB BNB = BernoulliNB(alpha=1.0, binarize=0.5, class_prior=None, fit_prior=True) feat = numpy.clip(feat, 0, 1).astype(int) BNB.fit(feat, tgt) s1 = 0.0 s2 = 0.0 rr = BNB.predict_proba(feat) for j in xrange(len(tgt)): if j < len(sdata): s1 = s1 + rr[j][1] else: s2 = s2 + rr[j][1] s1 = s1 / len(sdata) s2 = s2 / len(ndata) print "Average probability for label", self.labels[ i], "is Pos---%.2lf vs Neg---%.2lf." % (s1, s2) self.BNBs.append(BNB) print "BNBayes model training done." return
G = s s = resdir + "/config" cmd = "cp %s %s" % (conffile, s) os.system(cmd) conffile = s """ fn = resdir + "/M_pp.mch" oscmd = "./../ProB/probcli -pp %s %s"%(fn,M) os.system(oscmd) M = fn """ print("Reading Oracle File...") G = Bmch.read_oracle_file(G) print("====== MCSS <-- MonteCarloStateSampling(M) ======") mcdir = resdir + "/MCSS/" MCSS = SemLearnLib.MonteCarloStateSampling(M, W, conffile, mcdir) print("====== MR <-- M ======") sdir = resdir + "/0/" cmd = "mkdir %s" % sdir os.system(cmd) MR = sdir + "/MR.mch" cmd = "cp %s %s" % (M, MR) os.system(cmd)
def __init__(self, data, conffile): print("Training SKCART models...") self.neg_prop = Bmch.read_config(conffile, "skcart_neg_prop", "float") self.num_tree = Bmch.read_config(conffile, "skcart_num_tree", "int") self.min_imp_exp = Bmch.read_config(conffile, "skcart_min_imp_exp", "float") self.max_imp_exp = Bmch.read_config(conffile, "skcart_max_imp_exp", "float") vb = self.min_imp_exp vs = self.max_imp_exp - self.min_imp_exp self.num_labels = 0 self.labels = [] for x in data: v = x[1] if not (v in self.labels): self.labels.append(v) self.num_labels = self.num_labels + 1 else: continue self.labels.sort() print(self.num_labels) print(self.labels) self.CARTs = [] for i in xrange(self.num_labels): # Make positive data. sdata = [u[0] for u in data if u[1] == self.labels[i]] # Make negative data. fd = len(sdata[0]) fdh = fd / 2 num_data = len(sdata) num_iso = len(sdata) * self.neg_prop ni = 0 iflag = 0 ndata = [] while ni < num_iso: p = int(random.random() * num_data) q = int(random.random() * num_data) u = sdata[p][0:fdh] + sdata[q][fdh:fd] if not (u in sdata): ndata.append(u) ni += 1 iflag = 0 else: iflag += 1 if iflag > 10000: print("Warning: Not able to find Iso Relation.") break feat = sdata + ndata tgt = [1] * len(sdata) + [0] * len(ndata) # Train CART p_r = random.random() * vs + vb p_var = numpy.exp(p_r) CART = RandomForestRegressor(n_estimators=self.num_tree, min_impurity_decrease=p_var) feat = numpy.clip(feat, 0, 1).astype(int) CART.fit(feat, tgt) s1 = 0.0 s2 = 0.0 rr = CART.predict(feat) for j in xrange(len(tgt)): if j < len(sdata): s1 = s1 + rr[j] else: s2 = s2 + rr[j] s1 = s1 / len(sdata) s2 = s2 / len(ndata) print("Average probability for label", self.labels[i], "is Pos---%.2lf vs Neg---%.2lf." % (s1, s2)) self.CARTs.append(CART) print("BNBayes model training done.") return
mchfile = sys.argv[1] + "/" + sys.argv[2] + ".mch" with open(mchfile) as mchf: mch = mchf.readlines() mch = [x.strip() for x in mch] outfile = sys.argv[1] + "/" + sys.argv[2] + ".rev.mch" ftfile = sys.argv[1] + "/" + sys.argv[2] + ".ftrans" with open(ftfile) as ftf: ft = ftf.readlines() ft = [x.strip() for x in ft] conffile = sys.argv[4] no_dead = Bmch.read_config(conffile, "no_dead", "bool") no_ass = Bmch.read_config(conffile, "no_ass", "bool") revision_option = "No" if no_dead == True: revision_option = revision_option + "Dead" if no_ass == True: revision_option = revision_option + "Ass" if revision_option == "No": revision_option = "Default" rev_opt = revision_option max_cost = Bmch.read_config(conffile, "max_cost", "int") max_operations = Bmch.read_config(conffile, "max_operations", "int") max_num_rev = Bmch.read_config(conffile, "max_num_rev", "int") logtxt = []
dsfile = resfolder + "/data.txt" #nnetfile = sys.argv[5] with open(mchfile) as mchf: mch = mchf.readlines() mch = [x.strip() for x in mch] # Note: the following two functions have been complete, but are not used now. """ sd = Bmch.get_enum_sets(mch) sds = Bmch.convert_enum_sets_to_types(sd) print sds """ additional_sampling = Bmch.read_config(conffile,"additional_sampling","bool") if additional_sampling == True: print "\nUse additional sampling.\n" trsetmch = Bmch.generate_training_set_machine(mch,"") else: print "\nNot use additional sampling.\n" trsetmch = mch bscope = Bmch.generate_training_set_condition(mch) Bmch.print_mch_to_file(trsetmch,outfile) max_num_sampling_states = Bmch.read_config(conffile,"max_num_sampling_states","int") max_operations = Bmch.read_config(conffile,"max_operations","int")
fp = fpf.readlines() fp = [x.strip() for x in fp] import pydotplus import pydot pp = pydotplus.graphviz.graph_from_dot_file(spfile) #gph = pydotplus.graph_from_dot_data(pp.to_string()) from graphviz import Graph # Get the faulty operation. fope = Bmch.get_first_token(fp[-1]) # Process and output.. qq = pp.get_node_list() for i in xrange(len(qq)): if qq[i].get_shape() == "doubleoctagon": x = qq[i].get_name() rr = pp.get_edge_list() fopelabel = "\"%s\""%fp[-1] for k in xrange(len(rr)): print Bmch.get_label_pretty(rr[k]), fopelabel if Bmch.get_label_pretty(rr[k]) != fopelabel: continue q = rr[k].get_destination() if q == x: kt = k
def CFGRepairSimplification(RL, DL, VL, conffile, wdir): cmd = "mkdir %s" % wdir os.system(cmd) RLS = RL + [] RLS.sort(key=lambda x: x[1]) RLT = [] for i in xrange(len(RLS)): if i == 0: P = [] elif i > 0 and RLS[i][1] != RLS[i - 1][1]: RLT.append(P) P = [] P.append(RLS[i]) if i == len(RLS) - 1: RLT.append(P) # PT --- List of each operation's transitions that are not changed PLT = [] for i in xrange(len(RLT)): op = RLT[i][0][1] # FL --- List of faulty transitions FL = [] for X in RLT[i]: FL.append([X[0], X[1], X[2]]) P = [] for X in DL: if X[1] != op: continue if X in FL: continue P.append(X) PLT.append(P) L = Bmch.read_config(conffile, "max_CFG_length", "int") CFGS = [] # Generate substitutions for each operation. for k in xrange(len(RLT)): X = RLT[k] TL = [] for U in X: TL.append([U[0], U[1], U[3]]) op = TL[0][1] + "" sdir = wdir + "/" + op + "_simp/" Y = CFG_Substitution_Generator(TL, VL, conffile, sdir) TP = transition_partition(TL, VL, Y) PL = PLT[k] # SPS --- stable pre-states SPS = [] for P in PL: SPS.append(P[0]) for i in xrange(len(TP)): SC = TP[i] subs = convert_CFG_subs_to_str(SC[0]) # Make if condition pexp = [] for x in SC[1]: if not (x[0] in pexp): pexp.append(x[0]) pexp = sorted(pexp) cond = convert_pexp_to_cond(pexp, VL) CFGS.append([op, cond, subs]) return CFGS
fnames.sort() res = [] for x in fnames: if not(".mch" in x): continue fp = fdir + x print "Counting %s..."%fp print "Converting mch file to a pretty-printed version." mchfile = fp+"_tmpfileforcount" xt = "./../ProB/probcli %s -timeout 5000 -pp %s"%(fp,mchfile) os.system(xt) st,tr,fs = Bmch.CountMchStateAndTrans(mchfile) res.append([x,st,tr,fs]) os.system("rm %s"%mchfile) print "RESULT:" print "FILE NUM_ST NUM_TR NUM_FS" for x in res: print x[0], x[1], x[2], x[3] resfp = fdir + "RESULT" resfile = open(resfp,"w") resfile.write("RESULT:\n") resfile.write("FILE NUM_ST NUM_TR NUM_FS\n") for x in res: y = "%s %s %s %s\n"%(x[0], x[1], x[2], x[3])
def MonteCarloStateSampling(M, W, conffile, sdir): cmd = "mkdir %s" % sdir os.system(cmd) s = sdir + "/M.mch" cmd = "cp %s %s" % (M, s) os.system(cmd) M = s fn = sdir + "/M_pp.mch" oscmd = "./../ProB/probcli -pp %s %s" % (fn, M) os.system(oscmd) M = fn with open(M) as mchf: mch = mchf.readlines() mch = [x.strip() for x in mch] rev_cond = Bmch.generate_revision_condition(mch, [], "") # If integers exist in the model, then we limit the search space of integers. int_flag = False for x in rev_cond: for y in [ ": INTEGER", ": NATURAL", ": NATURAL1", ": INT", ": NAT", ": NAT1" ]: if x[len(x) - len(y):len(x)] == y: int_flag = True break Int_CompX = 1 if int_flag == True: SType = W.SType VList = W.VList for j in xrange(len(SType)): if SType[j][0] != "Int": continue V = VList[j] T = SType[j][1:len(SType[j])] Int_CompX = Int_CompX * len(T) for i in xrange(len(rev_cond)): x = rev_cond[i] for P in [ ": INTEGER", ": NATURAL", ": NATURAL1", ": INT", ": NAT", ": NAT1" ]: y = V + "_init " + P if x[len(x) - len(y):len(x)] == y: Q = "" for u in T: Q = Q + str(u) + "," Q = Q[0:len(Q) - 1] Q = V + "_init : {" + Q + "}" z = x.replace(y, Q) rev_cond[i] = z break all_opes = Bmch.get_all_opes(mch) # MS --- machine for sampling MS = [] i = 0 mchlen = len(mch) while i < mchlen: tt = Bmch.get_first_token(mch[i]) # Based on the syntax of <The B-book>, p.273. if tt == "INITIALIZATION": break if tt == "INITIALISATION": break MS.append(mch[i]) i = i + 1 MS.append("INITIALISATION") MS = MS + rev_cond #res.append("OPERATIONS") #res = res + all_opes MS.append("END") fn = sdir + "/sampling.mch" Bmch.print_mch_to_file(MS, fn) MS = fn mcss_max_num_samples = Bmch.read_config(conffile, "mcss_max_num_samples", "int") D = sdir + "D.txt" #genmode = "-mc %d -mc_mode random -p MAX_INITIALISATIONS %d -p RANDOMISE_ENUMERATION_ORDER TRUE -p MAX_DISPLAY_SET -1"%(mcss_max_num_samples * 100, mcss_max_num_samples * 100) genmode = "-mc %d -mc_mode random -p MAX_INITIALISATIONS %d -p RANDOMISE_ENUMERATION_ORDER TRUE -p MAX_DISPLAY_SET -1" % ( mcss_max_num_samples, mcss_max_num_samples) mkgraph = "./../ProB/probcli %s %s -nodead -spdot %s -c" % (MS, genmode, D) os.system(mkgraph) sg = Bgenlib.BStateGraphForNN() sg.ReadStateGraph(D) SI = sg.GetInitList() """ random.shuffle(SI) if len(SI) > mcss_max_num_samples: SI = SI[0:mcss_max_num_samples] """ print "Sample %d times. Get %d samples that satisfies requirements." % ( mcss_max_num_samples, len(SI)) return SI
s = resdir + "/train.csv" cmd = "cp %s %s" % (training_data, s) os.system(cmd) training_data = s s = resdir + "/test.csv" cmd = "cp %s %s" % (test_data, s) os.system(cmd) test_data = s s = resdir + "/config" cmd = "cp %s %s" % (conffile, s) os.system(cmd) conffile = s tmtype = Bmch.read_config(conffile, "tendency_model", "str") sg = Bgenlib.BStateGraphForNN() SData = sg.ReadCSVSemanticsData([training_data, test_data]) train_txt = resdir + "/train80.txt" valid_txt = resdir + "/valid20.txt" test_txt = resdir + "/test.txt" sp100 = len(SData[0]) sp80 = int(sp100 * 0.8) sg.WriteSemanticDataToTxt(SData[0][0:sp80], train_txt) sg.WriteSemanticDataToTxt(SData[0][sp80:sp100], valid_txt) sg.WriteSemanticDataToTxt(SData[1], test_txt) #tmtype = "SKCART"
os.system(oscmd) print "Producing the state graph of Machine 1." mchpp1 = "%s/Machine1_pp.mch"%resdir sgfile1 = "%s/StateGraph1.txt"%resdir oscmd = "./../ProB/probcli -pp %s %s/Machine1.mch"%(mchpp1,resdir) os.system(oscmd) with open(mchpp1) as mchf: mch1 = mchf.readlines() mch1 = [x.strip() for x in mch1] bscope1 = Bmch.generate_training_set_condition(mch1) oscmd = "./../ProB/probcli %s -model_check -df -p MAX_DISPLAY_SET -1 -p MAX_OPERATIONS 1024 -nodead -scope \"%s\" -spdot %s -c"%(mchpp1,bscope1,sgfile1) os.system(oscmd) print "Producing the state graph of Machine 2." mchpp2 = "%s/Machine2_pp.mch"%resdir sgfile2 = "%s/StateGraph2.txt"%resdir oscmd = "./../ProB/probcli -pp %s %s/Machine2.mch"%(mchpp2,resdir) os.system(oscmd) with open(mchpp2) as mchf: mch2 = mchf.readlines() mch2 = [x.strip() for x in mch2]
for i in xrange(len(subs)): subs[i] = subs[i][0:len(subs[i]) - 1] cf.close() if cond == "No Repair.": print "No repair applied to \"%s\"." % ope x = "a" while x != "y": x = raw_input("Confirm? (y/n): ") continue else: print "Applying \"%s ==> %s\" to \"%s\"." % (cond, subs, ope) x = "a" while x != "y": x = raw_input("Confirm? (y/n): ") mch = Bmch.update_consonance(mch, ope, cond, subs) for x in mch: print x print cond MSC = resdir + "/MSC.mch" Bmch.print_mch_to_file(mch, MSC) #ppp #MSC = MS # Phase 2. print "======= Phase 2: Insertion =======" with open(MSC) as mchf:
def GenerateCFGOperations(VT, VL, conffile, wdir): MaxCard = Bmch.read_config(conffile, "max_CFG_set_cardinality", "int") PPVL = Generate_Pre_And_Post_Variables(VL) PreVL = PPVL[0] PostVL = PPVL[1] #print PreVL #print PostVL #print VT NUM_DS = 0 ST = [] # ST --- Types of substitutions CM = ComputeVariableConnectionMatrix(VT) # CM --- Connection Matrix vble_types = "" const_types = "" INTZ = [] NAT1Z = [] for i in xrange(len(VT)): T = VT[i] if T[0] == "Int": S = "S -> INT | INTC\n" ST.append(S) for X in T[1:len(T)]: if not (X in INTZ): INTZ.append(X) if X > 0 and not (X in NAT1Z): NAT1Z.append(X) elif T[0] == "Bool": S = "S -> BOOL | 'TRUE' | 'FALSE' | 'bool(' PRED ')'\n" ST.append(S) S = "BOOL -> \'%s\'\n" % (PreVL[i]) vble_types = vble_types + S elif T[0] == "Dist": NUM_DS = NUM_DS + 1 DN = "DIST" + str(NUM_DS) SN = "SET" + str(NUM_DS) S = "S -> %s | %sC\n" % (DN, DN) ST.append(S) for j in xrange(len(VT)): if CM[i][j] == True and VT[i][0] == VT[j][0]: S = "%s -> \'%s\'\n" % (DN, PreVL[j]) vble_types = vble_types + S Z = ConvertSetToCFG(T[1:len(T)]) S = "%sC -> %s\n" % (DN, Z) const_types = const_types + S Z = EnumerateSubsetCFG(T[1:len(T)], MaxCard) S = "%sC -> %s\n" % (SN, Z) const_types = const_types + S elif T[0] == "Set": NUM_DS = NUM_DS + 1 DN = "DIST" + str(NUM_DS) SN = "SET" + str(NUM_DS) S = "S -> %s | %sC\n" % (SN, SN) ST.append(S) for j in xrange(len(VT)): if CM[i][j] == True and VT[i][0] == VT[j][0]: S = "%s -> \'%s\'\n" % (SN, PreVL[j]) vble_types = vble_types + S Z = EnumerateSubsetCFG(T[1:len(T)], MaxCard) S = "%sC -> %s\n" % (SN, Z) const_types = const_types + S else: ppp INTZ.sort() NAT1Z.sort() if len(INTZ) > 0: Z = ConvertSetToCFG(INTZ) S = "INTC -> %s\n" % (Z) const_types = const_types + S if len(NAT1Z) > 0: Z = ConvertSetToCFG(NAT1Z) S = "NAT1C -> %s\n" % (Z) const_types = const_types + S DS_grammar = "" for i in xrange(1, NUM_DS + 1): DN = "DIST" + str(i) SN = "SET" + str(i) S = ProduceDistAndSetCFGRules(DN, SN) DS_grammar = DS_grammar + S #print vble_types #print DS_grammar #print ST common_grammar = Common_CFG_Grammar() #print common_grammar MaxL = Bmch.read_config(conffile, "max_CFG_length", "int") XS = [] SFlag = True for dep in xrange(1, MaxL + 1): if SFlag == False: break X = [] for i in xrange(len(ST)): subs_type = ST[i] vble = PostVL[i] b_subs_grammar = subs_type + vble_types + const_types + common_grammar + DS_grammar grammar = CFG.fromstring(b_subs_grammar) for sentence in generate(grammar, depth=dep, n=1001): S = vble + " = " + ' '.join(sentence) if S in X: continue X.append(S) if len(X) > 1000: print "as the number of CFGs at depth %d is greater than 1000, we stop search now." % ( dep) SFlag = False random.shuffle(X) NX = 0 for Z in X: if not (Z in XS): XS.append(Z) NX = NX + 1 if NX >= 1000 and dep >= 4: print "Note: as the number of candidate substitutions at depth %d is greater than 1000, we reduce it to 1000." % ( dep) break X = XS X.sort() RedS = Redundant_CFG_Strings(VT) res = [] OPE_ID = -1 for i in xrange(len(X)): cond = X[i] # Removing redundant CFGs. flag = True for rs in RedS: if rs in cond: flag = False break if flag == False: continue OPE_ID = OPE_ID + 1 S = "SUBS_%d = PRE cfg_status = 1 & %s THEN cfg_status := 2 END ;" % ( OPE_ID, cond) # remove preCFGx_ and postCFGx_ prefix and make a substitution subs = cond + "" j = 0 while subs[j:j + 1] != "=": j = j + 1 subs = subs[0:j] + ":=" + subs[j + 1:len(subs)] subs = subs.replace("preCFGx_", "") subs = subs.replace("postCFGx_", "") res.append([S, subs]) return res
def GenerateCoverageModel(VT, VL, PPTL, CFGL): PPVL = Generate_Pre_And_Post_Variables(VL) PreVL = PPVL[0] PostVL = PPVL[1] """ print "VT:" for x in VT: print x print "VL:" for x in VL: print x """ # Collect all distinct elements. UnivSet = ["ThisElementMeansNothing"] for T in VT: if T[0] != "Dist" and T[0] != "Set": continue for X in T[1:len(T)]: if not (X in UnivSet) and Bmch.CanRepresentInt(X) != True: UnivSet.append(X) PPVIdxL = ExtractVbleIndexList(PreVL + PostVL) for X in PPVIdxL: for u in X[1]: if Bmch.CanRepresentInt(u) == True: continue if not (u in UnivSet): UnivSet.append(u) CM = [] CM.append("MACHINE CoverageModel") if UnivSet != []: CM.append("SETS") US = "UnivSet = {" for X in UnivSet: US = US + X + "," US = US[0:len(US) - 1] + "}" CM.append(US) CM.append("VARIABLES") VD = "cfg_status" for X in PPVIdxL: VD = VD + "," + X[0] CM.append(VD) VIdxL = ExtractVbleIndexList(VL) CM.append("INVARIANT") CM.append("cfg_status : INTEGER") PreVD = [] PostVD = [] VDone = [] for i in xrange(len(VT)): VN = VL[i].split("(")[0] if VN in VDone: continue VDone.append(VN) VIL = None for j in xrange(len(VIdxL)): if VIdxL[j][0] == VN: VIL = VIdxL[j][1] break if VIL == None: mp = "" elif len(VIL) > 0: mp = "" for u in VIL: mp = mp + u + "," mp = "{" + mp[0:len(mp) - 1] + "} --> " else: mp = "" PreVN = PreVL[i].split("(")[0] PostVN = PostVL[i].split("(")[0] if VT[i][0] == "Int": PreS = "& %s : %sINTEGER" % (PreVN, mp) PostS = "& %s : %sINTEGER" % (PostVN, mp) elif VT[i][0] == "Bool": PreS = "& %s : %sBOOL" % (PreVN, mp) PostS = "& %s : %sBOOL" % (PostVN, mp) elif VT[i][0] == "Dist": PreS = "& %s : %sUnivSet" % (PreVN, mp) PostS = "& %s : %sUnivSet" % (PostVN, mp) elif VT[i][0] == "Set": if len(VT[i]) == 1: # it should be an empty set. PreS = "& %s : %sPOW(UnivSet)" % (PreVN, mp) PostS = "& %s : %sPOW(UnivSet)" % (PostVN, mp) elif Bmch.CanRepresentInt(VT[i][1]) == True: PreS = "& %s : %sPOW(INTEGER)" % (PreVN, mp) PostS = "& %s : %sPOW(INTEGER)" % (PostVN, mp) else: PreS = "& %s : %sPOW(UnivSet)" % (PreVN, mp) PostS = "& %s : %sPOW(UnivSet)" % (PostVN, mp) else: ppp PreVD.append(PreS) PostVD.append(PostS) """ if VT[i][0] == "Int": PreVD.append("& %s : INT"%PreVL[i]) PostVD.append("& %s : INT"%PostVL[i]) elif VT[i][0] == "Bool": PreVD.append("& %s : BOOL"%PreVL[i]) PostVD.append("& %s : BOOL"%PostVL[i]) elif VT[i][0] == "Dist": PreVD.append("& %s : UnivSet"%PreVL[i]) PostVD.append("& %s : UnivSet"%PostVL[i]) elif VT[i][0] == "Set": PreVD.append("& %s : POW(UnivSet)"%PreVL[i]) PostVD.append("& %s : POW(UnivSet)"%PostVL[i]) else: ppp """ CM = CM + PreVD + PostVD CM.append("INITIALISATION") CM.append("cfg_status := 0") VDone = [] for i in xrange(len(VT)): VN = VL[i].split("(")[0] if VN in VDone: continue VDone.append(VN) VIL = None for j in xrange(len(VIdxL)): if VIdxL[j][0] == VN: VIL = VIdxL[j][1] break if not (VIL == None) and not (VIL == []): PreVN = PreVL[i].split("(")[0] PostVN = PostVL[i].split("(")[0] CM.append("; %s := {}" % (PreVN)) CM.append("; %s := {}" % (PostVN)) elif VT[i][0] == "Int": CM.append("; %s := 0" % (PreVL[i])) CM.append("; %s := 0" % (PostVL[i])) elif VT[i][0] == "Bool": CM.append("; %s := FALSE" % (PreVL[i])) CM.append("; %s := FALSE" % (PostVL[i])) elif VT[i][0] == "Dist": CM.append("; %s := %s" % (PreVL[i], UnivSet[0])) CM.append("; %s := %s" % (PostVL[i], UnivSet[0])) elif VT[i][0] == "Set": CM.append("; %s := {}" % (PreVL[i])) CM.append("; %s := {}" % (PostVL[i])) else: ppp CM.append("OPERATIONS") CM = CM + PPTL for X in CFGL: CM.append(X[0]) CM.append("OPE_ZERO = PRE cfg_status = 100 THEN skip END") CM.append("END") return CM
sm = Bmch.generate_revision_set_machine(mch, fstate, max_cost, rev_opt) for item in sm: revsetfile.write("%s\n"%item) revsetfile.close() print sm bth = 2048 #int(sys.argv[3]) + 1 mkgraph = "./../ProB/probcli -model_check -nodead -noinv -noass -p MAX_INITIALISATIONS %d -mc_mode bf -spdot %s.statespace.dot %s"%(bth,revsetfilename,revsetfilename) os.system(mkgraph) revset,numrev = Bmch.extract_state_revision_from_file("%s.statespace.dot"%revsetfilename, max_cost) """ revset, numrev = Bmch.generate_revision_set(mch, fstate, max_cost, rev_opt, sys.argv[1]) if numrev == 0: print "No state revision found." if "Dead" in rev_opt: while True: x = raw_input( "Skip the revision of Operation \"%s\" and use isolation? (y/n): " % ft[1]) if x == "y": iso_cond = "not( %s )" % fcond print "Pre-condition \"%s\" is added to Operation \"%s\"." % ( iso_cond, fope) newmch = Bmch.add_precond_to_mch(mch, fope, iso_cond) Bmch.print_mch_to_file(newmch, sys.argv[2]) print "State isolation done."
def GeneratingTrainingData(M, conf, resdir): mchfile = M conffile = conf resfolder = resdir print "Generating Training Data for Semantics Learning..." print "Source File:", mchfile print "Configuration File:", conffile print "Working Folder:", resfolder cmd = "mkdir %s" % resfolder os.system(cmd) ff = resfolder + "/source.mch" cmd = "./../ProB/probcli -pp %s %s" % (ff, mchfile) os.system(cmd) mchfile = ff ff = resfolder + "/config" cmd = "cp %s %s" % (conffile, ff) os.system(cmd) conffile = ff outfile = resfolder + "/trset.mch" sgfile = resfolder + "/trset.statespace.dot" dsfile = resfolder + "/data.txt" with open(mchfile) as mchf: mch = mchf.readlines() mch = [x.strip() for x in mch] additional_sampling = Bmch.read_config(conffile, "additional_sampling", "bool") if additional_sampling == True: print "\nUse additional sampling.\n" trsetmch = Bmch.generate_training_set_machine(mch, "") else: print "\nNot use additional sampling.\n" trsetmch = mch bscope = Bmch.generate_training_set_condition(mch) Bmch.print_mch_to_file(trsetmch, outfile) max_num_sampling_states = Bmch.read_config(conffile, "max_num_sampling_states", "int") max_operations = Bmch.read_config(conffile, "max_operations", "int") print "\nMaximum number of samples is", max_num_sampling_states, ".\n" # "-mc 100 and -p MAX_INITIALISATIONS 100" works well. But now I am trying more initialisations. genmode = "-mc %d -mc_mode random -p MAX_INITIALISATIONS %d -p RANDOMISE_ENUMERATION_ORDER TRUE -p MAX_OPERATIONS %d -p MAX_DISPLAY_SET -1" % ( max_num_sampling_states, max_num_sampling_states, max_operations) # We still need to carefully examine the performance of ProB-SMT and KODKOD. # When search space is small, NO-SMT, ProB-SMT and KODKOD have similar speed. #smtmode = "-p KODKOD TRUE -p SMT TRUE -p CLPFD TRUE" smtmode = "" mkgraph = "./../ProB/probcli %s %s -nodead -scope \"%s\" -spdot %s %s -c" % ( outfile, genmode, bscope, sgfile, smtmode) os.system(mkgraph) sg = Bgenlib.BStateGraphForNN() sg.ReadStateGraph(sgfile) TL = sg.GetTransList() TL = sg.SortSetsInTransList(TL) # Remove faulty transitions. # FS --- Faulty States. # FT --- Faulty Transitions. FS = sg.GetStatesWithoutOutgoingTransitions(TL) FT = sg.GetTransitionsWithPostStates(TL, FS) TL = Bmch.list_difference(TL, FT) SType = sg.GetSetTypeFromTransList(TL) VList = sg.GetVbleList() rd_seed = Bmch.read_config(conffile, "rd_seed", "int") neg_prop = Bmch.read_config(conffile, "neg_prop", "float") cv_prop = Bmch.read_config(conffile, "cv_prop", "float") SilasData = sg.SilasTransListToData(TL, SType, VList, neg_prop, rd_seed) VData = SilasData[0] FData = SilasData[1:len(SilasData)] random.seed(rd_seed) random.shuffle(FData) num_tr = int(len(FData) * (1 - cv_prop)) TrData = [VData] + FData[0:num_tr] CvData = [VData] + FData[num_tr:len(FData)] fname = resfolder + "/train.csv" Bgenlib.write_list_to_csv(TrData, fname) fname = resfolder + "/valid.csv" Bgenlib.write_list_to_csv(CvData, fname) fname = resfolder + "/datatypes.txt" DataTypes = [VList] + SType f = open(fname, "w") for x in DataTypes: f.write(str(x) + "\n") f.close() Num_Tr = len(TrData) - 1 Num_Cv = len(CvData) - 1 return [Num_Tr, Num_Cv]
exit() if mchfile[-4:len(mchfile)] != ".mch": print "Error: the mch file should be \".mch\" file!" exit() cmd = "rm -r " + resfolder os.system(cmd) cmd = "mkdir " + resfolder os.system(cmd) p = mchfile.split("/") mdl_name = p[-1][0:len(p[-1]) - 4] print "\nModel name is: ", mdl_name tendency_model = Bmch.read_config(conffile, "tendency_model", "str") orgmchfile = "%s/%s.mch" % (resfolder, mdl_name) orgconffile = "%s/%s_%s.config" % (resfolder, mdl_name, tendency_model) cmd = "cp %s %s" % (mchfile, orgmchfile) os.system(cmd) cmd = "cp %s %s" % (conffile, orgconffile) os.system(cmd) mchfile = orgmchfile conffile = orgconffile num_epoch = Bmch.read_config(conffile, "num_epoch", "int") max_cost = Bmch.read_config(conffile, "max_cost", "int") no_dead = Bmch.read_config(conffile, "no_dead", "bool") no_ass = Bmch.read_config(conffile, "no_ass", "bool") tendency_model = Bmch.read_config(conffile, "tendency_model", "str")
def TrainingSemanticsModel(M, conf, resdir): cmd = "mkdir %s" % resdir os.system(cmd) conffile = conf s = resdir + "/config" cmd = "cp %s %s" % (conffile, s) os.system(cmd) conffile = s start_time = time.time() N = GeneratingTrainingData(M, conffile, resdir) Num_Tr = N[0] Num_Cv = N[1] training_data = resdir + "/train.csv" valid_data = resdir + "/valid.csv" datatypes_file = resdir + "/datatypes.txt" conffile = conf f = open(datatypes_file, "r") T = f.readlines() DType = [] for x in T: DType.append(eval(x)) VList = DType[0] SType = DType[1:len(DType)] print "Training Data:", training_data print "Cross Validation Data", valid_data tmtype = Bmch.read_config(conffile, "tendency_model", "str") sg = Bgenlib.BStateGraphForNN() SD = sg.ReadCSVSemanticsDataAndComputeTypes([training_data, valid_data]) SData = SD[0] SemTypes = SD[1] train_txt = resdir + "/train.txt" valid_txt = resdir + "/valid.txt" sg.WriteSemanticDataToTxt(SData[0], train_txt) sg.WriteSemanticDataToTxt(SData[1], valid_txt) #tmtype = "BNBayes" if tmtype == "Logistic": # ============== Logistic Model Section ============== nnet_idim = len(SData[0][0][0]) nnet_odim = 2 logging.basicConfig() tr_log = logging.getLogger("mlp.optimisers") tr_log.setLevel(logging.DEBUG) rng = numpy.random.RandomState([2018, 03, 31]) rng_state = rng.get_state() lrate = Bmch.read_config(conffile, "logistic_lrate", "float") max_epochs = Bmch.read_config(conffile, "logistic_max_epochs", "int") batch_size = Bmch.read_config(conffile, "logistic_minibatch_size", "int") #max_epochs = 1000 #lrate = lrate * 2 BNNet = BLogistic_Init([nnet_idim, nnet_odim], rng) lr_scheduler = LearningRateFixed(learning_rate=lrate, max_epochs=max_epochs) #lr_scheduler = LearningRateNewBob(start_rate = lrate, scale_by = 0.5, min_derror_ramp_start = -0.1, min_derror_stop = 0, patience = 100, max_epochs = max_epochs) dp_scheduler = None #DropoutFixed(p_inp_keep=1.0, p_hid_keep=0.9) BNNet, Tr_Stat, Cv_Stat, Ev_Stat = BNNet_Semantic_Learning( BNNet, lr_scheduler, [train_txt, valid_txt, test_txt], dp_scheduler, batch_size=batch_size) tmfile = resdir + "/logistic.mdl" print "Writing logistic tendency model to %s." % tmfile filehandler = open(tmfile, 'w') pickle.dump(BNNet, filehandler) print "Tendency model has been written to the file." elif tmtype == "ResNet": # ============== ResNet Net Section ============== nnet_idim = len(SData[0][0][0]) nnet_odim = 2 logging.basicConfig() tr_log = logging.getLogger("mlp.optimisers") tr_log.setLevel(logging.DEBUG) rng = numpy.random.RandomState([2018, 03, 31]) rng_state = rng.get_state() lrate = Bmch.read_config(conffile, "resnet_lrate", "float") max_epochs = Bmch.read_config(conffile, "resnet_max_epochs", "int") batch_size = Bmch.read_config(conffile, "resnet_minibatch_size", "int") num_hid = Bmch.read_config(conffile, "resnet_num_hid", "int") num_layers = Bmch.read_config(conffile, "resnet_num_layers", "int") #lrate = lrate * 2 #max_epochs = 200 BNNet = BResNet_Init([nnet_idim, num_hid, num_layers, nnet_odim], rng, 'Softmax') lr_scheduler = LearningRateFixed(learning_rate=lrate, max_epochs=max_epochs) #lr_scheduler = LearningRateNewBob(start_rate = lrate, scale_by = 0.5, min_derror_ramp_start = -0.1, min_derror_stop = 0, patience = 100, max_epochs = max_epochs) dp_scheduler = None #DropoutFixed(p_inp_keep=1.0, p_hid_keep=0.9) BNNet, Tr_Stat, Cv_Stat, Ev_Stat = BNNet_Semantic_Learning( BNNet, lr_scheduler, [train_txt, valid_txt, test_txt], dp_scheduler, batch_size=batch_size) tmfile = resdir + "/ResNet.mdl" print "Writing ResNet tendency model to %s." % tmfile filehandler = open(tmfile, 'w') pickle.dump(BNNet, filehandler) print "Tendency model has been written to the file." elif tmtype == "CART": # ============== Classification and Regression Tree Section ============== print "Not Implemented Error!" Not_Implemented_Error tr_data = dt[0] + dt[1] + dt[2] num_tree = Bmch.read_config(conffile, "cart_num_tree", "int") min_var_exp = Bmch.read_config(conffile, "cart_min_var_exp", "int") max_var_exp = Bmch.read_config(conffile, "cart_max_var_exp", "int") data_prop = Bmch.read_config(conffile, "cart_data_prop", "float") use_mp = Bmch.read_config(conffile, "cart_use_mp", "bool") CARTree = RandMultiRegTree( data=tr_data, num_tree=num_tree, min_var_exp_scale=[min_var_exp, max_var_exp], data_prop=data_prop, use_mp=use_mp) CARTree.MType = "CART" CARTree.SType = SType CARTree.OpeList = OpeList print "Writing CART tendency model to %s." % tmfile filehandler = open(tmfile, 'w') pickle.dump(CARTree, filehandler) print "Tendency model has been written to the file." elif tmtype == "BNB": # ============== Bernoulli Naive Bayes Section ============== rng = numpy.random.RandomState([2018, 03, 31]) rng_state = rng.get_state() tr_feat = [] tr_tgt = [] for x in SData[0]: tr_feat.append(x[0]) tr_tgt.append(x[1]) tr_data = [tr_feat, tr_tgt] cv_feat = [] cv_tgt = [] for x in SData[1]: cv_feat.append(x[0]) cv_tgt.append(x[1]) cv_data = [cv_feat, cv_tgt] #num_tree = 256 #st_time = time.time() # Training #RF = RandomForestClassifier(n_estimators = num_tree, min_impurity_decrease = 0.0) #RF.fit(tr_feat, tr_tgt) BNB = BernoulliNB(alpha=1.0, binarize=0.5, class_prior=None, fit_prior=True) BNB.fit(tr_feat, tr_tgt) # Testing. #Acc = RF.score(cv_feat,cv_tgt) Acc = BNB.score(cv_feat, cv_tgt) print "Accuracy on Cross Validation Set is:", Acc * 100, "%." cv_proba = BNB.predict_proba(cv_feat)[:, 1] AUC = roc_auc_score(cv_tgt, cv_proba) print "ROC-AUC is:", AUC, "." #ed_time = time.time() #print ed_time - st_time BNB.MdlType = "BNB" BNB.VList = VList BNB.SType = SType BNB.SemTypes = SemTypes tmfile = resdir + "/semantics.mdl" print "Writing BNB tendency model to %s." % tmfile filehandler = open(tmfile, 'w') pickle.dump(BNB, filehandler) print "Tendency model has been written to the file." elif tmtype == "MLP": # ============== MLP Section ============== rng = numpy.random.RandomState([2018, 03, 31]) rng_state = rng.get_state() tr_feat = [] tr_tgt = [] for x in SData[0]: tr_feat.append(x[0]) tr_tgt.append(x[1]) tr_data = [tr_feat, tr_tgt] cv_feat = [] cv_tgt = [] for x in SData[1]: cv_feat.append(x[0]) cv_tgt.append(x[1]) cv_data = [cv_feat, cv_tgt] #num_tree = 256 #st_time = time.time() # Training # MLP = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(256,5), random_state=1) # This setting is significantly better than the default. MLP = MLPClassifier(solver='lbfgs', alpha=1e-5, random_state=1) MLP.fit(tr_feat, tr_tgt) # Testing. #Acc = RF.score(cv_feat,cv_tgt) Acc = MLP.score(cv_feat, cv_tgt) print "Accuracy on Cross Validation Set is:", Acc * 100, "%." cv_proba = MLP.predict_proba(cv_feat)[:, 1] AUC = roc_auc_score(cv_tgt, cv_proba) print "ROC-AUC is:", AUC, "." #ed_time = time.time() #print ed_time - st_time MLP.MdlType = "MLP" MLP.VList = VList MLP.SType = SType MLP.SemTypes = SemTypes tmfile = resdir + "/semantics.mdl" print "Writing BNB tendency model to %s." % tmfile filehandler = open(tmfile, 'w') pickle.dump(MLP, filehandler) print "Tendency model has been written to the file." elif tmtype == "LR": # ============== Logistic Regression Section ============== rng = numpy.random.RandomState([2018, 03, 31]) rng_state = rng.get_state() tr_feat = [] tr_tgt = [] for x in SData[0]: tr_feat.append(x[0]) tr_tgt.append(x[1]) tr_data = [tr_feat, tr_tgt] cv_feat = [] cv_tgt = [] for x in SData[1]: cv_feat.append(x[0]) cv_tgt.append(x[1]) cv_data = [cv_feat, cv_tgt] #num_tree = 256 #st_time = time.time() # Training LR = LogisticRegression(random_state=0, solver='lbfgs', multi_class='ovr') LR.fit(tr_feat, tr_tgt) # Testing. #Acc = RF.score(cv_feat,cv_tgt) Acc = LR.score(cv_feat, cv_tgt) print "Accuracy on Cross Validation Set is:", Acc * 100, "%." cv_proba = LR.predict_proba(cv_feat)[:, 1] AUC = roc_auc_score(cv_tgt, cv_proba) print "ROC-AUC is:", AUC, "." #ed_time = time.time() #print ed_time - st_time LR.MdlType = "LR" LR.VList = VList LR.SType = SType LR.SemTypes = SemTypes tmfile = resdir + "/semantics.mdl" print "Writing LR tendency model to %s." % tmfile filehandler = open(tmfile, 'w') pickle.dump(LR, filehandler) print "Tendency model has been written to the file." elif tmtype == "SVM": # ============== SVM Section ============== rng = numpy.random.RandomState([2018, 03, 31]) rng_state = rng.get_state() tr_feat = [] tr_tgt = [] for x in SData[0]: tr_feat.append(x[0]) tr_tgt.append(x[1]) tr_data = [tr_feat, tr_tgt] cv_feat = [] cv_tgt = [] for x in SData[1]: cv_feat.append(x[0]) cv_tgt.append(x[1]) cv_data = [cv_feat, cv_tgt] #num_tree = 256 #st_time = time.time() # Training #SVM = svm.SVC(kernel='linear') SVM = svm.SVC(kernel='rbf', probability=True) SVM.fit(tr_feat, tr_tgt) # Testing. #Acc = RF.score(cv_feat,cv_tgt) Acc = SVM.score(cv_feat, cv_tgt) print "Accuracy on Cross Validation Set is:", Acc * 100, "%." cv_proba = SVM.predict_proba(cv_feat)[:, 1] AUC = roc_auc_score(cv_tgt, cv_proba) print "ROC-AUC is:", AUC, "." #ed_time = time.time() #print ed_time - st_time SVM.MdlType = "SVM" SVM.VList = VList SVM.SType = SType SVM.SemTypes = SemTypes tmfile = resdir + "/semantics.mdl" print "Writing SVM tendency model to %s." % tmfile filehandler = open(tmfile, 'w') pickle.dump(SVM, filehandler) print "Tendency model has been written to the file." elif tmtype == "RF": # ============== Scikit-learn Random Forests Section ============== rng = numpy.random.RandomState([2018, 03, 31]) rng_state = rng.get_state() num_tree = Bmch.read_config(conffile, "rf_num_tree", "int") tr_feat = [] tr_tgt = [] for x in SData[0]: tr_feat.append(x[0]) tr_tgt.append(x[1]) tr_data = [tr_feat, tr_tgt] cv_feat = [] cv_tgt = [] for x in SData[1]: cv_feat.append(x[0]) cv_tgt.append(x[1]) cv_data = [cv_feat, cv_tgt] #num_tree = 256 #st_time = time.time() # Training #RF = RandomForestRegressor(n_estimators = num_tree, min_impurity_decrease = 0.0) #RF = RandomForestClassifier(n_estimators = num_tree, min_impurity_decrease = 0.0) if num_tree <= 0: # By default, the number of tree is 10 before scikit-learn version 0.20 and 100 after version 0.22. Here we use 100. num_tree = 100 RF = RandomForestClassifier(n_estimators=num_tree) #RF = RandomForestClassifier(min_impurity_decrease = 0.0) RF.fit(tr_feat, tr_tgt) # Testing. Acc = RF.score(cv_feat, cv_tgt) print "Accuracy on Cross Validation Set is:", Acc * 100, "%." cv_proba = RF.predict_proba(cv_feat)[:, 1] AUC = roc_auc_score(cv_tgt, cv_proba) print "ROC-AUC is:", AUC, "." #ed_time = time.time() #print ed_time - st_time RF.MdlType = "RF" RF.VList = VList RF.SType = SType RF.SemTypes = SemTypes tmfile = resdir + "/semantics.mdl" print "Writing RF tendency model (single) to %s." % tmfile filehandler = open(tmfile, 'w') pickle.dump(RF, filehandler) print "Tendency model has been written to the file." elif tmtype == "Silas": silas_dir = resdir + "/silas/" cmd = "rm -r %s" % silas_dir os.system(cmd) cmd = "mkdir %s" % silas_dir os.system(cmd) cmd = "cp -r src/silas-json-schemata/ json-schemata" os.system(cmd) cmd = "silas gen-all -o %s %s/train.csv %s/valid.csv" % ( silas_dir, resdir, resdir) os.system(cmd) silas_num_tree = Bmch.read_config(conffile, "silas_num_tree", "int") silas_feature_proportion = "1.0" #silas_num_tree = 3000 sf = silas_dir + "/settings.json" ChangeSilasSetting(sf, "feature_proportion", 0.25, "float") ChangeSilasSetting(sf, "max_depth", 32, "int") ChangeSilasSetting(sf, "desired_leaf_size", 32, "int") #ChangeSilasSetting(sf,"sampling_method","uniform","str") # if silas_num_tree < 0, then use default settings. if silas_num_tree > 0: # ssf --- Silas setting files ChangeSilasSetting(sf, "number_of_trees", silas_num_tree, "int") """ ssf = open(f,"r") ss = ssf.readlines() ssf.close() for i in xrange(len(ss)): x = ss[i] if "number_of_trees" in x: y = " \"number_of_trees\": %d,\n"%silas_num_tree ss[i] = y ssf = open(f,"w") for p in ss: ssf.write(p) ssf.close() break """ cmd = "silas learn -o %s/model/ %s/settings.json" % (silas_dir, silas_dir) #os.system(cmd) P = os.popen(cmd) P = P.read() print P # Get Accuracy. i = 0 x = "Accuracy:" while P[i:i + len(x)] != x: i = i + 1 i = i + len(x) j = i + 1 while P[j] != "\n": j = j + 1 Acc = P[i:j] Acc = float(Acc) # Get ROC-AUC i = 0 x = "ROC-AUC:" while P[i:i + len(x)] != x: i = i + 1 i = i + len(x) j = i + 1 while P[j] != "\n": j = j + 1 AUC = P[i:j] AUC = float(AUC) #cmd = "silas predict -o %s/predictions.csv %s/model %s/valid.csv"%(silas_dir,silas_dir,resdir) #os.system(cmd) SM = SilasModel() SM.MdlType = "Silas" SM.SilasNumTrees = silas_num_tree SM.SilasDir = silas_dir SM.Data = [] SM.Data.append("%s/train.csv" % resdir) SM.Data.append("%s/valid.csv" % resdir) SM.VList = VList SM.SType = SType SM.SemTypes = SemTypes # Get output labels. # smd --- Silas metadata f = silas_dir + "/model/metadata.json" ssf = open(f, "r") ss = ssf.readlines() ssf.close() for i in xrange(len(ss) - 1): x1 = ss[i] x2 = ss[i + 1] if "Available-Transition" in x1 and "collection_definition" in x2: x3 = ss[i + 2] if "N" in x3: label_N = 0 label_Y = 1 elif "Y" in x3: label_Y = 0 label_N = 1 break SM.label_Y = label_Y SM.label_N = label_N tmfile = resdir + "/semantics.mdl" print "Writing silas model to %s." % tmfile filehandler = open(tmfile, 'w') pickle.dump(SM, filehandler) print "Tendency model has been written to the file." else: print "Not Implemented Error!" Not_Implemented_Error end_time = time.time() elapsed_time = end_time - start_time print "Training Finished." print "Number of Training Examples:", Num_Tr print "Number of Validation Examples:", Num_Cv print "Type of Semantics Model:", tmtype print "Elapsed Time (s):", elapsed_time print "Classification Accuracy:", Acc print "ROC-AUC:", AUC return [Num_Tr, Num_Cv, tmtype, elapsed_time, Acc, AUC]
fn = resdir + "/M.mch" oscmd = "./../ProB/probcli -pp %s %s" % (fn, M) os.system(oscmd) M = fn print("====== D <-- StateDiagram(M) ======") with open(M) as mchf: mch = mchf.readlines() mch = [x.strip() for x in mch] D = resdir + "/D.txt" max_initialisations = 65536 #Bmch.read_config(conffile,"max_initialisations","int") max_operations = 65536 #Bmch.read_config(conffile,"max_operations","int") bscope = Bmch.generate_training_set_condition(mch) oscmd = "./../ProB/probcli %s -model_check -df -p MAX_DISPLAY_SET -1 -p MAX_INITIALISATIONS %d -p MAX_OPERATIONS %d -nodead -scope \"%s\" -spdot %s -c" % ( M, max_initialisations, max_operations, bscope, D) os.system(oscmd) sg = Bgenlib.BStateGraphForNN() sg.ReadStateGraph(D) D = sg.GetTransList() #for x in D: print x #x = raw_input("ppp") print("====== S <-- AllStates(D) ======") S = RepSimpLib.extract_all_states(D)
cfile.write("%d "%maxlabellength) # Compute NumVble and MaxVbleLength. numvble = 0; maxvblelength = 0; qq = pp.get_node_list() for i in xrange(len(qq)): x = qq[i].get_name() if x == 'root': node_idx = -1 elif x == 'graph': node_idx = -2 else: node_idx = int(x) y = qq[i].get_label() y = Bmch.proc_state_label(y) if len(y)/2 > numvble: numvble = len(y)/2 for j in xrange(len(y)): if j % 2 == 0: if len(y[j]) > maxvblelength: maxvblelength = len(y[j]) cfile.write("%d %d\n"%(numvble,maxvblelength)) qq = pp.get_edge_list() for i in xrange(len(qq)): x = qq[i].get_source() if x == 'root': edge_src = -1