def build_inferred_model(mod, tutor, after_k_interations): tutor.filterlist = clean_filterlist(tutor.filterlist) dummod = Domain() dummod.concepts = [Concept(i) for i,c in enumerate(mod.concepts) ] for k,v in tutor.filterlist.items(): print(k, state_as_str(v)) con = dummod.concepts[k] pixs = [ix for ix,bl in enumerate(v) if bl ] #get ids where state entry is True for i in pixs: con.predecessors.append(dummod.concepts[i]) gvrender(dummod, "inferred"+str(after_k_interations)) return dummod
def run_episode(self, model, stu, max_steps=-1, update_qvals=True): actions = model.concepts S = tuple([False] * len(actions)) #reset the tutor's state .. we assume the student knows nothing self.extend_Q(S, actions) self._new_trace() max_steps = float('inf') if max_steps<=0 else max_steps # hack up an infinite number of attempts here step_cnt=0 while (max_steps<=0 or step_cnt<=max_steps) and (False in S): A,exp = self.choose_A(S, actions) # if A.id not in self.filterlist: # self.filterlist[A.id]= [True]*len(actions) succ = stu.try_learn(A) if succ: # update_filter(self.filterlist, S, A.id, succ) R=-1.0 new_S = self.get_next_state(S,A) self._add_to_trace(S, A, True) self.extend_Q(new_S, actions) if (False not in new_S): R=10000.0 else: R=-1.0 new_S = S self._add_to_trace(S, A, False) if update_qvals: self.sa_update(S, A, R, new_S, actions) S = new_S step_cnt+=1 print(state_as_str(self.S), step_cnt) print("Qutor: Episode over in",step_cnt,"steps") return step_cnt
def run_episode(self, model, stu, max_steps=-1, update_qvals=True): self._new_trace() actions = model.concepts S = tuple( [False] * len(actions) ) #reset the tutor's state .. we assume the student knows nothing self.extend_Q(S, actions) #max_steps = float('inf') if max_steps<=0 else max_steps # hack up an infinite number of attempts here step_cnt = 0 A, exp = self.choose_A( S, actions ) # for SARSA we must pick the initial task outside the loop As = [] Ss = [] Qs = [] msgs = [] self.history = [] #while step_cnt<=max_steps and not self.mission_complete(): while (max_steps <= 0 or step_cnt <= max_steps) and (False in S): self.history = [[S, A]] + self.history # if self.history > self.history_limit: # self.history.pop() #RECORD KEEPING As.append(A.id) Ss.append(state_as_str(S)) Qs.append(self.Q[S][A]) #INFERENCE # if A.id not in self.filterlist: # self.filterlist[A.id]= [True]*len(actions) #STATE UPDATE/Q VALS msgs.append("Attempt {} {} -> ? Q= {}".format( state_as_str(S), A.id, self.Q[S][A])) print(self.name, end=" - ") succ = stu.try_learn(A) self._add_to_trace(S, A, succ) if succ: new_S = self.get_next_state(S, A) self.extend_Q(new_S, actions) R = -1.0 if (False not in new_S): R = 100.0 #basically if we've learned everything, get a big treat if self.DEBUG: print("success learned", A.id, "--> new S=", state_as_str(new_S)) # update_filter(self.filterlist, S, A.id, succ)# we use successful transitions as evidence to eliminate invalidly hypothesised dependencies else: new_S = S R = -1.0 msgs.append("{} {} -> {} Q={} R={} {} {}".format( state_as_str(S), A.id, state_as_str(new_S), self.Q[S][A], R, "S" if succ else "F", "X" if exp else "-")) new_A, exp = self.choose_A(new_S, actions) if (A == new_A): print(" * Will try to Learn repeat lesson", A if A == None else A.id, "X" if exp else "-") if update_qvals: self.sa_update(S, A, R, new_S, new_A) S = new_S A = new_A step_cnt += 1 if step_cnt == max_steps: print("Terminated at step limit!") print("SARSA-L: Episode over in", step_cnt, "steps") if self.DEBUG: for m in msgs: print(m) # print("States were:",Ss) # print("Actions were:",As) # print("Actions vals were:",Qs) print("Q(S,A) values:") for s in Ss: sb = tuple([bool(int(x)) for x in s]) print(sb) for a in self.Q[sb]: print(s, a.id, self.Q[sb][a]) print("") return step_cnt
for batch in batches: num_missions = batch['num_missions'] more_missions = num_missions - last_num last_num = num_missions # tutor = Qutor(N, alpha, eps) log = tutor.train(models, student, more_missions, float("inf")) mnum, runlength = log[-1] #save_policy(tutor.qs, "policy_file.dat") tutor.filterlist = clean_filterlist(tutor.filterlist) dummod = Domain() dummod.concepts = [Concept(i) for i, c in enumerate(mod.concepts)] for k, v in tutor.filterlist.items(): print(k, state_as_str(v)) con = dummod.concepts[k] pixs = [ix for ix, bl in enumerate(v) if bl] #get ids where state entry is True for i in pixs: con.predecessors.append(dummod.concepts[i]) gvrender(dummod, "inferred" + str(num_missions)) err = score_domain_similarity(mod, dummod) main_log.append((num_missions, runlength, err)) plt.ylabel('Steps to learn {} concepts (BMC{} Domain)'.format( N, branch_factor)) plt.xlabel('Mission #') num_missions, runlength, err = zip(*main_log)
def _add_to_trace(self, S, A, passed=True ): self.transition_trace[-1].append(tuple([state_as_str(S),A.id, passed]))
cnt_a_attempts_kc[a_id][ix]+=1 if succ: cnt_a_successs_kc[a_id][ix] += 1 # cnt_a_successs[a_id]+=1 if succ: # print("SBLNS",s_blns) if True not in s_blns: entries.add(a_id) if s_blns.count(False)==1: leaves.add(a_id) if succ: record = fl[a_id] # lazy initialise new = [(s and r) for s,r in zip(s_blns,record)] print(a_id, state_as_str(s_blns),state_as_str(record),"->",state_as_str(new)) fl[a_id]=new t_to_a[a_id]+=1 #here we want the cond probs P(knows(S) | A) sids = [ i for i,s in enumerate(s_blns) if s ] for sid in sids: # print(a_id, sid) # print(t_pred_cnt[a_id]) t_pred_cnt[a_id][sid] = t_pred_cnt[a_id][sid] +1 cln = clean_filterlist(fl) #remove redundant arcs p,r,F=_score_similarity(model, cln) ps.append(p) rs.append(r)