Python state_as_str Examples, reinf.exp1.policies.policy_utils.state_as_str Python Examples

Example #1

0

Show file

def build_inferred_model(mod, tutor, after_k_interations):
    tutor.filterlist = clean_filterlist(tutor.filterlist)
    dummod = Domain()
    dummod.concepts = [Concept(i) for i,c in enumerate(mod.concepts) ]
    for k,v in tutor.filterlist.items():
        print(k, state_as_str(v))
        con = dummod.concepts[k]
        pixs = [ix for ix,bl in enumerate(v) if bl ] #get ids where state entry is True
        for i in pixs:
            con.predecessors.append(dummod.concepts[i])            
    gvrender(dummod, "inferred"+str(after_k_interations))
    return dummod

Example #2

0

Show file

File: qutor.py Project: rjm49/rl_framework

    def run_episode(self, model, stu, max_steps=-1, update_qvals=True):
        actions = model.concepts

        S = tuple([False] * len(actions)) #reset the tutor's state .. we assume the student knows nothing
        self.extend_Q(S, actions)
        
        self._new_trace()

        max_steps = float('inf') if max_steps<=0 else max_steps # hack up an infinite number of attempts here
        step_cnt=0
        
        while (max_steps<=0 or step_cnt<=max_steps) and (False in S):
            A,exp = self.choose_A(S, actions)
            
#             if A.id not in self.filterlist:
#                 self.filterlist[A.id]= [True]*len(actions)
    
            succ = stu.try_learn(A)
            if succ:
#                 update_filter(self.filterlist, S, A.id, succ)
                R=-1.0
                new_S = self.get_next_state(S,A)
                self._add_to_trace(S, A, True)
                self.extend_Q(new_S, actions)
                if (False not in new_S):
                    R=10000.0
            else:
                R=-1.0
                new_S = S
                self._add_to_trace(S, A, False)
            
            if update_qvals:
                self.sa_update(S, A, R, new_S, actions)
            
            S = new_S
            step_cnt+=1
            print(state_as_str(self.S), step_cnt)
        print("Qutor: Episode over in",step_cnt,"steps")
        return step_cnt

Example #3

0

Show file

    def run_episode(self, model, stu, max_steps=-1, update_qvals=True):
        self._new_trace()
        actions = model.concepts
        S = tuple(
            [False] * len(actions)
        )  #reset the tutor's state .. we assume the student knows nothing
        self.extend_Q(S, actions)
        #max_steps = float('inf') if max_steps<=0 else max_steps # hack up an infinite number of attempts here
        step_cnt = 0

        A, exp = self.choose_A(
            S, actions
        )  # for SARSA we must pick the initial task outside the loop
        As = []
        Ss = []
        Qs = []
        msgs = []
        self.history = []
        #while step_cnt<=max_steps and not self.mission_complete():
        while (max_steps <= 0 or step_cnt <= max_steps) and (False in S):
            self.history = [[S, A]] + self.history
            #             if self.history > self.history_limit:
            #                 self.history.pop()

            #RECORD KEEPING
            As.append(A.id)
            Ss.append(state_as_str(S))

            Qs.append(self.Q[S][A])

            #INFERENCE
            #             if A.id not in self.filterlist:
            #                 self.filterlist[A.id]= [True]*len(actions)

            #STATE UPDATE/Q VALS
            msgs.append("Attempt {} {} -> ? Q= {}".format(
                state_as_str(S), A.id, self.Q[S][A]))

            print(self.name, end=" - ")
            succ = stu.try_learn(A)
            self._add_to_trace(S, A, succ)
            if succ:

                new_S = self.get_next_state(S, A)
                self.extend_Q(new_S, actions)

                R = -1.0
                if (False not in new_S):
                    R = 100.0  #basically if we've learned everything, get a big treat
                if self.DEBUG:
                    print("success learned", A.id, "--> new S=",
                          state_as_str(new_S))
#                 update_filter(self.filterlist, S, A.id, succ)# we use successful transitions as evidence to eliminate invalidly hypothesised dependencies
            else:
                new_S = S
                R = -1.0

            msgs.append("{} {} -> {} Q={} R={} {} {}".format(
                state_as_str(S), A.id, state_as_str(new_S), self.Q[S][A], R,
                "S" if succ else "F", "X" if exp else "-"))

            new_A, exp = self.choose_A(new_S, actions)
            if (A == new_A):
                print("        *        Will try to Learn repeat lesson",
                      A if A == None else A.id, "X" if exp else "-")

            if update_qvals:
                self.sa_update(S, A, R, new_S, new_A)

            S = new_S
            A = new_A
            step_cnt += 1

        if step_cnt == max_steps:
            print("Terminated at step limit!")
        print("SARSA-L: Episode over in", step_cnt, "steps")
        if self.DEBUG:
            for m in msgs:
                print(m)
#             print("States were:",Ss)
#             print("Actions were:",As)
#             print("Actions vals were:",Qs)
            print("Q(S,A) values:")
            for s in Ss:
                sb = tuple([bool(int(x)) for x in s])
                print(sb)
                for a in self.Q[sb]:
                    print(s, a.id, self.Q[sb][a])
                print("")

        return step_cnt

Example #4

0

Show file

File: Graph_InferenceError.py Project: rjm49/rl_framework

    for batch in batches:
        num_missions = batch['num_missions']
        more_missions = num_missions - last_num
        last_num = num_missions
        #                 tutor = Qutor(N, alpha, eps)
        log = tutor.train(models, student, more_missions, float("inf"))
        mnum, runlength = log[-1]
        #save_policy(tutor.qs, "policy_file.dat")

        tutor.filterlist = clean_filterlist(tutor.filterlist)

        dummod = Domain()
        dummod.concepts = [Concept(i) for i, c in enumerate(mod.concepts)]

        for k, v in tutor.filterlist.items():
            print(k, state_as_str(v))
            con = dummod.concepts[k]
            pixs = [ix for ix, bl in enumerate(v)
                    if bl]  #get ids where state entry is True
            for i in pixs:
                con.predecessors.append(dummod.concepts[i])
        gvrender(dummod, "inferred" + str(num_missions))

        err = score_domain_similarity(mod, dummod)
        main_log.append((num_missions, runlength, err))

    plt.ylabel('Steps to learn {} concepts (BMC{} Domain)'.format(
        N, branch_factor))
    plt.xlabel('Mission #')
    num_missions, runlength, err = zip(*main_log)

Example #5

0

Show file

File: base.py Project: rjm49/rl_framework

 def _add_to_trace(self, S, A, passed=True ):
     self.transition_trace[-1].append(tuple([state_as_str(S),A.id, passed]))

Example #6

0

Show file

File: Tabular_infer.py Project: rjm49/rl_framework

                cnt_a_attempts_kc[a_id][ix]+=1
                if succ:
                    cnt_a_successs_kc[a_id][ix] += 1
#             cnt_a_successs[a_id]+=1

        if succ:
#             print("SBLNS",s_blns)
            if True not in s_blns:
                entries.add(a_id)
            if s_blns.count(False)==1:
                leaves.add(a_id)

        if succ:
            record = fl[a_id] # lazy initialise
            new = [(s and r) for s,r in zip(s_blns,record)]
            print(a_id, state_as_str(s_blns),state_as_str(record),"->",state_as_str(new))
            fl[a_id]=new

            t_to_a[a_id]+=1 
            #here we want the cond probs P(knows(S) | A)
            sids = [ i for i,s in enumerate(s_blns) if s ]
            for sid in sids:
#                 print(a_id, sid)
#                 print(t_pred_cnt[a_id])
                t_pred_cnt[a_id][sid] =  t_pred_cnt[a_id][sid] +1


        cln = clean_filterlist(fl) #remove redundant arcs
        p,r,F=_score_similarity(model, cln)
        ps.append(p)
        rs.append(r)