def test_mountain_car(n_states,qhat_file='Data/mc_qhat.pkl',max_eps = 10000): """ Compares aggregated mountain car with n_states with a simple discretised version of the same. Supplying a precalculated qhat file will drastically speed computation """ divs = int(np.floor(np.sqrt(n_states))) if qhat_file: with open(qhat_file,'rb') as f: qhat = pickle.load(f) else: qhat = evaluate_MC_qhat() agg = Aggregation.generateAggregation(qhat,target_divisions=n_states) mc_d = Problems.MountainCar(representation='disc',divisions = divs) mc_a = Problems.MountainCar(representation='aggr',aggregation=agg,divisions=100) ag_d = Agents.QAgent(mc_d,alpha=1e-3) ag_a = Agents.QAgent(mc_a,alpha=1e-3) d_eps = ag_d.run_n_episodes(max_eps) a_eps = ag_a.run_n_episodes(max_eps) n_eps = np.array([2**i for i in range(1,int(np.log2(max_eps))+1)]) data = pd.DataFrame() data['n_eps'] = n_eps data['disc'] = d_eps data['aggr'] = a_eps return data
def test_Q(self): p_raw, p_agg = setupESA() ql_raw = Agents.QAgent(p_raw, 1) ql_agg = Agents.QAgent(p_agg, 1) ql_raw.episode(timeout=1000) ql_agg.episode(timeout=1000) delta_r = sum(abs(ql_raw.qValues[0] - p_raw.qValues[0])) / 4 delta_a = sum(abs(ql_agg.qValues[0] - p_agg.qValues[0])) / 2 print("\nQ learning raw delta = {}, agg delta = {}".format( delta_r, delta_a)) self.assertTrue(delta_r < 1e-1) self.assertTrue(delta_a < 1e-1)
def compare_raw_agg_ql(param_tuples,timeout=1000,gamma=0.5,aggtype='q',log_prob=False,rep=1,alpha=0.005,decayAlpha=False): """ Generates random problems from parameter lists, runs qlearning and records value function deviations for each problem after timeout steps log optionally records problems so that they can be retrieved later """ dmat = np.zeros((len(param_tuples)*rep,9)) if log_prob: problem_dict = {} d = datetime.today().strftime('%d-%m-%Y--%H_%M_%S') filename = 'problems' + d + '.pkl' for i, (n,n_agg,b,acts,e_noise) in enumerate(param_tuples): for j in range(rep): p_r, p_a, _ = Problems.genRandomProblems(n,n_agg,acts,b,gamma=gamma,e_noise=e_noise) pid = hash(str(p_r.transitions)) if log_prob: problem_dict[pid] = {'raw':p_r,'agg':p_a} agent_r = Agents.QAgent(p_r,alpha=alpha) agent_a = Agents.QAgent(p_a,alpha=alpha) agent_r.episode(timeout=timeout,decayAlpha=decayAlpha) agent_a.episode(timeout=timeout,decayAlpha=decayAlpha) delta_r = Evaluation.getDeltas(agent_r,p_r) delta_a = Evaluation.getDeltas(agent_a,p_a,agg=p_a.aggregation) dtilde = Evaluation.nonMarkovianity(p_a.transitions[0], p_a.aggregation) dmat[i*rep+j] = (pid,n,n_agg,b,acts,e_noise,dtilde,np.average(delta_r),np.average(delta_a)) if log_prob: with open(path+filename,'wb') as f: pickle.dump(problem_dict,f) data = pd.DataFrame(data=dmat,columns=['pid','n','n_agg','b','acts','e_noise','nonmarkovianity','d_r','d_a']) return data
def test_convergence_ql(param_tuple,timeout=100,interval=100,gamma=0.9,aggtype='q'): """ Generates a random problem from (single) parameter tuple, runs qlearning and records value function deviation at regular intervals. """ n, n_agg, b, acts, e_noise = param_tuple p_r, p_a, _ = problems.genRandomProblems(n,n_agg,acts,b,gamma=gamma,e_noise=e_noise) agent_r = Agents.QAgent(p_r,alpha=1e-1) agent_a = Agents.QAgent(p_a,alpha=1e-1) dmat = np.zeros((intervals,3)) for i in range(intervals): agent_r.episode(timeout=timeout) agent_a.episode(timeout=timeout) delta_a = Evaluation.getDeltas(agent_a,p_a) delta_r = Evaluation.getDeltas(agent_r,p_r) dmat[i] = (i*interval,delta_a,delta_r) data = pd.DataFrame(data=dmat,columns=['n','d_a','d_r']) return data
def evaluate_MC_qhat(): p = Problems.MountainCar(representation='tile') a = Agents.QAgent(p,alpha=0.01,tiles=True) a.run_n_episodes(35000) qv = agent.approxQValues(100) return qv