Ejemplo n.º 1
0
def test_mountain_car(n_states,qhat_file='Data/mc_qhat.pkl',max_eps = 10000):
    """
    Compares aggregated mountain car with n_states with a simple discretised version of the same.
    Supplying a precalculated qhat file will drastically speed computation
    """

    divs = int(np.floor(np.sqrt(n_states)))

    if qhat_file:
        with open(qhat_file,'rb') as f:
            qhat = pickle.load(f)
    else:
        qhat = evaluate_MC_qhat()

    agg = Aggregation.generateAggregation(qhat,target_divisions=n_states)

    mc_d = Problems.MountainCar(representation='disc',divisions = divs)
    mc_a = Problems.MountainCar(representation='aggr',aggregation=agg,divisions=100)
    
    ag_d = Agents.QAgent(mc_d,alpha=1e-3)
    ag_a = Agents.QAgent(mc_a,alpha=1e-3)

    d_eps = ag_d.run_n_episodes(max_eps)
    a_eps = ag_a.run_n_episodes(max_eps)

    n_eps = np.array([2**i for i in range(1,int(np.log2(max_eps))+1)])

    data = pd.DataFrame()
    data['n_eps'] = n_eps
    data['disc'] = d_eps
    data['aggr'] = a_eps

    return data
Ejemplo n.º 2
0
    def test_Q(self):

        p_raw, p_agg = setupESA()

        ql_raw = Agents.QAgent(p_raw, 1)
        ql_agg = Agents.QAgent(p_agg, 1)

        ql_raw.episode(timeout=1000)
        ql_agg.episode(timeout=1000)

        delta_r = sum(abs(ql_raw.qValues[0] - p_raw.qValues[0])) / 4
        delta_a = sum(abs(ql_agg.qValues[0] - p_agg.qValues[0])) / 2

        print("\nQ learning raw delta = {}, agg delta = {}".format(
            delta_r, delta_a))

        self.assertTrue(delta_r < 1e-1)
        self.assertTrue(delta_a < 1e-1)
Ejemplo n.º 3
0
def compare_raw_agg_ql(param_tuples,timeout=1000,gamma=0.5,aggtype='q',log_prob=False,rep=1,alpha=0.005,decayAlpha=False):
    """
    Generates random problems from parameter lists, runs qlearning 
    and records value function deviations for each problem after timeout steps
    log optionally records problems so that they can be retrieved later
    """

    dmat = np.zeros((len(param_tuples)*rep,9))
    
    if log_prob:
        problem_dict = {}
        d = datetime.today().strftime('%d-%m-%Y--%H_%M_%S')
        filename = 'problems' + d + '.pkl'
           
    for i, (n,n_agg,b,acts,e_noise) in enumerate(param_tuples):
        for j in range(rep):           
            p_r, p_a, _ = Problems.genRandomProblems(n,n_agg,acts,b,gamma=gamma,e_noise=e_noise)
                    
            pid = hash(str(p_r.transitions))
        
            if log_prob:
                problem_dict[pid] = {'raw':p_r,'agg':p_a}
                    
            agent_r = Agents.QAgent(p_r,alpha=alpha)
            agent_a = Agents.QAgent(p_a,alpha=alpha)
                    
            agent_r.episode(timeout=timeout,decayAlpha=decayAlpha)
            agent_a.episode(timeout=timeout,decayAlpha=decayAlpha)
        
            delta_r = Evaluation.getDeltas(agent_r,p_r)
            delta_a = Evaluation.getDeltas(agent_a,p_a,agg=p_a.aggregation)
        
            dtilde = Evaluation.nonMarkovianity(p_a.transitions[0], p_a.aggregation)

            dmat[i*rep+j] = (pid,n,n_agg,b,acts,e_noise,dtilde,np.average(delta_r),np.average(delta_a))
            
                    
    if log_prob:
        with open(path+filename,'wb') as f:
            pickle.dump(problem_dict,f)
                    
    data = pd.DataFrame(data=dmat,columns=['pid','n','n_agg','b','acts','e_noise','nonmarkovianity','d_r','d_a'])
    return data
Ejemplo n.º 4
0
def test_convergence_ql(param_tuple,timeout=100,interval=100,gamma=0.9,aggtype='q'):
    """
    Generates a random problem from (single) parameter tuple, runs qlearning 
    and records value function deviation at regular intervals. 
    """
    n, n_agg, b, acts, e_noise = param_tuple
    p_r, p_a, _ = problems.genRandomProblems(n,n_agg,acts,b,gamma=gamma,e_noise=e_noise)
    
    agent_r = Agents.QAgent(p_r,alpha=1e-1)
    agent_a = Agents.QAgent(p_a,alpha=1e-1)
    
    dmat = np.zeros((intervals,3))
    
    for i in range(intervals):
        agent_r.episode(timeout=timeout)
        agent_a.episode(timeout=timeout)
        
        delta_a = Evaluation.getDeltas(agent_a,p_a)
        delta_r = Evaluation.getDeltas(agent_r,p_r)
        
        dmat[i] = (i*interval,delta_a,delta_r)
        
    data = pd.DataFrame(data=dmat,columns=['n','d_a','d_r'])
    return data
Ejemplo n.º 5
0
def evaluate_MC_qhat():
    p = Problems.MountainCar(representation='tile')
    a = Agents.QAgent(p,alpha=0.01,tiles=True)
    a.run_n_episodes(35000)
    qv = agent.approxQValues(100)
    return qv