def runexp(env, agent, hasP=True):
    f_ext = FeatureTrueState(env.epLen, env.nState, env.nAction, env.nState)

    # Run the experiment
    global seed
    seed += 1
    # returns: cumReward, cumQueryCost, perf, cumRegret
    return run_finite_tabular_experiment(agent, env, f_ext, num_episodes, seed,
                        recFreq=1000, fileFreq=10000, targetPath='')   
Esempio n. 2
0
                        env_sample = env
                    else:
                        sampled_R = initial_agent.sample_mdp()[0]
                        print sampled_R
                        env_sample = gridworld.make_mdp(env.nState, env.nAction, env.epLen, sampled_R, env.P)

                    query_function = query_functions.QueryFirstNVisits(query_cost, n)
                    agent = alg(env.nState, env.nAction, env.epLen,
                                              P_true=None, R_true=None,
                                              query_function=query_function)
                    query_function.setAgent(agent)

                    # Run the experiment
                    # returns:  cumReward, cumQueryCost, perf, cumRegret 
                    result = run_finite_tabular_experiment(agent, env_sample, f_ext, num_episodes, seed,
                                        recFreq=1000, fileFreq=10000, targetPath=save_str, query_function=query_function,
                                        printing=1)
                    if use_real_env:
                        perfs.append(result[2]) 
                        R_priors.append(agent.R_prior)
                        visit_counts.append(query_function.visit_count)
                    else:
                        SQR_perfs.append(result[2]) 
                        SQR_visit_counts.append(query_function.visit_count)
                    
                    print time.time() - t1

        
        all_perfs[n] = perfs
        all_visit_counts[n] = visit_counts
        mean_perfs[n] = np.mean(perfs)
Esempio n. 3
0
def rollout_performance(agent, env, seed):
    f_ext = FeatureTrueState(env.epLen, env.nState, env.nAction, env.nState)

    # returns: cumReward, cumQueryCost, perf, cumRegret
    return run_finite_tabular_experiment(agent, env, f_ext, env.num_episodes, seed,
                        recFreq=1000, fileFreq=10000, targetPath='')   
    print '******************************************************************'
    print fileName
    print '******************************************************************'

    # Make the environment
    env = environment.make_stochasticChain(args.chainLen)

    # Make the feature extractor
    f_ext = FeatureTrueState(env.epLen, env.nState, env.nAction, env.nState)

    # Make the agent
    alg_dict = {'PSRL': finite_tabular_agents.PSRL,
                'PSRLunif': finite_tabular_agents.PSRLunif,
                'OptimisticPSRL': finite_tabular_agents.OptimisticPSRL,
                'GaussianPSRL': finite_tabular_agents.GaussianPSRL,
                'UCBVI': finite_tabular_agents.UCBVI,
                'BEB': finite_tabular_agents.BEB,
                'BOLT': finite_tabular_agents.BOLT,
                'UCRL2': finite_tabular_agents.UCRL2,
                'UCFH': finite_tabular_agents.UCFH}
    agent_constructor = alg_dict[args.alg]

    agent = agent_constructor(env.nState, env.nAction, env.epLen,
                              alpha0=args.alpha0, tau=args.tau,
                              scaling=args.scaling)

    # Run the experiment
    run_finite_tabular_experiment(agent, env, f_ext, args.nEps, args.seed,
                        recFreq=100, fileFreq=1000, targetPath=targetPath)

Esempio n. 5
0
    print '******************************************************************'

    # Make the environment
    env = environment.make_hardBanditMDP(epLen=args.epLen, gap=args.gap,
                                         nAction=2, pSuccess=0.5)

    # Make the feature extractor
    f_ext = FeatureTrueState(env.epLen, env.nState, env.nAction, env.nState)

    # Make the agent
    alg_dict = {'PSRL': finite_tabular_agents.PSRL,
                'PSRLunif': finite_tabular_agents.PSRLunif,
                'OptimisticPSRL': finite_tabular_agents.OptimisticPSRL,
                'GaussianPSRL': finite_tabular_agents.GaussianPSRL,
                'UCBVI': finite_tabular_agents.UCBVI,
                'BEB': finite_tabular_agents.BEB,
                'BOLT': finite_tabular_agents.BOLT,
                'UCRL2': finite_tabular_agents.UCRL2,
                'UCFH': finite_tabular_agents.UCFH,
                'EpsilonGreedy': finite_tabular_agents.EpsilonGreedy}

    agent_constructor = alg_dict[args.alg]

    agent = agent_constructor(env.nState, env.nAction, env.epLen,
                              scaling=args.scaling)

    # Run the experiment
    run_finite_tabular_experiment(agent, env, f_ext, args.nEps, args.seed,
                        recFreq=1000, fileFreq=10000, targetPath=targetPath)

Esempio n. 6
0
    # Make the environment
    env = gridworld.make_gridworld(grid_width, epLen, reward_means)
    f_ext = FeatureTrueState(env.epLen, env.nState, env.nAction, env.nState)
    # Make the agent
    alg = finite_tabular_agents.PSRL
    agent = alg(env.nState, env.nAction, env.epLen,
                              scaling=scaling, 
                              P_true=None,
                              R_true=None)
    # Make the query function
    query_function = query_functions.QueryFirstNVisits(query_cost, n)
    query_function.setEnvAgent(env, agent)

    # Run the experiment
    result = run_finite_tabular_experiment(agent, env, f_ext, nEps, seed,
                        recFreq=1000, fileFreq=10000, targetPath=targetPath, query_function=query_function)
    PSRL_results.append(result)
    PSRL_visits.append(query_function.visit_count)


eGreedy_results = []
eGreedy_visits = []
for n in max_num_visits:
    print "n=", n

    # Make the environment
    env = gridworld.make_gridworld(grid_width, epLen, reward_means)
    f_ext = FeatureTrueState(env.epLen, env.nState, env.nAction, env.nState)
    # Make the agent
    alg = finite_tabular_agents.EpsilonGreedy
    agent = alg(env.nState, env.nAction, env.epLen,
seed = 2 
numpy_rng = np.random.RandomState(seed)

#ENV
grid_width=4
epLen = 2 * grid_width - 1 + 8
num_episodes = 53
reward_sd = 2

env = gridworld.make_gridworld(grid_width, epLen, rewards={ (0, 0) : 1}, reward_noise=reward_sd)

# AGENT
query_cost=1.5
reward_tau = reward_sd**-2

agent = finite_tabular_agents.PSRLLimitedQuery(env.nState, env.nAction, env.epLen,
          scaling=.1, 
          P_true=env.P, R_true=None, 
          query_function=QueryFirstNVisits(query_cost, 5), 
          tau=reward_tau)

agent.R_prior = fillPrior(env, { (s,0) : (0, 1) for s in range(env.nState) }, (0, 10e10))

f_ext = FeatureTrueState(env.epLen, env.nState, env.nAction, env.nState)

# returns: cumReward, cumQueryCost, perf, cumRegret
results = run_finite_tabular_experiment(agent, env, f_ext, num_episodes, seed,
                    recFreq=1000, fileFreq=10000, targetPath='')   
print results