def better_run_experiment(num_runs, num_episodes):
    # Same as last function
    all_steps = []
    agent = ag.ExpectedSarsaAgent
    env = enviro.MountainEnvironment

    total_reward_per_run = []
    for run in range(num_runs):
        start = time.time()
        if run % 5 == 0:
            print("RUN: {}".format(run))

        initial_weights = np.random.uniform(-0.001, 0)
        agent_info = {
            "num_tilings": 32,
            "num_tiles": 4,
            "iht_size": 4096,
            "epsilon": 0.1,
            "gamma": 1,
            "alpha": 0.7 / 32,
            "initial_weights": initial_weights,
            "num_actions": 3
        }
        env_info = {
            "min_position": -1.2,
            "max_position": 0.5,
            "min_velocity": -0.07,
            "max_velocity": 0.07,
            "gravity": 0.0025,
            "action_discount": 0.001
        }
        rl_glue = RLGlue(env, agent)
        rl_glue.rl_init(agent_info, env_info)
        steps_per_episode = []

        for episode in range(num_episodes):
            # 15000 is max steps of the episode
            rl_glue.rl_episode(15000)
            steps_per_episode.append(rl_glue.num_steps)
        total_reward = np.sum(steps_per_episode) * -1
        all_steps.append(np.array(steps_per_episode))
        print("Run time: {}".format(time.time() - start))
        total_reward_per_run.append(total_reward)

    data = np.mean(total_reward_per_run)
    data_std_err = np.std(total_reward_per_run, axis=0) / np.sqrt(num_runs)
    plt.title("Expected Sarsa MountainCar (Alternate Parameters)",
              fontdict={
                  'fontsize': 16,
                  'fontweight': 25
              },
              pad=15.0)
    plt.xlabel("Epsiode", labelpad=5.0)
    plt.ylabel("Steps per Episode (averaged over " + str(num_runs) + " runs)",
               labelpad=10.0)
    plt.plot(np.mean(np.array(all_steps), axis=0))
    plt.show()
    np.save("ExpectedSarsa_test", np.array(all_steps))
    print("mean: ", data)
    print("standard error: ", data_std_err)
Exemple #2
0
def testPolicy(policy):
    agent = testAgent(policy)
    env = Environment()
    rlglue = RLGlue(env, agent)
    del env, agent
    rlglue.rl_init()

    # set up 2d array for average rewards
    # rewards[step] = sum of rewards across all runs for that step
    rewards = [0 for i in range(1000)]
    for run in range(1):
        rlglue.rl_init()
        #rlglue.rl_env_message('renderON')
        rlglue.rl_start()

        terminal = False
        for step in range(1000):
            if not terminal:
                r, s, a, terminal = rlglue.rl_step()
                rewards[step] += r

    # average rewards
    rewards = [i / 1 for i in rewards]

    return rewards
def main(agent_info, agent_class, steps, filename):
    env_class = floating_horsetrack_environment.Environment
    rl_glue = RLGlue(env_class, agent_class)

    max_steps = steps
    step = 0
    episode_end = []
    cum_reward = 0

    agent_info.update({"actions": env_class.actions})
    rl_glue.rl_init(agent_info)

    while step < max_steps:
        rl_glue.rl_start()

        is_terminal = False

        while not is_terminal and step < max_steps:
            reward, state, action, is_terminal = rl_glue.rl_step()
            cum_reward += reward

            step += 1

        if is_terminal:
            episode_end.append(step)
        rl_glue.rl_cleanup()

    save_results(episode_end, len(episode_end), "data/{}".format(filename))
Exemple #4
0
def run_experiment(environment, agent, environment_parameters, agent_parameters, experiment_parameters):
    
    rl_glue = RLGlue(environment, agent)
        
    # save sum of reward at the end of each episode
    agent_sum_reward = np.zeros((experiment_parameters["num_runs"], 
                                 experiment_parameters["num_episodes"]))

    env_info = {}

    agent_info = agent_parameters

    # one agent setting
    for run in range(1, experiment_parameters["num_runs"]+1):
        agent_info["seed"] = run
        agent_info["network_config"]["seed"] = run
        env_info["seed"] = run

        rl_glue.rl_init(agent_info, env_info)
        
        for episode in tqdm(range(1, experiment_parameters["num_episodes"]+1)):
            # run episode
            rl_glue.rl_episode(experiment_parameters["timeout"])
            
            episode_reward = rl_glue.rl_agent_message("get_sum_reward")
            agent_sum_reward[run - 1, episode - 1] = episode_reward
    save_name = "{}".format(rl_glue.agent.name)
    if not os.path.exists('results'):
        os.makedirs('results')
    np.save("results/sum_reward_{}".format(save_name), agent_sum_reward)
    shutil.make_archive('results', 'zip', 'results')
Exemple #5
0
def main():
	
	num_eps = 200000

	agent = Agent()
	env = Environment()
	rlglue = RLGlue(env, agent)
	del agent, env
	solves = 0
	rlglue.rl_init()
	rewards = []
	for ep in range(num_eps):
		rlglue.rl_start()
		#rlglue.rl_env_message('renderON')
		terminal = False
		reward = 0
		while not terminal:
		
			reward, state, action, terminal = rlglue.rl_step()
			if ep > 1000:
				rlglue.rl_env_message('renderON')
				print(state)
				time.sleep(0.1)
		rewards.append(reward)
		if ep >= 99:
			if np.average(rewards[ep-99:ep+1]) >  0.78:
				print('solved at episode %d' % ep+1)
				break
			else:
				pass
def question_4():
    # Specify hyper-parameters

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 200
    num_runs = 50
    max_eps_steps = 1000000

    steps = np.zeros([num_runs, num_episodes])
    rewards = []
    for r in range(num_runs):
        print("run number : ", r + 1)
        rlglue.rl_init()
        for e in range(num_episodes):
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
        reward = rlglue.total_reward()
        rewards.append(reward)
    mean = sum(rewards) / len(rewards)
    stder = statistics.stdev(rewards) / math.sqrt(len(rewards))
    print("mean:", mean)
    print("std:", stder)
    np.save('bonus_steps', steps)
    np.save("mean", mean)
    np.save("stder", stder)
Exemple #7
0
def main(agent_info, agent_class, env_info, env_class, steps, param_info):
    # env_class = horsetrack_environment.Environment
    rl_glue = RLGlue(env_class, agent_class)

    max_steps = steps
    max_episodes = 5
    step = 0
    episodes = 0
    episode_end = np.ones(max_episodes) * max_steps
    cum_reward = 0

    # max_steps = 20000

    agent_info.update({"actions": env_class.actions})
    rl_glue.rl_init(agent_info, env_info)

    while step < max_steps and episodes < max_episodes:
        rl_glue.rl_start()

        is_terminal = False

        while not is_terminal and step < max_steps:
            reward, state, action, is_terminal = rl_glue.rl_step()
            cum_reward += reward

            step += 1

        if is_terminal:
            episode_end[episodes] = step
            episodes += 1
        rl_glue.rl_cleanup()

    save_results(episode_end, "{}".format(param_info))
Exemple #8
0
def run_experiment(env_info,
                   agent_info,
                   num_episodes=5000,
                   experiment_name=None,
                   plot_freq=100,
                   true_values_file=None,
                   value_error_threshold=1e-8):
    env = CliffWalkEnvironment
    agent = TDAgent
    rl_glue = RLGlue(env, agent)

    rl_glue.rl_init(agent_info, env_info)

    manager = Manager(env_info,
                      agent_info,
                      true_values_file=true_values_file,
                      experiment_name=experiment_name)
    for episode in range(1, num_episodes + 1):
        rl_glue.rl_episode(0)  # no step limit
        if episode % plot_freq == 0:
            values = rl_glue.agent.agent_message("get_values")
            manager.visualize(values, episode)

    values = rl_glue.agent.agent_message("get_values")
    if true_values_file is not None:
        # Grading: The Manager will check that the values computed using your TD agent match
        # the true values (within some small allowance) across the states. In addition, it also
        # checks whether the root mean squared value error is close to 0.
        manager.run_tests(values, value_error_threshold)

    return values
def main():

    num_eps = 5000
    num_runs = 10
    random.seed(0)
    np.random.seed(0)
    agent = Agent()
    env = Environment()
    rlglue = RLGlue(env, agent)
    del agent, env
    for run in range(num_runs):
        rlglue.rl_init()
        performances = []
        for ep in range(num_eps):
            rlglue.rl_start()
            #rlglue.rl_env_message('renderON')
            terminal = False
            while not terminal:
                reward, state, action, terminal = rlglue.rl_step()

            # Find the first policy that performs at 100%
            performance = testPolicy(rlglue.rl_agent_message('policy')) * 100
            performances.append(performance)
            if performance >= 100:
                #print(rlglue.rl_agent_message('policy'))
                print('Episode: %d' % (ep + 1))
                break
        plt.plot(performances)
    plt.savefig('test.png')
Exemple #10
0
def run_experiment(env_info,
                   agent_info,
                   num_episodes=5000,
                   experiment_name=None,
                   plot_freq=100,
                   true_values_file=None,
                   value_error_threshold=1e-8):
    env = CliffWalkEnvironment
    agent = TDAgent
    rl_glue = RLGlue(env, agent)

    rl_glue.rl_init(agent_info, env_info)

    manager = Manager(env_info,
                      agent_info,
                      true_values_file=true_values_file,
                      experiment_name=experiment_name)
    for episode in range(1, num_episodes + 1):
        rl_glue.rl_episode(0)  # no step limit
        if episode % plot_freq == 0:
            values = rl_glue.agent.agent_message("get_values")
            manager.visualize(values, episode)

    values = rl_glue.agent.agent_message("get_values")
    return values
Exemple #11
0
def question_3():
    # Specify hyper-parameters
    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)
    num_episodes = 1000
    num_runs = 1
    max_eps_steps = 1000000
    for _ in range(num_runs):
        rlglue.rl_init()
        i = 0
        for i in range(num_episodes):
            rlglue.rl_episode(max_eps_steps)
            print(i)
    fout = open('value', 'w')
    steps = 50
    w, iht = rlglue.rl_agent_message("ValueFunction")
    Q = np.zeros([steps, steps])
    for i in range(steps):
        for j in range(steps):
            values = []
            for a in range(3):
                value = 0
                for index in tiles(iht, 8, [
                        8 * (-1.2 + (i * 1.7 / steps)) / 1.7, 8 *
                    (-0.07 + (j * 0.14 / steps)) / 0.14
                ], [a]):
                    value -= w[index]
                values.append(value)
            height = max(values)
            fout.write(repr(height) + ' ')
            Q[j][i] = height
        fout.write('\n')
    fout.close()
    np.save("value", Q)
Exemple #12
0
def main():
    env_class = horsetrack_environment.Environment
    agent_class = random_agent.Agent
    rl_glue = RLGlue(env_class, agent_class)

    num_episodes = 1000
    max_steps = 100000

    print("\tPrinting one dot for every run: {}".format(num_episodes),
          end=' ')
    print("total runs to complete.")

    total_steps = [0 for _ in range(max_steps)]

    for i in range(num_episodes):
        rl_glue.rl_init(agent_info={"actions": env_class.actions})
        rl_glue.rl_start()

        is_terminal = False
        while rl_glue.num_steps < max_steps and not is_terminal:
            reward, state, action, is_terminal = rl_glue.rl_step()
            # optimal_action[num_steps] += 1 if "action is optimal" else 0

        total_steps[i] = rl_glue.num_steps

        rl_glue.rl_cleanup()
        print(".", end='')
        sys.stdout.flush()

    # prop_optimal = [num_optimal / num_episodes for num_optimal in optimal_action]
    save_results(total_steps, len(total_steps), "RL_EXP_OUT.dat")
    print("\nDone")
Exemple #13
0
def question_1():
    # Specify hyper-parameters

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)
    np.random.seed(0)
    num_episodes = 200
    see_eps = [157]
    num_runs = 1
    max_eps_steps = 100000

    # test with various stepsizes (alphas) for agent
    stepSizes = np.linspace(0.01, 1, 100)
    # best stepsize so far (comment out to test many)
    stepSizes = [0.559184]

    # seperate run for each stepsize
    for step in stepSizes:

        # initialize agent and software, with chosen stepsize
        rlglue.rl_init()
        rlglue.rl_agent_message('step:' + str(step))

        # keep track of total rewards for each episode
        total_rewards = []

        for ep in range(num_episodes):
            # render only selected episodes
            if ep in see_eps:
                rlglue.rl_env_message('rOFF')
            if ep + 1 in see_eps:
                rlglue.rl_env_message('rON')
                print("Episode %d" % (ep + 1))

            # initializse for episode
            rlglue.rl_start()
            terminal = False
            total_reward = 0

            # run episode and calculate total reward
            while not terminal:
                reward, state, action, terminal = rlglue.rl_step()
                total_reward += reward
            total_rewards.append(total_reward)

            # calculate average reward of the last 100 episodes
            if ep >= 99:
                total = np.sum(total_rewards[ep - 99:ep + 1])
                avg = total / 100

                # check if results indicate the problem is solved
                if avg > -110:
                    print("Solved at episode %d, avg reward: %f" %
                          (ep + 1, avg))
                    break

    # close environment
    environment.close()
Exemple #14
0
def main(data_output_location="new_data"):

    env_class = horsetrack_environment.Environment
    agent_class = random_agent.Agent

    agent_name = agent_class.__module__[agent_class.__module__.find(".") + 1:]
    environment_name = env_class.__module__[env_class.__module__.find(".") +
                                            1:]

    rl_glue = RLGlue(env_class, agent_class)

    # num_episodes = 2000
    # max_steps = 1000
    max_total_steps = 100_000

    for epsilon in [0.0, 0.1]:
        for alpha in [2, 1, 0.5, 0.25, 0.125, 0.0625]:
            print("Running Agent: {} on Environment: {}.".format(
                agent_name, environment_name))
            agent_init_info = {
                "actions": [-1, 1],
                "world_size": 100,
                "epsilon": epsilon,
                "alpha": alpha
            }
            termination_times = []

            rl_glue.rl_init(agent_init_info=agent_init_info)

            step_counter = 0

            while step_counter < max_total_steps:
                rl_glue.rl_start()
                is_terminal = False

                while step_counter < max_total_steps and not is_terminal:
                    reward, state, action, is_terminal = rl_glue.rl_step()
                    step_counter += 1

                rl_glue.rl_cleanup()
                # print(".", end='')
                sys.stdout.flush()

                if is_terminal:
                    termination_times.append(step_counter)

            epoch_datetime = int(
                (datetime.datetime.now() -
                 datetime.datetime.utcfromtimestamp(0)).total_seconds())

            save_results(
                termination_times, len(termination_times),
                "{}/{}_{}__{}__epsilon{}__alpha{}.dat".format(
                    data_output_location, epoch_datetime, agent_name,
                    environment_name, epsilon, alpha))

    print("\nDone")
Exemple #15
0
def run_experiment():

    #specify hyper-parameters
    num_runs = 1
    max_episodes = 1000000
    max_steps_per_episode = 100
    num_states = 181
    num_actions = 2
    alpha = 0.01
    eps = 0.1
    Q1 = 0

    results = np.zeros(max_episodes)
    results_run = 0

    agent = RandomAgent(num_states, num_actions, alpha, eps, Q1)
    environment = BlackJack()
    rlglue = RLGlue(environment, agent)

    print(
        "\nPrinting one dot for every run: {0} total runs to complete".format(
            num_runs))

    for run in range(num_runs):
        np.random.seed(run)
        results_run = 0.0

        rlglue.rl_init()
        for e in range(1, max_episodes + 1):
            rlglue.rl_start()
            for s in range(max_steps_per_episode):
                r, _, _, terminal = rlglue.rl_step()
                results_run += r
                results[e - 1] += r

                if terminal:
                    break

            if e % 10000 == 0:
                print(
                    "\nEpisode {}: average return till episode is {}, and policy is"
                    .format(e, results_run / e))
                print(rlglue.rl_agent_message("printPolicy"))
        print(".")

    print("Average return over experiment: {}".format(
        (results / num_runs).mean()))

    #save final policy to file -- change file name as necessary
    with open("policy.txt", 'w') as f:
        f.write(rlglue.rl_agent_message("printPolicy"))

    #save all the experiment data for analysis -- change file name as necessary
    save_results(results / num_runs, max_episodes, "RL_EXP_OUT.dat")
Exemple #16
0
def question_1():
    # Specify hyper-parameters

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 200
    num_runs = 50
    max_eps_steps = 100000

    steps = np.zeros([num_runs, num_episodes])

    for r in range(num_runs):
        print("run number : ", r)
        rlglue.rl_init()
        for e in range(num_episodes):
            #print("Episode number: "+str(e))
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
            #print("Number of steps: "+str(steps))
            # print(steps[r, e])
    np.save('steps', steps)
    plotGraph()
    
    del agent, environment, rlglue
    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 1000
    num_runs = 1
    max_eps_steps = 100000

    steps = np.zeros([num_runs, num_episodes])

    for r in range(num_runs):
        print("run number : ", r)
        rlglue.rl_init()
        for e in range(num_episodes):
            print("Episode number: "+str(e))
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
            #print("Number of steps: "+str(steps))
            # print(steps[r, e])
    #np.save('steps', steps)
    #plotGraph()
    rlglue.rl_agent_message("plot3DGraph")
Exemple #17
0
def testPolicy(policy):
    env = Environment()
    agent = testAgent(policy)
    rlglue = RLGlue(env, agent)
    rlglue.rl_init()
    #rlglue.rl_env_message('renderON')
    performance = 0
    for ep in range(100):
        rlglue.rl_start()
        terminal = False
        reward = None
        while not terminal:
            reward, state, action, terminal = rlglue.rl_step()
        if reward > 0:
            performance += 1

    return performance / 100
def question_3():
    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)
    max_eps_steps = 100000
    num_episodes = 1000
    num_runs = 1
    numActions=3

    rlglue.rl_init()
    for e in range(num_episodes):
        rlglue.rl_episode(max_eps_steps)

    weights = rlglue.rl_agent_message("3D plot of the cast-to-go")

    fout = open('value','w')
    steps = 50
    z = np.zeros((50,50))
    for i in range(steps):
        for j in range(steps):
            values = []
            for a in range(numActions):
                tile = [8*(-1.2+(i*1.7/steps))/1.7,8*(-0.07+(j*0.14/steps))/0.14]
                inds =  agent.get_index(tile,a)
                values.append(np.sum([weights[i] for i in inds]))
            height = max(values)
            z[j][i]=-height
            fout.write(repr(-height)+' ')
        fout.write('\n')
    fout.close()

    fig = plt.figure()
    ax = fig.add_subplot(111,projection ='3d')
    x = np.arange(-1.2,0.5,1.7/50)
    y = np.arange(-0.07,0.07,0.14/50)
    x,y = np.meshgrid(x,y)
    ax.set_xticks([-1.2, 0.5])
    ax.set_yticks([0.07, -0.07])
    ax.set_ylabel('Velocity')
    ax.set_xlabel('Position')
    ax.set_zlabel('Cost-To-Go')
    ax.plot_surface(x,y,z)
    plt.savefig('cost-to-go.png')
    plt.show()
    np.save('steps', steps)
Exemple #19
0
def tiling(real_value):
    environment = RandomWalkEnvironment()
    agent = Agent2()
    rl = RLGlue(environment, agent)
    error = np.zeros(5000)
    for run in range(30):
        rl.rl_init()
        for episode in range(2000):
            rl.rl_episode(10000)
            estimate = rl.RL_agent_message("ValueFunction")
            error[episode] += np.sqrt(
                np.mean(np.power(real_value - estimate, 2)))

        rl.RL_cleanup()
    file2 = open("tiling_output.txt", "w")
    for i in range(2000):
        file2.write(format(error[i] / 10))
    file2.close()
Exemple #20
0
def question_1(num_episodes):
    # Specify hyper-parameters

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    max_eps_steps = 100000

    steps = np.zeros(num_episodes)

    rlglue.rl_init()
    for e in tqdm(range(num_episodes)):
        rlglue.rl_episode(max_eps_steps)
        steps[e] = rlglue.num_ep_steps()
        # print(steps[e])

    return steps
def question_1():
    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 200
    num_runs = 5
    max_eps_steps = 100000

    steps = np.zeros([num_runs, num_episodes])

    for r in range(num_runs):
        print("run number : ", r)
        rlglue.rl_init()
        for e in range(num_episodes):
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
            # print(steps[r, e])
    np.save('steps', steps)
Exemple #22
0
def testPolicy(policy):
    agent = testAgent(policy)
    env = Environment()
    rlglue = RLGlue(env, agent)
    del env, agent
    rlglue.rl_init()

    for run in range(1):
        rlglue.rl_init()
        rlglue.rl_env_message('renderON')
        rlglue.rl_start()

        total_reward = 0
        terminal = False
        while not terminal:
            r, s, a, terminal = rlglue.rl_step()
            total_reward += r

    return total_reward
Exemple #23
0
def run_experiment(env_info, agent_info, 
                   num_episodes=5000,
                   value_error_threshold=1e-8,
                   plot_freq=10):
    env = GridEnvironment
    agent = TDAgent
    rl_glue = RLGlue(env, agent)

    rl_glue.rl_init(agent_info, env_info)
    steps = []
    for episode in range(1, num_episodes + 1):
        rl_glue.rl_episode(0) # no step limit
        steps.append(rl_glue.agent.agent_message("get_steps"))
        if episode % plot_freq == 0:
            values = rl_glue.agent.agent_message("get_values")
            print(rl_glue.environment.env_message("get_grid_state"))
        rl_glue.rl_cleanup()
    values = rl_glue.agent.agent_message("get_values")
    
    return [values, steps]
Exemple #24
0
def question_2():
    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    max_eps_steps = 100000
    num_episodes = 1000

    rlglue.rl_init()
    for _ in tqdm(range(num_episodes)):
        rlglue.rl_episode(max_eps_steps)

    q3_plot = rlglue.rl_agent_message("plot")

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    X, Y = np.meshgrid(q3_plot[0], q3_plot[1])
    surf = ax.plot_surface(X, Y, q3_plot[2])
    ax.set_xlim(q3_plot[0][0], q3_plot[0][-1])
    ax.set_ylim(q3_plot[1][0], q3_plot[1][-1])
    plt.show()
Exemple #25
0
def experiment1():
    agent = RandomAgent()
    environment = Environment1D()
    rlg = RLGlue(environment, agent)

    max_steps = 1000  # max number of steps in an episode
    num_runs = 2000  # number of repetitions of the experiment
    optimal_action = np.zeros(max_steps)

    for k in range(num_runs):

        # initialize RL-Glue
        rlg.rl_init()  #env_init + agent_init
        rlg.rl_start()
        for i in range(max_steps):  #step
            action = rlg.rl_step()[2]
            if action == environment.env_message():
                optimal_action[i] += 1
    ratio_optimal_action = optimal_action / num_runs

    return ratio_optimal_action
def question_3():

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 1000
    num_runs = 1
    max_eps_steps = 100000

    steps = np.zeros([num_runs, num_episodes])
    # only 1 run
    for r in range(num_runs):
        print("1000 episode run : ", r)
        rlglue.rl_init()
        for e in range(num_episodes):
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
        # get the list of value functions [X,Y,Z] represents position, velocity, state-value
        Return = rlglue.rl_agent_message(1)
    return Return
Exemple #27
0
def question_1():
    # Specify hyper-parameters

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 1000
    num_runs = 1
    max_eps_steps = 100000

    num_action = 3

    for r in range(num_runs):
        print("run number : ", r)
        #np.random.seed(r)
        rlglue.rl_init()
        for _ in range(num_episodes):
            rlglue.rl_episode(max_eps_steps)
        weight = rlglue.rl_agent_message('get weight')

    # algorithm from assignment
    #fout = open('value','w')
    steps = 50
    neg_q_hat = np.zeros((steps, steps))
    for i in range(steps):
        for j in range(steps):
            values = []
            position = -1.2 + (i * 1.7 / steps)
            velocity = -0.07 + (j * 0.14 / steps)
            for a in range(num_action):
                tile_idx = agent.plot_get_feature(position, velocity, a)
                q_hat = np.sum(weight[tile_idx])
                values.append(q_hat)
            height = np.max(values)
            neg_q_hat[j][i] = -height
            #fout.write(repr(-height)+' ')
        #fout.write('\n')
    #fout.close()
    np.save('neg_q_hat', neg_q_hat)
def main():
    env = drifter_distractor_env.Environment
    env = switched_drifter_distractor_env.Environment

    agents = [random_agent.Agent, weight_change_agent.Agent]
    agent_types = ["absolute_error", "squared_error", "weight_change"]

    for agent_type in agent_types:
        agent = agents[1]

        agent_info = {
            "num_actions": 4,
            "action_selection": "softmax",
            "agent_type": agent_type
        }
        env_info = {}

        num_runs = 1
        num_steps = 100000

        actions = [0 for _ in range(4)]

        errors = []

        for run in range(num_runs):
            rl_glue = RLGlue(env, agent)
            rl_glue.rl_init(agent_info, env_info)
            rl_glue.rl_start()

            for step in range(num_steps):
                reward, state, action, is_terminal = rl_glue.rl_step()
                actions[action] += 1

        # np.save("data/squared_error", rl_glue.agent.track_actions)
        np.save("data/{}".format(agent_type), rl_glue.agent.track_actions)
        # print(rl_glue.environment.arm_1)
        # print(rl_glue.environment.arm_2)
        # print(rl_glue.environment.arm_3)
        # print(rl_glue.environment.arm_4)
        print(actions)
def part3():
    # Specify hyper-parameters

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 200
    num_runs = 1
    max_eps_steps = 100000

    steps = np.zeros([num_runs, num_episodes])

    for r in range(num_runs):
        print("run number : ", r)
        rlglue.rl_init()
        for e in range(num_episodes):
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
            # print(steps[r, e])
    np.save('steps', steps)
    fout = open('value', 'w')
    steps = 50
    num_of_actions = 3
    for i in range(steps):
        for j in range(steps):
            q = []
            for a in range(num_of_actions):
                pos = -1.2 + (1 * 1.7 / steps)
                vel = -0.07 + (j * 0.14 / steps)
                tile = (pos, vel)
                inds = Agent.F(tile, self.action)
                q.append(np.sum(self.weights[inds]))
            height = max(q)
            fout.write(repr(-height) + '')
        fout.write('\n')
    fout.close()
    np.save('heights', height)
    np.save('steps', steps)
Exemple #30
0
def question_3():
    num_episodes = 1000
    num_runs = 1
    max_eps_steps = 100000

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    for r in range(num_runs):
        start = time.time()
        print("run number : ", r)
        rlglue.rl_init()
        for e in range(num_episodes):
            rlglue.rl_episode(max_eps_steps)
    end = time.time()
    print(str(end - start) + " seconds elapsed")
    action_vals, pos, vel = rlglue.rl_agent_message("return info")
    action_vals = np.multiply(action_vals, -1)
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.plot_surface(pos, vel, action_vals)
    plt.show()