コード例 #1
0
def question_1():
    # Specify hyper-parameters

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 200
    num_runs = 50
    max_eps_steps = 100000

    steps = np.zeros([num_runs, num_episodes])

    for r in range(num_runs):
        print("run number : ", r)
        rlglue.rl_init()
        for e in range(num_episodes):
            #print("Episode number: "+str(e))
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
            #print("Number of steps: "+str(steps))
            # print(steps[r, e])
    np.save('steps', steps)
    plotGraph()
    
    del agent, environment, rlglue
    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 1000
    num_runs = 1
    max_eps_steps = 100000

    steps = np.zeros([num_runs, num_episodes])

    for r in range(num_runs):
        print("run number : ", r)
        rlglue.rl_init()
        for e in range(num_episodes):
            print("Episode number: "+str(e))
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
            #print("Number of steps: "+str(steps))
            # print(steps[r, e])
    #np.save('steps', steps)
    #plotGraph()
    rlglue.rl_agent_message("plot3DGraph")
コード例 #2
0
ファイル: bonus.py プロジェクト: RAYFC/Reinforcement-learning
def question_4():
    # Specify hyper-parameters

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 200
    num_runs = 50
    max_eps_steps = 1000000

    steps = np.zeros([num_runs, num_episodes])
    rewards = []
    for r in range(num_runs):
        print("run number : ", r + 1)
        rlglue.rl_init()
        for e in range(num_episodes):
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
        reward = rlglue.total_reward()
        rewards.append(reward)
    mean = sum(rewards) / len(rewards)
    stder = statistics.stdev(rewards) / math.sqrt(len(rewards))
    print("mean:", mean)
    print("std:", stder)
    np.save('bonus_steps', steps)
    np.save("mean", mean)
    np.save("stder", stder)
コード例 #3
0
ファイル: exp_hw6.py プロジェクト: reversedArrow/cmput366
def question_1(num_episodes):
    # Specify hyper-parameters

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    max_eps_steps = 100000

    steps = np.zeros(num_episodes)

    rlglue.rl_init()
    for e in tqdm(range(num_episodes)):
        rlglue.rl_episode(max_eps_steps)
        steps[e] = rlglue.num_ep_steps()
        # print(steps[e])

    return steps
コード例 #4
0
def question_1():
    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 200
    num_runs = 5
    max_eps_steps = 100000

    steps = np.zeros([num_runs, num_episodes])

    for r in range(num_runs):
        print("run number : ", r)
        rlglue.rl_init()
        for e in range(num_episodes):
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
            # print(steps[r, e])
    np.save('steps', steps)
コード例 #5
0
def question_3():

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 1000
    num_runs = 1
    max_eps_steps = 100000

    steps = np.zeros([num_runs, num_episodes])
    # only 1 run
    for r in range(num_runs):
        print("1000 episode run : ", r)
        rlglue.rl_init()
        for e in range(num_episodes):
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
        # get the list of value functions [X,Y,Z] represents position, velocity, state-value
        Return = rlglue.rl_agent_message(1)
    return Return
コード例 #6
0
def part3():
    # Specify hyper-parameters

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 200
    num_runs = 1
    max_eps_steps = 100000

    steps = np.zeros([num_runs, num_episodes])

    for r in range(num_runs):
        print("run number : ", r)
        rlglue.rl_init()
        for e in range(num_episodes):
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
            # print(steps[r, e])
    np.save('steps', steps)
    fout = open('value', 'w')
    steps = 50
    num_of_actions = 3
    for i in range(steps):
        for j in range(steps):
            q = []
            for a in range(num_of_actions):
                pos = -1.2 + (1 * 1.7 / steps)
                vel = -0.07 + (j * 0.14 / steps)
                tile = (pos, vel)
                inds = Agent.F(tile, self.action)
                q.append(np.sum(self.weights[inds]))
            height = max(q)
            fout.write(repr(-height) + '')
        fout.write('\n')
    fout.close()
    np.save('heights', height)
    np.save('steps', steps)
コード例 #7
0
ファイル: exp_hw6.py プロジェクト: aahmed97/MountCar
def question_1():
    # Specify hyper-parameters

    agent = Agent()
    environment = Environment()
    rlglue = RLGlue(environment, agent)

    num_episodes = 200
    num_runs = 50
    max_eps_steps = 100000

    steps = np.zeros([num_runs, num_episodes])

    for r in range(num_runs):
        print("run number : ", r)
        st = time.time()
        rlglue.rl_init()
        for e in range(num_episodes):
            rlglue.rl_episode(max_eps_steps)
            steps[r, e] = rlglue.num_ep_steps()
            # print(steps[r, e])
        finish = time.time() - st
        print(str(finish) + " seconds elapsed")
    np.save('steps', steps)
                i = 0
                for key in sorted_params_names:
                    msg_to_send = key + ' ' + str(grid_search_params[key][combination[i]])
                    print(msg_to_send)
                    agent.agent_message(msg_to_send)
                    i += 1
                rl_glue = RLGlue(agent_obj=agent, env_obj=env)
                for r in range(int(initial_params['num_runs'])):
                    print('run: ' + str(r))
                    agent.random_seed = r
                    rl_glue.rl_init()
                    agent.epsilon = float(initial_params['initial_epsilon'])
                    for e in range(int(initial_params['total_episodes'])):
                        rl_glue.rl_episode(max_episode_steps)
                        agent.epsilon = compute_epsilon(current_epsilon=agent.epsilon)
                        episodes_steps[j, r, e] = rl_glue.num_ep_steps()
                        Q_t[j, r, e] = agent.Q
                        if initial_params['agent'] != 'sarsa0':
                            Phi_t[j, r, e] = agent.Phi
                        if 'pies' in initial_params['agent']:
                            agent.xi = compute_xi(current_xi=agent.xi, decay=agent.decay,
                                                  decay_param=agent.decay_param)
                    print('path length', len(agent.path))
                j += 1

            # finding the best parameter setting for grid search based on AUC
            best_param_set_index = np.random.choice(
                np.flatnonzero(
                    np.trapz(np.mean(episodes_steps, 1)) == np.trapz(np.mean(episodes_steps, 1)).min()))
            best_params_string = ''
            for index in range(0, len(tuple(itertools.product(*test_coord))[best_param_set_index])):
コード例 #9
0
        print("training process with {} planning step".format(ite))
        # Create and pass agent and environment objects to RLGlue
        environment = DynaQEnvironment()
        agent = DynaQAgent(ite)
        rlglue = RLGlue(environment, agent)
        del agent, environment  # don't use these anymore

        for run in range(num_runs):
            print("run number: {}\n".format(run))
            # set seed for reproducibility
            np.random.seed(run)

            # initialize RL-Glue
            rlglue.rl_init()

            # loop over episodes
            for episode in range(num_episodes):

                rlglue.rl_episode()

                result[episode] += rlglue.num_ep_steps()
                data = rlglue.rl_agent_message(
                    "Q for all states in the episode")

                Q.append(data)

        result = result / num_runs
        output.append(result)

    np.save("output", output)