Esempio n. 1
0
    print("-" * 100)

    # Evaluation
    tot_steps = 0
    all_returns = []
    all_steps = []

    notrl_tot_steps = 0
    notrl_returns = []
    notrl_steps = []

    # create grid-world instance
    canyon = True
    grid = GridWorld(4, canyon)
    grid.make_maps()

    possible_actions = grid.possible_actions
    world = grid.world
    grid.list_of_maps.reverse()

    # Direct learning on final grid
    print("Direct learning on final grid")
    sarsa = SARSA(grid.final_grid, possible_actions, world)
    Q, returns, episodes, steps = do_task(sarsa, grid,
                                          len(grid.list_of_maps) - 1)
    notrl_returns.append(returns)
    notrl_steps.append(steps)
    notrl_tot_steps += steps[-1]
    print("-" * 80)
Esempio n. 2
0
def main(iteration):
    world = 4

    # saving directories
    window = 5  # moving mean window
    main_dir = 'qlearn_plots'
    sub_dir = ['4by4can', '4by4nocan', '9by9']
    sub_sub_dir = ['steps', 'episodes']

    for sub_d in sub_dir:
        for ss_d in sub_sub_dir:
            dir_name = '/'.join([main_dir, sub_d, 'win' + str(window), ss_d])
            if not os.path.exists(dir_name):
                os.makedirs(dir_name)

    # print("-" * 100)

    # Evaluation
    tot_steps = 0
    all_returns = []
    all_steps = []
    all_episodes = []

    notrl_tot_steps = 0
    notrl_returns = []
    notrl_steps = []
    notrl_episodes = []

    # create grid-world instance
    if world == 4:
        canyon = False
        grid = GridWorld(world, canyon)
        if canyon:
            canyon_str = "(CANYON)"
        else:
            canyon_str = "(NO CANYON)"
    elif world == 9:
        canyon_str = ''
        grid = GridWorld(9)
    grid.make_maps()

    possible_actions = grid.possible_actions
    grid.list_of_maps.reverse()

    # Direct learning on final grid
    # print("Direct learning on final grid")
    qlearn = QLearn(grid.final_grid, possible_actions, world)
    Q, returns, episodes, steps = do_task(qlearn, grid,
                                          len(grid.list_of_maps) - 1)
    notrl_returns.append(returns)
    notrl_steps.append(steps)
    notrl_episodes.append(episodes)
    notrl_tot_steps += steps[-1]
    # print("-" * 80)

    # Incremental transfer learning
    # print("Incremental transfer learning", canyon_str)
    Q = None
    for task, current_map in enumerate(grid.list_of_maps, 0):
        # print("-" * 50)
        # creates qlearn instance
        exploit = False if task == 0 else False
        qlearn = QLearn(current_map, possible_actions, world, Q)
        Q, returns, episodes, steps = do_task(qlearn, grid, task, exploit)
        all_returns.append(returns)
        tot_counter = 0
        epi_counter = 0
        if task != 0:
            tot_counter += all_steps[task - 1][-1]
            epi_counter += all_episodes[task - 1][-1]
            all_steps.append([i + tot_counter for i in steps])
            all_episodes.append([i + epi_counter for i in episodes])
        else:
            all_steps.append([i for i in steps])
            all_episodes.append([i for i in episodes])
    # print("-" * 100)

    # print("Incremental Transfer Cumulative total of steps",
    # all_steps[-1][-1] - all_steps[0][-1])
    # print("Direct Cumulative total of steps", notrl_steps[-1][-1])

    flat_episodes = [item for sublist in all_episodes for item in sublist]
    flat_returns = [item for sublist in all_returns for item in sublist]
    flat_steps = [item for sublist in all_steps for item in sublist]
    tmp_array = np.array(flat_returns)
    notrl_avg_returns = []
    avg_returns = []
    for t in range(len(flat_returns)):
        avg_returns.append(tmp_array[max(0, t - window):(t + 1)].mean())
    notrl_flat_returns = [
        item for sublist in notrl_returns for item in sublist
    ]
    tmp_array_1 = np.array(notrl_flat_returns)
    for t in range(len(notrl_flat_returns)):
        notrl_avg_returns.append(tmp_array_1[max(0, t - window):(t +
                                                                 1)].mean())

    fig = plt.figure()
    a0 = fig.add_subplot(1, 1, 1)
    val = 0
    for j, i in enumerate(all_steps):
        if j == len(all_steps) - 1:
            a0.axvline(x=i[-1],
                       linestyle='--',
                       color='#ccc5c6',
                       label='Task Switch')
        else:
            a0.axvline(x=i[-1], linestyle='--', color='#ccc5c6')
    a0.plot(flat_steps,
            avg_returns,
            label="Task Interpolation",
            color='#d73236',
            linewidth=1,
            linestyle='-')
    x_steps = [
        i + all_steps[0][-1] - notrl_steps[0][0] for i in notrl_steps[0]
    ]
    a0.plot(x_steps,
            notrl_avg_returns,
            label="Tabula Rasa",
            color='#80bbe5',
            linestyle='-',
            linewidth=1)
    plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
    plt.xlabel("Steps")
    plt.ylabel("Accumulated Reward")
    plt.legend(loc="lower right")
    plt.axis([None, None, -20, 1])
    if world == 4:
        if canyon:
            step_save = 'qlearn_plots/4by4can/' + 'win' + str(
                window) + '/steps/4by4_canyon_steps'
            plt_title = '4x4 Maze Canyon'
        else:
            step_save = 'qlearn_plots/4by4nocan/' + 'win' + str(
                window) + '/steps/4by4_nocanyon_steps'
            plt_title = '4x4 Maze Non-Canyon'
    elif world == 9:
        step_save = 'qlearn_plots/9by9/' + 'win' + str(
            window) + '/steps/9by9_steps'
        plt_title = '9x9 Maze'
    plt.title(plt_title)
    plt.savefig(step_save + iteration + '.eps', format='eps', dpi=1000)
    # fig.show()

    fig1 = plt.figure()
    a1 = fig1.add_subplot(1, 1, 1)
    val = 0
    for j, i in enumerate(all_episodes):
        if j == len(all_episodes) - 1:
            a1.axvline(x=i[-1],
                       linestyle='--',
                       color='#ccc5c6',
                       label='Task Switch')
        else:
            a1.axvline(x=i[-1], linestyle='--', color='#ccc5c6')
    a1.plot(flat_episodes,
            avg_returns,
            label="Task Interpolation",
            color='#d73236',
            linewidth=1,
            linestyle='-')
    x_episodes = [
        i + all_episodes[0][-1] - notrl_episodes[0][0]
        for i in notrl_episodes[0]
    ]
    a1.plot(x_episodes,
            notrl_avg_returns,
            label="Tabula Rasa",
            color='#80bbe5',
            linestyle='-',
            linewidth=1)
    plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
    plt.xlabel("Episodes")
    plt.ylabel("Accumulated Reward")
    plt.legend(loc="lower right")
    plt.axis([None, None, -20, 1])
    plt.title(plt_title)
    if world == 4:
        if canyon:
            epi_save = 'qlearn_plots/4by4can/' + 'win' + str(
                window) + '/episodes/4by4_canyon_episodes'
        else:
            epi_save = 'qlearn_plots/4by4nocan/' + 'win' + str(
                window) + '/episodes/4by4_nocanyon_episodes'
    elif world == 9:
        epi_save = 'qlearn_plots/9by9/' + 'win' + str(
            window) + '/episodes/9by9_episodes'
    plt.savefig(epi_save + iteration + '.eps', format='eps', dpi=1000)