Ejemplo n.º 1
0
from gw_collect import Gridworld
import pygame as pg
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F

from copy import deepcopy
from vicero.algorithms.qlearning import Qlearning

#env = Gridworld(width=6, height=6, cell_size=32, agent_pos=(0, 3), food_pos=[(0, 0), (3, 3), (4, 5), (2, 0)])
env_a = Gridworld(width=4,
                  height=4,
                  cell_size=32,
                  agent_pos=(0, 0),
                  food_pos=[(0, 3), (3, 3)])
env_b = Gridworld(width=4,
                  height=4,
                  cell_size=32,
                  agent_pos=(0, 0),
                  food_pos=[(0, 3), (3, 3)])

pg.init()
screen = pg.display.set_mode(
    (env_a.cell_size * env_a.width, env_a.cell_size * env_a.height))
env_a.screen = screen
clock = pg.time.Clock()

Ejemplo n.º 2
0
    if len(durations_t) >= 100:
        means = durations_t.unfold(0, 100, 1).mean(1).view(-1)
        means = torch.cat((torch.zeros(99), means))
        plt.plot(means.numpy(), c='green')

    plt.pause(0.001)


print('Set A')
histories_a = []

env_list = []

for i in range(10):
    env_list.append(Gridworld(width=4, height=4, cell_size=32, seed=(20 + i)))
#for i in [8, 0, 2, 4, 7]:
#    env_list.append(Gridworld(width=4, height=4, cell_size=32, seed=i))

env = MultitaskEnvironment(env_list)

for _ in range(repetitions):
    print('x')
    dqn = DQN(env,
              qnet=NeuralNet(64, 4).double(),
              plotter=plot,
              render=False,
              memory_length=2000,
              gamma=.99,
              alpha=.001,
              epsilon_start=0.1,
Ejemplo n.º 3
0
    if len(durations_t) >= 100:
        means = durations_t.unfold(0, 100, 1).mean(1).view(-1)
        means = torch.cat((torch.zeros(99), means))
        plt.plot(means.numpy(), c='green')

    plt.pause(0.001)


histories_all = []

env_list = []

for i in [8, 0, 2, 4, 7, 22, 23, 24, 25, 51]:
    print(i)
    env = Gridworld(width=4, height=4, cell_size=32, seed=i)

    history = []
    for _ in range(repetitions):
        dqn = DQN(env,
                  qnet=LinRegNet(64, 4).double(),
                  plotter=None,
                  render=False,
                  memory_length=2000,
                  gamma=.99,
                  alpha=.001,
                  epsilon_start=0.1,
                  plot_durations=True)
        dqn.train(training_iterations, 4, plot=False)
        history.append(dqn.history)
Ejemplo n.º 4
0
            state = torch.from_numpy(state)
            env_diffs.append(
                torch.sum((rf_a.policy_net(state) -
                           rf_b.policy_net(state))**2).item())

        print('{}/{} mean difference: {:.4f}'.format(ne + 1, iterations,
                                                     np.mean(env_diffs)))
        all_mean_diffs.append(np.mean(env_diffs))

    absolutely_all_diffs.append(all_mean_diffs)
    return all_mean_diffs[-1]


envs = [(Gridworld(width=4,
                   height=4,
                   cell_size=32,
                   agent_pos=(2, 0),
                   food_pos=[(1, 3), (3, 3)]),
         Gridworld(width=4,
                   height=4,
                   cell_size=32,
                   agent_pos=(0, 0),
                   food_pos=[(1, 3), (3, 3)]))]

for env_pair in envs:
    print(env_diff(env_pair[0], env_pair[1], 50, 100))

for diff in absolutely_all_diffs:
    plt.plot(diff)

#plt.savefig('test.png')
Ejemplo n.º 5
0
from gw_collect import Gridworld
import pygame as pg

for i in range(10):
    env = Gridworld(width=4, height=4, cell_size=32, seed=i)
    env.reset()

    pg.init()
    screen = pg.display.set_mode(
        (env.cell_size * env.width, env.cell_size * env.height))
    env.screen = screen
    env.draw(screen)
Ejemplo n.º 6
0

def average_environment_duration_dqn(env_set, iterations_per_env):
    all_durations = []
    for env in env_set:
        all_durations.append('env={},mean_duration={}'.format(
            env, np.mean(dqn_benchmark(env, iterations_per_env))))
        print(all_durations[-1])
    return all_durations


def average_environment_duration_rein(env_set, iterations_per_env):
    all_durations = []
    for env in env_set:
        all_durations.append('env={},mean_duration={}'.format(
            env, np.mean(rein_benchmark(env, iterations_per_env))))
        print(all_durations[-1])
    return all_durations


#env = Gridworld(width=4, height=4, cell_size=32, seed=33)
env_set = []
for i in range(10):
    env_set.append(Gridworld(width=4, height=4, cell_size=32, seed=i))

exp.run(average_environment_duration_rein,
        params={
            'env_set': env_set,
            'iterations_per_env': 10
        },
        k=1)