def main(arg):
    """ Starts different tests
    Args:
        param1(args): args
    """
    path = arg.locexp
    # experiment_name = args.experiment_name
    res_path = os.path.join(path, "results")
    if not os.path.exists(res_path):
        os.makedirs(res_path)
    dir_model = os.path.join(path, "pytorch_models")
    if arg.save_model and not os.path.exists(dir_model):
        os.makedirs(dir_model)
    train_agent(arg, arg.seed)
Beispiel #2
0
def main(args):
    """ """
    with open (args.param, "r") as f:
        param = json.load(f)

    print("use the env {} ".format(param["env_name"]))
    print(param)
    print("Start Programm in {}  mode".format(args.mode))
    env = gym.make(param["env_name"])
    if args.mode == "args": 
        param["lr"] = args.lr
        param["fc1_units"] = args.fc1_units
        param["fc2_units"] = args.fc2_units
    env  = FrameStack(env, param)
    train_agent(env, param)
Beispiel #3
0
cos_ann = True
ann_cyc = 5

schedule = Schedule(t0, t1, e0, e1, decay_fun, cosine_annealing=cos_ann, annealing_cycles=ann_cyc)

# Policy
policy = EpsilonGreedyPolicy(schedule=schedule, value_function=Q)

# Reward Function
reward_fun = rf_info2d_pos

# Action Pre/Post-Processing Action
act_fun = act_disc2cont

# Agent
lr = 1e-4
gamma = 0.99
doubleQ = True # Run doubleQ-DQN sampling from Q_target and bootstraping from Q
rb = False
rb_max_size = 1e6
rb_batch_size = 64
tau = 0.1

agent = DQN(policy, act_fun, Q, Q_target, state_dim, action_dim, gamma, doubleQ, reward_fun=reward_fun,
			replay_buffer=rb, max_buffer_size=rb_max_size, batch_size=rb_batch_size, tau=tau, lr=lr)

# Training
show = False

train_agent(agent, desc, file_name, runs, episodes, time_steps, test_episodes, init_state, init_noise, show=show)
def train_multiple_agents(model_dir,
                          local_pop_dir,
                          game_path,
                          base_port,
                          num_envs,
                          num_steps,
                          worker_idx,
                          total_workers,
                          reuse_ports=True,
                          level_path=None,
                          time_reward=0.):
    org_stdout = sys.stdout
    org_stderr = sys.stderr
    my_pop = subset_pop(train.load_pop(model_dir), worker_idx, total_workers)
    for i, p in enumerate(my_pop):
        print("Worker",
              worker_idx,
              "is starting training of",
              p,
              "for",
              num_steps,
              "steps",
              flush=True)
        sys.stdout = open(model_dir + p + "/train_log.txt", 'a')
        sys.stderr = sys.stdout
        p_base_port = base_port if reuse_ports else base_port + (num_envs * i *
                                                                 2)
        j = 0
        last_error = None
        while p_base_port + (j * num_envs * 2) < 60000:
            try:
                train.train_agent(model_dir,
                                  local_pop_dir,
                                  p,
                                  game_path,
                                  p_base_port + (j * num_envs * 2),
                                  num_envs,
                                  num_steps,
                                  level_path=level_path,
                                  time_reward=time_reward)
                break
            except ConnectionError as e:
                print(
                    "ConnectionError detected during training, trying a higher port range"
                )
                j += 1
                last_error = e
            except ConnectionResetError as e2:
                print(
                    "ConnectionResetError detected during training, trying a higher port range"
                )
                j += 1
                last_error = e2
            except EOFError as e3:
                print(
                    "EOFError detected during training, trying higher port range"
                )
                j += 1
                last_error = e3
            except json.decoder.JSONDecodeError as e4:
                print(
                    "JSONDecodeError detected during training, trying higher port range"
                )
                j += 1
                last_error = e4
        sys.stdout.close()
        sys.stderr.close()
        sys.stdout = org_stdout
        sys.stderr = org_stderr
        if p_base_port + (j * num_envs * 2) >= 60000:
            if last_error:
                raise last_error
            else:
                raise ValueError(
                    "So there's no last_error, but we got here...?")
        print("Worker",
              worker_idx,
              "has completed training of",
              p,
              "for",
              num_steps,
              "steps",
              flush=True)
Beispiel #5
0
import torch
import argparse
from train import train_agent

parser = argparse.ArgumentParser()
parser.add_argument("--env_id", type=str, default="PongNoFrameskip-v4")
parser.add_argument("--frame_stack", type=int, default=4)
parser.add_argument("--capacity", type=int, default=100000)
parser.add_argument("--batch_size", type=int, default=64)
parser.add_argument("--lr", type=float, default=0.00001)
parser.add_argument("--num_frames_to_train", type=int, default=1500000)
parser.add_argument("--warm_up", type=int, default=10000)
parser.add_argument("--gamma", type=float, default=0.99)
parser.add_argument("--update_target", type=int, default=1000)

args = parser.parse_args()

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    train_agent(args, device)
def example_2():
    """ Runs empowerment maximising agent running in a chosen grid world """
    np.random.seed(1)
    # maze
    n_step = 3
    f = WorldFactory()
    w = f.klyubin_world()#, tunnel_world()
    B = w.compute_transition()
    strategy = VisitCountFast()
    E = strategy.compute(world=w, T=B, n_step=n_step).reshape(-1)

    initpos = [1,3] # np.random.randint(w.dims[0], size=2)
    s = w._cell_to_index(initpos)

    # for reference
    emptymaze = MazeWorld(w.height, w.width)
    T = emptymaze.compute_transition()
    n_s, n_a, _ = T.shape

    # agent
    agent = EmpMaxAgent(alpha=0.1, gamma=0.9, T=T, n_step=n_step, n_samples=1000, det=1.)
    agent.s = s

    # training loop
    start = time.time()
    D_emp, D_mod, steps, tau, visited = train_agent(B, E, agent, w, n_s, n_a)
    print("elapsed seconds: %0.3f" % (time.time() - start))

    # some plotting
    fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(9, 6))
    #Amap = np.array([list(w.actions.values())[i] for i in agent.action_map])
    #ax[0, 0].quiver(np.arange(w.width) + .5, np.arange(w.height) + .5, Amap[:, 1].reshape(w.height, w.width), Amap[:, 0].reshape(w.height, w.width))

    w.plot(fig, ax[0, 0], colorMap= agent.E.reshape(*w.dims))
    ax[0, 0].set_title('subjective empowerment')
    print(f'min = {np.min(agent.E):.2f}, max = {np.max(agent.E):.2f}')

    w.plot(fig, ax[0,1], colorMap=visited.reshape(*w.dims))
    ax[0, 1].set_title('visited')

    Vmap = agent.value_map.reshape(*w.dims)
    w.plot(fig, ax[0, 2], colorMap= Vmap)
    ax[0, 2].set_title('value map')
    print(f'min = {np.min(Vmap):.2f}, max = {np.max(Vmap):.2f}')

    ax[1, 1].set_title("tau")
    ax[1, 1].plot(tau)

    ax[1, 0].scatter(agent.E, visited.reshape(n_s))
    ax[1, 0].set_xlabel('true empowerment')
    ax[1, 0].set_ylabel('visit frequency')

    red = 'tab:red'
    ax[1, 2].plot(D_emp, color=red)
    ax[1, 2].set_xlabel('time')
    ax[1, 2].set_ylabel('MSE of empowerment map', color=red)
    ax[1, 2].tick_params(axis='y', labelcolor=red)

    ax[1, 2] = ax[1, 2].twinx()
    ax[1, 2].set_ylabel('Model disagreement', color='tab:blue')
    ax[1, 2].plot(D_mod, color='tab:blue')
    ax[1, 2].tick_params(axis='y', labelcolor='tab:blue')

    w.plot(fig, ax[2, 0], colorMap= E.reshape(*w.dims))
    ax[2, 0].set_title('true empowerment')

    plt.show()