Exemple #1
0
def prepare_experiment(env, args):
    # Manager to share PER between a learner and explorers
    SyncManager.register('PrioritizedReplayBuffer', PrioritizedReplayBuffer)
    manager = SyncManager()
    manager.start()

    kwargs = get_default_rb_dict(args.replay_buffer_size, env)
    kwargs["check_for_update"] = True
    global_rb = manager.PrioritizedReplayBuffer(**kwargs)

    # queues to share network parameters between a learner and explorers
    n_queue = 1 if args.n_env > 1 else args.n_explorer
    n_queue += 1  # for evaluation
    queues = [manager.Queue() for _ in range(n_queue)]

    # Event object to share training status. if event is set True, all exolorers stop sampling transitions
    is_training_done = Event()

    # Lock
    lock = manager.Lock()

    # Shared memory objects to count number of samples and applied gradients
    trained_steps = Value('i', 0)

    return global_rb, queues, is_training_done, lock, trained_steps
Exemple #2
0
def main(args_):
    if args_.n_explorer is None:
        n_explorer = multiprocessing.cpu_count() - 1
    else:
        n_explorer = args_.n_explorer
    assert n_explorer > 0, "[error] number of explorers must be positive integer"

    env = env_fn()

    # Manager to share PER between a learner and explorers
    SyncManager.register('PrioritizedReplayBuffer', PrioritizedReplayBuffer)
    manager = SyncManager()
    manager.start()
    global_rb = manager.PrioritizedReplayBuffer(
        obs_shape=env.observation_space.shape,
        act_dim=env.action_space.low.size,
        size=args_.replay_buffer_size)

    # queues to share network parameters between a learner and explorers
    queues = [Queue() for _ in range(n_explorer)]

    # Event object to share training status. if event is set True, all exolorers stop sampling transitions
    is_training_done = Event()

    # Lock
    lock = manager.Lock()

    # Shared memory objects to count number of samples and applied gradients
    trained_steps = Value('i', 0)
    n_transition = Value('i', 0)

    tasks = []
    # Add explorers
    for i in range(n_explorer):
        tasks.append(Process(
            target=explorer,
            args=[global_rb, queues[i], trained_steps, n_transition, is_training_done, lock, 
                  env_fn, policy_fn, args_.local_buffer_size]))

    # Add learner
    tasks.append(Process(
        target=learner,
        args=[global_rb, trained_steps, is_training_done, lock, env_fn, policy_fn,
              args_.max_batch, args_.param_update_freq, *queues]))

    for task in tasks:
        task.start()
    for task in tasks:
        task.join()
Exemple #3
0
    PER_a = 0.6  # P(i) = p(i) ** a / total_priority ** a

    env_dict = {
        "obs": {
            "shape": state_size
        },
        "act": {},
        "rew": {},
        "next_obs": {
            "shape": state_size
        },
        "done": {}
    }
    global_rb = manager.PrioritizedReplayBuffer(memory_size,
                                                env_dict=env_dict,
                                                alpha=PER_a,
                                                default_dtype=np.float16,
                                                check_for_update=True)

    n_explorer = multiprocessing.cpu_count() - 1
    epsilons = [
        pow(0.4, 1 + (i / (n_explorer - 1)) * 7) for i in range(n_explorer)
    ]  # apex paper

    n_queue = n_explorer
    n_queue += 1  # for evaluation
    # n_queue += 1  # for prefetch
    queues = [manager.Queue() for _ in range(n_queue)]

    # Event object to share training status. if event is set True, all exolorers stop sampling transitions
    is_training_done = Event()
Exemple #4
0
    PER_e = 0.01  # epsilon -> pi = |delta| + epsilon transitions which have zero error also have chance to be selected
    PER_a = 0.6  # P(i) = p(i) ** a / total_priority ** a

    env_dict = {
        "obs": {
            "shape": (state_size, )
        },
        "act": {},
        "rew": {},
        "next_obs": {
            "shape": (state_size, )
        },
        "done": {}
    }
    global_rb = manager.PrioritizedReplayBuffer(memory_size,
                                                env_dict=env_dict,
                                                alpha=PER_a,
                                                eps=PER_e)

    n_explorer = multiprocessing.cpu_count() - 1

    n_queue = n_explorer
    n_queue += 1  # for evaluation
    queues = [manager.Queue() for _ in range(n_queue)]

    # Event object to share training status. if event is set True, all exolorers stop sampling transitions
    is_training_done = Event()

    # Lock
    lock = manager.Lock()

    # Shared memory objects to count number of samples and applied gradients