예제 #1
0
def make_rollout_agent(ob_size, action_size, starting_weights, args):
    rollout_agent = rollout_master_agent.parallel_rollout_master_agent(
        args, ob_size, action_size
    )
    rollout_agent.set_policy_weights(starting_weights)
    return rollout_agent
예제 #2
0
        optimizer_tasks = multiprocessing.JoinableQueue()
        optimizer_results = multiprocessing.Queue()
        optimizer_agent = optimization_agent.optimization_agent(
            args,
            learner_env.observation_space.shape[0],
            learner_env.action_space.shape[0],
            optimizer_tasks,
            optimizer_results
        )
        optimizer_agent.start()

        # the rollouts agents
        rollout_agent = rollout_master_agent.parallel_rollout_master_agent(
            args,
            learner_env.observation_space.shape[0],
            learner_env.action_space.shape[0]
        )

        # start the training and rollouting process
        optimizer_tasks.put(parallel_util.START_SIGNAL)
        optimizer_tasks.join()
        starting_weights = optimizer_results.get()
        # Note that it is super hacky, if the number of threads is too big,
        # when the weights are being assigned, some rollout_agents might
        # not even be created.
        time.sleep(5)
        rollout_agent.set_policy_weights(starting_weights)
    else:
        # the case where invoking dm_control suite
        pass