def make_rollout_agent(ob_size, action_size, starting_weights, args): rollout_agent = rollout_master_agent.parallel_rollout_master_agent( args, ob_size, action_size ) rollout_agent.set_policy_weights(starting_weights) return rollout_agent
optimizer_tasks = multiprocessing.JoinableQueue() optimizer_results = multiprocessing.Queue() optimizer_agent = optimization_agent.optimization_agent( args, learner_env.observation_space.shape[0], learner_env.action_space.shape[0], optimizer_tasks, optimizer_results ) optimizer_agent.start() # the rollouts agents rollout_agent = rollout_master_agent.parallel_rollout_master_agent( args, learner_env.observation_space.shape[0], learner_env.action_space.shape[0] ) # start the training and rollouting process optimizer_tasks.put(parallel_util.START_SIGNAL) optimizer_tasks.join() starting_weights = optimizer_results.get() # Note that it is super hacky, if the number of threads is too big, # when the weights are being assigned, some rollout_agents might # not even be created. time.sleep(5) rollout_agent.set_policy_weights(starting_weights) else: # the case where invoking dm_control suite pass