def init_args_and_threads(cpu_count=4, monitor_csv_policy='all', rand_seed=None): """ Perform one-time global init for the CoinRun library. This must be called before creating an instance of CoinRunVecEnv. You should not call this multiple times from the same process. """ os.environ['COINRUN_RESOURCES_PATH'] = os.path.join(SCRIPT_DIR, 'assets') is_high_difficulty = Config.HIGH_DIFFICULTY if rand_seed is None: rand_seed = random.SystemRandom().randint(0, 1000000000) # ensure different MPI processes get different seeds (just in case SystemRandom implementation is poor) mpi_rank, mpi_size = mpi_util.get_local_rank_size(MPI.COMM_WORLD) rand_seed = rand_seed - rand_seed % mpi_size + mpi_rank int_args = np.array([int(is_high_difficulty), Config.NUM_LEVELS, int(Config.PAINT_VEL_INFO), Config.USE_DATA_AUGMENTATION, game_versions[Config.GAME_TYPE], Config.SET_SEED, rand_seed, Config.TRAIN_FLAG]).astype(np.int32) lib.initialize_args(int_args) lib.initialize_set_monitor_dir(logger.get_dir().encode('utf-8'), {'off': 0, 'first_env': 1, 'all': 2}[monitor_csv_policy]) global already_inited if already_inited: return lib.init(cpu_count) already_inited = True
def build(): lrank, _lsize = mpi_util.get_local_rank_size(MPI.COMM_WORLD) if lrank == 0: dirname = os.path.dirname(__file__) if len(dirname): make_cmd = "QT_SELECT=5 make -C %s" % dirname else: make_cmd = "QT_SELECT=5 make" r = os.system(make_cmd) if r != 0: logger.error('coinrun: make failed') sys.exit(1) MPI.COMM_WORLD.barrier()
def main(mainkwargs): predict_only = is_predict_only(**mainkwargs) mega_batch_itr = 1 sys_args = sys.argv policy = None play_env = None max_iteration = 1 config = { "LOAD_MODEL_FOLDER": "20200111-051102", "RESTORE_COND": "RESTORE", "MODEL": { # "use_lstm": True, "fcnet_hiddens": [256, 128, 128], # "fcnet_activation": "relu", }, #"num_workers": 12, } if not predict_only: if RUN_WITH_RAY: import urban_env from urban_env.envs.two_way_env import TwoWayEnv from urban_env.envs.abstract import AbstractEnv register_env('multilane-v0', lambda config: urban_env.envs.MultilaneEnv(config)) register_env('merge-v0', lambda config: urban_env.envs.MergeEnv(config)) register_env('roundabout-v0', lambda config: urban_env.envs.RoundaboutEnv(config)) register_env('two-way-v0', lambda config: urban_env.envs.TwoWayEnv(config)) register_env('parking-v0', lambda config: urban_env.envs.ParkingEnv(config)) register_env( 'parking_2outs-v0', lambda config: urban_env.envs.ParkingEnv_2outs(config)) register_env('LG-SIM-ENV-v0', lambda config: urban_env.envs.LG_Sim_Env(config)) register_env('multitask-v0', lambda config: MultiTaskEnv(config)) from raylibs import ray_train, ray_init from ray_rollout import ray_retrieve_agent from settings import update_policy available_cluster_cpus, available_cluster_gpus = ray_init( **mainkwargs) #play_env = gym.make(play_env_id) '''retrieved_agent = ray_retrieve_agent(play_env_id) retrieved_agent_policy = retrieved_agent.get_policy() update_policy(retrieved_agent_policy)''' save_in_sub_folder = pathname + "/" + ray_folder + "/" + InceptcurrentDT print("save_in_sub_folder is ", save_in_sub_folder) mainkwargs = { **mainkwargs, **{ "save_in_sub_folder": save_in_sub_folder, "available_cluster_cpus": available_cluster_cpus, "available_cluster_gpus": available_cluster_gpus, } } ray_train(config=config, **mainkwargs) else: while mega_batch_itr <= max_iteration: from baselines.common import tf_util, mpi_util from baselines.common.vec_env import VecEnv sess = tf_util.get_session() sess.close() tf.reset_default_graph() print(" Batch iteration ", mega_batch_itr) print("(rank , size) = ", mpi_util.get_local_rank_size(MPI.COMM_WORLD)) # import baselines.run as run print("(rank , size) = ", mpi_util.get_local_rank_size(MPI.COMM_WORLD)) if len(sys_args) <= 1: save_in_sub_folder = None if max_iteration > 1: save_in_sub_folder = InceptcurrentDT args, args_dict = default_args( save_in_sub_folder=save_in_sub_folder) policy = run.main(args) MPI.COMM_WORLD.barrier() # print("policy training args ", args,"\n\n") mega_batch_itr += 1 play_env = gym.make(play_env_id) # from tensorboard import main as tb # tb.main() print(color.BOLD + 'Successfully ended Training!' + color.END) else: if RUN_WITH_RAY: from raylibs import ray_init ray_init(**mainkwargs) from ray_rollout import ray_retrieve_agent from settings import update_policy from raylibs import ray_play import urban_env from urban_env.envs.two_way_env import TwoWayEnv from urban_env.envs.abstract import AbstractEnv register_env( 'parking_2outs-v0', lambda config: urban_env.envs.ParkingEnv_2outs(config)) #play_env = gym.make(play_env_id) #config=play_env.config retrieved_agent = ray_retrieve_agent(env_id=play_env_id, config=config) retrieved_agent_policy = retrieved_agent.get_policy() update_policy(retrieved_agent_policy) print("entering ray play") ray_play(env_id=play_env_id, config=config, agent=retrieved_agent) else: from baselines.common import tf_util, mpi_util from baselines.common.vec_env import VecEnv # import baselines.run as run from baselinelibs import baselines_play play_env = gym.make(play_env_id) DFLT_ARGS, _ = default_args() loaded_file_correctly = ('load_path' in stringarg for stringarg in DFLT_ARGS) policy = run.main(DFLT_ARGS) # Just try to Play while loaded_file_correctly: baselines_play(play_env, policy)