def unity_run(default_args, share_args, options, max_step, max_episode, save_frequency, name): from mlagents.envs import UnityEnvironment from utils.sampler import create_sampler_manager try: tf_version, (model, policy_mode, _) = get_model_info(options['--algorithm']) algorithm_config = sth.load_config( f'./Algorithms/{tf_version}/config.yaml')[options['--algorithm']] ma = options['--algorithm'][:3] == 'ma_' except KeyError: raise NotImplementedError reset_config = default_args['reset_config'] if options['--unity']: env = UnityEnvironment() env_name = 'unity' else: file_name = default_args['exe_file'] if options[ '--env'] == 'None' else options['--env'] if os.path.exists(file_name): env = UnityEnvironment(file_name=file_name, base_port=int(options['--port']), no_graphics=False if options['--inference'] else not options['--graphic']) env_dir = os.path.split(file_name)[0] env_name = os.path.join(*env_dir.replace('\\', '/').replace( r'//', r'/').split('/')[-2:]) sys.path.append(env_dir) if os.path.exists(env_dir + '/env_config.py'): import env_config reset_config = env_config.reset_config max_step = env_config.max_step if os.path.exists(env_dir + '/env_loop.py'): from env_loop import Loop else: raise Exception('can not find this file.') sampler_manager, resampling_interval = create_sampler_manager( options['--sampler'], env.reset_parameters) if 'Loop' not in locals().keys(): if ma: from ma_loop import Loop else: from loop import Loop if options['--config-file'] != 'None': algorithm_config = update_config(algorithm_config, options['--config-file']) _base_dir = os.path.join(share_args['base_dir'], env_name, options['--algorithm']) base_dir = os.path.join(_base_dir, name) show_config(algorithm_config) brain_names = env.external_brain_names brains = env.brains brain_num = len(brain_names) visual_resolutions = {} for i in brain_names: if brains[i].number_visual_observations: visual_resolutions[f'{i}'] = [ brains[i].camera_resolutions[0]['height'], brains[i].camera_resolutions[0]['width'], 1 if brains[i].camera_resolutions[0]['blackAndWhite'] else 3 ] else: visual_resolutions[f'{i}'] = [] model_params = [{ 's_dim': brains[i].vector_observation_space_size * brains[i].num_stacked_vector_observations, 'a_dim_or_list': brains[i].vector_action_space_size, 'action_type': brains[i].vector_action_space_type, 'max_episode': max_episode, 'base_dir': os.path.join(base_dir, i), 'logger2file': share_args['logger2file'], 'out_graph': share_args['out_graph'], } for i in brain_names] if ma: assert brain_num > 1, 'if using ma* algorithms, number of brains must larger than 1' data = ExperienceReplay(share_args['ma']['batch_size'], share_args['ma']['capacity']) extra_params = {'data': data} models = [ model(n=brain_num, i=i, **model_params[i], **algorithm_config) for i in range(brain_num) ] else: extra_params = {} models = [ model(visual_sources=brains[i].number_visual_observations, visual_resolution=visual_resolutions[f'{i}'], **model_params[index], **algorithm_config) for index, i in enumerate(brain_names) ] [ models[index].init_or_restore( os.path.join( _base_dir, name if options['--load'] == 'None' else options['--load'], i)) for index, i in enumerate(brain_names) ] begin_episode = models[0].get_init_episode() params = { 'env': env, 'brain_names': brain_names, 'models': models, 'begin_episode': begin_episode, 'save_frequency': save_frequency, 'reset_config': reset_config, 'max_step': max_step, 'max_episode': max_episode, 'sampler_manager': sampler_manager, 'resampling_interval': resampling_interval, 'policy_mode': policy_mode } if 'batch_size' in algorithm_config.keys() and options['--fill-in']: steps = algorithm_config['batch_size'] else: steps = default_args['no_op_steps'] no_op_params = { 'env': env, 'brain_names': brain_names, 'models': models, 'brains': brains, 'steps': steps, 'choose': options['--noop-choose'] } params.update(extra_params) no_op_params.update(extra_params) if options['--inference']: Loop.inference(env, brain_names, models, reset_config=reset_config, sampler_manager=sampler_manager, resampling_interval=resampling_interval) else: try: [ sth.save_config(os.path.join(base_dir, i, 'config'), algorithm_config) for i in brain_names ] Loop.no_op(**no_op_params) Loop.train(**params) except Exception as e: print(e) finally: try: [models[i].close() for i in range(len(models))] except Exception as e: print(e) finally: env.close() sys.exit()
def gym_run(default_args, share_args, options, max_step, max_episode, save_frequency, name): from gym_loop import Loop from gym.spaces import Box, Discrete, Tuple from gym_wrapper import gym_envs try: tf_version, (model, policy_mode, _) = get_model_info(options['--algorithm']) algorithm_config = sth.load_config( f'./Algorithms/{tf_version}/config.yaml')[options['--algorithm']] except KeyError: raise NotImplementedError available_type = [Box, Discrete] render_episode = int(options['--render-episode']) if options[ '--render-episode'] != 'None' else default_args['render_episode'] try: env = gym_envs(options['--gym-env'], int(options['--gym-agents'])) assert type(env.observation_space) in available_type and type( env.action_space ) in available_type, 'action_space and observation_space must be one of available_type' except Exception as e: print(e) if options['--config-file'] != 'None': algorithm_config = update_config(algorithm_config, options['--config-file']) _base_dir = os.path.join(share_args['base_dir'], options['--gym-env'], options['--algorithm']) base_dir = os.path.join(_base_dir, name) show_config(algorithm_config) if type(env.observation_space) == Box: s_dim = env.observation_space.shape[0] if len( env.observation_space.shape) == 1 else 0 else: s_dim = int(env.observation_space.n) if len(env.observation_space.shape) == 3: visual_sources = 1 visual_resolution = list(env.observation_space.shape) else: visual_sources = 0 visual_resolution = [] if type(env.action_space) == Box: assert len( env.action_space.shape ) == 1, 'if action space is continuous, the shape length of action must equal to 1' a_dim_or_list = env.action_space.shape action_type = 'continuous' elif type(env.action_space) == Tuple: assert all( [type(i) == Discrete for i in env.action_space] ) == True, 'if action space is Tuple, each item in it must have type Discrete' a_dim_or_list = [i.n for i in env.action_space] action_type = 'discrete' else: a_dim_or_list = [env.action_space.n] action_type = 'discrete' gym_model = model(s_dim=s_dim, visual_sources=visual_sources, visual_resolution=visual_resolution, a_dim_or_list=a_dim_or_list, action_type=action_type, max_episode=max_episode, base_dir=base_dir, logger2file=share_args['logger2file'], out_graph=share_args['out_graph'], **algorithm_config) gym_model.init_or_restore( os.path.join( _base_dir, name if options['--load'] == 'None' else options['--load'])) begin_episode = gym_model.get_init_episode() params = { 'env': env, 'gym_model': gym_model, 'action_type': action_type, 'begin_episode': begin_episode, 'save_frequency': save_frequency, 'max_step': max_step, 'max_episode': max_episode, 'eval_while_train': default_args['eval_while_train'], # whether to eval while training. 'max_eval_episode': default_args['max_eval_episode'], 'render': default_args['render'], 'render_episode': render_episode, 'policy_mode': policy_mode } if 'batch_size' in algorithm_config.keys() and options['--fill-in']: steps = algorithm_config['batch_size'] else: steps = default_args['random_steps'] if options['--inference']: Loop.inference(env, gym_model, action_type) else: sth.save_config(os.path.join(base_dir, 'config'), algorithm_config) try: Loop.no_op(env, gym_model, action_type, steps, choose=options['--noop-choose']) Loop.train(**params) except Exception as e: print(e) finally: try: gym_model.close() except Exception as e: print(e) finally: env.close() sys.exit()
def gym_run(default_args, share_args, options, max_step, max_episode, save_frequency, name, seed): from gym_loop import Loop from gym_wrapper import gym_envs model, algorithm_config, policy_mode = get_model_info(options['--algorithm']) render_episode = int(options['--render-episode']) if options['--render-episode'] != 'None' else default_args['render_episode'] try: env = gym_envs(gym_env_name=options['--gym-env'], n=int(options['--gym-agents']), seed=int(options['--gym-env-seed']), render_mode=default_args['render_mode']) except Exception as e: print(e) if options['--config-file'] != 'None': algorithm_config = update_config(algorithm_config, options['--config-file']) _base_dir = os.path.join(share_args['base_dir'], options['--gym-env'], options['--algorithm']) base_dir = os.path.join(_base_dir, name) show_config(algorithm_config) model_params = { 's_dim': env.s_dim, 'visual_sources': env.visual_sources, 'visual_resolution': env.visual_resolution, 'a_dim_or_list': env.a_dim_or_list, 'is_continuous': env.is_continuous, 'max_episode': max_episode, 'base_dir': base_dir, 'logger2file': share_args['logger2file'], 'seed': seed, } gym_model = model( **model_params, **algorithm_config ) gym_model.init_or_restore(os.path.join(_base_dir, name if options['--load'] == 'None' else options['--load'])) begin_episode = gym_model.get_init_episode() params = { 'env': env, 'gym_model': gym_model, 'begin_episode': begin_episode, 'save_frequency': save_frequency, 'max_step': max_step, 'max_episode': max_episode, 'eval_while_train': default_args['eval_while_train'], # whether to eval while training. 'max_eval_episode': default_args['max_eval_episode'], 'render': default_args['render'], 'render_episode': render_episode, 'policy_mode': policy_mode } if 'batch_size' in algorithm_config.keys() and options['--fill-in']: steps = algorithm_config['batch_size'] else: steps = default_args['random_steps'] if options['--inference']: Loop.inference(env, gym_model) else: sth.save_config(os.path.join(base_dir, 'config'), algorithm_config) try: Loop.no_op(env, gym_model, steps, choose=options['--noop-choose']) Loop.train(**params) except Exception as e: print(e) finally: gym_model.close() env.close() sys.exit()
def main(): if sys.platform.startswith('win'): # Add the _win_handler function to the windows console's handler function list win32api.SetConsoleCtrlHandler(_win_handler, True) if os.path.exists( os.path.join(config_file.config['config_file'], 'config.yaml')): config = sth.load_config(config_file.config['config_file']) else: config = config_file.config print(f'load config from config.') hyper_config = config['hyper parameters'] train_config = config['train config'] record_config = config['record config'] basic_dir = record_config['basic_dir'] last_name = record_config['project_name'] + '/' \ + record_config['remark'] \ + record_config['run_id'] cp_dir = record_config['checkpoint_basic_dir'] + last_name cp_file = cp_dir + '/rb' log_dir = record_config['log_basic_dir'] + last_name excel_dir = record_config['excel_basic_dir'] + last_name config_dir = record_config['config_basic_dir'] + last_name sth.check_or_create(basic_dir, 'basic') sth.check_or_create(cp_dir, 'checkpoints') sth.check_or_create(log_dir, 'logs(summaries)') sth.check_or_create(excel_dir, 'excel') sth.check_or_create(config_dir, 'config') logger = create_logger( name='logger', console_level=logging.INFO, console_format='%(levelname)s : %(message)s', logger2file=record_config['logger2file'], file_name=log_dir + '\log.txt', file_level=logging.WARNING, file_format= '%(lineno)d - %(asctime)s - %(module)s - %(funcName)s - %(levelname)s - %(message)s' ) if train_config['train']: sth.save_config(config_dir, config) if train_config['unity_mode']: env = UnityEnvironment() else: env = UnityEnvironment( file_name=train_config['unity_file'], no_graphics=True if train_config['train'] else False, base_port=train_config['port']) brain_name = env.external_brain_names[0] brain = env.brains[brain_name] # set the memory use proportion of GPU tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True # tf_config.gpu_options.per_process_gpu_memory_fraction = 0.5 tf.reset_default_graph() graph = tf.Graph() with graph.as_default() as g: with tf.Session(graph=g, config=tf_config) as sess: logger.info('Algorithm: {0}'.format( train_config['algorithm'].name)) if train_config['algorithm'] == config_file.algorithms.ppo_sep_ac: from ppo.ppo_base import PPO_SEP model = PPO_SEP(sess=sess, s_dim=brain.vector_observation_space_size, a_counts=brain.vector_action_space_size[0], hyper_config=hyper_config) logger.info('PPO_SEP initialize success.') elif train_config['algorithm'] == config_file.algorithms.ppo_com: from ppo.ppo_base import PPO_COM model = PPO_COM(sess=sess, s_dim=brain.vector_observation_space_size, a_counts=brain.vector_action_space_size[0], hyper_config=hyper_config) logger.info('PPO_COM initialize success.') elif train_config['algorithm'] == config_file.algorithms.sac: from sac.sac import SAC model = SAC(sess=sess, s_dim=brain.vector_observation_space_size, a_counts=brain.vector_action_space_size[0], hyper_config=hyper_config) logger.info('SAC initialize success.') elif train_config['algorithm'] == config_file.algorithms.sac_no_v: from sac.sac_no_v import SAC_NO_V model = SAC_NO_V(sess=sess, s_dim=brain.vector_observation_space_size, a_counts=brain.vector_action_space_size[0], hyper_config=hyper_config) logger.info('SAC_NO_V initialize success.') elif train_config['algorithm'] == config_file.algorithms.ddpg: from ddpg.ddpg import DDPG model = DDPG(sess=sess, s_dim=brain.vector_observation_space_size, a_counts=brain.vector_action_space_size[0], hyper_config=hyper_config) logger.info('DDPG initialize success.') elif train_config['algorithm'] == config_file.algorithms.td3: from td3.td3 import TD3 model = TD3(sess=sess, s_dim=brain.vector_observation_space_size, a_counts=brain.vector_action_space_size[0], hyper_config=hyper_config) logger.info('TD3 initialize success.') recorder = Recorder(log_dir, excel_dir, record_config, logger, max_to_keep=5, pad_step_number=True, graph=g) episode = init_or_restore(cp_dir, sess, recorder, cp_file) try: if train_config['train']: train_OnPolicy( sess=sess, env=env, brain_name=brain_name, begin_episode=episode, model=model, recorder=recorder, cp_file=cp_file, hyper_config=hyper_config, train_config=train_config) if not train_config[ 'use_replay_buffer'] else train_OffPolicy( sess=sess, env=env, brain_name=brain_name, begin_episode=episode, model=model, recorder=recorder, cp_file=cp_file, hyper_config=hyper_config, train_config=train_config) tf.train.write_graph(g, cp_dir, 'raw_graph_def.pb', as_text=False) export_model(cp_dir, g) else: inference(env, brain_name, model, train_config) except Exception as e: logger.error(e) finally: env.close() recorder.close() sys.exit()
def gym_run(options, max_step, save_frequency, name): from gym_loop import Loop from gym.spaces import Box, Discrete, Tuple from gym_wrapper import gym_envs available_type = [Box, Discrete] render = train_config['gym_render'] render_episode = int(options['--render-episode']) if options['--render-episode'] != 'None' else train_config['gym_render_episode'] try: env = gym_envs(options['--gym-env'], int(options['--gym-agents'])) print('obs: ', env.observation_space) print('a: ', env.action_space) assert env.observation_space in available_type and env.action_space in available_type, 'action_space and observation_space must be one of available_type' except Exception as e: print(e) try: algorithm_config, model, policy_mode, train_mode = algos[options['--algorithm']] except KeyError: raise Exception("Don't have this algorithm.") if options['--config-file'] != 'None': algorithm_config = update_config(algorithm_config, options['--config-file']) _base_dir = os.path.join(train_config['base_dir'], options['--gym-env'], options['--algorithm']) base_dir = os.path.join(_base_dir, name) show_config(algorithm_config) if type(env.observation_space) == Box: if len(env.observation_space.shape) == 1: s_dim = env.observation_space.shape[0] else: s_dim = 0 else: s_dim = env.observation_space.n if len(env.observation_space.shape) == 3: visual_sources = 1 visual_resolution = list(env.observation_space.shape) else: visual_sources = 0 visual_resolution = [] if type(env.action_space) == Box: assert len(env.action_space.shape) == 1, 'if action space is continuous, the shape length of action must equal to 1' a_dim_or_list = env.action_space.shape action_type = 'continuous' elif type(env.action_space) == Tuple: assert all([type(i) == Discrete for i in env.action_space]) == True, 'if action space is Tuple, each item in it must have type Discrete' a_dim_or_list = [i.n for i in env.action_space] action_type = 'discrete' else: a_dim_or_list = [env.action_space.n] action_type = 'discrete' gym_model = model( s_dim=s_dim, visual_sources=visual_sources, visual_resolution=visual_resolution, a_dim_or_list=a_dim_or_list, action_type=action_type, base_dir=base_dir, logger2file=train_config['logger2file'], out_graph=train_config['out_graph'], **algorithm_config ) gym_model.init_or_restore(os.path.join(_base_dir, name if options['--load'] == 'None' else options['--load'])) begin_episode = gym_model.get_init_episode() max_episode = gym_model.get_max_episode() params = { 'env': env, 'gym_model': gym_model, 'action_type': action_type, 'begin_episode': begin_episode, 'save_frequency': save_frequency, 'max_step': max_step, 'max_episode': max_episode, 'render': render, 'render_episode': render_episode, 'train_mode': train_mode } if options['--inference']: Loop.inference(env, gym_model, action_type) else: sth.save_config(os.path.join(base_dir, 'config'), algorithm_config) try: Loop.no_op(env, gym_model, action_type, 30) Loop.train(**params) except Exception as e: print(e) finally: try: gym_model.close() except Exception as e: print(e) finally: env.close() sys.exit()
def initialize_env_model(filepath, algo, name, port): env = UnityEnvironment(file_name=filepath, base_port=port, no_graphics=True) if algo == 'pg': algorithm_config = Algorithms.pg_config model = Algorithms.PG policy_mode = 'ON' elif algo == 'ppo': algorithm_config = Algorithms.ppo_config model = Algorithms.PPO policy_mode = 'ON' elif algo == 'ddpg': algorithm_config = Algorithms.ddpg_config model = Algorithms.DDPG policy_mode = 'OFF' elif algo == 'td3': algorithm_config = Algorithms.td3_config model = Algorithms.TD3 policy_mode = 'OFF' elif algo == 'sac': algorithm_config = Algorithms.sac_config model = Algorithms.SAC policy_mode = 'OFF' elif algo == 'sac_no_v': algorithm_config = Algorithms.sac_no_v_config model = Algorithms.SAC_NO_V policy_mode = 'OFF' else: raise Exception("Don't have this algorithm.") env_dir = os.path.split(filepath)[0] sys.path.append(env_dir) import env_config reset_config = env_config.reset_config max_step = env_config.max_step env_name = os.path.join(*fix_path(env_dir).split('/')[-2:]) base_dir = os.path.join( r'C:/RLData' if platform.system() == "Windows" else r'/RLData', env_name, algo, name) brain_names = env.external_brain_names brains = env.brains models = [ model(s_dim=brains[i].vector_observation_space_size * brains[i].num_stacked_vector_observations, a_counts=brains[i].vector_action_space_size[0], action_type=brains[i].vector_action_space_type, cp_dir=os.path.join(base_dir, i, 'model'), log_dir=os.path.join(base_dir, i, 'log'), excel_dir=os.path.join(base_dir, i, 'excel'), logger2file=False, out_graph=False, **algorithm_config) for i in brain_names ] [ sth.save_config(os.path.join(base_dir, i, 'config'), algorithm_config) for i in brain_names ] begin_episode = models[0].get_init_step() max_episode = models[0].get_max_episode() return env, brain_names, models, policy_mode, reset_config, max_step