Ejemplo n.º 1
0
def unity_run(default_args, share_args, options, max_step, max_episode,
              save_frequency, name):
    from mlagents.envs import UnityEnvironment
    from utils.sampler import create_sampler_manager

    try:
        tf_version, (model, policy_mode,
                     _) = get_model_info(options['--algorithm'])
        algorithm_config = sth.load_config(
            f'./Algorithms/{tf_version}/config.yaml')[options['--algorithm']]
        ma = options['--algorithm'][:3] == 'ma_'
    except KeyError:
        raise NotImplementedError

    reset_config = default_args['reset_config']
    if options['--unity']:
        env = UnityEnvironment()
        env_name = 'unity'
    else:
        file_name = default_args['exe_file'] if options[
            '--env'] == 'None' else options['--env']
        if os.path.exists(file_name):
            env = UnityEnvironment(file_name=file_name,
                                   base_port=int(options['--port']),
                                   no_graphics=False if options['--inference']
                                   else not options['--graphic'])
            env_dir = os.path.split(file_name)[0]
            env_name = os.path.join(*env_dir.replace('\\', '/').replace(
                r'//', r'/').split('/')[-2:])
            sys.path.append(env_dir)
            if os.path.exists(env_dir + '/env_config.py'):
                import env_config
                reset_config = env_config.reset_config
                max_step = env_config.max_step
            if os.path.exists(env_dir + '/env_loop.py'):
                from env_loop import Loop
        else:
            raise Exception('can not find this file.')
    sampler_manager, resampling_interval = create_sampler_manager(
        options['--sampler'], env.reset_parameters)

    if 'Loop' not in locals().keys():
        if ma:
            from ma_loop import Loop
        else:
            from loop import Loop

    if options['--config-file'] != 'None':
        algorithm_config = update_config(algorithm_config,
                                         options['--config-file'])
    _base_dir = os.path.join(share_args['base_dir'], env_name,
                             options['--algorithm'])
    base_dir = os.path.join(_base_dir, name)
    show_config(algorithm_config)

    brain_names = env.external_brain_names
    brains = env.brains
    brain_num = len(brain_names)

    visual_resolutions = {}
    for i in brain_names:
        if brains[i].number_visual_observations:
            visual_resolutions[f'{i}'] = [
                brains[i].camera_resolutions[0]['height'],
                brains[i].camera_resolutions[0]['width'],
                1 if brains[i].camera_resolutions[0]['blackAndWhite'] else 3
            ]
        else:
            visual_resolutions[f'{i}'] = []

    model_params = [{
        's_dim':
        brains[i].vector_observation_space_size *
        brains[i].num_stacked_vector_observations,
        'a_dim_or_list':
        brains[i].vector_action_space_size,
        'action_type':
        brains[i].vector_action_space_type,
        'max_episode':
        max_episode,
        'base_dir':
        os.path.join(base_dir, i),
        'logger2file':
        share_args['logger2file'],
        'out_graph':
        share_args['out_graph'],
    } for i in brain_names]

    if ma:
        assert brain_num > 1, 'if using ma* algorithms, number of brains must larger than 1'
        data = ExperienceReplay(share_args['ma']['batch_size'],
                                share_args['ma']['capacity'])
        extra_params = {'data': data}
        models = [
            model(n=brain_num, i=i, **model_params[i], **algorithm_config)
            for i in range(brain_num)
        ]
    else:
        extra_params = {}
        models = [
            model(visual_sources=brains[i].number_visual_observations,
                  visual_resolution=visual_resolutions[f'{i}'],
                  **model_params[index],
                  **algorithm_config) for index, i in enumerate(brain_names)
        ]

    [
        models[index].init_or_restore(
            os.path.join(
                _base_dir,
                name if options['--load'] == 'None' else options['--load'], i))
        for index, i in enumerate(brain_names)
    ]
    begin_episode = models[0].get_init_episode()

    params = {
        'env': env,
        'brain_names': brain_names,
        'models': models,
        'begin_episode': begin_episode,
        'save_frequency': save_frequency,
        'reset_config': reset_config,
        'max_step': max_step,
        'max_episode': max_episode,
        'sampler_manager': sampler_manager,
        'resampling_interval': resampling_interval,
        'policy_mode': policy_mode
    }
    if 'batch_size' in algorithm_config.keys() and options['--fill-in']:
        steps = algorithm_config['batch_size']
    else:
        steps = default_args['no_op_steps']
    no_op_params = {
        'env': env,
        'brain_names': brain_names,
        'models': models,
        'brains': brains,
        'steps': steps,
        'choose': options['--noop-choose']
    }
    params.update(extra_params)
    no_op_params.update(extra_params)

    if options['--inference']:
        Loop.inference(env,
                       brain_names,
                       models,
                       reset_config=reset_config,
                       sampler_manager=sampler_manager,
                       resampling_interval=resampling_interval)
    else:
        try:
            [
                sth.save_config(os.path.join(base_dir, i, 'config'),
                                algorithm_config) for i in brain_names
            ]
            Loop.no_op(**no_op_params)
            Loop.train(**params)
        except Exception as e:
            print(e)
        finally:
            try:
                [models[i].close() for i in range(len(models))]
            except Exception as e:
                print(e)
            finally:
                env.close()
                sys.exit()
Ejemplo n.º 2
0
def gym_run(default_args, share_args, options, max_step, max_episode,
            save_frequency, name):
    from gym_loop import Loop
    from gym.spaces import Box, Discrete, Tuple
    from gym_wrapper import gym_envs

    try:
        tf_version, (model, policy_mode,
                     _) = get_model_info(options['--algorithm'])
        algorithm_config = sth.load_config(
            f'./Algorithms/{tf_version}/config.yaml')[options['--algorithm']]
    except KeyError:
        raise NotImplementedError

    available_type = [Box, Discrete]
    render_episode = int(options['--render-episode']) if options[
        '--render-episode'] != 'None' else default_args['render_episode']

    try:
        env = gym_envs(options['--gym-env'], int(options['--gym-agents']))
        assert type(env.observation_space) in available_type and type(
            env.action_space
        ) in available_type, 'action_space and observation_space must be one of available_type'
    except Exception as e:
        print(e)

    if options['--config-file'] != 'None':
        algorithm_config = update_config(algorithm_config,
                                         options['--config-file'])
    _base_dir = os.path.join(share_args['base_dir'], options['--gym-env'],
                             options['--algorithm'])
    base_dir = os.path.join(_base_dir, name)
    show_config(algorithm_config)

    if type(env.observation_space) == Box:
        s_dim = env.observation_space.shape[0] if len(
            env.observation_space.shape) == 1 else 0
    else:
        s_dim = int(env.observation_space.n)

    if len(env.observation_space.shape) == 3:
        visual_sources = 1
        visual_resolution = list(env.observation_space.shape)
    else:
        visual_sources = 0
        visual_resolution = []

    if type(env.action_space) == Box:
        assert len(
            env.action_space.shape
        ) == 1, 'if action space is continuous, the shape length of action must equal to 1'
        a_dim_or_list = env.action_space.shape
        action_type = 'continuous'
    elif type(env.action_space) == Tuple:
        assert all(
            [type(i) == Discrete for i in env.action_space]
        ) == True, 'if action space is Tuple, each item in it must have type Discrete'
        a_dim_or_list = [i.n for i in env.action_space]
        action_type = 'discrete'
    else:
        a_dim_or_list = [env.action_space.n]
        action_type = 'discrete'

    gym_model = model(s_dim=s_dim,
                      visual_sources=visual_sources,
                      visual_resolution=visual_resolution,
                      a_dim_or_list=a_dim_or_list,
                      action_type=action_type,
                      max_episode=max_episode,
                      base_dir=base_dir,
                      logger2file=share_args['logger2file'],
                      out_graph=share_args['out_graph'],
                      **algorithm_config)
    gym_model.init_or_restore(
        os.path.join(
            _base_dir,
            name if options['--load'] == 'None' else options['--load']))
    begin_episode = gym_model.get_init_episode()
    params = {
        'env': env,
        'gym_model': gym_model,
        'action_type': action_type,
        'begin_episode': begin_episode,
        'save_frequency': save_frequency,
        'max_step': max_step,
        'max_episode': max_episode,
        'eval_while_train':
        default_args['eval_while_train'],  # whether to eval while training.
        'max_eval_episode': default_args['max_eval_episode'],
        'render': default_args['render'],
        'render_episode': render_episode,
        'policy_mode': policy_mode
    }
    if 'batch_size' in algorithm_config.keys() and options['--fill-in']:
        steps = algorithm_config['batch_size']
    else:
        steps = default_args['random_steps']
    if options['--inference']:
        Loop.inference(env, gym_model, action_type)
    else:
        sth.save_config(os.path.join(base_dir, 'config'), algorithm_config)
        try:
            Loop.no_op(env,
                       gym_model,
                       action_type,
                       steps,
                       choose=options['--noop-choose'])
            Loop.train(**params)
        except Exception as e:
            print(e)
        finally:
            try:
                gym_model.close()
            except Exception as e:
                print(e)
            finally:
                env.close()
                sys.exit()
Ejemplo n.º 3
0
def gym_run(default_args, share_args, options, max_step, max_episode, save_frequency, name, seed):
    from gym_loop import Loop
    from gym_wrapper import gym_envs

    model, algorithm_config, policy_mode = get_model_info(options['--algorithm'])
    render_episode = int(options['--render-episode']) if options['--render-episode'] != 'None' else default_args['render_episode']

    try:
        env = gym_envs(gym_env_name=options['--gym-env'],
                       n=int(options['--gym-agents']),
                       seed=int(options['--gym-env-seed']),
                       render_mode=default_args['render_mode'])
    except Exception as e:
        print(e)

    if options['--config-file'] != 'None':
        algorithm_config = update_config(algorithm_config, options['--config-file'])
    _base_dir = os.path.join(share_args['base_dir'], options['--gym-env'], options['--algorithm'])
    base_dir = os.path.join(_base_dir, name)
    show_config(algorithm_config)

    model_params = {
        's_dim': env.s_dim,
        'visual_sources': env.visual_sources,
        'visual_resolution': env.visual_resolution,
        'a_dim_or_list': env.a_dim_or_list,
        'is_continuous': env.is_continuous,
        'max_episode': max_episode,
        'base_dir': base_dir,
        'logger2file': share_args['logger2file'],
        'seed': seed,
    }
    gym_model = model(
        **model_params,
        **algorithm_config
    )
    gym_model.init_or_restore(os.path.join(_base_dir, name if options['--load'] == 'None' else options['--load']))
    begin_episode = gym_model.get_init_episode()
    params = {
        'env': env,
        'gym_model': gym_model,
        'begin_episode': begin_episode,
        'save_frequency': save_frequency,
        'max_step': max_step,
        'max_episode': max_episode,
        'eval_while_train': default_args['eval_while_train'],  # whether to eval while training.
        'max_eval_episode': default_args['max_eval_episode'],
        'render': default_args['render'],
        'render_episode': render_episode,
        'policy_mode': policy_mode
    }
    if 'batch_size' in algorithm_config.keys() and options['--fill-in']:
        steps = algorithm_config['batch_size']
    else:
        steps = default_args['random_steps']
    if options['--inference']:
        Loop.inference(env, gym_model)
    else:
        sth.save_config(os.path.join(base_dir, 'config'), algorithm_config)
        try:
            Loop.no_op(env, gym_model, steps, choose=options['--noop-choose'])
            Loop.train(**params)
        except Exception as e:
            print(e)
        finally:
            gym_model.close()
            env.close()
            sys.exit()
Ejemplo n.º 4
0
def main():
    if sys.platform.startswith('win'):
        # Add the _win_handler function to the windows console's handler function list
        win32api.SetConsoleCtrlHandler(_win_handler, True)
    if os.path.exists(
            os.path.join(config_file.config['config_file'], 'config.yaml')):
        config = sth.load_config(config_file.config['config_file'])
    else:
        config = config_file.config
        print(f'load config from config.')

    hyper_config = config['hyper parameters']
    train_config = config['train config']
    record_config = config['record config']

    basic_dir = record_config['basic_dir']
    last_name = record_config['project_name'] + '/' \
        + record_config['remark'] \
        + record_config['run_id']
    cp_dir = record_config['checkpoint_basic_dir'] + last_name
    cp_file = cp_dir + '/rb'
    log_dir = record_config['log_basic_dir'] + last_name
    excel_dir = record_config['excel_basic_dir'] + last_name
    config_dir = record_config['config_basic_dir'] + last_name
    sth.check_or_create(basic_dir, 'basic')
    sth.check_or_create(cp_dir, 'checkpoints')
    sth.check_or_create(log_dir, 'logs(summaries)')
    sth.check_or_create(excel_dir, 'excel')
    sth.check_or_create(config_dir, 'config')

    logger = create_logger(
        name='logger',
        console_level=logging.INFO,
        console_format='%(levelname)s : %(message)s',
        logger2file=record_config['logger2file'],
        file_name=log_dir + '\log.txt',
        file_level=logging.WARNING,
        file_format=
        '%(lineno)d - %(asctime)s - %(module)s - %(funcName)s - %(levelname)s - %(message)s'
    )
    if train_config['train']:
        sth.save_config(config_dir, config)

    if train_config['unity_mode']:
        env = UnityEnvironment()
    else:
        env = UnityEnvironment(
            file_name=train_config['unity_file'],
            no_graphics=True if train_config['train'] else False,
            base_port=train_config['port'])
    brain_name = env.external_brain_names[0]
    brain = env.brains[brain_name]
    # set the memory use proportion of GPU
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    # tf_config.gpu_options.per_process_gpu_memory_fraction = 0.5
    tf.reset_default_graph()
    graph = tf.Graph()
    with graph.as_default() as g:
        with tf.Session(graph=g, config=tf_config) as sess:
            logger.info('Algorithm: {0}'.format(
                train_config['algorithm'].name))
            if train_config['algorithm'] == config_file.algorithms.ppo_sep_ac:
                from ppo.ppo_base import PPO_SEP
                model = PPO_SEP(sess=sess,
                                s_dim=brain.vector_observation_space_size,
                                a_counts=brain.vector_action_space_size[0],
                                hyper_config=hyper_config)
                logger.info('PPO_SEP initialize success.')
            elif train_config['algorithm'] == config_file.algorithms.ppo_com:
                from ppo.ppo_base import PPO_COM
                model = PPO_COM(sess=sess,
                                s_dim=brain.vector_observation_space_size,
                                a_counts=brain.vector_action_space_size[0],
                                hyper_config=hyper_config)
                logger.info('PPO_COM initialize success.')
            elif train_config['algorithm'] == config_file.algorithms.sac:
                from sac.sac import SAC
                model = SAC(sess=sess,
                            s_dim=brain.vector_observation_space_size,
                            a_counts=brain.vector_action_space_size[0],
                            hyper_config=hyper_config)
                logger.info('SAC initialize success.')
            elif train_config['algorithm'] == config_file.algorithms.sac_no_v:
                from sac.sac_no_v import SAC_NO_V
                model = SAC_NO_V(sess=sess,
                                 s_dim=brain.vector_observation_space_size,
                                 a_counts=brain.vector_action_space_size[0],
                                 hyper_config=hyper_config)
                logger.info('SAC_NO_V initialize success.')
            elif train_config['algorithm'] == config_file.algorithms.ddpg:
                from ddpg.ddpg import DDPG
                model = DDPG(sess=sess,
                             s_dim=brain.vector_observation_space_size,
                             a_counts=brain.vector_action_space_size[0],
                             hyper_config=hyper_config)
                logger.info('DDPG initialize success.')
            elif train_config['algorithm'] == config_file.algorithms.td3:
                from td3.td3 import TD3
                model = TD3(sess=sess,
                            s_dim=brain.vector_observation_space_size,
                            a_counts=brain.vector_action_space_size[0],
                            hyper_config=hyper_config)
                logger.info('TD3 initialize success.')
            recorder = Recorder(log_dir,
                                excel_dir,
                                record_config,
                                logger,
                                max_to_keep=5,
                                pad_step_number=True,
                                graph=g)
            episode = init_or_restore(cp_dir, sess, recorder, cp_file)
            try:
                if train_config['train']:
                    train_OnPolicy(
                        sess=sess,
                        env=env,
                        brain_name=brain_name,
                        begin_episode=episode,
                        model=model,
                        recorder=recorder,
                        cp_file=cp_file,
                        hyper_config=hyper_config,
                        train_config=train_config) if not train_config[
                            'use_replay_buffer'] else train_OffPolicy(
                                sess=sess,
                                env=env,
                                brain_name=brain_name,
                                begin_episode=episode,
                                model=model,
                                recorder=recorder,
                                cp_file=cp_file,
                                hyper_config=hyper_config,
                                train_config=train_config)
                    tf.train.write_graph(g,
                                         cp_dir,
                                         'raw_graph_def.pb',
                                         as_text=False)
                    export_model(cp_dir, g)
                else:
                    inference(env, brain_name, model, train_config)
            except Exception as e:
                logger.error(e)
            finally:
                env.close()
    recorder.close()
    sys.exit()
Ejemplo n.º 5
0
Archivo: run.py Proyecto: familywei/RLs
def gym_run(options, max_step, save_frequency, name):
    from gym_loop import Loop
    from gym.spaces import Box, Discrete, Tuple
    from gym_wrapper import gym_envs

    available_type = [Box, Discrete]

    render = train_config['gym_render']
    render_episode = int(options['--render-episode']) if options['--render-episode'] != 'None' else train_config['gym_render_episode']

    try:
        env = gym_envs(options['--gym-env'], int(options['--gym-agents']))
        print('obs: ', env.observation_space)
        print('a: ', env.action_space)
        assert env.observation_space in available_type and env.action_space in available_type, 'action_space and observation_space must be one of available_type'
    except Exception as e:
        print(e)

    try:
        algorithm_config, model, policy_mode, train_mode = algos[options['--algorithm']]
    except KeyError:
        raise Exception("Don't have this algorithm.")

    if options['--config-file'] != 'None':
        algorithm_config = update_config(algorithm_config, options['--config-file'])
    _base_dir = os.path.join(train_config['base_dir'], options['--gym-env'], options['--algorithm'])
    base_dir = os.path.join(_base_dir, name)
    show_config(algorithm_config)

    if type(env.observation_space) == Box:
        if len(env.observation_space.shape) == 1:
            s_dim = env.observation_space.shape[0]
        else:
            s_dim = 0
    else:
        s_dim = env.observation_space.n
    if len(env.observation_space.shape) == 3:
        visual_sources = 1
        visual_resolution = list(env.observation_space.shape)
    else:
        visual_sources = 0
        visual_resolution = []

    if type(env.action_space) == Box:
        assert len(env.action_space.shape) == 1, 'if action space is continuous, the shape length of action must equal to 1'
        a_dim_or_list = env.action_space.shape
        action_type = 'continuous'
    elif type(env.action_space) == Tuple:
        assert all([type(i) == Discrete for i in env.action_space]) == True, 'if action space is Tuple, each item in it must have type Discrete'
        a_dim_or_list = [i.n for i in env.action_space]
        action_type = 'discrete'
    else:
        a_dim_or_list = [env.action_space.n]
        action_type = 'discrete'

    gym_model = model(
        s_dim=s_dim,
        visual_sources=visual_sources,
        visual_resolution=visual_resolution,
        a_dim_or_list=a_dim_or_list,
        action_type=action_type,
        base_dir=base_dir,
        logger2file=train_config['logger2file'],
        out_graph=train_config['out_graph'],
        **algorithm_config
    )
    gym_model.init_or_restore(os.path.join(_base_dir, name if options['--load'] == 'None' else options['--load']))
    begin_episode = gym_model.get_init_episode()
    max_episode = gym_model.get_max_episode()
    params = {
        'env': env,
        'gym_model': gym_model,
        'action_type': action_type,
        'begin_episode': begin_episode,
        'save_frequency': save_frequency,
        'max_step': max_step,
        'max_episode': max_episode,
        'render': render,
        'render_episode': render_episode,
        'train_mode': train_mode
    }
    if options['--inference']:
        Loop.inference(env, gym_model, action_type)
    else:
        sth.save_config(os.path.join(base_dir, 'config'), algorithm_config)
        try:
            Loop.no_op(env, gym_model, action_type, 30)
            Loop.train(**params)
        except Exception as e:
            print(e)
        finally:
            try:
                gym_model.close()
            except Exception as e:
                print(e)
            finally:
                env.close()
                sys.exit()
Ejemplo n.º 6
0
def initialize_env_model(filepath, algo, name, port):
    env = UnityEnvironment(file_name=filepath,
                           base_port=port,
                           no_graphics=True)
    if algo == 'pg':
        algorithm_config = Algorithms.pg_config
        model = Algorithms.PG
        policy_mode = 'ON'
    elif algo == 'ppo':
        algorithm_config = Algorithms.ppo_config
        model = Algorithms.PPO
        policy_mode = 'ON'
    elif algo == 'ddpg':
        algorithm_config = Algorithms.ddpg_config
        model = Algorithms.DDPG
        policy_mode = 'OFF'
    elif algo == 'td3':
        algorithm_config = Algorithms.td3_config
        model = Algorithms.TD3
        policy_mode = 'OFF'
    elif algo == 'sac':
        algorithm_config = Algorithms.sac_config
        model = Algorithms.SAC
        policy_mode = 'OFF'
    elif algo == 'sac_no_v':
        algorithm_config = Algorithms.sac_no_v_config
        model = Algorithms.SAC_NO_V
        policy_mode = 'OFF'
    else:
        raise Exception("Don't have this algorithm.")
    env_dir = os.path.split(filepath)[0]
    sys.path.append(env_dir)
    import env_config
    reset_config = env_config.reset_config
    max_step = env_config.max_step
    env_name = os.path.join(*fix_path(env_dir).split('/')[-2:])
    base_dir = os.path.join(
        r'C:/RLData' if platform.system() == "Windows" else r'/RLData',
        env_name, algo, name)
    brain_names = env.external_brain_names
    brains = env.brains
    models = [
        model(s_dim=brains[i].vector_observation_space_size *
              brains[i].num_stacked_vector_observations,
              a_counts=brains[i].vector_action_space_size[0],
              action_type=brains[i].vector_action_space_type,
              cp_dir=os.path.join(base_dir, i, 'model'),
              log_dir=os.path.join(base_dir, i, 'log'),
              excel_dir=os.path.join(base_dir, i, 'excel'),
              logger2file=False,
              out_graph=False,
              **algorithm_config) for i in brain_names
    ]
    [
        sth.save_config(os.path.join(base_dir, i, 'config'), algorithm_config)
        for i in brain_names
    ]

    begin_episode = models[0].get_init_step()
    max_episode = models[0].get_max_episode()
    return env, brain_names, models, policy_mode, reset_config, max_step