Exemple #1
0
 def __init__(self, env, env_args):
     super().__init__(env)
     self.reset_config = env_args['reset_config']
     self.train_mode = env_args['train_mode']
     self.sampler_manager, self.resample_interval = create_sampler_manager(
         env_args['sampler_path'], self.env.reset_parameters)
     self.episode = 0
Exemple #2
0
    def __init__(self, env, env_args):
        self.env = env
        self.reset_config = env_args['reset_config']
        self.train_mode = env_args['train_mode']
        self.sampler_manager, self.resample_interval = create_sampler_manager(
            env_args['sampler_path'], env.reset_parameters)
        self.episode = 0

        self.brains = env.brains
        self.brain_names = env.external_brain_names
        self.brain_num = len(self.brain_names)
        self.visual_resolutions = []
        for b in self.brain_names:
            if self.brains[b].number_visual_observations:
                self.visual_resolutions.append([
                    self.brains[b].camera_resolutions[0]['height'],
                    self.brains[b].camera_resolutions[0]['width'],
                    1 if self.brains[b].camera_resolutions[0]['blackAndWhite']
                    else 3
                ])
            else:
                self.visual_resolutions.append([])
        self.visual_sources = [
            self.brains[b].number_visual_observations for b in self.brain_names
        ]
        self.s_dim = [
            self.brains[b].vector_observation_space_size *
            self.brains[b].num_stacked_vector_observations
            for b in self.brain_names
        ]
        self.a_dim_or_list = [
            self.brains[b].vector_action_space_size for b in self.brain_names
        ]
        self.is_continuous = [
            True if self.brains[b].vector_action_space_type == 'continuous'
            else False for b in self.brain_names
        ]

        obs = self.env.reset()
        self.brain_agents = [
            len(obs[brain_name].agents) for brain_name in self.brain_names
        ]
Exemple #3
0
 def __init__(self, env, env_args):
     super().__init__(env)
     self.reset_config = env_args['reset_config']
     self.sampler_manager, self.resample_interval = create_sampler_manager(
         env_args['sampler_path'], 0)
     self.episode = 0
Exemple #4
0
def unity_run(default_args, share_args, options, max_step, max_episode,
              save_frequency, name):
    from mlagents.envs import UnityEnvironment
    from utils.sampler import create_sampler_manager

    try:
        tf_version, (model, policy_mode,
                     _) = get_model_info(options['--algorithm'])
        algorithm_config = sth.load_config(
            f'./Algorithms/{tf_version}/config.yaml')[options['--algorithm']]
        ma = options['--algorithm'][:3] == 'ma_'
    except KeyError:
        raise NotImplementedError

    reset_config = default_args['reset_config']
    if options['--unity']:
        env = UnityEnvironment()
        env_name = 'unity'
    else:
        file_name = default_args['exe_file'] if options[
            '--env'] == 'None' else options['--env']
        if os.path.exists(file_name):
            env = UnityEnvironment(file_name=file_name,
                                   base_port=int(options['--port']),
                                   no_graphics=False if options['--inference']
                                   else not options['--graphic'])
            env_dir = os.path.split(file_name)[0]
            env_name = os.path.join(*env_dir.replace('\\', '/').replace(
                r'//', r'/').split('/')[-2:])
            sys.path.append(env_dir)
            if os.path.exists(env_dir + '/env_config.py'):
                import env_config
                reset_config = env_config.reset_config
                max_step = env_config.max_step
            if os.path.exists(env_dir + '/env_loop.py'):
                from env_loop import Loop
        else:
            raise Exception('can not find this file.')
    sampler_manager, resampling_interval = create_sampler_manager(
        options['--sampler'], env.reset_parameters)

    if 'Loop' not in locals().keys():
        if ma:
            from ma_loop import Loop
        else:
            from loop import Loop

    if options['--config-file'] != 'None':
        algorithm_config = update_config(algorithm_config,
                                         options['--config-file'])
    _base_dir = os.path.join(share_args['base_dir'], env_name,
                             options['--algorithm'])
    base_dir = os.path.join(_base_dir, name)
    show_config(algorithm_config)

    brain_names = env.external_brain_names
    brains = env.brains
    brain_num = len(brain_names)

    visual_resolutions = {}
    for i in brain_names:
        if brains[i].number_visual_observations:
            visual_resolutions[f'{i}'] = [
                brains[i].camera_resolutions[0]['height'],
                brains[i].camera_resolutions[0]['width'],
                1 if brains[i].camera_resolutions[0]['blackAndWhite'] else 3
            ]
        else:
            visual_resolutions[f'{i}'] = []

    model_params = [{
        's_dim':
        brains[i].vector_observation_space_size *
        brains[i].num_stacked_vector_observations,
        'a_dim_or_list':
        brains[i].vector_action_space_size,
        'action_type':
        brains[i].vector_action_space_type,
        'max_episode':
        max_episode,
        'base_dir':
        os.path.join(base_dir, i),
        'logger2file':
        share_args['logger2file'],
        'out_graph':
        share_args['out_graph'],
    } for i in brain_names]

    if ma:
        assert brain_num > 1, 'if using ma* algorithms, number of brains must larger than 1'
        data = ExperienceReplay(share_args['ma']['batch_size'],
                                share_args['ma']['capacity'])
        extra_params = {'data': data}
        models = [
            model(n=brain_num, i=i, **model_params[i], **algorithm_config)
            for i in range(brain_num)
        ]
    else:
        extra_params = {}
        models = [
            model(visual_sources=brains[i].number_visual_observations,
                  visual_resolution=visual_resolutions[f'{i}'],
                  **model_params[index],
                  **algorithm_config) for index, i in enumerate(brain_names)
        ]

    [
        models[index].init_or_restore(
            os.path.join(
                _base_dir,
                name if options['--load'] == 'None' else options['--load'], i))
        for index, i in enumerate(brain_names)
    ]
    begin_episode = models[0].get_init_episode()

    params = {
        'env': env,
        'brain_names': brain_names,
        'models': models,
        'begin_episode': begin_episode,
        'save_frequency': save_frequency,
        'reset_config': reset_config,
        'max_step': max_step,
        'max_episode': max_episode,
        'sampler_manager': sampler_manager,
        'resampling_interval': resampling_interval,
        'policy_mode': policy_mode
    }
    if 'batch_size' in algorithm_config.keys() and options['--fill-in']:
        steps = algorithm_config['batch_size']
    else:
        steps = default_args['no_op_steps']
    no_op_params = {
        'env': env,
        'brain_names': brain_names,
        'models': models,
        'brains': brains,
        'steps': steps,
        'choose': options['--noop-choose']
    }
    params.update(extra_params)
    no_op_params.update(extra_params)

    if options['--inference']:
        Loop.inference(env,
                       brain_names,
                       models,
                       reset_config=reset_config,
                       sampler_manager=sampler_manager,
                       resampling_interval=resampling_interval)
    else:
        try:
            [
                sth.save_config(os.path.join(base_dir, i, 'config'),
                                algorithm_config) for i in brain_names
            ]
            Loop.no_op(**no_op_params)
            Loop.train(**params)
        except Exception as e:
            print(e)
        finally:
            try:
                [models[i].close() for i in range(len(models))]
            except Exception as e:
                print(e)
            finally:
                env.close()
                sys.exit()