def _setup(self, variant): set_seed(variant['run_params']['seed']) self._variant = variant self._session = tf.keras.backend.get_session() self.train_generator = None self._built = False
def _setup(self, variant): set_seed(variant['run_params']['seed']) self._variant = variant gpu_options = tf.GPUOptions(allow_growth=True) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) tf.keras.backend.set_session(session) self._session = tf.keras.backend.get_session() self.train_generator = None self._built = False
def _setup(self, variant): set_seed(variant['run_params']['seed']) self._variant = variant self.SAVE_PER_ALGO = { 'default': self.save_mbpo, 'MBPO': self.save_mbpo, 'CMBPO': self.save_cmbpo } self.RESTORE_PER_ALGO = { 'default': self.restore_mbpo, 'MBPO': self.restore_mbpo, 'CMBPO': self.restore_cmbpo, } gpu_options = tf.GPUOptions(allow_growth=True) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) tf.keras.backend.set_session(session) self._session = tf.keras.backend.get_session() self.train_generator = None self._built = False
def main(): import sys example_args = get_parser().parse_args(sys.argv[1:]) variant_spec = get_variant_spec(example_args) command_line_args = example_args print('vriant spec: {}'.format(variant_spec)) params = variant_spec.get('algorithm_params') local_dir = os.path.join(params.get('log_dir'), params.get('domain')) resources_per_trial = _normalize_trial_resources( command_line_args.resources_per_trial, command_line_args.trial_cpus, command_line_args.trial_gpus, command_line_args.trial_extra_cpus, command_line_args.trial_extra_gpus) experiment_id = params.get('exp_name') #### add pool_load_max_size to experiment_id if 'pool_load_max_size' in variant_spec['algorithm_params']['kwargs']: max_size = variant_spec['algorithm_params']['kwargs'][ 'pool_load_max_size'] experiment_id = '{}_{}e3'.format(experiment_id, int(max_size / 1000)) #### variant_spec = add_command_line_args_to_variant_spec( variant_spec, command_line_args) if command_line_args.video_save_frequency is not None: assert 'algorithm_params' in variant_spec variant_spec['algorithm_params']['kwargs']['video_save_frequency'] = ( command_line_args.video_save_frequency) variant = variant_spec # init set_seed(variant['run_params']['seed']) gpu_options = tf.GPUOptions(allow_growth=True) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) tf.keras.backend.set_session(session) # build variant = copy.deepcopy(variant) tester.set_hyper_param(**variant) tester.add_record_param(['run_params.seed', 'info']) tester.configure(task_name='policy_learn', private_config_path=os.path.join(get_package_path(), 'rla_config.yaml'), run_file='main.py', log_root=get_package_path()) tester.log_files_gen() tester.print_args() environment_params = variant['environment_params'] training_environment = (get_environment_from_params( environment_params['training'])) evaluation_environment = (get_environment_from_params( environment_params['evaluation'](variant)) if 'evaluation' in environment_params else training_environment) replay_pool = (get_replay_pool_from_variant(variant, training_environment)) sampler = get_sampler_from_variant(variant) Qs = get_Q_function_from_variant(variant, training_environment) policy = get_policy_from_variant(variant, training_environment, Qs) initial_exploration_policy = (get_policy('UniformPolicy', training_environment)) #### get termination function domain = environment_params['training']['domain'] static_fns = mopo.static[domain.lower()] #### print("[ DEBUG ] KWARGS: {}".format(variant['algorithm_params']['kwargs'])) algorithm = get_algorithm_from_variant( variant=variant, training_environment=training_environment, evaluation_environment=evaluation_environment, policy=policy, initial_exploration_policy=initial_exploration_policy, Qs=Qs, pool=replay_pool, static_fns=static_fns, sampler=sampler, session=session) print('[ DEBUG ] finish construct model, start training') # train list(algorithm.train())