def test_no_crash_cartpole(self): config = self._define_config() with config.unlocked: config.env = 'CartPole-v1' config.max_length = 200 config.steps = 500 config.normalize_ranges = False # The env reports wrong ranges. config.network = networks.feed_forward_categorical for score in train.train(config, env_processes=True): float(score)
def test_pendulum_no_crash(self): nets = networks.feed_forward_gaussian, networks.recurrent_gaussian for network in nets: config = self._define_config() with config.unlocked: config.env = 'Pendulum-v0' config.max_length = 200 config.steps = 500 config.network = network for score in train.train(config, env_processes=True): float(score)
def test_no_crash_variable_duration(self): config = self._define_config() with config.unlocked: config.env = functools.partial( tools.MockEnvironment, observ_shape=(2, 3), action_shape=(3,), min_duration=5, max_duration=25) config.max_length = 25 config.steps = 100 config.network = networks.recurrent_gaussian for score in train.train(config, env_processes=False): float(score)
def test_no_crash_variable_duration(self): config = self._define_config() with config.unlocked: config.env = functools.partial(tools.MockEnvironment, observ_shape=(2, 3), action_shape=(3, ), min_duration=5, max_duration=25) config.max_length = 25 config.steps = 100 config.network = networks.recurrent_gaussian for score in train.train(config, env_processes=False): float(score)
def test_no_crash_observation_shape(self): nets = networks.feed_forward_gaussian, networks.recurrent_gaussian observ_shapes = (1,), (2, 3), (2, 3, 4) for network, observ_shape in itertools.product(nets, observ_shapes): config = self._define_config() with config.unlocked: config.env = functools.partial( tools.MockEnvironment, observ_shape, action_shape=(3,), min_duration=15, max_duration=15) config.max_length = 20 config.steps = 50 config.network = network for score in train.train(config, env_processes=False): float(score)
def test_no_crash_observation_shape(self): nets = networks.feed_forward_gaussian, networks.recurrent_gaussian observ_shapes = (1, ), (2, 3), (2, 3, 4) for network, observ_shape in itertools.product(nets, observ_shapes): config = self._define_config() with config.unlocked: config.env = functools.partial(tools.MockEnvironment, observ_shape, action_shape=(3, ), min_duration=15, max_duration=15) config.max_length = 20 config.steps = 50 config.network = network for score in train.train(config, env_processes=False): float(score)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'exp_path', type=str, nargs='*', help='Full experiment path (to the dir where the config is stored)') parser.add_argument( '-nep', '--no_env_process', default=False, action='store_true', help='Step environments in separate processes to circumvent the GIL') parser.add_argument('-r', '--render', default=False, action='store_true', help='Whether to render the run') parser.add_argument('-c', '--cpu', default=False, action='store_true', help='Whether to run the training on access1-cp') parser.add_argument('-e', '--edgar', default=False, action='store_true', help='Whether to run the training on edgar') parser.add_argument('-b', '--besteffort', default=False, action='store_true', help='Whether to run in besteffort mode') parser.add_argument('-nc', '--nb_cores', type=int, default=8, help='Number of cores to be used on the cluster') parser.add_argument('-w', '--wallclock', type=int, default=72, help='Job wall clock time to be set on the cluster') parser.add_argument( '-s', '--steps', type=int, default=None, help= 'Number of steps of the experiment (if not None, change the config)') args = parser.parse_args() sys_path_clean = utils.get_sys_path_clean() seed_path, timestamp_dir = os.path.split(os.path.normpath( args.exp_path[0])) exp_path, _ = os.path.split(os.path.normpath(seed_path)) exp_name = os.path.basename(exp_path) rendered_envs_path = '/home/thoth/apashevi/scratch_remote/Cache/Code/{}/rlgrasp/rendered_envs.py'.format( exp_name) if not args.cpu and not args.edgar: # run the job locally utils.change_sys_path(sys_path_clean, exp_path) import agents.scripts.train as trainer from agents.scripts import utility assert len(args.exp_path) == 1 config = utility.load_config(args.exp_path[0]) with config.unlocked: config.num_agents = 4 if args.steps is not None: config.steps = args.steps utils.rewrite_rendered_envs_file(args.render, rendered_envs_path) for score in trainer.train(config, not args.no_env_process): print('Score {}'.format(score)) if args.render: utils.rewrite_rendered_envs_file(False, rendered_envs_path) else: if args.edgar: cluster = 'edgar' else: cluster = 'access1-cp' utils.rewrite_rendered_envs_file(False, rendered_envs_path) job_cluster = utils.get_job(cluster, args.besteffort, args.nb_cores, args.wallclock) timestamp = timestamp_dir.split('-')[0] config = timestamp_dir.split('-')[1] if len(args.exp_path) == 1: send_job(job_cluster, seed_path, timestamp, config, args_steps) else: for exp_path_complete in args.exp_path: seed_path, _ = os.path.split( os.path.normpath(exp_path_complete)) send_job(job_cluster, seed_path, timestamp, config, args.steps)