def main(args, unknown_args): args, config = parse_args_uargs(args, unknown_args, dump_config=True) pprint(args) pprint(config) set_global_seeds(args.seed) assert args.baselogdir is not None or args.logdir is not None if args.logdir is None: modules_ = prepare_modules(model_dir=args.model_dir) logdir = modules_["model"].prepare_logdir(config=config) args.logdir = str(pathlib2.Path(args.baselogdir).joinpath(logdir)) create_if_need(args.logdir) save_config(config=config, logdir=args.logdir) modules = prepare_modules(model_dir=args.model_dir, dump_dir=args.logdir) datasource = modules["data"].DataSource() model = modules["model"].prepare_model(config) runner = modules["model"].ModelRunner(model=model) runner.train(datasource=datasource, args=args, stages_config=config["stages"], verbose=args.verbose)
def main(args, unknown_args): args, config = parse_args_uargs(args, unknown_args, dump_config=True) set_global_seeds(config.get("seed", 42)) Experiment, Runner = import_experiment_and_runner(Path(args.expdir)) experiment = Experiment(config) runner = Runner() dump_code(args.expdir, experiment.logdir) runner.run_experiment(experiment, check=args.check)
def main(args, unknown_args): args, config = parse_args_uargs(args, unknown_args) pprint(args) pprint(config) set_global_seeds(args.seed) modules = prepare_modules(model_dir=args.model_dir) datasource = modules["data"].DataSource() loaders = datasource.prepare_loaders(args, **config["data_params"]) model = modules["model"].prepare_model(config) runner = modules["model"].ModelRunner(model=model) callbacks = runner.prepare_callbacks( callbacks_params=config["callbacks_params"], args=args, mode="infer") runner.infer(loaders=loaders, callbacks=callbacks, verbose=args.verbose)
def reset(self): self.time_step = 0 if self.episodes % self.ep2reload == 0: self.env = ProstheticsEnv(visualize=self.visualize, integrator_accuracy=1e-3) seed = random.randrange(SEED_RANGE) set_global_seeds(seed) self.env.change_model(model=self.model, prosthetic=True, difficulty=1, seed=seed) state_desc = self.env.reset(project=False) if self.randomized_start: state = get_simbody_state(state_desc) amplitude = random.gauss(0.8, 0.05) direction = random.choice([-1., 1]) amplitude_knee = random.gauss(-1.2, 0.05) state[4] = 0.8 state[6] = amplitude * direction # right leg state[9] = amplitude * direction * (-1.) # left leg state[13] = amplitude_knee if direction == 1. else 0 # right knee state[14] = amplitude_knee if direction == -1. else 0 # left knee # noise = np.random.normal(scale=0.1, size=72) # noise[3:6] = 0 # noise[6] = np.random.uniform(-1., 1., size=1) # noise[9] = np.random.uniform(-1., 1., size=1) # noise[13] = -np.random.uniform(0., 1., size=1) # knee_r # noise[14] = -np.random.uniform(0., 1., size=1) # knee_l # state = (np.array(state) + noise).tolist() simbody_state = self.env.osim_model.get_state() obj = simbody_state.getY() for i in range(72): obj[i] = state[i] self.env.osim_model.set_state(simbody_state) observation = preprocess_obs_round2(state_desc) if self.observe_time: observation.append(-1.0) return observation
def main(args, unknown_args): args, config = parse_args_uargs(args, unknown_args) set_global_seeds(args.seed) modules = prepare_modules(expdir=args.expdir) model = Registry.get_model(**config["model_params"]) datasource = modules["data"].DataSource() data_params = config.get("data_params", {}) or {} loaders = datasource.prepare_loaders(mode="infer", n_workers=args.workers, batch_size=args.batch_size, **data_params) runner = modules["model"].ModelRunner(model=model) callbacks_params = config.get("callbacks_params", {}) or {} callbacks = runner.prepare_callbacks(mode="infer", resume=args.resume, out_prefix=args.out_prefix, **callbacks_params) runner.infer(loaders=loaders, callbacks=callbacks, verbose=args.verbose)
def main(args, unknown_args): args, config = parse_args_uargs(args, unknown_args, dump_config=True) set_global_seeds(args.seed) assert args.baselogdir is not None or args.logdir is not None if args.logdir is None: modules_ = prepare_modules(expdir=args.expdir) logdir = modules_["model"].prepare_logdir(config=config) args.logdir = str(pathlib.Path(args.baselogdir).joinpath(logdir)) os.makedirs(args.logdir, exist_ok=True) save_config(config=config, logdir=args.logdir) modules = prepare_modules(expdir=args.expdir, dump_dir=args.logdir) model = Registry.get_model(**config["model_params"]) datasource = modules["data"].DataSource() runner = modules["model"].ModelRunner(model=model) runner.train_stages(datasource=datasource, args=args, stages_config=config["stages"], verbose=args.verbose)
def run(self): self.episode_index = 1 self.load_actor_weights() self.buffer = SamplerBuffer(self.buffer_size, self.env.observation_shape, self.env.action_shape) seed = self._seed + random.randrange(SEED_RANGE) set_global_seeds(seed) seed = random.randrange(SEED_RANGE) \ if self.seeds is None \ else random.choice(self.seeds) set_global_seeds(seed) self.buffer.init_with_observation(self.env.reset()) self.random_process.reset_states() action_noise = False param_noise_d = 0 noise_action = 0 action_noise_t = 0 step_index = 0 episode_reward = 0 episode_reward_orig = 0 start_time = time.time() done = False while True: while not done: state = self.buffer.get_state(history_len=self.history_len) action = self.act(state) if action_noise \ and action_noise_t + self.action_noise_t >= step_index: noise_action = self.random_process.sample() action_noise_t = step_index else: noise_action = noise_action action = action + noise_action action = np.clip(action, a_min=self.action_clip[0], a_max=self.action_clip[1]) next_state, reward, done, info = self.env.step(action) episode_reward += reward episode_reward_orig += info.get("reward_origin", 0) transition = [next_state, action, reward, done, step_index] self.buffer.push_transition(transition) step_index += 1 elapsed_time = time.time() - start_time if not self.infer or self.force_store: self.store_episode() if step_index < self.min_episode_steps \ or episode_reward < self.min_episode_reward: self.hard_seeds.add(seed) else: self.hard_seeds.discard(seed) print( f"--- episode {self.episode_index:5d}:\t" f"steps: {step_index:5d}\t" f"reward: {episode_reward:10.4f}/{episode_reward_orig:10.4f}\t" f"seed: {seed}") if self.logger is not None: self.logger.add_scalar("steps", step_index, self.episode_index) self.logger.add_scalar("action noise sigma", self.random_process.current_sigma, self.episode_index) self.logger.add_scalar("param noise d", param_noise_d, self.episode_index) self.logger.add_scalar("reward", episode_reward, self.episode_index) self.logger.add_scalar("reward_origin", episode_reward_orig, self.episode_index) self.logger.add_scalar("episode per minute", 1. / elapsed_time * 60, self.episode_index) self.logger.add_scalar("steps per second", step_index / elapsed_time, self.episode_index) self.logger.add_scalar("episode time (sec)", elapsed_time, self.episode_index) self.logger.add_scalar("episode time (min)", elapsed_time / 60, self.episode_index) self.logger.add_scalar("step time (sec)", elapsed_time / step_index, self.episode_index) self.episode_index += 1 if self.episode_index >= self.episode_limit: return if self.episode_index % self.weights_sync_period == 0: self.load_actor_weights() noise_prob_ = random.random() if noise_prob_ < self.param_noise_prob: states = self.buffer.get_states_history( history_len=self.history_len) states = self.to_tensor(states).detach() param_noise_d = set_params_noise( actor=self.actor, states=states, target_d=self.param_noise_d, tol=1e-3, max_steps=self.param_noise_steps) action_noise = False elif noise_prob_ < \ self.param_noise_prob + self.action_noise_prob: action_noise = True param_noise_d = 0 else: action_noise = False param_noise_d = 0 self.buffer = SamplerBuffer( capacity=self.buffer_size, observation_shape=self.env.observation_shape, action_shape=self.env.action_shape) seed = self._seed + random.randrange(SEED_RANGE) set_global_seeds(seed) if self.seeds is None: hard_seed_prob = random.random() if len(self.hard_seeds) > 0 and hard_seed_prob < 0.5: seed = random.sample(self.hard_seeds, 1)[0] else: seed = random.randrange(SEED_RANGE) else: seed = random.choice(self.seeds) set_global_seeds(seed) self.buffer.init_with_observation(self.env.reset()) self.random_process.reset_states() noise_action = 0 action_noise_t = 0 step_index = 0 episode_reward = 0 episode_reward_orig = 0 start_time = time.time() done = False
def __init__(self, actor, env, id, logdir=None, redis_server=None, redis_prefix=None, buffer_size=int(1e4), history_len=1, weights_sync_period=1, mode="infer", resume=None, action_noise_prob=0, action_noise_t=1, random_process=None, param_noise_prob=0, param_noise_d=0.2, param_noise_steps=1000, seeds=None, action_clip=(-1, 1), episode_limit=None, force_store=False, min_episode_steps=None, min_episode_reward=None): self._seed = 42 + id set_global_seeds(self._seed) self._sampler_id = id self._device = UtilsFactory.prepare_device() self.actor = copy.deepcopy(actor).to(self._device) self.env = env self.redis_server = redis_server self.redis_prefix = redis_prefix or "" self.resume = resume self.episode_limit = episode_limit or int(2**32 - 2) self.force_store = force_store self.min_episode_steps = min_episode_steps self.min_episode_reward = min_episode_reward self.hard_seeds = set() min_episode_flag_ = \ min_episode_steps is None and min_episode_reward is None assert min_episode_flag_ or seeds is None self.min_episode_steps = self.min_episode_steps or -int(1e6) self.min_episode_reward = self.min_episode_reward or -int(1e6) self.history_len = history_len self.buffer_size = buffer_size self.weights_sync_period = weights_sync_period self.episode_index = 0 self.action_clip = action_clip self.infer = mode == "infer" self.seeds = seeds self.action_noise_prob = action_noise_prob self.action_noise_t = action_noise_t self.random_process = random_process or RandomProcess() self.param_noise_prob = param_noise_prob self.param_noise_d = param_noise_d self.param_noise_steps = param_noise_steps if self.infer: self.action_noise_prob = 0 self.param_noise_prob = 0 if logdir is not None: current_date = datetime.now().strftime("%y-%m-%d-%H-%M-%S-%M-%f") logpath = f"{logdir}/sampler-{mode}-{id}-{current_date}" os.makedirs(logpath, exist_ok=True) self.logger = SummaryWriter(logpath) else: self.logger = None self.buffer = SamplerBuffer( capacity=self.buffer_size, observation_shape=self.env.observation_shape, action_shape=self.env.action_shape)
import path import os import copy import torch import numpy as np from catalyst.utils.args import parse_args_uargs from catalyst.utils.misc import set_global_seeds, import_module, create_if_need from envs.prosthetics_preprocess import \ preprocess_obs_round2 as preprocess_state from osim.env import ProstheticsEnv import atexit import multiprocessing as mp os.environ["OMP_NUM_THREADS"] = "1" torch.set_num_threads(1) set_global_seeds(42) golden_seeds = np.load( os.path.join(os.path.dirname(os.path.abspath(__file__)), "golden_seeds.npz"))["seeds"] DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") class AlgoWrapper: def __init__(self, actor, critics, history_len, consensus="min"): self.actor = actor.to(DEVICE) self.critics = [x.to(DEVICE) for x in critics] self.history_len = history_len self.consensus = consensus @staticmethod def _act(actor, state):
def __init__(self, frame_skip=1, visualize=False, randomized_start=False, max_episode_length=1000, reward_scale=0.1, death_penalty=0.0, living_bonus=0.0, crossing_legs_penalty=0.0, bending_knees_bonus=0.0, left_knee_bonus=0., right_knee_bonus=0., bonus_for_knee_angles_scale=0., bonus_for_knee_angles_angle=0., activations_penalty=0., max_reward=10.0, action_fn=None, observe_time=False, model="3D"): self.model = model self.visualize = visualize self.randomized_start = randomized_start self.env = ProstheticsEnv(visualize=visualize, integrator_accuracy=1e-3) seed = random.randrange(SEED_RANGE) set_global_seeds(seed) self.env.change_model(model=self.model, prosthetic=True, difficulty=1, seed=seed) self.frame_skip = frame_skip self.observe_time = observe_time hotfix_flag = 1 - frame_skip % 2 self.max_ep_length = max_episode_length - 2 - hotfix_flag * 2 self.observation_space = Box(low=self.env.observation_space.low[0], high=self.env.observation_space.high[0], shape=(343 + int(observe_time), )) self.action_space = Box(low=self.env.action_space.low[0], high=self.env.action_space.high[0], shape=(19, )) # reward shaping self.reward_scale = reward_scale self.death_penalty = np.abs(death_penalty) self.living_bonus = living_bonus self.cross_legs_coef = crossing_legs_penalty self.bending_knees_coef = bending_knees_bonus self.max_reward = max_reward self.activations_penalty = activations_penalty self.left_knee_bonus = left_knee_bonus self.right_knee_bonus = right_knee_bonus self.bonus_for_knee_angles_scale = bonus_for_knee_angles_scale self.knees_angle_bonus = bonus_for_knee_angles_angle self.episodes = 1 self.ep2reload = 5 # ddpg different output activations support action_fn = action_fn.lower() if action_fn == "tanh": action_mean = .5 action_std = .5 self.action_handler = lambda x: x * action_std + action_mean else: self.action_handler = lambda x: x
def run_sampler(*, logdir, algorithm, environment, config, vis, infer, action_noise_prob, param_noise_prob, action_noise=None, param_noise=None, id=None, resume=None, redis=True): config_ = copy.deepcopy(config) action_noise = action_noise or 0 param_noise = param_noise or 0 if not redis: redis_server = None redis_prefix = None else: redis_server = StrictRedis( port=config_.get("redis", {}).get("port", 12000)) redis_prefix = config_.get("redis", {}).get("prefix", "") id = id or 0 set_global_seeds(42 + id) if "randomized_start" in config_["env"]: config_["env"]["randomized_start"] = ( config_["env"]["randomized_start"] and not infer) env = environment(**config_["env"], visualize=vis) # @TODO: remove this hack config_["shared"]["observation_size"] = env.observation_shape[0] config_["shared"]["action_size"] = env.action_shape[0] algo_kwargs = algorithm.prepare_for_sampler(config_) rp_params = config_.get("random_process", {}) random_process = rp.__dict__[rp_params.pop("random_process", "RandomProcess")] rp_params["sigma"] = action_noise rp_params["size"] = config_["shared"]["action_size"] random_process = random_process(**rp_params) seeds = config_.get("seeds", None) \ if infer \ else config_.get("train_seeds", None) min_episode_steps = config_["sampler"].pop("min_episode_steps", None) min_episode_steps = min_episode_steps if not infer else None min_episode_reward = config_["sampler"].pop("min_episode_reward", None) min_episode_reward = min_episode_reward if not infer else None if seeds is not None: min_episode_steps = None min_episode_reward = None pprint(config_["sampler"]) pprint(algo_kwargs) sampler = Sampler(**config_["sampler"], **algo_kwargs, env=env, logdir=logdir, id=id, redis_server=redis_server, redis_prefix=redis_prefix, mode="infer" if infer else "train", random_process=random_process, action_noise_prob=action_noise_prob, param_noise_prob=param_noise_prob, param_noise_d=param_noise, seeds=seeds, min_episode_steps=min_episode_steps, min_episode_reward=min_episode_reward, resume=resume) pprint(sampler) sampler.run()
def run_sampler( *, config, vis, infer, action_noise_prob, param_noise_prob, action_noise=None, param_noise=None, noise_power=None, # @TODO: remove id=None, resume=None, debug=False): config_ = copy.deepcopy(config) if debug: redis_server = None redis_prefix = None else: redis_server = StrictRedis( port=config_.get("redis", {}).get("port", 12000)) redis_prefix = config_.get("redis", {}).get("prefix", "") id = id or 0 set_global_seeds(42 + id) action_noise = action_noise or noise_power param_noise = param_noise or noise_power if "randomized_start" in config_["env"]: config_["env"]["randomized_start"] = ( config_["env"]["randomized_start"] and not infer) env = env_module.ENV(**config_["env"], visualize=vis) algo_kwargs = algo_module.prepare_for_sampler(config_) rp_params = config_.get("random_process", {}) random_process = rp.__dict__[ rp_params.pop("random_process", "RandomProcess")] rp_params["sigma"] = action_noise random_process = random_process(**rp_params) seeds = config_.get("seeds", None) \ if infer \ else config_.get("train_seeds", None) min_episode_steps = config_["sampler"].pop("min_episode_steps", None) min_episode_steps = min_episode_steps if not infer else None min_episode_reward = config_["sampler"].pop("min_episode_reward", None) min_episode_reward = min_episode_reward if not infer else None if seeds is not None: min_episode_steps = None min_episode_reward = None pprint(config_["sampler"]) pprint(algo_kwargs) sampler = Sampler( **config_["sampler"], **algo_kwargs, env=env, logdir=args.logdir, id=id, redis_server=redis_server, redis_prefix=redis_prefix, mode="infer" if infer else "train", random_process=random_process, action_noise_prob=action_noise_prob, param_noise_prob=param_noise_prob, param_noise_d=param_noise, seeds=seeds, min_episode_steps=min_episode_steps, min_episode_reward=min_episode_reward, resume=resume) pprint(sampler) sampler.run()