Exemple #1
0
def main(args, unknown_args):
    args, config = parse_args_uargs(args, unknown_args, dump_config=True)
    pprint(args)
    pprint(config)
    set_global_seeds(args.seed)

    assert args.baselogdir is not None or args.logdir is not None

    if args.logdir is None:
        modules_ = prepare_modules(model_dir=args.model_dir)
        logdir = modules_["model"].prepare_logdir(config=config)
        args.logdir = str(pathlib2.Path(args.baselogdir).joinpath(logdir))

    create_if_need(args.logdir)
    save_config(config=config, logdir=args.logdir)
    modules = prepare_modules(model_dir=args.model_dir, dump_dir=args.logdir)

    datasource = modules["data"].DataSource()
    model = modules["model"].prepare_model(config)

    runner = modules["model"].ModelRunner(model=model)
    runner.train(datasource=datasource,
                 args=args,
                 stages_config=config["stages"],
                 verbose=args.verbose)
Exemple #2
0
def main(args, unknown_args):
    args, config = parse_args_uargs(args, unknown_args, dump_config=True)
    set_global_seeds(config.get("seed", 42))

    Experiment, Runner = import_experiment_and_runner(Path(args.expdir))

    experiment = Experiment(config)
    runner = Runner()
    dump_code(args.expdir, experiment.logdir)

    runner.run_experiment(experiment, check=args.check)
Exemple #3
0
def main(args, unknown_args):
    args, config = parse_args_uargs(args, unknown_args)
    pprint(args)
    pprint(config)
    set_global_seeds(args.seed)

    modules = prepare_modules(model_dir=args.model_dir)

    datasource = modules["data"].DataSource()
    loaders = datasource.prepare_loaders(args, **config["data_params"])
    model = modules["model"].prepare_model(config)

    runner = modules["model"].ModelRunner(model=model)
    callbacks = runner.prepare_callbacks(
        callbacks_params=config["callbacks_params"], args=args, mode="infer")
    runner.infer(loaders=loaders, callbacks=callbacks, verbose=args.verbose)
    def reset(self):
        self.time_step = 0

        if self.episodes % self.ep2reload == 0:
            self.env = ProstheticsEnv(visualize=self.visualize,
                                      integrator_accuracy=1e-3)
            seed = random.randrange(SEED_RANGE)
            set_global_seeds(seed)
            self.env.change_model(model=self.model,
                                  prosthetic=True,
                                  difficulty=1,
                                  seed=seed)

        state_desc = self.env.reset(project=False)
        if self.randomized_start:
            state = get_simbody_state(state_desc)

            amplitude = random.gauss(0.8, 0.05)
            direction = random.choice([-1., 1])
            amplitude_knee = random.gauss(-1.2, 0.05)
            state[4] = 0.8
            state[6] = amplitude * direction  # right leg
            state[9] = amplitude * direction * (-1.)  # left leg
            state[13] = amplitude_knee if direction == 1. else 0  # right knee
            state[14] = amplitude_knee if direction == -1. else 0  # left knee

            # noise = np.random.normal(scale=0.1, size=72)
            # noise[3:6] = 0
            # noise[6] = np.random.uniform(-1., 1., size=1)
            # noise[9] = np.random.uniform(-1., 1., size=1)
            # noise[13] = -np.random.uniform(0., 1., size=1)  # knee_r
            # noise[14] = -np.random.uniform(0., 1., size=1)  # knee_l
            # state = (np.array(state) + noise).tolist()

            simbody_state = self.env.osim_model.get_state()
            obj = simbody_state.getY()
            for i in range(72):
                obj[i] = state[i]
            self.env.osim_model.set_state(simbody_state)

        observation = preprocess_obs_round2(state_desc)
        if self.observe_time:
            observation.append(-1.0)

        return observation
Exemple #5
0
def main(args, unknown_args):
    args, config = parse_args_uargs(args, unknown_args)
    set_global_seeds(args.seed)

    modules = prepare_modules(expdir=args.expdir)

    model = Registry.get_model(**config["model_params"])
    datasource = modules["data"].DataSource()
    data_params = config.get("data_params", {}) or {}
    loaders = datasource.prepare_loaders(mode="infer",
                                         n_workers=args.workers,
                                         batch_size=args.batch_size,
                                         **data_params)

    runner = modules["model"].ModelRunner(model=model)
    callbacks_params = config.get("callbacks_params", {}) or {}
    callbacks = runner.prepare_callbacks(mode="infer",
                                         resume=args.resume,
                                         out_prefix=args.out_prefix,
                                         **callbacks_params)
    runner.infer(loaders=loaders, callbacks=callbacks, verbose=args.verbose)
Exemple #6
0
def main(args, unknown_args):
    args, config = parse_args_uargs(args, unknown_args, dump_config=True)
    set_global_seeds(args.seed)

    assert args.baselogdir is not None or args.logdir is not None

    if args.logdir is None:
        modules_ = prepare_modules(expdir=args.expdir)
        logdir = modules_["model"].prepare_logdir(config=config)
        args.logdir = str(pathlib.Path(args.baselogdir).joinpath(logdir))

    os.makedirs(args.logdir, exist_ok=True)
    save_config(config=config, logdir=args.logdir)
    modules = prepare_modules(expdir=args.expdir, dump_dir=args.logdir)

    model = Registry.get_model(**config["model_params"])
    datasource = modules["data"].DataSource()

    runner = modules["model"].ModelRunner(model=model)
    runner.train_stages(datasource=datasource,
                        args=args,
                        stages_config=config["stages"],
                        verbose=args.verbose)
Exemple #7
0
    def run(self):
        self.episode_index = 1
        self.load_actor_weights()
        self.buffer = SamplerBuffer(self.buffer_size,
                                    self.env.observation_shape,
                                    self.env.action_shape)

        seed = self._seed + random.randrange(SEED_RANGE)
        set_global_seeds(seed)
        seed = random.randrange(SEED_RANGE) \
            if self.seeds is None \
            else random.choice(self.seeds)
        set_global_seeds(seed)
        self.buffer.init_with_observation(self.env.reset())
        self.random_process.reset_states()

        action_noise = False
        param_noise_d = 0
        noise_action = 0
        action_noise_t = 0
        step_index = 0
        episode_reward = 0
        episode_reward_orig = 0
        start_time = time.time()
        done = False

        while True:
            while not done:
                state = self.buffer.get_state(history_len=self.history_len)
                action = self.act(state)

                if action_noise \
                        and action_noise_t + self.action_noise_t >= step_index:
                    noise_action = self.random_process.sample()
                    action_noise_t = step_index
                else:
                    noise_action = noise_action

                action = action + noise_action
                action = np.clip(action,
                                 a_min=self.action_clip[0],
                                 a_max=self.action_clip[1])

                next_state, reward, done, info = self.env.step(action)
                episode_reward += reward
                episode_reward_orig += info.get("reward_origin", 0)

                transition = [next_state, action, reward, done, step_index]
                self.buffer.push_transition(transition)
                step_index += 1

            elapsed_time = time.time() - start_time
            if not self.infer or self.force_store:
                self.store_episode()

            if step_index < self.min_episode_steps \
                    or episode_reward < self.min_episode_reward:
                self.hard_seeds.add(seed)
            else:
                self.hard_seeds.discard(seed)

            print(
                f"--- episode {self.episode_index:5d}:\t"
                f"steps: {step_index:5d}\t"
                f"reward: {episode_reward:10.4f}/{episode_reward_orig:10.4f}\t"
                f"seed: {seed}")

            if self.logger is not None:
                self.logger.add_scalar("steps", step_index, self.episode_index)
                self.logger.add_scalar("action noise sigma",
                                       self.random_process.current_sigma,
                                       self.episode_index)
                self.logger.add_scalar("param noise d", param_noise_d,
                                       self.episode_index)
                self.logger.add_scalar("reward", episode_reward,
                                       self.episode_index)
                self.logger.add_scalar("reward_origin", episode_reward_orig,
                                       self.episode_index)
                self.logger.add_scalar("episode per minute",
                                       1. / elapsed_time * 60,
                                       self.episode_index)
                self.logger.add_scalar("steps per second",
                                       step_index / elapsed_time,
                                       self.episode_index)
                self.logger.add_scalar("episode time (sec)", elapsed_time,
                                       self.episode_index)
                self.logger.add_scalar("episode time (min)", elapsed_time / 60,
                                       self.episode_index)
                self.logger.add_scalar("step time (sec)",
                                       elapsed_time / step_index,
                                       self.episode_index)

            self.episode_index += 1

            if self.episode_index >= self.episode_limit:
                return

            if self.episode_index % self.weights_sync_period == 0:
                self.load_actor_weights()

                noise_prob_ = random.random()

                if noise_prob_ < self.param_noise_prob:
                    states = self.buffer.get_states_history(
                        history_len=self.history_len)
                    states = self.to_tensor(states).detach()
                    param_noise_d = set_params_noise(
                        actor=self.actor,
                        states=states,
                        target_d=self.param_noise_d,
                        tol=1e-3,
                        max_steps=self.param_noise_steps)
                    action_noise = False
                elif noise_prob_ < \
                        self.param_noise_prob + self.action_noise_prob:
                    action_noise = True
                    param_noise_d = 0
                else:
                    action_noise = False
                    param_noise_d = 0

            self.buffer = SamplerBuffer(
                capacity=self.buffer_size,
                observation_shape=self.env.observation_shape,
                action_shape=self.env.action_shape)

            seed = self._seed + random.randrange(SEED_RANGE)
            set_global_seeds(seed)
            if self.seeds is None:
                hard_seed_prob = random.random()
                if len(self.hard_seeds) > 0 and hard_seed_prob < 0.5:
                    seed = random.sample(self.hard_seeds, 1)[0]
                else:
                    seed = random.randrange(SEED_RANGE)
            else:
                seed = random.choice(self.seeds)
            set_global_seeds(seed)
            self.buffer.init_with_observation(self.env.reset())
            self.random_process.reset_states()

            noise_action = 0
            action_noise_t = 0
            step_index = 0
            episode_reward = 0
            episode_reward_orig = 0
            start_time = time.time()
            done = False
Exemple #8
0
    def __init__(self,
                 actor,
                 env,
                 id,
                 logdir=None,
                 redis_server=None,
                 redis_prefix=None,
                 buffer_size=int(1e4),
                 history_len=1,
                 weights_sync_period=1,
                 mode="infer",
                 resume=None,
                 action_noise_prob=0,
                 action_noise_t=1,
                 random_process=None,
                 param_noise_prob=0,
                 param_noise_d=0.2,
                 param_noise_steps=1000,
                 seeds=None,
                 action_clip=(-1, 1),
                 episode_limit=None,
                 force_store=False,
                 min_episode_steps=None,
                 min_episode_reward=None):

        self._seed = 42 + id
        set_global_seeds(self._seed)

        self._sampler_id = id
        self._device = UtilsFactory.prepare_device()
        self.actor = copy.deepcopy(actor).to(self._device)
        self.env = env
        self.redis_server = redis_server
        self.redis_prefix = redis_prefix or ""
        self.resume = resume
        self.episode_limit = episode_limit or int(2**32 - 2)
        self.force_store = force_store
        self.min_episode_steps = min_episode_steps
        self.min_episode_reward = min_episode_reward
        self.hard_seeds = set()
        min_episode_flag_ = \
            min_episode_steps is None and min_episode_reward is None
        assert min_episode_flag_ or seeds is None

        self.min_episode_steps = self.min_episode_steps or -int(1e6)
        self.min_episode_reward = self.min_episode_reward or -int(1e6)

        self.history_len = history_len
        self.buffer_size = buffer_size
        self.weights_sync_period = weights_sync_period
        self.episode_index = 0
        self.action_clip = action_clip

        self.infer = mode == "infer"
        self.seeds = seeds

        self.action_noise_prob = action_noise_prob
        self.action_noise_t = action_noise_t
        self.random_process = random_process or RandomProcess()

        self.param_noise_prob = param_noise_prob
        self.param_noise_d = param_noise_d
        self.param_noise_steps = param_noise_steps

        if self.infer:
            self.action_noise_prob = 0
            self.param_noise_prob = 0

        if logdir is not None:
            current_date = datetime.now().strftime("%y-%m-%d-%H-%M-%S-%M-%f")
            logpath = f"{logdir}/sampler-{mode}-{id}-{current_date}"
            os.makedirs(logpath, exist_ok=True)
            self.logger = SummaryWriter(logpath)
        else:
            self.logger = None

        self.buffer = SamplerBuffer(
            capacity=self.buffer_size,
            observation_shape=self.env.observation_shape,
            action_shape=self.env.action_shape)
Exemple #9
0
import path
import os
import copy
import torch
import numpy as np
from catalyst.utils.args import parse_args_uargs
from catalyst.utils.misc import set_global_seeds, import_module, create_if_need
from envs.prosthetics_preprocess import \
    preprocess_obs_round2 as preprocess_state
from osim.env import ProstheticsEnv
import atexit
import multiprocessing as mp

os.environ["OMP_NUM_THREADS"] = "1"
torch.set_num_threads(1)
set_global_seeds(42)
golden_seeds = np.load(
    os.path.join(os.path.dirname(os.path.abspath(__file__)),
                 "golden_seeds.npz"))["seeds"]
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class AlgoWrapper:
    def __init__(self, actor, critics, history_len, consensus="min"):
        self.actor = actor.to(DEVICE)
        self.critics = [x.to(DEVICE) for x in critics]
        self.history_len = history_len
        self.consensus = consensus

    @staticmethod
    def _act(actor, state):
    def __init__(self,
                 frame_skip=1,
                 visualize=False,
                 randomized_start=False,
                 max_episode_length=1000,
                 reward_scale=0.1,
                 death_penalty=0.0,
                 living_bonus=0.0,
                 crossing_legs_penalty=0.0,
                 bending_knees_bonus=0.0,
                 left_knee_bonus=0.,
                 right_knee_bonus=0.,
                 bonus_for_knee_angles_scale=0.,
                 bonus_for_knee_angles_angle=0.,
                 activations_penalty=0.,
                 max_reward=10.0,
                 action_fn=None,
                 observe_time=False,
                 model="3D"):

        self.model = model
        self.visualize = visualize
        self.randomized_start = randomized_start
        self.env = ProstheticsEnv(visualize=visualize,
                                  integrator_accuracy=1e-3)
        seed = random.randrange(SEED_RANGE)
        set_global_seeds(seed)
        self.env.change_model(model=self.model,
                              prosthetic=True,
                              difficulty=1,
                              seed=seed)

        self.frame_skip = frame_skip
        self.observe_time = observe_time
        hotfix_flag = 1 - frame_skip % 2
        self.max_ep_length = max_episode_length - 2 - hotfix_flag * 2

        self.observation_space = Box(low=self.env.observation_space.low[0],
                                     high=self.env.observation_space.high[0],
                                     shape=(343 + int(observe_time), ))
        self.action_space = Box(low=self.env.action_space.low[0],
                                high=self.env.action_space.high[0],
                                shape=(19, ))

        # reward shaping
        self.reward_scale = reward_scale
        self.death_penalty = np.abs(death_penalty)
        self.living_bonus = living_bonus
        self.cross_legs_coef = crossing_legs_penalty
        self.bending_knees_coef = bending_knees_bonus
        self.max_reward = max_reward
        self.activations_penalty = activations_penalty
        self.left_knee_bonus = left_knee_bonus
        self.right_knee_bonus = right_knee_bonus
        self.bonus_for_knee_angles_scale = bonus_for_knee_angles_scale
        self.knees_angle_bonus = bonus_for_knee_angles_angle

        self.episodes = 1
        self.ep2reload = 5

        # ddpg different output activations support
        action_fn = action_fn.lower()
        if action_fn == "tanh":
            action_mean = .5
            action_std = .5
            self.action_handler = lambda x: x * action_std + action_mean
        else:
            self.action_handler = lambda x: x
Exemple #11
0
def run_sampler(*,
                logdir,
                algorithm,
                environment,
                config,
                vis,
                infer,
                action_noise_prob,
                param_noise_prob,
                action_noise=None,
                param_noise=None,
                id=None,
                resume=None,
                redis=True):
    config_ = copy.deepcopy(config)
    action_noise = action_noise or 0
    param_noise = param_noise or 0

    if not redis:
        redis_server = None
        redis_prefix = None
    else:
        redis_server = StrictRedis(
            port=config_.get("redis", {}).get("port", 12000))
        redis_prefix = config_.get("redis", {}).get("prefix", "")

    id = id or 0
    set_global_seeds(42 + id)

    if "randomized_start" in config_["env"]:
        config_["env"]["randomized_start"] = (
            config_["env"]["randomized_start"] and not infer)
    env = environment(**config_["env"], visualize=vis)
    # @TODO: remove this hack
    config_["shared"]["observation_size"] = env.observation_shape[0]
    config_["shared"]["action_size"] = env.action_shape[0]

    algo_kwargs = algorithm.prepare_for_sampler(config_)

    rp_params = config_.get("random_process", {})
    random_process = rp.__dict__[rp_params.pop("random_process",
                                               "RandomProcess")]
    rp_params["sigma"] = action_noise
    rp_params["size"] = config_["shared"]["action_size"]
    random_process = random_process(**rp_params)

    seeds = config_.get("seeds", None) \
        if infer \
        else config_.get("train_seeds", None)
    min_episode_steps = config_["sampler"].pop("min_episode_steps", None)
    min_episode_steps = min_episode_steps if not infer else None
    min_episode_reward = config_["sampler"].pop("min_episode_reward", None)
    min_episode_reward = min_episode_reward if not infer else None

    if seeds is not None:
        min_episode_steps = None
        min_episode_reward = None

    pprint(config_["sampler"])
    pprint(algo_kwargs)

    sampler = Sampler(**config_["sampler"],
                      **algo_kwargs,
                      env=env,
                      logdir=logdir,
                      id=id,
                      redis_server=redis_server,
                      redis_prefix=redis_prefix,
                      mode="infer" if infer else "train",
                      random_process=random_process,
                      action_noise_prob=action_noise_prob,
                      param_noise_prob=param_noise_prob,
                      param_noise_d=param_noise,
                      seeds=seeds,
                      min_episode_steps=min_episode_steps,
                      min_episode_reward=min_episode_reward,
                      resume=resume)

    pprint(sampler)

    sampler.run()
Exemple #12
0
def run_sampler(
        *,
        config, vis, infer,
        action_noise_prob, param_noise_prob,
        action_noise=None, param_noise=None,
        noise_power=None,  # @TODO: remove
        id=None, resume=None, debug=False):
    config_ = copy.deepcopy(config)

    if debug:
        redis_server = None
        redis_prefix = None
    else:
        redis_server = StrictRedis(
            port=config_.get("redis", {}).get("port", 12000))
        redis_prefix = config_.get("redis", {}).get("prefix", "")

    id = id or 0
    set_global_seeds(42 + id)

    action_noise = action_noise or noise_power
    param_noise = param_noise or noise_power

    if "randomized_start" in config_["env"]:
        config_["env"]["randomized_start"] = (
                config_["env"]["randomized_start"] and not infer)
    env = env_module.ENV(**config_["env"], visualize=vis)
    algo_kwargs = algo_module.prepare_for_sampler(config_)

    rp_params = config_.get("random_process", {})
    random_process = rp.__dict__[
        rp_params.pop("random_process", "RandomProcess")]
    rp_params["sigma"] = action_noise
    random_process = random_process(**rp_params)

    seeds = config_.get("seeds", None) \
        if infer \
        else config_.get("train_seeds", None)
    min_episode_steps = config_["sampler"].pop("min_episode_steps", None)
    min_episode_steps = min_episode_steps if not infer else None
    min_episode_reward = config_["sampler"].pop("min_episode_reward", None)
    min_episode_reward = min_episode_reward if not infer else None

    if seeds is not None:
        min_episode_steps = None
        min_episode_reward = None

    pprint(config_["sampler"])
    pprint(algo_kwargs)

    sampler = Sampler(
        **config_["sampler"],
        **algo_kwargs,
        env=env,
        logdir=args.logdir, id=id,
        redis_server=redis_server,
        redis_prefix=redis_prefix,
        mode="infer" if infer else "train",
        random_process=random_process,
        action_noise_prob=action_noise_prob,
        param_noise_prob=param_noise_prob,
        param_noise_d=param_noise,
        seeds=seeds,
        min_episode_steps=min_episode_steps,
        min_episode_reward=min_episode_reward,
        resume=resume)

    pprint(sampler)

    sampler.run()