Ejemplo n.º 1
0
def run(args, policy, model_name, cost_fn='L2'):
    """Run an analytic policy, using similar setups as baselines-fork.

    If we have a random seed in the args, we use that instead of the config
    file. That way we can run several instances of the policy in parallel for
    faster data collection.

    model_name and cost_fn only have semantic meaning for vismpc
    """
    with open(args.cfg_file, 'r') as fh:
        cfg = yaml.safe_load(fh)
        if args.seed is not None:
            seed = args.seed
            cfg['seed'] = seed  # Actually I don't think it's needed but doesn't hurt?
        else:
            seed = cfg['seed']
        if seed == 1500 or seed == 1600:
            print('Ideally, avoid using these two seeds.')
            sys.exit()
        if args.policy != 'vismpc':
            model_name = 'NA'
            cost_fn = 'NA'
        stuff = '-seed-{}-{}-model-{}-cost-{}_epis_{}'.format(
            seed, cfg['init']['type'], model_name.replace('/', '_'), cost_fn,
            args.max_episodes)
        result_path = args.result_path.replace('.pkl', '{}.pkl'.format(stuff))
        #assert not cfg['env']['force_grab'], 'Do not need force_grab for analytic'
        print('\nOur result_path:\n\t{}'.format(result_path))
    np.random.seed(seed)

    # Should seed env this way, following gym conventions.  NOTE: we pass in
    # args.cfg_file here, but then it's immediately loaded by ClothEnv. When
    # env.reset() is called, it uses the ALREADY loaded parameters, and does
    # NOT re-query the file again for parameters (that'd be bad!).
    env = ClothEnv(args.cfg_file)
    env.seed(seed)
    env.render(filepath=args.render_path)
    if args.policy == 'vismpc':
        policy.set_env_cfg(env, cfg, model_name, cost_fn)
    else:
        policy.set_env_cfg(env, cfg)

    # Book-keeping.
    num_episodes = 0
    stats_all = []
    coverage = []
    variance_inv = []
    nb_steps = []

    for ep in range(args.max_episodes):
        obs = env.reset()
        # Go through one episode and put information in `stats_ep`.
        # Don't forget the first obs, since we need t _and_ t+1.
        stats_ep = defaultdict(list)
        stats_ep['obs'].append(obs)
        done = False
        num_steps = 0

        while not done:
            action = policy.get_action(obs, t=num_steps)
            obs, rew, done, info = env.step(action)
            stats_ep['obs'].append(obs)
            stats_ep['rew'].append(rew)
            stats_ep['act'].append(action)
            stats_ep['done'].append(done)
            stats_ep['info'].append(info)
            num_steps += 1
        num_episodes += 1
        coverage.append(info['actual_coverage'])
        variance_inv.append(info['variance_inv'])
        nb_steps.append(num_steps)
        stats_all.append(stats_ep)
        print("\nInfo for most recent episode: {}".format(info))
        print("Finished {} episodes.".format(num_episodes))
        print('  {:.3f} +/- {:.3f} (coverage)'.format(np.mean(coverage),
                                                      np.std(coverage)))
        print('  {:.2f} +/- {:.1f} ((inv)variance)'.format(
            np.mean(variance_inv), np.std(variance_inv)))
        print('  {:.2f} +/- {:.2f} (steps per episode)'.format(
            np.mean(nb_steps), np.std(nb_steps)))

        # Just dump here to keep saving and overwriting.
        with open(result_path, 'wb') as fh:
            pickle.dump(stats_all, fh)

    assert len(stats_all) == args.max_episodes, len(stats_all)
    if env.render_proc is not None:
        env.render_proc.terminate()
        env.cloth.stop_render()
Ejemplo n.º 2
0
    args = pp.parse_args()
    args.file_path = fp = os.path.dirname(os.path.realpath(__file__))
    args.cfg_file = join(fp, '../cfg/demo_baselines.yaml') # BASELINES!
    args.render_path = join(fp, '../render/build')    # Must be compiled!

    with open(args.cfg_file, 'r') as fh:
        cfg = yaml.safe_load(fh)
        seed = cfg['seed']
        print('\nseed:           {}'.format(seed))
        print('clip_act_space: {}'.format(cfg['env']['clip_act_space']))
        print('delta_actions:  {}'.format(cfg['env']['delta_actions']))
        print('obs_type:       {}'.format(cfg['env']['obs_type']))

    # Should seed env this way, following gym conventions.
    env = ClothEnv(args.cfg_file)
    env.seed(seed)
    env.render(filepath=args.render_path)

    # Just collect a bunch of observations.
    stats = defaultdict(list)
    for nb_obs in range(args.num_obs):
        obs = env.reset()
        env.cloth.stop_render()
        stats['coverage'].append(env._compute_coverage())
        stats['variance'].append(env._compute_variance())
        print("\nNow collected {} observations".format(nb_obs+1))
        assert nb_obs+1 == len(stats['coverage'])
        print('  coverage: {:.2f} +/- {:.1f}'.format(np.mean(stats['coverage']),
                                                     np.std(stats['coverage'])))
        print('  variance: {:.2f} +/- {:.1f}'.format(np.mean(stats['variance']),
                                                     np.std(stats['variance'])))
Ejemplo n.º 3
0
def run(config_file,
        render_path,
        file_path,
        result_path,
        load_state,
        max_episodes,
        random_pol=False):
    """Run a policy. Use this as the main testbed before running baselines-fork.
    """
    with open(config_file, 'r') as fh:
        cfg = yaml.safe_load(fh)
        seed = cfg['seed']
        stuff = '-clip_a-{}-delta_a-{}-obs-{}'.format(
            cfg['env']['clip_act_space'], cfg['env']['delta_actions'],
            cfg['env']['obs_type'])
        result_path = result_path.replace('.pkl', '{}.pkl'.format(stuff))

    # Save states into local directory, load from nfs diskstation.
    NFS = '/nfs/diskstation/seita/clothsim'
    state_path = join(file_path, "state_init.pkl")
    load_state_path = join(NFS, 'state_init_med_49_coverage.pkl')
    num_episodes = 0
    stats_all = []

    # Should seed env this way, following gym conventions.
    if load_state:
        env = ClothEnv(config_file, start_state_path=load_state_path)
    else:
        env = ClothEnv(config_file)
    env.seed(seed)
    env.render(filepath=render_path)

    for ep in range(max_episodes):
        obs = env.reset()
        env.save_state(state_path)
        # Go through one episode and put information in `stats_ep`.
        # Put the first observation here to start.
        stats_ep = defaultdict(list)
        stats_ep['obs'].append(obs)
        done = False
        num_steps = 0

        while not done:
            if random_pol:
                #action = env.get_random_action(atype='touch_cloth')
                action = env.get_random_action(atype='over_xy_plane')
            else:
                #action = analytic(env, t=num_steps, cfg=cfg)
                action = analytic_corners(env, t=num_steps, cfg=cfg)

            # Apply the action.
            obs, rew, done, info = env.step(action)
            stats_ep['obs'].append(obs)
            stats_ep['rew'].append(rew)
            stats_ep['act'].append(action)
            stats_ep['done'].append(done)
            stats_ep['info'].append(info)
            num_steps += 1

        num_episodes += 1
        print("\nFinished {} episodes: {}\n".format(num_episodes, info))
        stats_all.append(stats_ep)

        # Just dump here to keep saving and overwriting.
        with open(result_path, 'wb') as fh:
            pickle.dump(stats_all, fh)

    assert len(stats_all) == max_episodes, len(stats_all)
    if env.render_proc is not None:
        env.render_proc.terminate()
        env.cloth.stop_render()
Ejemplo n.º 4
0
def make_env(env_id,
             env_type,
             mpi_rank=0,
             subrank=0,
             seed=None,
             reward_scale=1.0,
             gamestate=None,
             flatten_dict_observations=True,
             wrapper_kwargs=None,
             logger_dir=None,
             cloth_cfg_path=None,
             render_path=None,
             start_state_path=None):
    """Daniel: make single instance of env, to be wrapped in VecEnv for parallelism.

    We need to have a special if case for the clothenv, which doesn't actually
    use `gym.make(...)` because we have a custom configuration.
    """
    wrapper_kwargs = wrapper_kwargs or {}

    if env_type == 'cloth':
        print("Env Type is Cloth")
        assert cloth_cfg_path is not None
        from gym_cloth.envs import ClothEnv
        env = ClothEnv(cloth_cfg_path,
                       subrank=subrank,
                       start_state_path=start_state_path)
        print('Created ClothEnv, seed {}, mpi_rank {}, subrank {}.'.format(
            seed, mpi_rank, subrank))
        print('start_state_path: {}'.format(start_state_path))
        # Daniel: render, but currently only works if we have one env, not a vec ...
        if render_path is not None:
            env.render(filepath=render_path)
    elif env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(
            game=env_id,
            max_episode_steps=10000,
            use_restricted_actions=retro.Actions.DISCRETE,
            state=gamestate)
    else:
        print("USING WRONG COMMAND")
        env = gym.make(env_id)

    if flatten_dict_observations and isinstance(env.observation_space,
                                                gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir
                  and os.path.join(logger_dir,
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    #Adi: Let's return the actual env for now instead of the wrapped version for simplicity.  Can change this back later.
    env = env.unwrapped

    return env
Ejemplo n.º 5
0
def run(args, policy):
    """Run an analytic policy, using similar setups as baselines-fork.

    If we have a random seed in the args, we use that instead of the config
    file. That way we can run several instances of the policy in parallel for
    faster data collection.
    """
    with open(args.cfg_file, 'r') as fh:
        cfg = yaml.safe_load(fh)
        if args.seed is not None:
            seed = args.seed
            cfg['seed'] = seed  # Actually I don't think it's needed but doesn't hurt?
        else:
            seed = cfg['seed']
        if seed == 1500 or seed == 1600:
            print('Ideally, avoid using these two seeds.')
            sys.exit()
        assert cfg['env']['clip_act_space'] and cfg['env']['delta_actions']
        stuff = '-seed-{}-obs-{}-depth-{}-rgbd-{}-{}_epis_{}'.format(
            seed, cfg['env']['obs_type'], cfg['env']['use_depth'],
            cfg['env']['use_rgbd'], cfg['init']['type'], args.max_episodes)
        result_path = args.result_path.replace('.pkl', '{}.pkl'.format(stuff))
        assert not cfg['env'][
            'force_grab'], 'Do not need force_grab for analytic'
        print('\nOur result_path:\n\t{}'.format(result_path))
    np.random.seed(seed)

    # Should seed env this way, following gym conventions.  NOTE: we pass in
    # args.cfg_file here, but then it's immediately loaded by ClothEnv. When
    # env.reset() is called, it uses the ALREADY loaded parameters, and does
    # NOT re-query the file again for parameters (that'd be bad!).
    env = ClothEnv(args.cfg_file)
    env.seed(seed)
    env.render(filepath=args.render_path)
    policy.set_env_cfg(env, cfg)
    # policy = UpActor()
    # Book-keeping.
    num_episodes = 0
    stats_all = []
    coverage = []
    variance_inv = []
    nb_steps = []

    cloth_tier = args.tier

    cloth_obs = []

    for ep in range(args.max_episodes):
        obs, obs_1d = env.reset()
        if cloth_tier == 1:
            cloth_obs.extend(obs_1d)
        # cloth_npy = np.array(cloth_obs)
        # test_time_sequence(obs_1d)
        # plot_2d(obs_1d, 0, cloth_tier)
        # Go through one episode and put information in `stats_ep`.
        # Don't forget the first obs, since we need t _and_ t+1.
        stats_ep = defaultdict(list)
        stats_ep['obs'].append(obs)

        done = False
        num_steps = 0

        while not done:
            action = policy.get_action(obs, t=num_steps)
            obs, rew, done, info = env.step(action)
            stats_ep['obs'].append(obs)
            stats_ep['rew'].append(rew)
            stats_ep['act'].append(action)
            stats_ep['done'].append(done)
            stats_ep['info'].append(info)
            obs_1d = info['obs_1d']
            test_time_sequence(obs_1d)
            num_steps += 1
            cloth_obs.extend(obs_1d)
            plot_2d(obs_1d, num_steps, cloth_tier)
            if num_steps > 80:
                print("Finished")
        num_episodes += 1
        coverage.append(info['actual_coverage'])
        variance_inv.append(info['variance_inv'])
        nb_steps.append(num_steps)
        stats_all.append(stats_ep)
        print("\nInfo for most recent episode: {}".format(info))
        print("Finished {} episodes.".format(num_episodes))
        print('  {:.2f} +/- {:.1f} (coverage)'.format(np.mean(coverage),
                                                      np.std(coverage)))
        print('  {:.2f} +/- {:.1f} ((inv)variance)'.format(
            np.mean(variance_inv), np.std(variance_inv)))
        print('  {:.2f} +/- {:.1f} (steps per episode)'.format(
            np.mean(nb_steps), np.std(nb_steps)))

        # If we have finished one episode save the data and end
        filename = "cloth_action_data_tier_" + str(cloth_tier) + ".hkl"
        cloth_npy = np.array(cloth_obs)
        hkl.dump(cloth_npy, filename, mode='w')
        env.render_proc.terminate()
        # env.cloth.stop_render()
        return 0  # End

        # Just dump here to keep saving and overwriting.
        with open(result_path, 'wb') as fh:
            pickle.dump(stats_all, fh)

    assert len(stats_all) == args.max_episodes, len(stats_all)
    if env.render_proc is not None:
        env.render_proc.terminate()
        env.cloth.stop_render()
Ejemplo n.º 6
0
def run(config_file, render_path, file_path, result_path, load_state, max_episodes):
    """Run a policy.

    Note that there are many possible interpretations of 'random' actions.
    It's faster if we have `load_state=True`, so if there isn't a state ready
    to load, then run one call to `env.reset()` to get one.

    Actually, we can also do an analytic one where we grip the highest point.
    That part is trivial, but determining the length and direction can be more
    complicated. We can just use hard-coded rules.
    """
    with open(config_file, 'r') as fh:
        cfg = yaml.load(fh)
        seed = cfg['seed']

    # Save states into local directory, load from nfs diskstation.
    NFS = '/nfs/diskstation/seita/clothsim'
    state_path = join(file_path,"state_init.pkl")
    load_state_path = join(NFS,'state_init_med_49_coverage.pkl')
    random_pol = True
    num_episodes = 0
    stats_all = []

    # Should seed env this way, following gym conventions.
    if load_state:
        env = ClothEnv(config_file, start_state_path=load_state_path)
    else:
        env = ClothEnv(config_file)
    env.seed(seed)
    env.render(filepath=render_path)

    # Fix a Pyrender scene, so that we don't keep re-creating.
    pyr_scene, pyr_rend = _create_scene_and_offscreen_render()

    for ep in range(max_episodes):
        # Do one one episode and put information in `stats_ep`. Save starting state.
        obs = env.reset()
        env.save_state(state_path)
        stats_ep = defaultdict(list)
        done = False
        num_steps = 0

        while not done:
            if random_pol:
                #action = env.get_random_action(atype='over_xy_plane')
                action = env.get_random_action(atype='touch_cloth')
            else:
                raise NotImplementedError()
            _save_trimesh(env, pyr_scene, pyr_rend)
            obs, rew, done, info = env.step(action)
            stats_ep['obs'].append(obs)
            stats_ep['rew'].append(rew)
            stats_ep['act'].append(action)
            stats_ep['done'].append(done)
            stats_ep['info'].append(info)
            num_steps += 1

        num_episodes += 1
        print("\nFinished {} episodes: {}\n".format(num_episodes, info))
        stats_all.append(stats_ep)

    assert len(stats_all) == max_episodes, len(stats_all)
    with open(result_path, 'wb') as fh:
        pickle.dump(stats_all, fh)
    if env.render_proc is not None:
        env.render_proc.terminate()
        env.cloth.stop_render()