Beispiel #1
0
def test_gray_scale_observation(env_id, keep_dim):
    gray_env = AtariPreprocessing(gym.make(env_id),
                                  screen_size=84,
                                  grayscale_obs=True)
    rgb_env = AtariPreprocessing(gym.make(env_id),
                                 screen_size=84,
                                 grayscale_obs=False)
    wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim)
    assert rgb_env.observation_space.shape[-1] == 3

    seed = 0
    gray_env.seed(seed)
    wrapped_env.seed(seed)

    gray_obs = gray_env.reset()
    wrapped_obs = wrapped_env.reset()

    if keep_dim:
        assert wrapped_env.observation_space.shape[-1] == 1
        assert len(wrapped_obs.shape) == 3
        wrapped_obs = wrapped_obs.squeeze(-1)
    else:
        assert len(wrapped_env.observation_space.shape) == 2
        assert len(wrapped_obs.shape) == 2

    # ALE gray scale is slightly different, but no more than by one shade
    assert np.allclose(gray_obs.astype('int32'),
                       wrapped_obs.astype('int32'),
                       atol=1)
Beispiel #2
0
def make_atari(env_id, max_episode_steps=None):
    env = gym.make(env_id)
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    if max_episode_steps is not None:
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
    return env
Beispiel #3
0
def test_text_envs():
    env = gym.make('FrozenLake-v0')
    video = VideoRecorder(env)
    try:
        env.reset()
        video.capture_frame()
        video.close()
    finally:
        os.remove(video.path)
Beispiel #4
0
def test_flatten_observation(env_id):
    env = gym.make(env_id)
    wrapped_env = FlattenObservation(env)

    obs = env.reset()
    wrapped_obs = wrapped_env.reset()

    assert len(obs.shape) == 3
    assert len(wrapped_obs.shape) == 1
    assert wrapped_obs.shape[0] == obs.shape[0] * obs.shape[1] * obs.shape[2]
Beispiel #5
0
def test_record_simple():
    env = gym.make("CartPole-v1")
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
    rec.close()
    assert not rec.empty
    assert not rec.broken
    assert os.path.exists(rec.path)
    f = open(rec.path)
    assert os.fstat(f.fileno()).st_size > 100
Beispiel #6
0
def make_robotics_env(env_id, seed, rank=0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = FlattenDictWrapper(env, ['observation', 'desired_goal'])
    env = Monitor(
        env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)),
        info_keywords=('is_success',))
    env.seed(seed)
    return env
Beispiel #7
0
def test_resize_observation(env_id, shape):
    env = gym.make(env_id)
    env = ResizeObservation(env, shape)


    assert env.observation_space.shape[-1] == 3
    obs = env.reset()
    if isinstance(shape, int):
        assert env.observation_space.shape[:2] == (shape, shape)
        assert obs.shape == (shape, shape, 3)
    else:
        assert env.observation_space.shape[:2] == tuple(shape)
        assert obs.shape == tuple(shape) + (3,)
Beispiel #8
0
def main(**kwargs):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + str(kwargs['seed'])
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(kwargs,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get(
        'gpu_frac', 0.95)
    sess = tf.Session(config=config)

    with sess.as_default() as sess:
        folder = './data/policy/' + kwargs['env']
        paths = pickle.load(open(folder + '/paths.pickle', 'rb'))
        niters = paths.get_current_episode_size() // 100
        train_data, test_data = split_data(paths, niters)

        dimo = train_data[0]['o'].shape[-1]

        dims = [dimo]
        env = gym.make(kwargs['env'],
                       obs_type=kwargs['obs_type'],
                       fixed_num_of_contact=kwargs['fixed_num_of_contact'])

        feature_net = FeatureNet(
            dims,
            fixed_num_of_contact=kwargs['fixed_num_of_contact'],
            contact_dim=env.contact_dim,
            sess=sess,
            output=kwargs['prediction'],
            process_type=kwargs['process_type'],
            feature_dim=kwargs['feature_dim'],
            feature_layer=kwargs['feature_layer'])

        sess.run(tf.global_variables_initializer())
        for i in range(niters):
            start = timer.time()
            feature_net.train(train_data[i])
            feature_net.test(test_data[i])
            logger.logkv("iter", i)
            logger.logkv("iter_time", timer.time() - start)
            logger.dumpkvs()
            if i == 0:
                sess.graph.finalize()
Beispiel #9
0
def make_mujoco_env(env_id, seed, reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    myseed = seed  + 1000 * rank if seed is not None else None
    set_global_seeds(myseed)
    env = gym.make(env_id)
    logger_path = None if logger.get_dir() is None else os.path.join(logger.get_dir(), str(rank))
    env = Monitor(env, logger_path, allow_early_resets=True)
    env.seed(seed)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
Beispiel #10
0
def test_transform_reward(env_id):
    # use case #1: scale
    scales = [0.1, 200]
    for scale in scales:
        env = gym.make(env_id)
        wrapped_env = TransformReward(gym.make(env_id), lambda r: scale * r)
        action = env.action_space.sample()

        env.seed(0)
        env.reset()
        wrapped_env.seed(0)
        wrapped_env.reset()

        _, reward, _, _ = env.step(action)
        _, wrapped_reward, _, _ = wrapped_env.step(action)

        assert wrapped_reward == scale * reward
    del env, wrapped_env

    # use case #2: clip
    min_r = -0.0005
    max_r = 0.0002
    env = gym.make(env_id)
    wrapped_env = TransformReward(gym.make(env_id),
                                  lambda r: np.clip(r, min_r, max_r))
    action = env.action_space.sample()

    env.seed(0)
    env.reset()
    wrapped_env.seed(0)
    wrapped_env.reset()

    _, reward, _, _ = env.step(action)
    _, wrapped_reward, _, _ = wrapped_env.step(action)

    assert abs(wrapped_reward) < abs(reward)
    assert wrapped_reward == -0.0005 or wrapped_reward == 0.0002
    del env, wrapped_env

    # use case #3: sign
    env = gym.make(env_id)
    wrapped_env = TransformReward(gym.make(env_id), lambda r: np.sign(r))

    env.seed(0)
    env.reset()
    wrapped_env.seed(0)
    wrapped_env.reset()

    for _ in range(1000):
        action = env.action_space.sample()
        _, wrapped_reward, done, _ = wrapped_env.step(action)
        assert wrapped_reward in [-1.0, 0.0, 1.0]
        if done:
            break
    del env, wrapped_env
Beispiel #11
0
    def make_env(subrank=None, obs_type = 'original', fixed_num_of_contact = 0):
        env = gym.make(env_name, obs_type = obs_type, fixed_num_of_contact = fixed_num_of_contact)
        if subrank is not None and logger.get_dir() is not None:
            try:
                from mpi4py import MPI
                mpi_rank = MPI.COMM_WORLD.Get_rank()
            except ImportError:
                MPI = None
                mpi_rank = 0
                logger.warn('Running with a single MPI process. This should work, but the results may differ from the ones publshed in Plappert et al.')

            max_episode_steps = env._max_episode_steps
            env =  Monitor(env,
                           os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)),
                           allow_early_resets=True)
            # hack to re-expose _max_episode_steps (ideally should replace reliance on it downstream)
            env = gym.wrappers.TimeLimit(env, max_episode_steps=max_episode_steps)
        return env
Beispiel #12
0
def test_frame_stack(env_id, num_stack, lz4_compress):
    env = gym.make(env_id)
    shape = env.observation_space.shape
    env = FrameStack(env, num_stack, lz4_compress)
    assert env.observation_space.shape == (num_stack, ) + shape

    obs = env.reset()
    obs = np.asarray(obs)
    assert obs.shape == (num_stack, ) + shape
    for i in range(1, num_stack):
        assert np.allclose(obs[i - 1], obs[i])

    obs, _, _, _ = env.step(env.action_space.sample())
    obs = np.asarray(obs)
    assert obs.shape == (num_stack, ) + shape
    for i in range(1, num_stack - 1):
        assert np.allclose(obs[i - 1], obs[i])
    assert not np.allclose(obs[-1], obs[-2])
Beispiel #13
0
def test_clip_action():
    # mountaincar: action-based rewards
    make_env = lambda: gym.make('MountainCarContinuous-v0')
    env = make_env()
    wrapped_env = ClipAction(make_env())

    seed = 0
    env.seed(seed)
    wrapped_env.seed(seed)

    env.reset()
    wrapped_env.reset()

    actions = [[.4], [1.2], [-0.3], [0.0], [-2.5]]
    for action in actions:
        obs1, r1, d1, _ = env.step(
            np.clip(action, env.action_space.low, env.action_space.high))
        obs2, r2, d2, _ = wrapped_env.step(action)
        assert np.allclose(r1, r2)
        assert np.allclose(obs1, obs2)
        assert d1 == d2
Beispiel #14
0
def main(**kwargs):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    # folder = '../dataset/sequence/HandManipulateEgg-v0/seed'
    # obs = {}
    # acs = []
    # for i in range(2):
    #     # with open(folder + str(i) + '-dict.pickle', 'wb') as pickle_file:
    #     # dict, array, int
    #     o, a, fixed_num_of_contact = pickle.load(open(folder + str(i+1) + '-dict.pickle', 'rb'))
    #     for key in o:
    #         if key in obs:
    #             obs[key] = np.concatenate([obs[key], o[key]], axis = 0)
    #         else:
    #             obs[key] = o[key]
    #     acs.append(a)
    # acs = np.concatenate(acs, axis = 0)
    # folder = './dataset/sequence/HandManipulateEgg-v0/2seeds'
    # with open(folder + '-dict.pickle', 'wb') as pickle_file:
    #     print(folder)
    #     filtered_obs = {}
    #     for key in obs:
    #         if key in ['geom1s', 'geom2s', 'positions', 'force', 'object_position']:
    #             filtered_obs[key] = obs[key]
    #
    #     pickle.dump([filtered_obs, acs, fixed_num_of_contact], pickle_file)
    #
    # # ../sequence/HandManipulateEgg-v09/5seeds-dict.pickle
    folder = '../dataset/sequence/HandManipulateEgg-v0/seed1-dict.pickle'
    o, a, fixed_num_of_contact = pickle.load(open(folder, 'rb'))
    env = gym.make(kwargs['env'],
                   obs_type = kwargs['obs_type'],
                   fixed_num_of_contact = [fixed_num_of_contact, True])

    ngeoms = env.sim.model.ngeom
    obs, object_info = expand_data(o, ngeoms, fixed_num_of_contact)
    folder = './dataset/HandManipulateEgg-v0/50000obs.pickle'
    obs = obs.reshape((-1, *obs.shape[2:]))
    with open(folder, 'wb') as pickle_file:
        pickle.dump(obs, pickle_file)
Beispiel #15
0
def main(**kwargs):
    # configure logger, disable logging in child MPI processes (with rank > 0)
    arg_list = []
    for key in kwargs.keys():
        arg_list.append('--' + key)
        arg_list.append(str(kwargs[key]))
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(arg_list)
    extra_args = parse_cmdline_kwargs(unknown_args)

    params = args.__dict__

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get(
        'gpu_frac', 0.95)
    sess = tf.Session(config=config)
    env = gym.make(kwargs['env'],
                   obs_type=kwargs['obs_type'],
                   fixed_num_of_contact=kwargs['fixed_num_of_contact'])

    global_largest = 0
    for _ in range(200):
        horizon = 100
        o = env.reset()
        d = False
        t = 0
        largest = 0
        while t < horizon and d is False:
            a = env.action_space.sample()
            num_of_contacts = env.contact_num
            if num_of_contacts > largest:
                largest = num_of_contacts
            if largest > global_largest:
                global_largest = largest
            o, r, d, _ = env.step(a)
            t = t + 1
        print("largest: ", largest)
    print("global_largest: ", global_largest)
    env.close()
Beispiel #16
0
    def __init__(self, env_name):
        env = gym.make(env_name)
        self.env = env
        self.env_id = env.spec.id
        self.sim = env.env.sim

        self._horizon = env.spec.timestep_limit

        try:
            self._action_dim = self.env.env.action_dim
        except AttributeError:
            self._action_dim = self.env.env.action_space.shape[0]

        self._observation_dim = self.env.env.obs_dim

        try:
            self._num_agents = self.env.env.num_agents
        except AttributeError:
            self._num_agents = 1

        # Specs
        self.spec = EnvSpec(self._observation_dim, self._action_dim, self._horizon, self._num_agents)
Beispiel #17
0
def main(**kwargs):

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get(
        'gpu_frac', 0.95)
    sess = tf.Session(config=config)

    env = gym.make(kwargs['env'][0],
                   obs_type=kwargs['obs_type'][0],
                   fixed_num_of_contact=kwargs['fixed_num_of_contact'][0])

    num_episodes = 1
    horizon = 100

    feature_net = pickle.load(
        open('./saved/' + str(kwargs['env'][0]) + '-model.pickle', 'rb'))

    for _ in range(num_episodes):
        o = env.reset()
        d = False
        t = 0
        while t < horizon and d is False:
            a = env.action_space.sample()
            o, r, d, _ = env.step(a)
            env.render()
            time.sleep(1)
            prediction = feature_net.predict_single(o['observation'])
            t = t + 1
            env.sim.data.set_joint_qpos('object:joint', prediction[-7:])
            env.sim.forward()
            env.render()
            time.sleep(1)
            env.sim.data.set_joint_qpos('object:joint', o['observation'][-7:])
            env.sim.forward()
        env.close()
Beispiel #18
0
def main(**kwargs):
    import dill as pickle
    from datetime import datetime
    exp_dir = os.getcwd() + '/data/feature_net/' + kwargs['input_label'][0] + kwargs['output_label'][0] + '/'
    logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last')
    json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get('gpu_frac', 0.95)
    sess = tf.Session(config=config)

    mode = kwargs['mode'][0]
    visualize_training_data = kwargs['visualize_training_data'][0]
    visualize_testing_data = kwargs['visualize_testing_data'][0]
    visualize_new_data = kwargs['visualize_new_data'][0]

    if mode == 'restore':
        saver = tf.train.import_meta_graph(exp_dir + '-999.meta')
        saver.restore(sess, tf.train.latest_checkpoint(exp_dir))
        graph = tf.get_default_graph()

    with sess.as_default() as sess:

        # folder = './data/policy/' + kwargs['env'][0]
        # buffer, fixed_num_of_contact = pickle.load(open('../saved/HandManipulateEgg-v0-fix9.pickle', 'rb'))

        buffer = {}
        name = 's1'
        paths, fixed_num_of_contact = pickle.load(open('../saved/soft/' + name + '80-dict.pickle', 'rb'))
        for key in paths:
            buffer[key] = paths[key]

        for name in ['s2', 's4', 's5', 's6', 'soft3']:
            paths, fixed_num_of_contact = pickle.load(open('../saved/soft/' + name + '80-dict.pickle', 'rb'))
            for key in paths:
                buffer[key] = np.concatenate([buffer[key], paths[key]], axis = 0)


        env = gym.make(kwargs['env'][0],
                       obs_type = kwargs['obs_type'][0],
                       fixed_num_of_contact = fixed_num_of_contact)

        for key in buffer:
            buffer[key] = buffer[key][:int(1e6)]


        niters = buffer['positions'].shape[0] // 100
        print("total iteration: ", niters)


        ngeoms = env.sim.model.ngeom
        input_label = kwargs['input_label'][0]
        output_label = kwargs['output_label'][0]
        start = time.time()
        # paths = expand_data(buffer, ngeoms, fixed_num_of_contact, input_label, output_label)
        # print("expand data:", time.time() - start)
        paths = buffer

        start = time.time()
        train_data, test_data, vis_data, vis_data_test = split_data(paths, niters)
        print("split data:", time.time() - start)

        train_data['object_position'] = train_data['object_position'][:, :, :3]
        vis_data['original_object_position'] = vis_data['object_position']
        vis_data_test['original_object_position'] = vis_data_test['object_position']
        test_data['object_position'] = test_data['object_position'][:, :, :3]

        labels_to_dims = {}
        labels_to_dims['contacts'] = 3+6+ngeoms
        labels_to_dims['positions'] = 3
        # labels_to_dims['object_position'] = 7
        labels_to_dims['object_position'] = 3
        labels_to_dims['joint_position'] = 24
        labels_to_dims['object_vel'] = 6
        labels_to_dims['joint_vel'] = 24
        labels_to_dims['geoms'] = ngeoms



        dims = (labels_to_dims[input_label], labels_to_dims[output_label])
        print("preparation done")



        num_episodes = 1
        horizon = 100
        if visualize_training_data:
            visualize_data(vis_data, env, fixed_num_of_contact, feature_net, mode, input_label)
        if visualize_testing_data:
            visualize_data(vis_data_test, env, fixed_num_of_contact, feature_net, mode, input_label)
Beispiel #19
0
 def env_fn():
     env = gym.make('CartPole-v0')
     env.seed(0)
     return env
Beispiel #20
0
def main(**kwargs):
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get(
        'gpu_frac', 0.95)
    sess = tf.Session(config=config)
    exp_dir = os.getcwd() + '/data/feature_net/' + kwargs['input_label'][
        0] + kwargs['output_label'][0] + '/'
    mode = kwargs['mode'][0]

    if mode == 'restore':
        rotation_saver = tf.train.import_meta_graph(exp_dir + '-999.meta')
        rotation_saver.restore(sess, tf.train.latest_checkpoint(exp_dir))
        graph = tf.get_default_graph()

    with sess.as_default() as sess:

        input_label = kwargs['input_label'][0]
        output_label = kwargs['output_label'][0]
        buffer = {}
        name = '1'
        paths, fixed_num_of_contact = pickle.load(
            open(
                '../saved/trained/SoftHandManipulateEgg-v080-' + name +
                '-dict.pickle', 'rb'))
        for key in paths:
            buffer[key] = paths[key]

        for name in [str(i) for i in range(2, 17)]:
            paths, fixed_num_of_contact = pickle.load(
                open(
                    '../saved/trained/SoftHandManipulateEgg-v080-' + name +
                    '-dict.pickle', 'rb'))
            for key in paths:
                buffer[key] = np.concatenate([buffer[key], paths[key]], axis=0)

        env = gym.make(kwargs['env'][0],
                       obs_type=kwargs['obs_type'][0],
                       fixed_num_of_contact=fixed_num_of_contact)
        batch_size = 100
        paths = data_filter(buffer, fixed_num_of_contact, batch_size)
        niters = paths['positions'].shape[0] // batch_size
        print("total iteration: ", niters)
        print("total number of data: ", paths['positions'].shape[0])

        train_data, test_data, _, _ = split_data(paths, niters)
        train_data['object_position'] = train_data['object_position'][:, :, :3]
        test_data['object_position'] = test_data['object_position'][:, :, :3]

        labels_to_dims = {}
        labels_to_dims['positions'] = 3

        rotation_model = RotationModel(
            dims=[labels_to_dims[input_label]],
            sess=sess,
            fixed_num_of_contact=fixed_num_of_contact,
            feature_layers=kwargs['feature_layers'][0],
            output_layers=kwargs['output_layers'][0],
            learning_rate=kwargs['learning_rate'][0])

        if mode == 'train':
            sess.run(tf.global_variables_initializer())
            for i in range(niters):
                input, out = train_data[input_label][i], train_data[
                    output_label][i]
                pred = rotation_model.train(input, out)
                logger.logkv("iter", i)
                logger.dumpkvs()
            rotation_model.save_model(exp_dir, 999)

        if mode == 'restore':
            rotation_model.restore()
            for i in range(1):
                logger.logkv("iter", i)
                _, _ = rotation_model.restore_predict(
                    train_data[input_label][i], train_data[output_label][i])
                logger.dumpkvs()
Beispiel #21
0
def main(**kwargs):
    z_dim = kwargs['z_dim']
    trans_mode = kwargs['trans_mode']
    epochs = kwargs['epochs']
    include_action = kwargs['include_action']
    label = kwargs['label']

    dataset = kwargs['data_path']
    feature_dims = kwargs['feature_dims']
    mode = kwargs['mode']
    n = kwargs['n']
    k = kwargs['k']
    encoder_lr = kwargs['encoder_lr']
    decoder_lr = kwargs['decoder_lr']
    decoder_feature_dims = kwargs['decoder_feature_dims']
    process_type = kwargs['process_type']

    if kwargs['data_path'] == '../dataset/sequence/HandManipulateEgg-v0/5seeds-dict.pickle':
        kwargs['dataset'] = 'trained_5seeds'
    elif kwargs['data_path'] == '../dataset/untrained/HandManipulateEgg-v0/5seeds-dict.pickle':
        kwargs['dataset'] = 'untrained_5seeds'
    elif kwargs['data_path'] == '../dataset/HandManipulateEgg-v09-dict.pickle':
        kwargs['dataset'] = 'trained_1seed'
    exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + str(kwargs['seed'])
    if kwargs['debug']:
        save_dir = '../saved_cpc/' + str(label) + '/' +  str(kwargs['normalize_data']) + '/' + str(process_type)+ '/trained/debug'
        # save_dir = '../saved_cpc/' + str(label) + '/' + str(process_type)+ '/trained/debug'
    else:
        save_dir = '../saved_cpc/' + str(label) + '/' +  str(kwargs['normalize_data']) + '/' + str(process_type)+ '/trained'
        # save_dir = '../saved_cpc/' + str(label) + '/' + str(process_type)+ '/trained'
    logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last')
    json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get('gpu_frac', 0.95)
    sess = tf.Session(config=config)

    obs, acts, fixed_num_of_contact = pickle.load(open(dataset, 'rb'))

    env = gym.make(kwargs['env'],
                   obs_type = kwargs['obs_type'],
                   fixed_num_of_contact = [fixed_num_of_contact, True])

    ngeoms = env.sim.model.ngeom
    obs, object_info = expand_data(obs, ngeoms, fixed_num_of_contact)
    if kwargs['normalize_data']:
        obs = normalize_obs(obs)
    next_obs = obs[:, 1:]
    obs = obs[:, :-1]
    N, L, _, contact_point_dim = obs.shape
    N, L, action_dim = acts.shape

    obs_dim = (fixed_num_of_contact, contact_point_dim)
    train_data, test_data = split_data([obs, acts, next_obs, object_info])

    batch_size = 2

    if mode in ['restore', 'store_weights']:
        saver = tf.train.import_meta_graph(save_dir + '-999.meta')
        pur_save_dir = save_dir[:-8]
        saver.restore(sess, tf.train.latest_checkpoint(pur_save_dir))
        graph = tf.get_default_graph()

    with sess.as_default() as sess:
        encoder = Encoder(z_dim,
                          fixed_num_of_contact,
                          contact_point_dim,
                          feature_dims)
        trans = Transition(z_dim, action_dim, mode = trans_mode)
        cpc = CPC(sess,
                  encoder,
                  trans,
                  encoder_lr,
                  fixed_num_of_contact,
                  contact_point_dim,
                  action_dim,
                  include_action = include_action,
                  type = 1*(label=='cpc1') + 2*(label=='cpc2'),
                  n_neg = n,
                  process_type = process_type,
                  mode = mode)

        cpc_epochs, decoder_epochs = epochs
        if mode == 'train':
            sess.run(tf.global_variables_initializer())
            logger.log("training started")
            for epoch in range(cpc_epochs):
                # train_cpc(cpc, epoch, train_data, batch_size, n, k)
                test_cpc(cpc, epoch, test_data, batch_size, n, k)

                logger.logkv("epoch", epoch)
                logger.dumpkvs()
            cpc.save_model(save_dir, 999)

            """decoder"""
            logger.log("Done with cpc training.")

            decoder = Decoder(cpc,
                              sess,
                              z_dim,
                              decoder_feature_dims,
                              fixed_num_of_contact,
                              contact_point_dim,
                              decoder_lr)
            uninit_vars = [var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var))]
            sess.run(tf.variables_initializer(uninit_vars))
            for epoch in range(decoder_epochs):
                train_decoder(decoder, epoch, train_data, batch_size, n, k)
                test_decoder(decoder, epoch, test_data, batch_size, n, k)

                logger.logkv("epoch", (epoch + cpc_epochs))
                logger.dumpkvs()
            print("model saved in", save_dir)

        elif mode == 'restore':
            decoder = Decoder(cpc,
                              sess,
                              z_dim,
                              decoder_feature_dims,
                              fixed_num_of_contact,
                              contact_point_dim,
                              decoder_lr)
            uninit_vars = [var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var))]
            sess.run(tf.variables_initializer(uninit_vars))
            print("initialized")
            for epoch in range(100):
                train_decoder(decoder, epoch, train_data, batch_size, n, k)
                test_decoder(decoder, epoch, test_data, batch_size, n, k)

                logger.logkv("epoch", epoch)
                logger.dumpkvs()
                print("logging to", exp_dir)

        elif mode == 'store_weights':
            old = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='')
            old = sess.run(old)
            save_dir = './saved_model/' +  str(label) + '/' + str(process_type)+ '/trained/'
            with open(save_dir + 'weights.pickle', 'wb') as pickle_file:
                pickle.dump(old, pickle_file)
            print("weights saved to", save_dir)

            save_dir = '/home/vioichigo/try/tactile-baselines/saved_model/cpc2/trained'
            with open(save_dir + 'params.pickle', 'wb') as pickle_file:
                pickle.dump([z_dim, fixed_num_of_contact, contact_point_dim, action_dim, encoder_lr, feature_dims, trans_mode, label, include_action], pickle_file)

        tf.reset_default_graph()
        print("graph reset successfully")
Beispiel #22
0
def env_fn():
    return lambda: gym.make('PongNoFrameskip-v4')
Beispiel #23
0
def make_env(
    env_id,
    env_type,
    mpi_rank=0,
    subrank=0,
    seed=None,
    reward_scale=1.0,
    gamestate=None,
    flatten_dict_observations=True,
    wrapper_kwargs=None,
    env_kwargs=None,
    logger_dir=None,
    initializer=None,
    obs_type='original',
    fixed_num_of_contact=0,
):
    if initializer is not None:
        initializer(mpi_rank=mpi_rank, subrank=subrank)

    wrapper_kwargs = wrapper_kwargs or {}
    env_kwargs = env_kwargs or {}
    if ':' in env_id:
        import re
        import importlib
        module_name = re.sub(':.*', '', env_id)
        env_id = re.sub('.*:', '', env_id)
        importlib.import_module(module_name)
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(
            game=env_id,
            max_episode_steps=10000,
            use_restricted_actions=retro.Actions.DISCRETE,
            state=gamestate)
    else:
        env_kwargs['obs_type'] = obs_type
        env_kwargs['fixed_num_of_contact'] = fixed_num_of_contact
        env = gym.make(env_id, **env_kwargs)

    if flatten_dict_observations and isinstance(env.observation_space,
                                                gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir
                  and os.path.join(logger_dir,
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Beispiel #24
0
def main(**kwargs):
    # configure logger, disable logging in child MPI processes (with rank > 0)
    arg_list = []
    for key in kwargs.keys():
        arg_list.append('--' + key)
        arg_list.append(str(kwargs[key]))
    arg_parser = common_arg_parser()
    buffer_size = int(kwargs['buffer_size'])
    args, unknown_args = arg_parser.parse_known_args(arg_list)
    extra_args = parse_cmdline_kwargs(unknown_args)

    params = args.__dict__
    import copy
    params = copy.deepcopy(params)

    if args.obs_type == 'object':
        params['label'] = args.obs_type
    elif args.obs_type == 'original':
        params['label'] = 'object+joint'
    elif args.obs_type == 'contact':
        params['label'] = 'object+contact(' + args.process_type + ')'
    elif args.obs_type == 'full_contact':
        params['label'] = 'object+joint+contact(' + args.process_type + ')'

    exp_dir = os.getcwd() + '/data/' + EXP_NAME
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(params,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get(
        'gpu_frac', 0.95)
    sess = tf.Session(config=config)

    folder = './data/policy/' + str(args.env)

    obs_type = params['obs_type']
    fixed_num_of_contact = params['fixed_num_of_contact']

    env = gym.make(params['env'],
                   obs_type=obs_type,
                   fixed_num_of_contact=fixed_num_of_contact)

    policy = pickle.load(
        open('./data/policy/' + str(args.env)[4:] + '/policy.pickle', 'rb'))
    T = env._max_episode_steps

    paths = generate_paths(policy,
                           T,
                           obs_type,
                           params['env'],
                           fixed_num_of_contact,
                           build_env(args),
                           contact_dim=env.contact_dim,
                           buffer_size=buffer_size)

    paths = process_episode(paths.all_samples(), env.contact_dim,
                            fixed_num_of_contact)

    folder = '../saved/trained/' + str(args.env) + str(fixed_num_of_contact)
    with open(folder + '-18-dict.pickle', 'wb') as pickle_file:
        pickle.dump([paths, fixed_num_of_contact], pickle_file)
Beispiel #25
0
 def make_fn():
     env = gym.make('PongNoFrameskip-v4')
     return env
Beispiel #26
0
def main(**kwargs):
    exp_dir = os.getcwd(
    ) + '/cpc_model/' + kwargs['process_type'][0] + '/n200-8'
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(kwargs,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)

    obs, acts, fixed_num_of_contact = pickle.load(
        open('../untrained/HandManipulateEgg-v0/5seeds-dict.pickle', 'rb'))

    include_action = kwargs['include_action'][0]

    env = gym.make(kwargs['env'][0],
                   obs_type=kwargs['obs_type'][0],
                   fixed_num_of_contact=[fixed_num_of_contact, True])

    ngeoms = env.sim.model.ngeom
    obs, object_info = expand_data(obs, ngeoms, fixed_num_of_contact)
    next_obs = obs[:, 1:]
    obs = obs[:, :-1]
    N, L, _, contact_point_dim = obs.shape
    N, L, action_dim = acts.shape

    obs_dim = (fixed_num_of_contact, contact_point_dim)

    z_dim = 8
    lr = 1e-3
    epochs = 100
    batch_size = 2
    n = 200
    k = 1

    encoder = Encoder(z_dim, obs_dim[1], fixed_num_of_contact).cuda()
    if include_action:
        trans = Transition(z_dim, action_dim).cuda()
    else:
        trans = Transition(z_dim, 0).cuda()
    decoder = Decoder(z_dim, 3).cuda()

    optim_cpc = optim.Adam(list(encoder.parameters()) +
                           list(trans.parameters()),
                           lr=lr)
    optim_dec = optim.Adam(decoder.parameters(), lr=lr)
    train_data, test_data = split_data([obs, acts, next_obs])

    for epoch in range(epochs):
        train_cpc(encoder, trans, optim_cpc, epoch, train_data, batch_size, n,
                  k, include_action)
        test_cpc(encoder, trans, epoch, test_data, batch_size, n, k,
                 include_action)

        logger.logkv("epoch", epoch)
        logger.dumpkvs()

    train_data, test_data = split_data([obs, acts, next_obs, object_info])
    for epoch in range(100):
        train_decoder(decoder,
                      encoder,
                      optim_dec,
                      epoch,
                      train_data,
                      batch_size,
                      include_action,
                      n,
                      k=1)
        test_decoder(decoder,
                     encoder,
                     epoch,
                     test_data,
                     batch_size,
                     include_action,
                     n,
                     k=1)
        logger.logkv("epoch", epoch)
        logger.dumpkvs()
Beispiel #27
0
def main(**kwargs):

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get(
        'gpu_frac', 0.95)
    sess = tf.Session(config=config)
    env = gym.make(
        kwargs['env'][0],
        obs_type=kwargs['obs_type'][0],
        fixed_num_of_contact=[kwargs['fixed_num_of_contact'][0], False])

    num_episodes = 100
    horizon = 100
    horizon = 1
    fixed_num_of_contact = kwargs['fixed_num_of_contact'][0]
    mode = kwargs['mode'][0]

    if mode == 'load':
        epoch = 10
        obs, predictions = pickle.load(
            open('./dataset/supervised-saved/' + str(epoch) + '.pickle', 'rb'))

        contact_num = fixed_num_of_contact
        B, D = obs.shape
        contact_info = obs[:, :env.contact_dim].reshape(
            (B, fixed_num_of_contact, -1))
        object_info = obs[:, env.contact_dim:]

        for contact, o, pred in zip(contact_info, object_info, predictions):
            # env.sim.data.set_joint_qpos('target:joint', o[-7:]+np.ones(7))
            object_position = o[-7:]
            object_vel = o[48:48 + 6]
            joint_position = o[:24]
            joint_vel = o[24:48]
            env.sim.data.set_joint_qpos('object:joint', object_position)
            env.sim.data.set_joint_qvel('object:joint', object_vel)
            for idx in range(len(env.sim.model.joint_names)):
                name = env.sim.model.joint_names[idx]
                if name.startswith('robot'):
                    env.sim.data.set_joint_qpos(name, joint_position[idx])
                    env.sim.data.set_joint_qvel(name, joint_vel[idx])
            pos = object_position[:-4]
            num_points = (np.sum(contact, axis=1) != 0).sum()
            if num_points != 0:
                print(((pos - pred)**2).sum(),
                      np.abs(pos - pred).sum(), num_points)
            else:
                print(((pos - pred)**2).sum(), np.abs(pos - pred).sum(), pos)

            env.render()
            time.sleep(0.2)
            env.sim.data.set_joint_qpos(
                'object:joint',
                np.concatenate((pred, object_position[-4:]), axis=-1))
            env.render()
            time.sleep(1)
        env.close()

        # # # note: 46 is object
        #  for contact_idx in range(contact_num):
        #      site_name = 'contact{}'.format(contact_idx+1)
        #      site_id = env.sim.model.site_name2id(site_name)
        #      env.sim.model.site_pos[site_id] = contact_info[contact_idx][-9:-6]
        #      env.sim.forward()
        #      time.sleep(1)
        #  env.render()
        #  st()

    else:
        for _ in range(num_episodes):
            o = env.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = env.action_space.sample()
                o, r, d, _ = env.step(a)
                env.render()

                t = t + 1
                # contacts = o['observation'][:env.contact_dim].reshape((fixed_num_of_contact, -1))
                # # env.sim.data.set_joint_qpos('target:joint', o['observation'][-7:]+np.ones(7))
                # contact_num = env.contact_num
                # # for idx in range(env.sim.model.ngeom):
                # #     print(idx, env.sim.model.geom_id2name(idx))
                # # note: 46 is object

                # print(contact_num)
                # for contact_idx in range(contact_num):
                #     site_name = 'contact{}'.format(contact_idx+1)
                #     site_id = env.sim.model.site_name2id(site_name)
                #     env.sim.model.site_pos[site_id] = contacts[contact_idx][-9:-6]
                #     env.sim.forward()
                #     env.render()
                #     time.sleep(1)

                # for contact_idx in range(contact_num):
                #     site_name = 'contact{}'.format(contact_idx+1)
                #     site_id = env.sim.model.site_name2id(site_name)
                #     env.sim.model.site_pos[site_id] = np.array([1, 0.9, 0.25])
                #     env.sim.forward()
                env.render()
                time.sleep(0.1)
            env.close()