예제 #1
0
    # else:
    #     sampler_cls = BatchSampler
    created_session = True if (sess is None) else False
    if sess is None:
        sess = tf.Session()
        sess.__enter__()
    n_itr = 1
    sess.run(tf.global_variables_initializer())
    # sampler_cls.start_worker()
    for itr in range(n_itr):
        # rollout(env, policy, animated=True, max_path_length=1000)
        o = env.reset()
        policy.reset()
        d = False
        while not d:
            env.render()
            flat_obs = policy.observation_space.flatten(o)
            mean, log_std = [x[0] for x in policy._f_dist([flat_obs])]
            # rnd = np.random.normal(size=mean.shape)
            # action = rnd * np.exp(log_std) + mean
            action = mean
            next_o, r, d, env_info = env.step(action)
            o = next_o

    # sampler_cls.shutdown_worker()
    if created_session:
        sess.close()

    # done = False
    # obs = env.reset()
    # rewards = []
예제 #2
0
     'xml_file': xml_file,
     'distractors': True
 })
 env = TfEnv(normalize(pusher_env))
 # path = rollout(env, policy, max_path_length=120, speedup=1,
 path = rollout(env,
                policy,
                max_path_length=max_path_length,
                speedup=1,
                noise=noise,
                animated=True,
                always_return_paths=True,
                save_video=False,
                vision=True)
 # close the window after rollout
 env.render(close=True)
 import time
 time.sleep(0.1)
 # if path['observations'][-1,0] > joint_thresh:
 #     num_tries = max_num_tries
 #if path['rewards'].sum() > filter_thresh and path['observations'][-1,0] < joint_thresh:
 if eval_success(
         path
 ):  # and path['observations'][-1,0] < joint_thresh:
     print('xml for distractor is', xml_file1)
     pusher_env = PusherEnvVision2D(**{
         'xml_file': xml_file1,
         'distractors': True
     })
     env = TfEnv(normalize(pusher_env))
     # path = rollout(env, policy, max_path_length=120, speedup=1,
def main():

    import matplotlib.pyplot as plt
    plt.ion()

    parser = argparse.ArgumentParser()
    parser.add_argument('env_name', type=str, help="name of gym env")
    parser.add_argument('model_path', type=str, help="path of trained model")
    parser.add_argument('--cos_forward', action='store_true')
    parser.add_argument('--norm_input', action='store_true')
    parser.add_argument('--mode',
                        type=str,
                        choices=['render', 'record'],
                        default='render')
    parser.add_argument('--data_path', type=str, default='/tmp/data')
    parser.add_argument('--num_sample', type=int, default=100000)

    args = parser.parse_args()

    with tf.Session() as sess:
        data = joblib.load(args.model_path)
        _encoder = data["encoder"]
        _inverse_model = data["inverse_model"]
        _forward_model = data["forward_model"]

        env = TfEnv(
            normalize(env=GymEnv('Box3dReachPixel-v11',
                                 record_video=False,
                                 log_dir='/tmp/gym_test',
                                 record_log=False)))

        s1_ph = tf.placeholder(tf.float32,
                               [None] + list(env.observation_space.shape))
        s2_ph = tf.placeholder(tf.float32,
                               [None] + list(env.observation_space.shape))

        action_ph = tf.placeholder(tf.float32,
                                   [None] + list(env.action_space.shape))

        encoder1 = _encoder.get_weight_tied_copy(observation_input=s1_ph)
        encoder2 = _encoder.get_weight_tied_copy(observation_input=s2_ph)

        inverse_model = _inverse_model.get_weight_tied_copy(
            feature_input1=encoder1.output, feature_input2=encoder2.output)
        forward_model = _forward_model.get_weight_tied_copy(
            feature_input=encoder1.output, action_input=action_ph)
        if args.cos_forward:
            forward_loss = cos_loss(encoder2.output, forward_model.output)
        else:
            forward_loss = tf.reduce_mean(
                tf.square(encoder2.output - forward_model.output))

        inverse_loss = tf.reduce_mean(
            tf.square(action_ph - inverse_model.output))

        # Start running the env
        obs = env.reset()
        next_obs = None
        x = []
        inverse_losses_results = []
        forward_losses_results = []

        if args.mode == 'render':
            f, (ax1, ax2) = plt.subplots(2)
            ax1.set_title("Inverse loss")
            ax2.set_title("Forward loss")
        elif args.mode == 'record':
            images = np.zeros([args.num_sample, 500, 500, 3], dtype='uint8')
            inverse_losses = np.zeros(args.num_sample, dtype='float32')
            forward_losses = np.zeros(args.num_sample, dtype='float32')
            boxes_contacts = np.zeros(args.num_sample, dtype='uint8')
            table_contacts = np.zeros(args.num_sample, dtype='uint8')

        for t in range(args.num_sample):
            if t % LOG_FREQ == 0:
                print("Sample: {}".format(t))
            action = env.action_space.sample()
            next_obs, reward, done, env_info = env.step(action)
            if args.mode == 'render':
                env.render()
            elif args.mode == 'record':
                img = env.wrapped_env._wrapped_env.env.env.render(
                    mode='rgb_array')
                images[t, :, :, :] = img

            inverse_loss_result, forward_loss_result = sess.run(
                [inverse_loss, forward_loss], {
                    s1_ph: [obs / 255.0 - 0.5],
                    s2_ph: [next_obs / 255.0 - 0.5],
                    action_ph: [action]
                })

            if args.mode == 'render':
                x.append(t)
                inverse_losses_results.append(inverse_loss_result)
                forward_losses_results.append(forward_loss_result)
                ax1.plot(x, inverse_losses_results, c="blue")
                ax2.plot(x, forward_losses_results, c="blue")
                plt.pause(0.001)
                plt.show()
            elif args.mode == 'record':
                boxes_contacts[t] = env_info["contact_reward"]
                table_contacts[t] = env_info["table_contact_reward"]
                forward_losses[t] = forward_loss_result
                inverse_losses[t] = inverse_loss_result
            if done:
                obs = env.reset()
            else:
                obs = next_obs

        if args.mode == 'record':
            data_dict = dict(images=images,
                             forward_losses=forward_losses,
                             inverse_losses=inverse_losses,
                             boxes_contacts=boxes_contacts,
                             table_contacts=table_contacts)
            joblib.dump(data_dict, args.data_path)
            print("Saved data to {}".format(args.data_path))