コード例 #1
0
ファイル: mf_trainer.py プロジェクト: AliengirlLiv/teachable
 def save_videos(self, step, policy, save_name='sample_video', num_rollouts=2, teacher_dict={}, save_video=False,
                 log_prefix=None, stochastic=True):
     policy.eval()
     self.env.set_level_distribution(self.curriculum_step)
     save_wandb = (save_video and not self.is_debug)
     paths, accuracy = rollout(self.env, policy,
                               max_path_length=200,
                               reset_every=self.rollouts_per_meta_task,
                               stochastic=stochastic,
                               batch_size=1, record_teacher=True, teacher_dict=teacher_dict,
                               video_directory=self.exp_name, video_name=save_name + str(self.curriculum_step),
                               num_rollouts=num_rollouts, save_wandb=save_wandb, save_locally=False,
                               obs_preprocessor=self.obs_preprocessor,)
     if log_prefix is not None:
         logger.logkv(log_prefix + "Acc", accuracy)
         logger.logkv(log_prefix + "Reward", np.mean([sum(path['rewards']) for path in paths]))
         logger.logkv(log_prefix + "PathLength",
                      np.mean([path['env_infos'][-1]['episode_length'] for path in paths]))
         logger.logkv(log_prefix + "Success", np.mean([path['env_infos'][-1]['success'] for path in paths]))
     return paths, accuracy
コード例 #2
0
    # with tf.Session():
    #     [rest of the code]
    with tf.Session() as sess:
        pkl_path = args.param
        print("Testing policy %s" % pkl_path)
        data = joblib.load(pkl_path)
        policy = data['policy']
        env = normalize(ArmReacherEnv(side='right'))

        real_rewards = np.array([])
        act_rewards = np.array([])
        pos_rewards = np.array([])
        for _ in range(args.num_rollouts):
            path = rollout(env,
                           policy,
                           max_path_length=args.max_path_length,
                           animated=False,
                           speedup=args.speedup,
                           video_filename=args.video_filename,
                           save_video=False,
                           ignore_done=args.ignore_done,
                           stochastic=args.stochastic)

            real_rewards = np.append(real_rewards, np.sum(path[0]['rewards']))
            print("Real Reward Sum", np.sum(path[0]['rewards']))
            #print(np.mean(path[0]['rewards']))
            #print(len(path_act[0]['rewards']))
            #print(len(path_pos[0]['rewards']))
        print("Real Reward Avg")
        print(np.mean(real_rewards))
コード例 #3
0
ファイル: save_videos.py プロジェクト: iclavera/meta-mb
    experimet_paths = load_exps_data(args.path,
                                     gap=args.gap_pkl,
                                     max=args.max_pkl)
    for exp_path in experimet_paths:
        max_path_length = exp_path['json'][
            'max_path_length'] if args.max_path_length is None else args.max_path_length
        if valid_experiment(exp_path['json']):
            for pkl_path in exp_path['pkl']:
                with tf.Session() as sess:
                    print("\n Testing policy %s \n" % pkl_path)
                    data = joblib.load(pkl_path)
                    policy = data['policy']
                    if hasattr(policy, 'switch_to_pre_update'):
                        policy.switch_to_pre_update()
                    env = data['env']
                    video_filename = pkl_path.split('.')[0] + '.mp4'
                    paths = rollout(env,
                                    policy,
                                    max_path_length=max_path_length,
                                    animated=False,
                                    speedup=args.speedup,
                                    video_filename=video_filename,
                                    save_video=True,
                                    ignore_done=args.ignore_done,
                                    stochastic=args.stochastic,
                                    num_rollouts=args.num_rollouts)
                    print('Average Returns: ',
                          np.mean([sum(path['rewards']) for path in paths]))
                tf.reset_default_graph()
コード例 #4
0
                        env_class = getattr(iclr19_levels, args.class_name)
                        # env_args = {
                        #     'start_loc': args.start_loc,
                        #     'include_holdout_obj': args.holdout_obj,
                        # }
                        # if args.grid_size is not None:
                        #     env_args['room_size'] = args.grid_size
                        # if args.num_dists is not None:
                        #     env_args['num_dists'] = args.num_dists
                        # e_new = env_class(**env_args)
                        e_new = env_class(**arguments)
                        e_new.use_teacher = args.use_teacher
                        if args.use_teacher:
                            teacher = PostActionAdvice(Bot, e_new)
                            e_new.teacher = teacher
                            e_new.teacher.set_feedback_type(args.feedback_type)
                        env = rl2env(normalize(e_new))

                    video_filename = os.path.join(args.path, 'saved_video.mp4')
                    paths, accuracy = rollout(env, policy, max_path_length=max_path_length, animated=args.animated, speedup=args.speedup,
                                    video_filename=video_filename, save_video=True, ignore_done=args.ignore_done, batch_size=1,
                                        stochastic=args.stochastic, num_rollouts=args.num_rollouts, reset_every=args.reset_every,
                                    record_teacher=True, reward_predictor=reward_predictor, dense_rewards=args.dense_rewards)
                    print('Average Returns: ', np.mean([sum(path['rewards']) for path in paths]))
                    print('Average Path Length: ', np.mean([path['env_infos'][-1]['episode_length'] for path in paths]))
                    print('Average Success Rate: ', np.mean([path['env_infos'][-1]['success'] for path in paths]))
                tf.reset_default_graph()



コード例 #5
0
        print("Testing policy %s" % pkl_path)
        data = joblib.load(pkl_path)
        policy = data['policy']
        env = normalize(ArmReacherEnv(side='right'))
        goal = data['env'].goal

        real_rewards = np.array([])
        act_rewards = np.array([])
        pos_rewards = np.array([])

        for i in range(args.num_rollouts):
            path = rollout(env,
                           policy,
                           max_path_length=args.max_path_length,
                           animated=False,
                           speedup=args.speedup,
                           video_filename=args.video_filename,
                           save_video=False,
                           ignore_done=args.ignore_done,
                           stochastic=args.stochastic)

            mujoco_env_mimic_act = normalize(BlueEnv(actions=env.actions))

            pickle.dump(env.actions, open("actions_ppo_0.pkl", "wb"))

            mujoco_env_mimic_act.goal = env.goal
            act_filename = "local_act_maml_" + str(i) + ".mp4"
            path_act = rollout(mujoco_env_mimic_act,
                               policy,
                               max_path_length=args.max_path_length,
                               animated=True,