def simulate_policy(checkpoint_path, num_rollouts, max_path_length, render_kwargs, video_save_path=None, evaluation_environment_params=None): checkpoint_path = os.path.abspath(checkpoint_path.rstrip('/')) variant, progress, metadata = load_variant_progress_metadata( checkpoint_path) environment = load_environment(variant) policy = load_policy(checkpoint_path, variant, environment) render_kwargs = {**DEFAULT_RENDER_KWARGS, **render_kwargs} paths = rollouts(num_rollouts, environment, policy, path_length=max_path_length, render_kwargs=render_kwargs) if video_save_path and render_kwargs.get('mode') == 'rgb_array': fps = 1 // getattr(environment, 'dt', 1 / 30) for i, path in enumerate(paths): video_save_dir = os.path.expanduser('/tmp/simulate_policy/') video_save_path = os.path.join(video_save_dir, f'episode_{i}.mp4') save_video(path['images'], video_save_path, fps=fps) return paths
def _evaluation_paths(self, policy, evaluation_env): if self._eval_n_episodes < 1: return () with policy.set_deterministic(self._eval_deterministic): paths = rollouts(self._eval_n_episodes, evaluation_env, policy, self.sampler._max_path_length, render_kwargs=self._eval_render_kwargs) should_save_video = ( self._video_save_frequency > 0 and (self._epoch == 0 or (self._epoch + 1) % self._video_save_frequency == 0)) if should_save_video: fps = 1 // getattr(self._training_environment, 'dt', 1 / 30) for i, path in enumerate(paths): video_frames = path.pop('images') video_file_name = f'evaluation_path_{self._epoch}_{i}.mp4' video_file_path = os.path.join(os.getcwd(), 'videos', video_file_name) save_video(video_frames, video_file_path, fps=fps) return paths
def _evaluation_paths(self, policy, evaluation_env): if self._eval_n_episodes < 1: return () # TODO(hartikainen): I don't like this way of handling evaluation mode # for the policies. We should instead have two separete policies for # training and evaluation. with policy.evaluation_mode(): paths = rollouts(self._eval_n_episodes, evaluation_env, policy, self.sampler._max_path_length, render_kwargs=self._eval_render_kwargs) should_save_video = ( self._video_save_frequency > 0 and (self._epoch == 0 or (self._epoch + 1) % self._video_save_frequency == 0)) if should_save_video: fps = 1 // getattr(self._training_environment, 'dt', 1 / 30) for i, path in enumerate(paths): video_frames = path.pop('images') video_file_name = f'evaluation_path_{self._epoch}_{i}.mp4' video_file_path = os.path.join(os.getcwd(), 'videos', video_file_name) save_video(video_frames, video_file_path, fps=fps) return paths
def _evaluation_paths(self, policy, evaluation_env): if self._eval_n_episodes < 1: return () #from gym.envs.mujoco.hopper import HopperEnv #from smrl.envs.wrappers import NormalizedBoxEnv #evaluation_env = NormalizedBoxEnv(HopperEnv()) #evaluation_env = HopperEnv() #import ipdb ; ipdb.set_trace() with policy.set_deterministic(self._eval_deterministic): paths = rollouts(self._eval_n_episodes, evaluation_env, policy, self.sampler._max_path_length, render_mode=self._eval_render_mode) #import ipdb ; ipdb.set_trace() should_save_video = (self._video_save_frequency > 0 and self._epoch % self._video_save_frequency == 0) if should_save_video: for i, path in enumerate(paths): video_frames = path.pop('images') video_file_name = f'evaluation_path_{self._epoch}_{i}.avi' video_file_path = os.path.join(os.getcwd(), 'videos', video_file_name) save_video(video_frames, video_file_path) return paths
def simulate_policy(args): session = tf.keras.backend.get_session() checkpoint_path = args.checkpoint_path.rstrip('/') experiment_path = os.path.dirname(checkpoint_path) variant_path = os.path.join(experiment_path, 'params.json') with open(variant_path, 'r') as f: variant = json.load(f) with session.as_default(): pickle_path = os.path.join(checkpoint_path, 'checkpoint.pkl') with open(pickle_path, 'rb') as f: picklable = pickle.load(f) env = picklable['env'] policy = (get_policy_from_variant(variant, env, Qs=[None])) policy.set_weights(picklable['policy_weights']) with policy.set_deterministic(args.deterministic): paths = rollouts(env, policy, path_length=args.max_path_length, n_paths=args.num_rollouts, render_mode=args.render_mode) if args.render_mode != 'human': from pprint import pprint import pdb pdb.set_trace() pass return paths
def _evaluation_paths(self, policy, evaluation_env): self._eval_n_episodes = 10 if self._eval_n_episodes < 1: return () # with policy.set_deterministic(False): with policy.set_deterministic(self._eval_deterministic): paths = rollouts(self._eval_n_episodes, evaluation_env, policy, self.sampler._max_path_length, render_mode=self._eval_render_mode) # render_mode=self._eval_render_mode,mbpo = self) should_save_video = (self._video_save_frequency > 0 and self._epoch % self._video_save_frequency == 0) if should_save_video: for i, path in enumerate(paths): video_frames = path.pop('images') video_file_name = f'evaluation_path_{self._epoch}_{i}.avi' video_file_path = os.path.join(os.getcwd(), 'videos', video_file_name) save_video(video_frames, video_file_path) return paths
def simulate_policy(args): session = tf.keras.backend.get_session() checkpoint_path = args.checkpoint_path.rstrip('/') experiment_path = os.path.dirname(checkpoint_path) variant_path = os.path.join(experiment_path, 'params.pkl') with open(variant_path, 'rb') as f: variant = pickle.load(f) with session.as_default(): pickle_path = os.path.join(checkpoint_path, 'checkpoint.pkl') with open(pickle_path, 'rb') as f: picklable = pickle.load(f) environment_params = (variant['environment_params']['evaluation'] if 'evaluation' in variant['environment_params'] else variant['environment_params']['training']) evaluation_environment = get_environment_from_params(environment_params) evaluation_environment.seed(variant['run_params']['seed']) if args.record_video: video_dir = os.path.join(experiment_path, 'test-video') evaluation_environment._env = wrappers.Monitor( evaluation_environment._env, video_dir, force=True) policy = (get_policy_from_variant(variant, evaluation_environment)) policy.set_weights(picklable['policy_weights']) render_kwargs = {**DEFAULT_RENDER_KWARGS, **args.render_kwargs} with policy.set_deterministic(args.deterministic): paths = rollouts(args.num_rollouts, evaluation_environment, policy, path_length=args.max_path_length, render_kwargs=render_kwargs) if not args.record_video: evaluation_metrics = evaluate_rollouts(paths, evaluation_environment) evaluation_file_path = os.path.join(experiment_path, 'final_eval.csv') with open(evaluation_file_path, 'w') as f: w = csv.DictWriter(f, evaluation_metrics.keys()) w.writeheader() w.writerow(evaluation_metrics) if args.render_kwargs.get('mode') == 'rgb_array': fps = 1 // getattr(evaluation_environment, 'dt', 1 / 30) for i, path in enumerate(paths): video_save_dir = os.path.expanduser('/tmp/simulate_policy/') video_save_path = os.path.join(video_save_dir, f'episode_{i}.mp4') save_video(path['images'], video_save_path, fps=fps) return paths
def _evaluation_paths(self, policy, evaluation_env): if self._eval_n_episodes < 1: return () with policy.set_deterministic(self._eval_deterministic): paths = rollouts(evaluation_env, policy, self.sampler._max_path_length, self._eval_n_episodes, render_mode=self._eval_render_mode) return paths
def simulate_policy(args): session = tf.keras.backend.get_session() checkpoint_path = args.checkpoint_path.rstrip('/') experiment_path = os.path.dirname(checkpoint_path) variant_path = os.path.join(experiment_path, 'params.json') with open(variant_path, 'r') as f: variant = json.load(f) with session.as_default(): pickle_path = os.path.join(checkpoint_path, 'checkpoint.pkl') with open(pickle_path, 'rb') as f: picklable = pickle.load(f) environment_params = (variant['environment_params']['evaluation'] if 'evaluation' in variant['environment_params'] else variant['environment_params']['training']) evaluation_environment = get_environment_from_params(environment_params) policy = (get_policy_from_variant(variant, evaluation_environment, Qs=[None])) policy.set_weights(picklable['policy_weights']) with policy.set_deterministic(args.deterministic): paths = rollouts(args.num_rollouts, evaluation_environment, policy, path_length=args.max_path_length, render_mode=args.render_mode) #### print rewards rewards = [path['rewards'].sum() for path in paths] print('Rewards: {}'.format(rewards)) print('Mean: {}'.format(np.mean(rewards))) #### if args.render_mode != 'human': from pprint import pprint import pdb pdb.set_trace() pass return paths
def simulate_policy(args): session = tf.keras.backend.get_session() checkpoint_path = args.checkpoint_path.rstrip('/') experiment_path = os.path.dirname(checkpoint_path) variant_path = os.path.join(experiment_path, 'params.pkl') with open(variant_path, 'rb') as f: variant = pickle.load(f) with session.as_default(): pickle_path = os.path.join(checkpoint_path, 'checkpoint.pkl') with open(pickle_path, 'rb') as f: picklable = pickle.load(f) environment_params = ( variant['environment_params']['evaluation'] if 'evaluation' in variant['environment_params'] else variant['environment_params']['training']) evaluation_environment = get_environment_from_params(environment_params) policy = ( get_policy_from_variant(variant, evaluation_environment)) policy.set_weights(picklable['policy_weights']) render_kwargs = {**DEFAULT_RENDER_KWARGS, **args.render_kwargs} with policy.set_deterministic(args.deterministic): paths = rollouts(args.num_rollouts, evaluation_environment, policy, path_length=args.max_path_length, render_kwargs=render_kwargs) if args.render_kwargs.get('mode') == 'rgb_array': for i, path in enumerate(paths): video_save_dir = os.path.expanduser('/tmp/simulate_policy/') video_save_path = os.path.join(video_save_dir, f'episode_{i}.avi') save_video(path['images'], video_save_path) return paths
def _evaluation_paths(self): if self._eval_n_episodes < 1: return () should_save_video = ( self._video_save_frequency > 0 and self._epoch % self._video_save_frequency == 0) paths = [] for goal_index in range(self._num_goals): with self._policies[goal_index].set_deterministic(self._eval_deterministic): self._evaluation_environment.set_goal(goal_index) paths.append( rollouts( self._eval_n_episodes, self._evaluation_environment, self._policies[goal_index], self._samplers[goal_index]._max_path_length, render_kwargs=(self._eval_render_kwargs if should_save_video else {}) ) ) # TODO: interleave videos from different policies if should_save_video: fps = 1 // getattr(self._evaluation_environment, 'dt', 1/30) for rollout_num in range(len(paths[0])): video_frames = [] for goal_index in range(self._num_goals): video_frames.append(paths[goal_index][rollout_num].pop('images')) video_frames = np.concatenate(video_frames) video_file_name = f'evaluation_path_{self._epoch}_{rollout_num}.mp4' video_file_path = os.path.join( os.getcwd(), 'videos', video_file_name) save_video(video_frames, video_file_path, fps=fps) return paths
def simulate_policy(args): gpu_options = tf.GPUOptions(allow_growth=True) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) tf.keras.backend.set_session(session) session = tf.keras.backend.get_session() checkpoint_path = args.checkpoint_path.rstrip('/') experiment_path = os.path.dirname(checkpoint_path) variant_path = os.path.join(experiment_path, 'params.json') with open(variant_path, 'r') as f: variant = json.load(f) with session.as_default(): pickle_path = os.path.join(checkpoint_path, 'checkpoint.pkl') with open(pickle_path, 'rb') as f: picklable = pickle.load(f) env = picklable['env'] policy = ( get_policy_from_variant(variant, env)) policy.set_weights(picklable['policy_weights']) #env = wrappers.Monitor(env, '/home/jzchai/PycharmProjects/softlearning/examples/plotting/Synergy', force=True) with policy.set_deterministic (args.deterministic): paths = rollouts(env=env, policy=policy, path_length=args.max_path_length, n_paths=args.num_rollouts, render_mode=args.render_mode) if args.render_mode != 'human': from pprint import pprint; import pdb; pdb.set_trace() pass return paths
def simulate_policy(args): session = tf.keras.backend.get_session() checkpoint_path = args.checkpoint_path.rstrip('/') experiment_path = os.path.dirname(checkpoint_path) variant_path = os.path.join(experiment_path, 'params.pkl') with open(variant_path, 'rb') as f: variant = pickle.load(f) with session.as_default(): pickle_path = os.path.join(checkpoint_path, 'checkpoint.pkl') with open(pickle_path, 'rb') as f: picklable = pickle.load(f) import ipdb ipdb.set_trace() environment_params = (variant['environment_params']['evaluation'] if 'evaluation' in variant['environment_params'] else variant['environment_params']['training']) if args.use_state_estimator: environment_params['kwargs'].update({ 'pixel_wrapper_kwargs': { 'pixels_only': False, 'normalize': False, 'render_kwargs': { 'width': 32, 'height': 32, 'camera_id': -1, } }, 'camera_settings': { 'azimuth': 180, 'distance': 0.35, 'elevation': -55, 'lookat': (0, 0, 0.03), }, }) # obs_keys = environment_params['kwargs'].pop('observation_keys') # non_object_obs_keys = [obs_key for obs_key in obs_keys if 'object' not in obs_key] # non_object_obs_keys.append('pixels') # environment_params['kwargs']['observation_keys'] = tuple(non_object_obs_keys) # if args.render_mode == 'human': # if 'has_renderer' in environment_params['kwargs'].keys(): # environment_params['kwargs']['has_renderer'] = True # variant['environment_params']['evaluation']['task'] = 'TurnFreeValve3ResetFree-v0' # variant['environment_params']['evaluation']['kwargs']['reset_from_corners'] = True # 'reward_keys': ( # 'object_to_target_position_distance_cost', # 'object_to_target_orientation_distance_cost', # ), # 'swap_goal_upon_completion': False, # } evaluation_environment = get_environment_from_params(environment_params) policy = (get_policy_from_variant(variant, evaluation_environment)) policy.set_weights(picklable['policy_weights']) dump_path = os.path.join(checkpoint_path, 'policy_params.pkl') with open(dump_path, 'wb') as f: pickle.dump(picklable['policy_weights'], f) render_kwargs = {**DEFAULT_RENDER_KWARGS, **args.render_kwargs} from softlearning.preprocessors.utils import get_state_estimator_preprocessor state_estimator = get_state_estimator_preprocessor( state_estimator_path= '/home/justinvyu/dev/softlearning-vice/softlearning/models/state_estimators/state_estimator_fixed_antialias.h5', num_hidden_units=256, num_hidden_layers=2) sampler_kwargs = { 'state_estimator': state_estimator, 'replace_state': True, } with policy.set_deterministic(args.deterministic): paths = rollouts(args.num_rollouts, evaluation_environment, policy, path_length=args.max_path_length, render_kwargs=render_kwargs, sampler_kwargs=sampler_kwargs) if args.render_kwargs.get('mode') == 'rgb_array': fps = 2 // getattr(evaluation_environment, 'dt', 1 / 30) for i, path in enumerate(paths): video_save_dir = args.checkpoint_path # video_save_dir = os.path.expanduser('/tmp/simulate_policy/') video_save_path = os.path.join(video_save_dir, f'episode_{i}.mp4') save_video(path['images'], video_save_path, fps=fps) return paths
def simulate_policy(args): session = tf.keras.backend.get_session() checkpoint_path = args.checkpoint_path.rstrip('/') experiment_path = os.path.dirname(checkpoint_path) variant_path = os.path.join(experiment_path, 'params.pkl') with open(variant_path, 'rb') as f: variant = pickle.load(f) checkpoint_paths = [ checkpoint_dir for checkpoint_dir in sorted( glob.iglob(os.path.join(experiment_path, 'checkpoint_*')), key=lambda d: float(d.split("checkpoint_")[1])) ] dump_dir = os.path.join(experiment_path, 'evaluations/') if not os.path.exists(dump_dir): os.makedirs(dump_dir) all_paths = [] for checkpoint_dir in checkpoint_paths[::2]: with session.as_default(): pickle_path = os.path.join(checkpoint_dir, 'checkpoint.pkl') with open(pickle_path, 'rb') as f: picklable = pickle.load(f) environment_params = (variant['environment_params']['evaluation'] if 'evaluation' in variant['environment_params'] else variant['environment_params']['training']) environment_params['kwargs']['device_path'] = '/dev/ttyUSB0' environment_params['kwargs']['camera_config'] = { 'topic': '/kinect2_001144463747/qhd/image_color', 'image_shape': (256, 256, 3) } environment_params['kwargs']['init_pos_range'] = list((np.array([ 0, -np.pi / 4, -np.pi / 2, -3 * np.pi / 4, -np.pi, np.pi / 4, np.pi / 2, np.pi * 3 / 4 ]) + (-75 * np.pi / 180)) % (2 * np.pi) - np.pi) environment_params['kwargs']['target_pos_range'] = [-75 * np.pi / 180] environment_params['kwargs']['cycle_inits'] = True evaluation_environment = get_environment_from_params( environment_params) policy = (get_policy_from_variant(variant, evaluation_environment)) policy_weights = picklable['policy_weights'] if variant['algorithm_params']['type'] in ['MultiSAC', 'MultiVICEGAN']: policy_weights = policy_weights[0] policy.set_weights(policy_weights) # dump_path = os.path.join(checkpoint_path, 'policy_params.pkl') # with open(dump_path, 'wb') as f: # pickle.dump(picklable['policy_weights'], f) render_kwargs = {**DEFAULT_RENDER_KWARGS, **args.render_kwargs} with policy.set_deterministic(args.deterministic): paths = rollouts(args.num_rollouts, evaluation_environment, policy, path_length=args.max_path_length, render_kwargs=render_kwargs) if render_kwargs.get('mode') == 'rgb_array': fps = 2 // getattr(evaluation_environment, 'dt', 1 / 30) for i, path in enumerate(paths): # video_save_dir = os.path.expanduser('/tmp/simulate_policy/') video_save_path = os.path.join(checkpoint_dir, f'episode_{i}.mp4') save_video(path['images'], video_save_path, fps=fps) all_paths.append(paths) with open(os.path.join(dump_dir, 'evaluation_paths.pkl'), 'wb') as f: pickle.dump(all_paths, f) return paths