def obtain_samples(self, itr, reset_args=None, return_dict=False, log_prefix=''): init_policy_params_list = cur_policy_params_list = [ policy.get_param_values() for policy in self.algo.policy_list ] if hasattr(self.algo.env, "get_param_values"): try: cur_env_params = self.algo.env.get_param_values() except: cur_env_params = None else: cur_env_params = None import time start = time.time() if type(reset_args) != list and type(reset_args) != np.ndarray: reset_args = [reset_args] * self.n_envs if self.algo.policy_list[0].all_param_vals is not None: cur_policy_params_list = [[ flatten_tensors(x.values()) for x in policy.all_param_vals ] for policy in self.algo.policy_list] else: cur_policy_params_list = [ [cur_policy_params] * self.n_envs for cur_policy_params in cur_policy_params_list ] # do tasks sequentially and parallelize within rollouts per task. paths = {} for n in range(len(self.algo.policy_list)): for i in range(self.n_envs): paths[str(n) + "_" + str(i)] = parallel_sampler.sample_paths( policy_params=cur_policy_params_list[n][i], env_params=cur_env_params, max_samples=self.algo.batch_size / self.n_envs, max_path_length=self.algo.max_path_length, scope=self.algo.scope, reset_arg=reset_args[i], show_prog_bar=False, ) total_time = time.time() - start logger.record_tabular(log_prefix + "TotalExecTime", total_time) if not return_dict: flatten_list = lambda l: [ item for sublist in l for item in sublist ] paths = flatten_list(paths.values()) for n in range(len(self.algo.policy_list)): self.algo.policy_list[n].set_param_values( init_policy_params_list[n]) # currently don't support not whole paths (if desired, add code to truncate paths) assert self.algo.whole_paths return paths
def obtain_samples(self, itr, reset_args=None, return_dict=False, log_prefix=''): init_policy_params = cur_policy_params = self.algo.policy.get_param_values() if hasattr(self.algo.env,"get_param_values"): try: cur_env_params = self.algo.env.get_param_values() except: cur_env_params = None else: cur_env_params = None import time start = time.time() # first, a naive implementation. if type(reset_args) != list and type(reset_args)!=np.ndarray: reset_args = [reset_args]*self.n_envs if self.algo.policy.all_param_vals: cur_policy_params = [flatten_tensors(x.values()) for x in self.algo.policy.all_param_vals] else: cur_policy_params = [cur_policy_params]*self.n_envs # assume that n_envs = num parallel if self.n_envs == parallel_sampler.singleton_pool.n_parallel: raise NotImplementedError('this implementation is buggy.') # 1 thread per env paths = parallel_sampler.sample_paths( policy_params=cur_policy_params, env_params=cur_env_params, max_samples=self.algo.batch_size, max_path_length=self.algo.max_path_length, scope=self.algo.scope, reset_arg=reset_args, show_prog_bar=True, multi_task=True, ) else: # do tasks sequentially and parallelize within rollouts per task. paths = {} for i in range(self.n_envs): paths[i] = parallel_sampler.sample_paths( policy_params=cur_policy_params[i], env_params=cur_env_params, max_samples=self.algo.batch_size / self.n_envs, max_path_length=self.algo.max_path_length, scope=self.algo.scope, reset_arg=reset_args[i], show_prog_bar=False, ) total_time = time.time() - start logger.record_tabular(log_prefix+"TotalExecTime", total_time) if not return_dict: flatten_list = lambda l: [item for sublist in l for item in sublist] paths = flatten_list(paths.values()) self.algo.policy.set_param_values(init_policy_params) # currently don't support not whole paths (if desired, truncate paths) assert self.algo.whole_paths return paths
def obtain_samples(self, itr, reset_args=None, return_dict=False, log_prefix=''): init_policy_params = cur_policy_params = self.algo.policy.get_param_values() if hasattr(self.algo.env,"get_param_values"): try: cur_env_params = self.algo.env.get_param_values() except: cur_env_params = None else: cur_env_params = None import time start = time.time() if type(reset_args) != list and type(reset_args)!=np.ndarray: reset_args = [reset_args]*self.n_envs #this following block already handled by get_param_values if hasattr(self.algo.policy, 'all_param_vals') and self.algo.policy.all_param_vals: cur_policy_params = [flatten_tensors(self.algo.policy.all_param_vals.values())] else: cur_policy_params = [cur_policy_params]*self.n_envs # do tasks sequentially and parallelize within rollouts per task. paths = {} #import ipdb #ipdb.set_trace() for i in range(self.n_envs): paths[i] = parallel_sampler.sample_paths( policy_params=cur_policy_params[i], env_params=cur_env_params, max_samples=self.algo.batch_size / self.n_envs, max_path_length=self.algo.max_path_length, scope=self.algo.scope, reset_arg=reset_args[i], show_prog_bar=False, ) total_time = time.time() - start logger.record_tabular(log_prefix+"TotalExecTime", total_time) #import ipdb #ipdb.set_trace() if not return_dict: flatten_list = lambda l: [item for sublist in l for item in sublist] paths = flatten_list(paths.values()) self.algo.policy.set_param_values(init_policy_params) # currently don't support not whole paths (if desired, add code to truncate paths) assert self.algo.whole_paths return paths
def obtain_samples(self, itr, reset_args=None, return_dict=False, log_prefix='',extra_input=None,extra_input_dim=None, save_img_obs=False, preupdate=True): if extra_input is not None: assert False, "not implemented" if not preupdate: assert False, "not implemented" init_policy_params = cur_policy_params = self.algo.policy.get_param_values() if hasattr(self.algo.env,"get_param_values"): try: cur_env_params = self.algo.env.get_param_values() except: cur_env_params = None else: cur_env_params = None import time start = time.time() if type(reset_args) != list and type(reset_args)!=np.ndarray: reset_args = [reset_args]*self.n_envs if hasattr(self.algo.policy, 'all_param_vals'): #TODO: RK, need to make this less hacky and still work with non-maml policies if self.algo.policy.all_param_vals: cur_policy_params = [flatten_tensors(x.values()) for x in self.algo.policy.all_param_vals] else: cur_policy_params = [cur_policy_params]*self.n_envs else: cur_policy_params = [cur_policy_params]*self.n_envs # do tasks sequentially and parallelize within rollouts per task. paths = {} for i in range(self.n_envs): paths[i] = parallel_sampler.sample_paths( policy_params=cur_policy_params[i], env_params=cur_env_params, max_samples=self.algo.batch_size / self.n_envs, max_path_length=self.algo.max_path_length, scope=self.algo.scope, reset_arg=reset_args[i], show_prog_bar=False, ) total_time = time.time() - start logger.record_tabular(log_prefix+"TotalExecTime", total_time) if not return_dict: flatten_list = lambda l: [item for sublist in l for item in sublist] paths = flatten_list(paths.values()) self.algo.policy.set_param_values(init_policy_params) # currently don't support not whole paths (if desired, add code to truncate paths) assert self.algo.whole_paths return paths
def obtain_samples(self, itr, reset_args=None, return_dict=False, log_prefix=''): init_policy_params = cur_policy_params = self.algo.policy.get_param_values() if hasattr(self.algo.env,"get_param_values"): try: cur_env_params = self.algo.env.get_param_values() except: cur_env_params = None else: cur_env_params = None import time start = time.time() if type(reset_args) != list and type(reset_args)!=np.ndarray: reset_args = [reset_args]*self.n_envs if self.algo.policy.all_param_vals: cur_policy_params = [flatten_tensors(x.values()) for x in self.algo.policy.all_param_vals] else: cur_policy_params = [cur_policy_params]*self.n_envs # do tasks sequentially and parallelize within rollouts per task. paths = {} for i in range(self.n_envs): paths[i] = parallel_sampler.sample_paths( policy_params=cur_policy_params[i], env_params=cur_env_params, max_samples=self.algo.batch_size / self.n_envs, max_path_length=self.algo.max_path_length, scope=self.algo.scope, reset_arg=reset_args[i], show_prog_bar=False, ) total_time = time.time() - start logger.record_tabular(log_prefix+"TotalExecTime", total_time) if not return_dict: flatten_list = lambda l: [item for sublist in l for item in sublist] paths = flatten_list(paths.values()) self.algo.policy.set_param_values(init_policy_params) # currently don't support not whole paths (if desired, add code to truncate paths) assert self.algo.whole_paths return paths
def get_param_values(self, all_params=False, **tags): params = self.get_params(all_params, **tags) param_values = tf.get_default_session().run(params) return flatten_tensors(param_values)
def get_param_values(self, **tags): params = self.get_params(**tags) param_values = self._sess.run(params) return flatten_tensors(param_values)
def get_param_values(self, **tags): params = self.get_params(**tags) param_values = tf.get_default_session().run(params) return flatten_tensors(param_values)
def eval_loss_grad(params): self.policy.set_param_values(params, trainable=True) grad = f_loss_grad(*input) flattened_grad = tensor_utils.flatten_tensors(list(map(np.asarray, grad))) return flattened_grad.astype(np.float64)
def get_param_values(self, **tags): return flatten_tensors([ param.get_value(borrow=True) for param in self.get_params(**tags) ])
def get_param_values(self, **tags): params = self.get_params(**tags) # import ipdb; ipdb.set_trace() param_values = tf.get_default_session().run(params) return flatten_tensors(param_values)
def get_param_values(self, **tags): return flatten_tensors( [param.get_value(borrow=True) for param in self.get_params(**tags)] )
def eval_loss_grad(params): self.policy.set_param_values(params, trainable=True) grad = f_loss_grad(*input) flattened_grad = tensor_utils.flatten_tensors( list(map(np.asarray, grad))) return flattened_grad.astype(np.float64)
def get_param_values(self, **tags): if config.TF_NN_SETTRACE: ipdb.set_trace() params = self.get_params(**tags) param_values = tf.get_default_session().run(params) return flatten_tensors(param_values)
def obtain_samples(self, itr, reset_args=None, return_dict=False, log_prefix='', extra_input=None, extra_input_dim=None, save_img_obs=False, preupdate=True, numTrajs_perTask=None): # if not preupdate: # assert False, "not implemented" init_policy_params = cur_policy_params = self.algo.policy.get_param_values( ) if hasattr(self.algo.env, "get_param_values"): try: cur_env_params = self.algo.env.get_param_values() except: cur_env_params = None else: cur_env_params = None import time start = time.time() if type(reset_args) != list and type(reset_args) != np.ndarray: reset_args = [reset_args] * self.n_envs cur_policy_params = [cur_policy_params] * self.n_envs # do tasks sequentially and parallelize within rollouts per task. paths = {} all_param_vals_list = self.algo.policy.all_param_vals if extra_input == None: extra_infos = None else: assert extra_input in [ "onehot_exploration", 'gaussian_exploration', 'onehot_hacked' ] extra_infos = [extra_input, extra_input_dim, preupdate] for i in range(self.n_envs): if self.algo.policy.all_param_vals is None: policy_params = cur_policy_params[i] else: policy_params = flatten_tensors( all_param_vals_list[i].values()) paths_i = parallel_sampler.sample_paths( policy_params=policy_params, env_params=cur_env_params, max_samples=self.algo.batch_size / self.n_envs, max_path_length=self.algo.max_path_length, scope=self.algo.scope, reset_arg=reset_args[i], taskIdx=i, show_prog_bar=False, extra_infos=extra_infos) if numTrajs_perTask != None: paths[i] = paths_i[:numTrajs_perTask] else: paths[i] = paths_i total_time = time.time() - start logger.record_tabular(log_prefix + "TotalExecTime", total_time) if not return_dict: flatten_list = lambda l: [ item for sublist in l for item in sublist ] paths = flatten_list(paths.values()) #self.algo.policy.set_param_values(init_policy_params) # currently don't support not whole paths (if desired, add code to truncate paths) assert self.algo.whole_paths return paths