def allPolicyFiles(log_dir): """ :param log_dir: :return: """ train_args, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup( log_dir) files = glob.glob(os.path.join(log_dir + algo_name + '_*_model.pkl')) printYellow(log_dir) files = glob.glob(log_dir + '/model_*') files_list = [] for file in files: eps = int((file.split('_')[-1])) files_list.append((eps, file + '/')) def sortFirst(val): """ :param val: :return: """ return val[0] files_list.sort(key=sortFirst) res = np.array(files_list) return res[:, 0], res[:, 1]
def policyCrossEval(log_dir, task, episode, model_path, num_timesteps=2000, num_cpu=1, seed=0): train_args, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup( log_dir) env_kwargs = EnvsKwargs(task, env_kwargs) OK = True if (not OK): # no latest model saved yet return None, False else: pass printGreen( "Evaluation from the model saved at: {}, with evaluation time steps: {}" .format(model_path, num_timesteps)) log_dir, environment, algo_args = createEnv(log_dir, train_args, algo_name, algo_class, env_kwargs, num_cpu=num_cpu, seed=seed) reward = policyEval(environment, model_path, log_dir, algo_class, algo_args, num_timesteps, num_cpu) # Just a trick to save the episode number of the reward,but need a little bit more space to store reward = np.append(episode, reward) return reward, True
def newPolicy(episodes, file_path): """ :param episodes: :param file_path: :return: """ train_args, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup( file_path) episode, model_path, OK = latestPolicy(file_path, algo_name) if episode in episodes: return -1, '', False else: return episode, model_path, True
def policyCrossEval(log_dir, task, episode, model_path, num_timesteps=2000, num_cpu=1): """ To do a cross evaluation for a certain policy for different tasks A version of real time evaluation but with some bugs to fix :param log_dir: :param task: :param episode: :param model_path: :param num_timesteps: How many timesteps to evaluate the policy :param num_cpu: :return: """ train_args, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup( log_dir) env_kwargs = EnvsKwargs(task, env_kwargs) OK = True if (not OK): # no latest model saved yet return None, False else: pass printGreen( "Evaluation from the model saved at: {}, with evaluation time steps: {}" .format(model_path, num_timesteps)) log_dir, environment, algo_args = createEnv(log_dir, train_args, algo_name, algo_class, env_kwargs, num_cpu=num_cpu) reward = policyEval(environment, model_path, log_dir, algo_class, algo_args, num_timesteps, num_cpu) # Just a trick to save the episode number of the reward,but need a little bit more space to store reward = np.append(episode, reward) return reward, True