def run_single_run(iw_parameters,iw_variants, run_num, Domain, sim_dt, sim_budget, horizon, numberRollouts, runNum, seed) : import time import numpy as np import random import json import matplotlib.pyplot as plt import gc import wizluk import pandas as pd import sys import gym import wizluk.envs import wizluk.policies from wizluk.policies import RandomPolicy import cv2 import warnings def warn_with_traceback(message, category, filename, lineno, file=None, line=None): log = file if hasattr(file,'write') else sys.stderr traceback.print_stack(file=log) log.write(warnings.formatwarning(message, category, filename, lineno, line)) warnings.showwarning = warn_with_traceback wizluk.setup_logger("iw_gridworld_v1.log") env = gym.make(Domain) np.random.seed(seed) env.seed(seed) random.seed(seed) IW_Rollout = wizluk.policies.UCT(**(iw_parameters[run_num])) IW_Rollout_agent = wizluk.agents.LookaheadAgent(env, IW_Rollout, name='IW_Rollout', domain='GridWorld-16x16-v1') IW_Rollout_df = {} IW_Rollout_agent.init_evaluation_statistics(IW_Rollout_df) S = np.prod(env.observation_space.shape) x = env.reset() x0 = copy.deepcopy(x) IW_Rollout_agent.start_episode() score = 0.0 for s in range(horizon): wizluk.logger.debug("action number: {}".format(s)) x_flat = np.reshape(x, [1, S]) u = IW_Rollout_agent.get_action(x) x_next, reward, done, info = env.step(u) x_next_flat = np.reshape(x_next, [1, S]) IW_Rollout_agent.observe_transition(x_flat,u, reward, x_next_flat, done, False) x = x_next score += reward if done: break IW_Rollout_agent.stop_episode() IW_Rollout_agent.collect_evaluation_statistics( IW_Rollout_df, x0 ) with open('../results/{}_{}_simdt_{}_simBud_{}_Horizon_{}_numRoll_{}_runNum_{}.dat'.format(iw_variants['Name'][run_num],Domain, sim_dt, sim_budget, horizon, numberRollouts, runNum), 'wb') as output: pickle.dump(score, output, pickle.HIGHEST_PROTOCOL)
def run_experiment(Domain, sim_dt, sim_budget, horizon, numberRollouts, N, seeds, seed, includeHeur, includeKuth) : import time import numpy as np import random import json import matplotlib.pyplot as plt import gc import wizluk import pandas as pd import sys import gym import wizluk.envs import wizluk.policies from wizluk.policies import RandomPolicy import cv2 import warnings def warn_with_traceback(message, category, filename, lineno, file=None, line=None): log = file if hasattr(file,'write') else sys.stderr traceback.print_stack(file=log) log.write(warnings.formatwarning(message, category, filename, lineno, line)) warnings.showwarning = warn_with_traceback wizluk.setup_logger("iw_gridworld_v1.log") iw_depth_random_rollout_parameters = { "budget" : sim_dt, "sim_budget" : sim_budget, "horizon": horizon, "cost_to_go_est": "random_rollout", "num_rollouts": numberRollouts, "atari": "True", "caching": "Partial" } iw_depth_knuth_parameters = { "budget" : sim_dt, "sim_budget" : sim_budget, "horizon": horizon, "cost_to_go_est": "knuth", "num_rollouts": numberRollouts, "atari": "True", "caching": "Partial" } iw_depth_heur_parameters = { "budget" : sim_dt, "sim_budget" : sim_budget, "horizon": horizon, "cost_to_go_est": "heuristic", "atari": "True", "caching": "Partial" } #iw_depth_stochastic_enum_parameters = { #"budget" : sim_dt, #"sim_budget" : sim_budget, #"novelty_definition" : "depth", #"include_root_in_novelty_table": "True", #"horizon": horizon, #"pruned_state_strategy": "stochastic_enum", #"number_of_paths_to_consider_for_stoch_enum": 2, #"num_rollouts": numberRollouts #} iw_variants = {'Name': []}#, "iw_depth_knuth"]}#, "iw_depth_heur"]}#, "iw_depth_stochastic_enum"]} iw_parameters = [] #, iw_depth_knuth_parameters]#, iw_depth_heur_parameters]#, iw_depth_stochastic_enum_parameters] if includeKuth: iw_variants['Name'].append("OneStep_knuth") iw_parameters.append(iw_depth_knuth_parameters) if includeHeur: iw_variants['Name'].append("OneStep_heur") iw_parameters.append(iw_depth_heur_parameters) listOfRuns = [] random.seed(seeds[seed]) for run_num in range(len(iw_parameters)) : seedStart = random.randint(0,50000) seedSkip = random.randint(1,201) for k in range(N) : listOfRuns.append(run_single_run.remote(iw_parameters, iw_variants, run_num, Domain, sim_dt, sim_budget, horizon, numberRollouts, k, seeds[seedStart + k * seedSkip])) ray.get(listOfRuns) gc.collect() return 0