def plot_results_from_dump(envName, tmax, tplot): """ Requires result files to be named "results/cumRegret_" + envName + "_" + learner.name() + "_" + str(tmax)" :param envName: name of the environment :param tmax: assumes the data have been generated with maximal time horizon tmax. :param tplot: the results are plotted only until time horizon tplot. :return: """ if (envName in bW.registerWorlds.keys()): regName = (bW.registerWorlds[envName])(0) #print("Environment " + envName + " registered as " + regName) envName = regName envOpt = bW.makeWorld(envName) median = [] quantile1 = [] quantile2 = [] learners = [] names = [] skip = max(1, (tmax // 1000)) itimes = [t for t in range(0, tmax, skip)] times = [itimes[i] for i in range(len(itimes)) if i * skip < tplot] #Declare list of algorithms (only to get their names): learners.append( ucrl.UCRL2(envOpt.observation_space.n, envOpt.action_space.n, delta=0.05)) learners.append( c_ucrl.C_UCRL2(envOpt.observation_space.n, envOpt.action_space.n, envOpt, envOpt.env.classes, delta=0.05)) for learner in learners: names.append(learner.name()) filename = "results/cumRegret_" + envName + "_" + learner.name( ) + "_" + str(tmax) file = open(filename, 'rb') data_j = pickle.load(file) file.close() q = np.quantile(data_j, 0.5, axis=0) median.append([q[i] for i in range(len(q)) if i * skip < tplot]) q = np.quantile(data_j, 0.25, axis=0) quantile1.append([q[i] for i in range(len(q)) if i * skip < tplot]) q = np.quantile(data_j, 0.75, axis=0) quantile2.append([q[i] for i in range(len(q)) if i * skip < tplot]) plotCumulativeRegretsFromDump(names, envName, median, quantile1, quantile2, times, tplot)
def demo_animate(): testName = 'riversail_10' envName = (bW.registerWorlds[testName])(0) env = bW.makeWorld(envName) # -> Choose which learner to use: # learner = lr.Random(env) # learner = lh.Human(env) learner = ucrl.UCRL2(env.observation_space.n, env.action_space.n, delta=0.05) # learner = c_ucrl.C_UCRL2(env.observation_space.n, env.action_space.n, env, env.env.classes, delta=0.05) animate(env, learner, 50, 'maze')
def multicoreXpsNoRender(envName, learner, nbReplicates, timeHorizon): num_cores = multiprocessing.cpu_count() envs = [] learners = [] timeHorizons = [] for i in range(nbReplicates): envs.append(bW.makeWorld(envName)) learners.append(copy.deepcopy(learner)) timeHorizons.append(copy.deepcopy(timeHorizon)) t0 = time.time() cumRewards = Parallel(n_jobs=num_cores)( delayed(starOneXp)(i) for i in zip(envs, learners, timeHorizons)) elapsed = time.time() - t0 return cumRewards, elapsed / nbReplicates
def run_large_exp(envName="riverSwim", timeHorizon=1000, nbReplicates=100): if (envName in bW.registerWorlds.keys()): regName = (bW.registerWorlds[envName])(0) print("Environment " + envName + " registered as " + regName) envName = regName envOpt = bW.makeWorld(envName) print("Computing an estimate of the optimal policy (for regret)...") opti_learner = opt.Opti_controller(envOpt.env, envOpt.observation_space.n, envOpt.action_space.n) print(opti_learner.policy) print("*********************************************") dump_cumRewardsAlgos = [] names = [] meanelapsedtimes = [] learners = [] learners.append( ucrl.UCRL2(envOpt.observation_space.n, envOpt.action_space.n, delta=0.05)) learners.append( c_ucrl.C_UCRL2(envOpt.observation_space.n, envOpt.action_space.n, envOpt, envOpt.env.classes, delta=0.05)) for learner in learners: names.append(learner.name()) dump_cumRewards, meanelapsedtime = multicoreXpsNoRenderWithDump( envName, learner, nbReplicates, timeHorizon) dump_cumRewardsAlgos.append(dump_cumRewards) meanelapsedtimes.append(meanelapsedtime) opttimeHorizon = min(max((10000, timeHorizon)), 10**8) cumReward_opti = oneXpNoRender(envOpt, envName, opti_learner, opttimeHorizon) gain = cumReward_opti[-1] / len(cumReward_opti) print("Average gain is ", gain) opti_reward = [[t * gain for t in range(timeHorizon)]] filename = "results/cumMeans_" + envName + "_" + opti_learner.name( ) + "_" + str(timeHorizon) + "_" + str(time.time()) file = open(filename, 'wb') pickle.dump(opti_reward, file) file.close() dump_cumRewardsAlgos.append(filename) [ print(str(names[i]), "average runtime is ", meanelapsedtimes[i]) for i in range(len(names)) ] median, quantile1, quantile2, times = analyzeResults( names, dump_cumRewardsAlgos, timeHorizon, envName) plotCumulativeRegretsFromDump(names, envName, median, quantile1, quantile2, times, timeHorizon) print("*********************************************")