def main(): iters = [int(50e4), int(50e4), int(10e6), int(10e6)] numMod = [5, 5, 10, 10] global globalNum, env NUM_CORES = 8 # iterate over 4 envs for num in range(1, 5): print(num) globalNum = num env = gridWorld1(size=num) global laplaceModel laplaceModel = Laplacian(env) laplaceModel.getLaplacian(iters=iters[num - 1]) laplaceModel.saveEigenVec( "data/dat" + str(num) + "/eigenval" + str(num) + ".csv", "data/dat" + str(num) + "/eigenvec" + str(num) + ".csv", "data/dat" + str(num) + "/eigenMap" + str(num) + ".pkl") # Load laplacian model laplaceModel.loadEigenVecs( "data/dat" + str(num) + "/eigenval" + str(num) + ".csv", "data/dat" + str(num) + "/eigenvec" + str(num) + ".csv", "data/dat" + str(num) + "/eigenMap" + str(num) + ".pkl") # Train policies for eigen-vectors (train in parallel) procPool = Pool(NUM_CORES) procPool.map(trainLaplace, range(numMod[num - 1]))
def main(): for i in range(4): num = i + 1 env = gridWorld1(size=num) laplaceModel = Laplacian(env) successorModel = Successor(env) print("Laplacian framework") laplaceModel.loadEigenVecs( "data/dat" + str(num) + "/eigenval" + str(num) + ".csv", "data/dat" + str(num) + "/eigenvec" + str(num) + ".csv", "data/dat" + str(num) + "/eigenMap" + str(num) + ".pkl") for i in range(20): laplaceModel.plotSingleEigen( i, "images/eigenvec" + str(num) + "/eigen" + str(i) + ".png") print("Successor framework") successorModel.loadSuccessor( "data/dat" + str(num) + "/successor" + str(num) + ".csv") ln = len(successorModel.keys) for i in range(20): obj = 1 while obj == 1: ind = np.random.randint(ln) pos = successorModel.revMap[ind] obj = env.room[pos[0]][pos[1]] successorModel.plotSuccessorState( ind, "images/successor" + str(num) + "/sr2d" + str(i) + ".png", "images/successor" + str(num) + "/sr3d" + str(i) + ".png")
def main(): parser = argparse.ArgumentParser(description='Get SR policies') parser.add_argument('--cluster', type=int, default=2) parser.add_argument('--norm', type=int, default=0) args = parser.parse_args() # iters = [int(50e4), int(50e4), int(10e6), int(10e6)] # The iterations for training Q-learning numMod = [4, 5, 10, 10] for num in range(1, 5): print("Env : " + str(num)) global srModel, env, globNum env = gridWorld1(size=num) globNum = num srModel = Successor(env) srModel.loadSuccessor("data/dat" + str(num) + "/successor" + str(num) + ".csv") srModel.clusterRepresentation(numMod[num - 1], "images/tmp/sr.png", args.cluster, args.norm) srModel.saveLabels('data/dat' + str(num) + "/successorLabels" + str(num) + ".pkl") procPool = Pool(12) procPool.map(buildSRPolicy, range(len(srModel.medoids)))
def main(): parser = argparse.ArgumentParser(description='Get SR policies') parser.add_argument('--env', type=int, default=3) parser.add_argument('--ratio', type=float, default=50.0) parser.add_argument('--seed', type=int, default=1000) args = parser.parse_args() random.seed(args.seed) np.random.seed(args.seed) assert(args.env >= 1 and args.env <= 4) ITERS = [int(5e4), int(5e4), int(5e5), int(5e5)][args.env-1] OPTSIZE = [4, 5, 10, 10][args.env-1] EVALNUM = 100 for i in range(200): env = gridWorld1(size=args.env) env.resetGoal() env.resetStart() env.reset() # env.plotState(path=None) pth = "data/dat" + str(args.env) + "/policies/" print("Option0") qlearner1 = SmdpQlearner(env, optionPath=pth, optSize=0, gamma=0.99, plot=False) qlearner1.train(iters=ITERS, evalNum=EVALNUM, policyPath="images/tmp1") print("Option5-Eigen") qlearner2 = SmdpQlearner(env, optionPath=pth, optSize=OPTSIZE, gamma=0.99, plot=False) qlearner2.train(iters=ITERS, evalNum=EVALNUM, policyPath="images/tmp2") print("Option5-SR") qlearner3 = SmdpQlearner(env, optionPath=pth, optSize=OPTSIZE, gamma=0.99, plot=False, eigen=False) qlearner3.train(iters=ITERS, evalNum=EVALNUM, policyPath="images/tmp3") print("Option5-Eigen-NU") qlearner4 = SmdpQlearner(env, optionPath=pth, optSize=OPTSIZE, gamma=0.99, plot=False) qlearner4.train(iters=ITERS, evalNum=EVALNUM, policyPath="images/tmp2", uniform=False, rat=args.ratio) print("Option5-SR-NU") qlearner5 = SmdpQlearner(env, optionPath=pth, optSize=OPTSIZE, gamma=0.99, plot=False, eigen=False) qlearner5.train(iters=ITERS, evalNum=EVALNUM, policyPath="images/tmp3", uniform=False, rat=args.ratio) print("Option5-SR-AE") pklPath = "data/dat" + str(args.env) + \ "/successorLabels" + str(args.env) + ".pkl" qlearner5 = SmdpQlearner(env, optionPath=pth, optSize=OPTSIZE, gamma=0.99, plot=False, eigen=False) qlearner5.train(iters=ITERS, evalNum=EVALNUM, policyPath="images/tmp3", uniform=False, rat=args.ratio, adaptive=True, adaPath=pklPath)
def main(): titers = [int(10e5), int(5e3), int(5e3), int(5e3)] numMod = [10, 4, 7, 5] env_num = 4 assert (env_num >= 1 and env_num <= 4) print("Env : " + str(env_num)) env = gridWorld1(size=env_num, incremental=True) print("goal ", env.goal) srModel = IncSuccessor(env) render = False # options are unavailable in first round optionsAvailable = False # Each incremental iteration for itera in range(9): # Build successor representation with current set of options srModel.getSuccessor(optionsAvailable=optionsAvailable, render=render) srModel.getValidSuccessor() srModel.getReachedSuccessor() srModel.plotSuccessorMagnitudes("images/magnitudes/" + str(itera) + ".png") randomState = [] for i in range(20): while True: x = np.random.randint(env.height) y = np.random.randint(env.width) if env.room[x][y] == 0: break randomState.append(srModel.validKeyInd[tuple([x, y])]) # If final iteration, run full successor representatio if itera >= 8: srModel.getRareStateSuccessor(final=True) # Otherwise, cluster SR of states that are not used frequently else: srModel.getRareStateSuccessor() srModel.clusterRepresentation( numMod[env_num - 1], "images/inc-goals/sr" + str(itera) + ".png", 2, 0) print("size", srModel.successor.shape) curMedoids = [] # Iterate over every cluster center and learn policy for j, md in enumerate(srModel.medoids): print(j) rew = srModel.validSuccessor[srModel.validKeyInd[ srModel.reachedRevMap[md]]] Qlearner = SimpleQLearner(env, rew, srModel.keyInd) if itera >= 8: Qlearner.train(int(5e4)) else: Qlearner.train(titers[env_num - 1]) Qlearner.plotPolicy("images/policies" + str(env_num) + "/explorepolicy" + str(j) + ".png") Qlearner.savePolicy("data/dat" + str(env_num) + "/policies/explorepolicy" + str(j) + ".csv") ind = srModel.reachedRevMap[md] plot_ind = srModel.keyInd[ind] srModel.plotSuccessorState( plot_ind, "images/sr" + str(env_num) + "/sr2d" + str(j) + ".png", "images/sr" + str(env_num) + "/sr3d" + str(j) + ".png") curMedoids.append(md) optionsAvailable = True