def prepareDataContinuousEnvWithReward(): actionSpace = [[0, 1], [1, 0], [-1, 0], [0, -1], [1, 1], [-1, -1], [1, -1], [-1, 1]] policy = OptimalPolicy(actionSpace) import continuousEnv xbound = [0, 180] ybound = [0, 180] vel = 1 transitionFunction = continuousEnv.TransitionFunction(xbound, ybound, vel) isTerminal = continuousEnv.IsTerminal(vel + .5) reset = continuousEnv.Reset(xbound, ybound) maxTimeStep = 180 sampleTraj = SampleTrajectory(maxTimeStep, transitionFunction, isTerminal, reset) decay = 0.99 rewardFunction = lambda state, action: -1 accumulateRewards = AccumulateRewards(decay, rewardFunction) trajNum = 160 path = "./continuous_reward_data.pkl" generateData(sampleTraj, accumulateRewards, policy, actionSpace, trajNum, path) data = loadData(path) print("{} data points in {}".format(len(data), path))
def prepareDataContinuousEnv(): import continuousEnv as env xbound = [0, 180] ybound = [0, 180] vel = 1 transitionFunction = env.TransitionFunction(xbound, ybound, vel) isTerminal = env.IsTerminal(vel + .5) reset = env.Reset(xbound, ybound) maxTimeStep = 10000 sampleTraj = SampleTrajectory(maxTimeStep, transitionFunction, isTerminal, reset) decay = 0.99 rewardFunction = lambda state, action: -1 accumulateRewards = AccumulateRewards(decay, rewardFunction) policy = env.OptimalPolicy(env.actionSpace) trajNum = 2000 partialTrajSize = 5 path = "./continuous_data_with_reward.pkl" data = generateData(sampleTraj, accumulateRewards, policy, env.actionSpace, trajNum, path, withReward=True, partialTrajSize=partialTrajSize) print("{} data points in {}".format(len(data), path))
def prepareSheepEscapingEnvData(): import sheepEscapingEnv as env actionSpace = env.actionSpace numActionSpace = env.numActionSpace xBoundary = env.xBoundary yBoundary = env.yBoundary vel = env.vel wolfHeatSeekingPolicy = env.WolfHeatSeekingPolicy(actionSpace) transition = env.TransitionFunction(xBoundary, yBoundary, vel, wolfHeatSeekingPolicy) isTerminal = env.IsTerminal(minDistance=vel + 5) reset = env.Reset(xBoundary, yBoundary) rewardFunction = lambda state, action: 1 cInit = 1 cBase = 1 calculateScore = CalculateScore(cInit, cBase) selectChild = SelectChild(calculateScore) getActionPrior = UniformActionPrior(actionSpace) initializeChildren = InitializeChildren(actionSpace, transition, getActionPrior) expand = Expand(transition, isTerminal, initializeChildren) maxRollOutSteps = 10 numSimulations = 600 maxTrajLen = 100 rolloutPolicy = lambda state: actionSpace[np.random.choice( range(numActionSpace))] rollout = RollOut(rolloutPolicy, maxRollOutSteps, transition, rewardFunction, isTerminal) mcts = MCTSPolicy(numSimulations, selectChild, expand, rollout, backup, getSoftmaxActionDist) sampleTraj = SampleTrajectoryWithMCTS(maxTrajLen, isTerminal, reset) # policy = env.SheepNaiveEscapingPolicy(actionSpace) # sampleTraj = SampleTrajectory(maxRunningSteps, transition, isTerminal, reset, render=None) rewardDecay = 0.99 accumulateRewards = AccumulateRewards(rewardDecay, rewardFunction) trajNum = 500 partialTrajSize = None path = "./500trajs_sheepEscapingEnv_data_actionDist.pkl" reportInterval = 10 data = generateData(sampleTraj, accumulateRewards, mcts, actionSpace, trajNum, path, withReward=True, partialTrajSize=partialTrajSize, reportInterval=reportInterval) print("{} data points in {}".format(len(data), path))
def prepareDataContinuousEnv(): actionSpace = [[0, 1], [1, 0], [-1, 0], [0, -1], [1, 1], [-1, -1], [1, -1], [-1, 1]] policy = OptimalPolicy(actionSpace) maxTimeStep = 180 import continuousEnv xbound = [0, 180] ybound = [0, 180] vel = 1 transitionFunction = continuousEnv.TransitionFunction(xbound, ybound, vel) isTerminal = continuousEnv.IsTerminal(1.5 * vel) reset = continuousEnv.Reset(xbound, ybound) sampleTraj = SampleTrajectory(maxTimeStep, transitionFunction, isTerminal, reset) trajNum = 1200 path = "./continuous_data.pkl" generateData(sampleTraj, policy, actionSpace, trajNum, path) data = loadData(path) print("{} data points in {}".format(data, path))