Esempio n. 1
0
def prepareDataContinuousEnvWithReward():
    actionSpace = [[0, 1], [1, 0], [-1, 0], [0, -1], [1, 1], [-1, -1], [1, -1],
                   [-1, 1]]
    policy = OptimalPolicy(actionSpace)

    import continuousEnv
    xbound = [0, 180]
    ybound = [0, 180]
    vel = 1
    transitionFunction = continuousEnv.TransitionFunction(xbound, ybound, vel)
    isTerminal = continuousEnv.IsTerminal(vel + .5)
    reset = continuousEnv.Reset(xbound, ybound)

    maxTimeStep = 180
    sampleTraj = SampleTrajectory(maxTimeStep, transitionFunction, isTerminal,
                                  reset)

    decay = 0.99
    rewardFunction = lambda state, action: -1
    accumulateRewards = AccumulateRewards(decay, rewardFunction)

    trajNum = 160
    path = "./continuous_reward_data.pkl"
    generateData(sampleTraj, accumulateRewards, policy, actionSpace, trajNum,
                 path)

    data = loadData(path)
    print("{} data points in {}".format(len(data), path))
def prepareDataContinuousEnv():
    import continuousEnv as env
    xbound = [0, 180]
    ybound = [0, 180]
    vel = 1
    transitionFunction = env.TransitionFunction(xbound, ybound, vel)
    isTerminal = env.IsTerminal(vel + .5)
    reset = env.Reset(xbound, ybound)

    maxTimeStep = 10000
    sampleTraj = SampleTrajectory(maxTimeStep, transitionFunction, isTerminal,
                                  reset)

    decay = 0.99
    rewardFunction = lambda state, action: -1
    accumulateRewards = AccumulateRewards(decay, rewardFunction)

    policy = env.OptimalPolicy(env.actionSpace)
    trajNum = 2000
    partialTrajSize = 5
    path = "./continuous_data_with_reward.pkl"
    data = generateData(sampleTraj,
                        accumulateRewards,
                        policy,
                        env.actionSpace,
                        trajNum,
                        path,
                        withReward=True,
                        partialTrajSize=partialTrajSize)

    print("{} data points in {}".format(len(data), path))
def prepareSheepEscapingEnvData():
    import sheepEscapingEnv as env
    actionSpace = env.actionSpace
    numActionSpace = env.numActionSpace
    xBoundary = env.xBoundary
    yBoundary = env.yBoundary
    vel = env.vel
    wolfHeatSeekingPolicy = env.WolfHeatSeekingPolicy(actionSpace)
    transition = env.TransitionFunction(xBoundary, yBoundary, vel,
                                        wolfHeatSeekingPolicy)
    isTerminal = env.IsTerminal(minDistance=vel + 5)
    reset = env.Reset(xBoundary, yBoundary)

    rewardFunction = lambda state, action: 1

    cInit = 1
    cBase = 1
    calculateScore = CalculateScore(cInit, cBase)
    selectChild = SelectChild(calculateScore)

    getActionPrior = UniformActionPrior(actionSpace)
    initializeChildren = InitializeChildren(actionSpace, transition,
                                            getActionPrior)
    expand = Expand(transition, isTerminal, initializeChildren)

    maxRollOutSteps = 10
    numSimulations = 600
    maxTrajLen = 100
    rolloutPolicy = lambda state: actionSpace[np.random.choice(
        range(numActionSpace))]
    rollout = RollOut(rolloutPolicy, maxRollOutSteps, transition,
                      rewardFunction, isTerminal)
    mcts = MCTSPolicy(numSimulations, selectChild, expand, rollout, backup,
                      getSoftmaxActionDist)
    sampleTraj = SampleTrajectoryWithMCTS(maxTrajLen, isTerminal, reset)

    # policy = env.SheepNaiveEscapingPolicy(actionSpace)
    # sampleTraj = SampleTrajectory(maxRunningSteps, transition, isTerminal, reset, render=None)

    rewardDecay = 0.99
    accumulateRewards = AccumulateRewards(rewardDecay, rewardFunction)

    trajNum = 500
    partialTrajSize = None
    path = "./500trajs_sheepEscapingEnv_data_actionDist.pkl"
    reportInterval = 10
    data = generateData(sampleTraj,
                        accumulateRewards,
                        mcts,
                        actionSpace,
                        trajNum,
                        path,
                        withReward=True,
                        partialTrajSize=partialTrajSize,
                        reportInterval=reportInterval)

    print("{} data points in {}".format(len(data), path))
def prepareDataContinuousEnv():
    actionSpace = [[0, 1], [1, 0], [-1, 0], [0, -1], [1, 1], [-1, -1], [1, -1],
                   [-1, 1]]
    policy = OptimalPolicy(actionSpace)

    maxTimeStep = 180
    import continuousEnv
    xbound = [0, 180]
    ybound = [0, 180]
    vel = 1
    transitionFunction = continuousEnv.TransitionFunction(xbound, ybound, vel)
    isTerminal = continuousEnv.IsTerminal(1.5 * vel)
    reset = continuousEnv.Reset(xbound, ybound)
    sampleTraj = SampleTrajectory(maxTimeStep, transitionFunction, isTerminal,
                                  reset)

    trajNum = 1200
    path = "./continuous_data.pkl"
    generateData(sampleTraj, policy, actionSpace, trajNum, path)

    data = loadData(path)
    print("{} data points in {}".format(data, path))