Beispiel #1
0
def ExpandPath(path, state, dynaLearner, rewardLearner):
    for actionId in path:
        nextState = SimState()
        if state.path != []:
            prevAction = state.path[len(state.path) - 1]
        else:
            prevAction = -1  # TODO: make it NoTask
        for i, world in enumerate(state.worldList):
            fea = getTestFeature(world, actionId)

            rewardFea = getRewardFeature(world, prevAction)
            # WARNING!! Don't change the order
            # reward, = rewardLearner.getClass(fea)
            reward = rewardLearner.getQ(rewardFea, actionId)

            fea.pop(0)
            distList = dynaLearner[actionId].getClass(fea, orange.GetProbabilities)  # TODO: add randomness here
            ax, ay, dx, dy = [GetSample(dist) for dist in distList]
            # ax, ay, dx, dy = [round(value, Precision) for value in sampleValue]
            # ll = [log(pair[1]) for pair in sampleValue] #loglikelihood

            m = world.mario
            sx = ax + m.sx
            sy = ay + m.sy

            newMario = copy.deepcopy(m)
            newMario.x = m.x + m.sx + dx
            newMario.y = m.y + m.sy + dy
            newMario.sx = sx
            newMario.sy = sy

            newWorld = copy.copy(world)  # with static assumption, everything other than mario stays the same
            newWorld.mario = newMario

            dir, isJump, isSpeed = getActionType(actionId)
            if (not (newWorld.mario.sy >= 0 and isJump)) and not (
                sx == 0.0 and sy == 0.0 and dx == 0.0 and dy == 0.0
            ):  # Jump does not increase y-speed, do not need to search anymore
                nextState.worldList.append(newWorld)
                nextState.rewardList.append(state.rewardList[i] + reward)
                # nextState.probList.append(state.probList[i] + sum(ll))

        nextState.path = state.path + [actionId]
        state = nextState
    return state
Beispiel #2
0
def TestSimPath(path, state, dynaLearner, rewardLearner):
    for actionId in path:
        fea = getTestFeature(state, actionId)
        m = state.mario
        sx, sy, dx, dy = dynaLearner.getClass(fea)  # TODO: add randomness here
        reward, = rewardLearner.getClass(fea)
        state.dump()
        print "mario: ", m.x, " ", m.y, " ", reward

        newMario = copy.deepcopy(m)

        newMario.x = m.x + m.sx + dx
        newMario.y = m.y + m.sy + dy
        newMario.sx = sx
        newMario.sy = sy

        newState = copy.copy(state)  # with static assumption, everything other than mario stays the same
        newState.mario = newMario
        state = newState