Example #1
0
File: Sim.py Project: lono175/Mario
def TestSim(obs):
    MaxY = 16
    MaxX = 22
    state = WorldState(obs)
    print "mario loc ", state.mario.x, " ", state.mario.y

    commonVar = getCommonVar()
    classVarList = getClassVar()
    rewardVar = orange.FloatVariable("reward")
    RewardLearner = Learner(commonVar, [rewardVar], 3000)
    commonVar.pop(0)
    DynamicLearner = Learner(commonVar, classVarList, 3000)

    lastActionId = 9
    modelFea = getModelFeature(state, [2.0, 1.0, 0.0, 0.0])
    rewardFea = getTrainFeature(state, [0.0], lastActionId)  # don't learn the pseudo reward

    DynamicLearner.add([modelFea])
    RewardLearner.add([rewardFea])

    dynaLearner = [DynamicLearner for action in range(12)]
    path = Optimize(state, dynaLearner, RewardLearner, 100, [], ActionRange)
    newState = ExpandPath(path, MakeSimState(state, 10), dynaLearner, RewardLearner)
    print type(newState)
    print "hello"
    for world in newState.worldList:
        print "loc: ", world.mario.x
Example #2
0
 def initLearner(self):
     if self.DynamicLearner == {}:
         commonVar = getCommonVar()
         classVarList = getClassVar()
         #rewardVar = orange.FloatVariable("reward")
         #self.RewardLearner = Learner(commonVar, [rewardVar], 6000)
         commonVar.pop(0)
         for action in self.actionList:
             if action == 9:
                 maxFeature = 10000
             else:
                 maxFeature = 3000
             self.DynamicLearner[action] = Learner(commonVar, classVarList, maxFeature)