def TestSim(obs): MaxY = 16 MaxX = 22 state = WorldState(obs) print "mario loc ", state.mario.x, " ", state.mario.y commonVar = getCommonVar() classVarList = getClassVar() rewardVar = orange.FloatVariable("reward") RewardLearner = Learner(commonVar, [rewardVar], 3000) commonVar.pop(0) DynamicLearner = Learner(commonVar, classVarList, 3000) lastActionId = 9 modelFea = getModelFeature(state, [2.0, 1.0, 0.0, 0.0]) rewardFea = getTrainFeature(state, [0.0], lastActionId) # don't learn the pseudo reward DynamicLearner.add([modelFea]) RewardLearner.add([rewardFea]) dynaLearner = [DynamicLearner for action in range(12)] path = Optimize(state, dynaLearner, RewardLearner, 100, [], ActionRange) newState = ExpandPath(path, MakeSimState(state, 10), dynaLearner, RewardLearner) print type(newState) print "hello" for world in newState.worldList: print "loc: ", world.mario.x
def initLearner(self): if self.DynamicLearner == {}: commonVar = getCommonVar() classVarList = getClassVar() #rewardVar = orange.FloatVariable("reward") #self.RewardLearner = Learner(commonVar, [rewardVar], 6000) commonVar.pop(0) for action in self.actionList: if action == 9: maxFeature = 10000 else: maxFeature = 3000 self.DynamicLearner[action] = Learner(commonVar, classVarList, maxFeature)