print "# coin ", numOfCoin print "# Turtle ", numOfTurtle print "isEpisodeEnd ", isEpisodeEnd isTraining = not isEpisodeEnd count = 0 totalReward = 0 rewardList = [] stepCount = 0 while stepCount < maxStep: #randomly choose a sub goal at the beginning of the episode goalDiff = actionList[int(random.random() * len(actionList))] world = env.start(numOfTurtle, numOfCoin, goalDiff) objLoc = tool.getObjLoc(world, gridSize) marioLoc = tool.getMarioLoc(world, gridSize) goal = (marioLoc[0] + goalDiff[0], marioLoc[1] + goalDiff[1]) objLocWithGoal = tool.addGoalLoc(objLoc, goal) ob = (marioLoc, objLocWithGoal) action = controller.start(ob) count += 1 prevStepCount = stepCount episodeReward = 0 while stepCount < maxStep: stepCount = stepCount + 1 clock.tick(frameRate) reward, world, flag = env.step(action, isTraining) totalReward = totalReward + reward episodeReward = episodeReward + reward
def TestRun(controller, discrete_size, monsterMoveProb, objSet, maxStep, isEpisodeEnd, isShow, frameRate): #controller.disableUpdate() #no update after training print "MaxStep: ", maxStep size = 800, 800 gridSize = (discrete_size, discrete_size) delay = 100 interval = 50 pygame.init() pygame.key.set_repeat(delay, interval) clock=pygame.time.Clock() screen = pygame.display.set_mode(size) actionList = ((0, 1), (0, -1), (1, 0), (-1, 0)) env = GridEnv.Grid((discrete_size, discrete_size), size, actionList, monsterMoveProb) isTraining = not isEpisodeEnd #maxStep = 200 numOfTurtle = objSet[0] numOfCoin = objSet[1] print "# coin ", numOfCoin print "# Turtle ", numOfTurtle print "isEpisodeEnd ", isEpisodeEnd count = 0 totalReward = 0 rewardList = [] stepCount = 0 while stepCount < maxStep: #for i in range(0, maxEpisode): #print totalReward #rewardList[i] = totalReward Save(controller, 'smart.db') world = env.start(numOfTurtle, numOfCoin) objLoc = tool.getObjLoc(world, gridSize) marioLoc = tool.getMarioLoc(world, gridSize) goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller) #print "goal: ", goal #print "bestPath: ", bestPath goal = bestPath.pop(0) #print "plan: ", bestPath curPlanCounter = 0 objLocWithGoal = tool.addGoalLoc(objLoc, goal) ob = (marioLoc, objLocWithGoal) action = controller.start(ob) count += 1 prevStepCount = stepCount while stepCount < maxStep: stepCount = stepCount + 1 clock.tick(frameRate) reward, world, flag = env.step(action, isTraining) totalReward = totalReward + reward marioLoc = tool.getMarioLoc(world, gridSize) if marioLoc[0] == goal[0] and marioLoc[1] == goal[1]: reward = reward + 5 #print "reward" else: reward = reward - 6 #print "punish" if flag: controller.end(reward) break objLoc = tool.getObjLoc(world, gridSize) #if len(bestPath) == 0 or curPlanCounter == 0: dummy, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller) #print "plan: ", bestPath #curPlanCounter = curPlanCounter - 1 goal = bestPath.pop(0) #goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller) #print "goal: ", goal objLocWithGoal = tool.addGoalLoc(objLoc, goal) ob = (marioLoc, objLocWithGoal) allQ = controller.getAllQ(ob) #print "allQ: ", allQ action = controller.step(reward, ob) #print "action: ", action for event in pygame.event.get(): #action = 0 if event.type == pygame.QUIT: sys.exit() if isShow: screen.blit(env.getScreen(), (0, 0)) pygame.display.flip() #rewardList.append((prevStepCount, stepCount, episodeReward)) print totalReward return rewardList, controller
def TestRun(controller, discrete_size, monsterMoveProb, objSet, maxStep, isEpisodeEnd, isShow, frameRate): controller.disableUpdate() #no update after training print "MaxStep: ", maxStep size = 800, 800 gridSize = (discrete_size, discrete_size) delay = 100 interval = 50 pygame.init() pygame.key.set_repeat(delay, interval) clock = pygame.time.Clock() screen = pygame.display.set_mode(size) actionList = ((0, 1), (0, -1), (1, 0), (-1, 0)) env = GridEnv.Grid((discrete_size, discrete_size), size, actionList, monsterMoveProb) isTraining = not isEpisodeEnd #maxStep = 200 numOfTurtle = objSet[0] numOfCoin = objSet[1] print "# coin ", numOfCoin print "# Turtle ", numOfTurtle print "isEpisodeEnd ", isEpisodeEnd count = 0 totalReward = 0 rewardList = [] stepCount = 0 while stepCount < maxStep: #for i in range(0, maxEpisode): #print totalReward #rewardList[i] = totalReward world = env.start(numOfTurtle, numOfCoin) objLoc = tool.getObjLoc(world, gridSize) marioLoc = tool.getMarioLoc(world, gridSize) goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller) goal = bestPath.pop(0) #print "plan: ", bestPath curPlanCounter = 3 print "goal: ", goal objLocWithGoal = tool.addGoalLoc(objLoc, goal) ob = (marioLoc, objLocWithGoal) action = controller.start(ob) count += 1 prevStepCount = stepCount while stepCount < maxStep: stepCount = stepCount + 1 clock.tick(frameRate) reward, world, flag = env.step(action, isTraining) totalReward = totalReward + reward if flag: controller.end(reward) break objLoc = tool.getObjLoc(world, gridSize) marioLoc = tool.getMarioLoc(world, gridSize) if len(bestPath) == 0 or curPlanCounter == 0: dummy, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller) print "plan: ", bestPath curPlanCounter = curPlanCounter - 1 goal = bestPath.pop(0) #goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller) print "goal: ", goal objLocWithGoal = tool.addGoalLoc(objLoc, goal) ob = (marioLoc, objLocWithGoal) allQ = controller.getAllQ(ob) print "allQ: ", allQ action = controller.step(reward, ob) print "action: ", action for event in pygame.event.get(): #action = 0 if event.type == pygame.QUIT: sys.exit() if isShow: screen.blit(env.getScreen(), (0, 0)) pygame.display.flip() #rewardList.append((prevStepCount, stepCount, episodeReward)) print totalReward return rewardList, controller
def TestRun(controller, discrete_size, monsterMoveProb, objSet, maxStep, isEpisodeEnd, isShow, frameRate): print "MaxStep: ", maxStep size = 800, 800 gridSize = (discrete_size, discrete_size) delay = 100 interval = 50 pygame.init() pygame.key.set_repeat(delay, interval) clock=pygame.time.Clock() screen = pygame.display.set_mode(size) actionList = ((0, 1), (0, -1), (1, 0), (-1, 0)) env = GridEnv.Grid((discrete_size, discrete_size), size, actionList, monsterMoveProb) isTraining = not isEpisodeEnd #maxStep = 200 numOfTurtle = objSet[0] numOfCoin = objSet[1] print "# coin ", numOfCoin print "# Turtle ", numOfTurtle print "isEpisodeEnd ", isEpisodeEnd count = 0 totalReward = 0 rewardList = [] stepCount = 0 while stepCount < maxStep: #for i in range(0, maxEpisode): #print totalReward #rewardList[i] = totalReward Save(controller, 'smart.db') world = env.start(numOfTurtle, numOfCoin) objLoc = tool.getObjLoc(world, gridSize) marioLoc = tool.getMarioLoc(world, gridSize) goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller) #print "plan: ", bestPath dummy = bestPath.pop(0) prevPath = bestPath[:] goal = bestPath.pop(0) #curPlanCounter = 3 #print "goal: ", goal objLocWithGoal = tool.addGoalLoc(objLoc, goal) ob = (marioLoc, objLocWithGoal) action = controller.start(ob) count += 1 prevStepCount = stepCount while stepCount < maxStep: stepCount = stepCount + 1 clock.tick(frameRate) reward, world, flag, realReward, isSuccess = env.step(action, isTraining, goal) totalReward = totalReward + reward if flag: controller.end(reward, realReward, isSuccess) break objLoc = tool.getObjLoc(world, gridSize) marioLoc = tool.getMarioLoc(world, gridSize) print "-------------mario: ", marioLoc #if len(bestPath) == 0 or curPlanCounter == 0: dummy, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller) print "plan: ", bestPath #curPlanCounter = curPlanCounter - 1 if isSuccess: #if not, just use the old plan curPathCost = GetPlanCost(bestPath, objLoc, controller, True) prevPathCost = GetPlanCost(prevPath, objLoc, controller, True) print "cur plan Cost: ", curPathCost print "prev plan Cost: ", prevPathCost if prevPathCost > 0.9*curPathCost and len(prevPath) > 1: #stay with old plan print "stay with old plan" bestPath = prevPath dummy = bestPath.pop(0) prevPath = bestPath[:] goal = bestPath.pop(0) #print "goal: ", goal objLocWithGoal = tool.addGoalLoc(objLoc, goal) ob = (marioLoc, objLocWithGoal) allQ = controller.getAllQ(ob) print "allQ: ", allQ #print "internalReward: ", reward action = controller.step(reward, ob, realReward, isSuccess) print "action: ", action for event in pygame.event.get(): #action = 0 if event.type == pygame.QUIT: sys.exit() if isShow: screen.blit(env.getScreen(), (0, 0)) pygame.display.flip() #rewardList.append((prevStepCount, stepCount, episodeReward)) #print totalReward return rewardList, controller
def TestRun(controller, discrete_size, monsterMoveProb, objSet, maxStep, isEpisodeEnd, isShow, frameRate): print "MaxStep: ", maxStep size = 800, 800 gridSize = (discrete_size, discrete_size) delay = 100 interval = 50 pygame.init() pygame.key.set_repeat(delay, interval) clock = pygame.time.Clock() screen = pygame.display.set_mode(size) actionList = ((0, 1), (0, -1), (1, 0), (-1, 0)) env = GridEnv.Grid((discrete_size, discrete_size), size, actionList, monsterMoveProb) isTraining = not isEpisodeEnd #maxStep = 200 numOfTurtle = objSet[0] numOfCoin = objSet[1] print "# coin ", numOfCoin print "# Turtle ", numOfTurtle print "isEpisodeEnd ", isEpisodeEnd count = 0 totalReward = 0 rewardList = [] stepCount = 0 while stepCount < maxStep: #for i in range(0, maxEpisode): #print totalReward #rewardList[i] = totalReward Save(controller, 'smart.db') world = env.start(numOfTurtle, numOfCoin) objLoc = tool.getObjLoc(world, gridSize) marioLoc = tool.getMarioLoc(world, gridSize) goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller) #print "plan: ", bestPath dummy = bestPath.pop(0) prevPath = bestPath[:] goal = bestPath.pop(0) #curPlanCounter = 3 #print "goal: ", goal objLocWithGoal = tool.addGoalLoc(objLoc, goal) ob = (marioLoc, objLocWithGoal) action = controller.start(ob) count += 1 prevStepCount = stepCount while stepCount < maxStep: stepCount = stepCount + 1 clock.tick(frameRate) reward, world, flag, realReward, isSuccess = env.step( action, isTraining, goal) totalReward = totalReward + reward if flag: controller.end(reward, realReward, isSuccess) break objLoc = tool.getObjLoc(world, gridSize) marioLoc = tool.getMarioLoc(world, gridSize) print "-------------mario: ", marioLoc #if len(bestPath) == 0 or curPlanCounter == 0: dummy, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller) print "plan: ", bestPath #curPlanCounter = curPlanCounter - 1 if isSuccess: #if not, just use the old plan curPathCost = GetPlanCost(bestPath, objLoc, controller, True) prevPathCost = GetPlanCost(prevPath, objLoc, controller, True) print "cur plan Cost: ", curPathCost print "prev plan Cost: ", prevPathCost if prevPathCost > 0.9 * curPathCost and len(prevPath) > 1: #stay with old plan print "stay with old plan" bestPath = prevPath dummy = bestPath.pop(0) prevPath = bestPath[:] goal = bestPath.pop(0) #print "goal: ", goal objLocWithGoal = tool.addGoalLoc(objLoc, goal) ob = (marioLoc, objLocWithGoal) allQ = controller.getAllQ(ob) print "allQ: ", allQ #print "internalReward: ", reward action = controller.step(reward, ob, realReward, isSuccess) print "action: ", action for event in pygame.event.get(): #action = 0 if event.type == pygame.QUIT: sys.exit() if isShow: screen.blit(env.getScreen(), (0, 0)) pygame.display.flip() #rewardList.append((prevStepCount, stepCount, episodeReward)) #print totalReward return rewardList, controller
print "# coin ", numOfCoin print "# Turtle ", numOfTurtle print "isEpisodeEnd ", isEpisodeEnd isTraining = not isEpisodeEnd count = 0 totalReward = 0 rewardList = [] stepCount = 0 while stepCount < maxStep: #randomly choose a sub goal at the beginning of the episode goalDiff = actionList[int(random.random()*len(actionList))] world = env.start(numOfTurtle, numOfCoin, goalDiff) objLoc = tool.getObjLoc(world, gridSize) marioLoc = tool.getMarioLoc(world, gridSize) goal = (marioLoc[0]+goalDiff[0], marioLoc[1]+goalDiff[1]) objLocWithGoal = tool.addGoalLoc(objLoc, goal) ob = (marioLoc, objLocWithGoal) action = controller.start(ob) count += 1 prevStepCount = stepCount episodeReward = 0 while stepCount < maxStep: stepCount = stepCount + 1 clock.tick(frameRate) reward, world, flag, realReward, isSuccess = env.step(action, isTraining) totalReward = totalReward + reward episodeReward = episodeReward + reward