Ejemplo n.º 1
0
    print "# Turtle ", numOfTurtle
    print "isEpisodeEnd ", isEpisodeEnd

    isTraining = not isEpisodeEnd

    count = 0

    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
        #randomly choose a sub goal at the beginning of the episode
        goalDiff = actionList[int(random.random() * len(actionList))]
        world = env.start(numOfTurtle, numOfCoin, goalDiff)
        objLoc = tool.getObjLoc(world, gridSize)
        marioLoc = tool.getMarioLoc(world, gridSize)
        goal = (marioLoc[0] + goalDiff[0], marioLoc[1] + goalDiff[1])
        objLocWithGoal = tool.addGoalLoc(objLoc, goal)
        ob = (marioLoc, objLocWithGoal)
        action = controller.start(ob)

        count += 1
        prevStepCount = stepCount
        episodeReward = 0
        while stepCount < maxStep:
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, world, flag = env.step(action, isTraining)
            totalReward = totalReward + reward
            episodeReward = episodeReward + reward
            if flag:
Ejemplo n.º 2
0
def TestRun(controller, discrete_size, monsterMoveProb, objSet, maxStep, isEpisodeEnd, isShow, frameRate):
    #controller.disableUpdate() #no update after training
    print "MaxStep: ", maxStep
    size = 800, 800
    gridSize = (discrete_size, discrete_size)
    delay = 100
    interval = 50
    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock=pygame.time.Clock()
    screen = pygame.display.set_mode(size)

    actionList = ((0, 1), (0, -1), (1, 0), (-1, 0))
    env = GridEnv.Grid((discrete_size, discrete_size), size, actionList, monsterMoveProb)

    isTraining = not isEpisodeEnd
    #maxStep = 200

    numOfTurtle = objSet[0]
    numOfCoin = objSet[1]

    print "# coin ", numOfCoin
    print "# Turtle ", numOfTurtle
    print "isEpisodeEnd ", isEpisodeEnd

    count = 0
    
    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
    #for i in range(0, maxEpisode):
        #print totalReward
        #rewardList[i] = totalReward

        Save(controller, 'smart.db')
        world = env.start(numOfTurtle, numOfCoin)
        objLoc = tool.getObjLoc(world, gridSize)
        marioLoc = tool.getMarioLoc(world, gridSize)
        goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
        #print "goal: ", goal
        #print "bestPath: ", bestPath
        goal = bestPath.pop(0)
        #print "plan: ", bestPath
        curPlanCounter = 0
        objLocWithGoal = tool.addGoalLoc(objLoc, goal)
        ob = (marioLoc, objLocWithGoal)
        action = controller.start(ob)
            
        count += 1
        prevStepCount = stepCount
        while stepCount < maxStep:
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, world, flag = env.step(action, isTraining)
            totalReward = totalReward + reward

            marioLoc = tool.getMarioLoc(world, gridSize)

            if marioLoc[0] == goal[0] and marioLoc[1] == goal[1]:
               reward = reward + 5
               #print "reward"
            else:
               reward = reward - 6
               #print "punish"
            if flag:
                controller.end(reward)
                break
            objLoc = tool.getObjLoc(world, gridSize)

            #if len(bestPath) == 0 or curPlanCounter == 0:
            dummy, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
            #print "plan: ", bestPath
            #curPlanCounter = curPlanCounter - 1
            goal = bestPath.pop(0)
            #goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
            #print "goal: ", goal
            objLocWithGoal = tool.addGoalLoc(objLoc, goal)
            ob = (marioLoc, objLocWithGoal)
            allQ = controller.getAllQ(ob)
            #print "allQ: ", allQ
            action = controller.step(reward, ob)
            #print "action: ", action
            for event in pygame.event.get():
               #action = 0
               if event.type == pygame.QUIT: sys.exit()
            if isShow:
                screen.blit(env.getScreen(), (0, 0))
                pygame.display.flip()
        #rewardList.append((prevStepCount, stepCount, episodeReward))
    print totalReward
    return rewardList, controller
Ejemplo n.º 3
0
def TestRun(controller, discrete_size, monsterMoveProb, objSet, maxStep,
            isEpisodeEnd, isShow, frameRate):
    controller.disableUpdate()  #no update after training
    print "MaxStep: ", maxStep
    size = 800, 800
    gridSize = (discrete_size, discrete_size)
    delay = 100
    interval = 50
    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock = pygame.time.Clock()
    screen = pygame.display.set_mode(size)

    actionList = ((0, 1), (0, -1), (1, 0), (-1, 0))
    env = GridEnv.Grid((discrete_size, discrete_size), size, actionList,
                       monsterMoveProb)

    isTraining = not isEpisodeEnd
    #maxStep = 200

    numOfTurtle = objSet[0]
    numOfCoin = objSet[1]

    print "# coin ", numOfCoin
    print "# Turtle ", numOfTurtle
    print "isEpisodeEnd ", isEpisodeEnd

    count = 0

    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
        #for i in range(0, maxEpisode):
        #print totalReward
        #rewardList[i] = totalReward

        world = env.start(numOfTurtle, numOfCoin)
        objLoc = tool.getObjLoc(world, gridSize)
        marioLoc = tool.getMarioLoc(world, gridSize)
        goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
        goal = bestPath.pop(0)
        #print "plan: ", bestPath
        curPlanCounter = 3
        print "goal: ", goal
        objLocWithGoal = tool.addGoalLoc(objLoc, goal)
        ob = (marioLoc, objLocWithGoal)
        action = controller.start(ob)

        count += 1
        prevStepCount = stepCount
        while stepCount < maxStep:
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, world, flag = env.step(action, isTraining)
            totalReward = totalReward + reward

            if flag:
                controller.end(reward)
                break
            objLoc = tool.getObjLoc(world, gridSize)
            marioLoc = tool.getMarioLoc(world, gridSize)
            if len(bestPath) == 0 or curPlanCounter == 0:
                dummy, bestPath = GetPlan(discrete_size, marioLoc, objLoc,
                                          controller)
                print "plan: ", bestPath
            curPlanCounter = curPlanCounter - 1
            goal = bestPath.pop(0)
            #goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
            print "goal: ", goal
            objLocWithGoal = tool.addGoalLoc(objLoc, goal)
            ob = (marioLoc, objLocWithGoal)
            allQ = controller.getAllQ(ob)
            print "allQ: ", allQ
            action = controller.step(reward, ob)
            print "action: ", action
            for event in pygame.event.get():
                #action = 0
                if event.type == pygame.QUIT: sys.exit()
            if isShow:
                screen.blit(env.getScreen(), (0, 0))
                pygame.display.flip()
        #rewardList.append((prevStepCount, stepCount, episodeReward))
    print totalReward
    return rewardList, controller
Ejemplo n.º 4
0
def TestRun(controller, discrete_size, monsterMoveProb, objSet, maxStep, isEpisodeEnd, isShow, frameRate):
    print "MaxStep: ", maxStep
    size = 800, 800
    gridSize = (discrete_size, discrete_size)
    delay = 100
    interval = 50
    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock=pygame.time.Clock()
    screen = pygame.display.set_mode(size)

    actionList = ((0, 1), (0, -1), (1, 0), (-1, 0))
    env = GridEnv.Grid((discrete_size, discrete_size), size, actionList, monsterMoveProb)

    isTraining = not isEpisodeEnd
    #maxStep = 200

    numOfTurtle = objSet[0]
    numOfCoin = objSet[1]

    print "# coin ", numOfCoin
    print "# Turtle ", numOfTurtle
    print "isEpisodeEnd ", isEpisodeEnd

    count = 0

    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
        #for i in range(0, maxEpisode):
        #print totalReward
        #rewardList[i] = totalReward

        Save(controller, 'smart.db')
        world = env.start(numOfTurtle, numOfCoin)
        objLoc = tool.getObjLoc(world, gridSize)
        marioLoc = tool.getMarioLoc(world, gridSize)
        goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
        #print "plan: ", bestPath

        dummy = bestPath.pop(0)
        prevPath = bestPath[:]
        goal = bestPath.pop(0)
        #curPlanCounter = 3
        #print "goal: ", goal
        objLocWithGoal = tool.addGoalLoc(objLoc, goal)
        ob = (marioLoc, objLocWithGoal)
        action = controller.start(ob)

        count += 1
        prevStepCount = stepCount
        while stepCount < maxStep:
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, world, flag, realReward, isSuccess = env.step(action, isTraining, goal)
            totalReward = totalReward + reward

            if flag:
                controller.end(reward, realReward, isSuccess)
                break
            objLoc = tool.getObjLoc(world, gridSize)
            marioLoc = tool.getMarioLoc(world, gridSize)
            print "-------------mario: ", marioLoc
            #if len(bestPath) == 0 or curPlanCounter == 0:
            dummy, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
            print "plan: ", bestPath
            #curPlanCounter = curPlanCounter - 1

            if isSuccess: #if not, just use the old plan
                curPathCost = GetPlanCost(bestPath, objLoc, controller, True)
                prevPathCost = GetPlanCost(prevPath, objLoc, controller, True)
                print "cur plan Cost: ", curPathCost
                print "prev plan Cost: ", prevPathCost
                if prevPathCost > 0.9*curPathCost and len(prevPath) > 1:
                    #stay with old plan
                    print "stay with old plan"
                    bestPath = prevPath
                
            dummy = bestPath.pop(0)
            prevPath = bestPath[:]
            goal = bestPath.pop(0)
                    
            #print "goal: ", goal
            objLocWithGoal = tool.addGoalLoc(objLoc, goal)
            ob = (marioLoc, objLocWithGoal)
            allQ = controller.getAllQ(ob)
            print "allQ: ", allQ
            #print "internalReward: ", reward
            action = controller.step(reward, ob, realReward, isSuccess)
            print "action: ", action
            for event in pygame.event.get():
                #action = 0
               if event.type == pygame.QUIT: sys.exit()
            if isShow:
                screen.blit(env.getScreen(), (0, 0))
                pygame.display.flip()
        #rewardList.append((prevStepCount, stepCount, episodeReward))
    #print totalReward
    return rewardList, controller
Ejemplo n.º 5
0
def TestRun(controller, discrete_size, monsterMoveProb, objSet, maxStep,
            isEpisodeEnd, isShow, frameRate):
    print "MaxStep: ", maxStep
    size = 800, 800
    gridSize = (discrete_size, discrete_size)
    delay = 100
    interval = 50
    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock = pygame.time.Clock()
    screen = pygame.display.set_mode(size)

    actionList = ((0, 1), (0, -1), (1, 0), (-1, 0))
    env = GridEnv.Grid((discrete_size, discrete_size), size, actionList,
                       monsterMoveProb)

    isTraining = not isEpisodeEnd
    #maxStep = 200

    numOfTurtle = objSet[0]
    numOfCoin = objSet[1]

    print "# coin ", numOfCoin
    print "# Turtle ", numOfTurtle
    print "isEpisodeEnd ", isEpisodeEnd

    count = 0

    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
        #for i in range(0, maxEpisode):
        #print totalReward
        #rewardList[i] = totalReward

        Save(controller, 'smart.db')
        world = env.start(numOfTurtle, numOfCoin)
        objLoc = tool.getObjLoc(world, gridSize)
        marioLoc = tool.getMarioLoc(world, gridSize)
        goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
        #print "plan: ", bestPath

        dummy = bestPath.pop(0)
        prevPath = bestPath[:]
        goal = bestPath.pop(0)
        #curPlanCounter = 3
        #print "goal: ", goal
        objLocWithGoal = tool.addGoalLoc(objLoc, goal)
        ob = (marioLoc, objLocWithGoal)
        action = controller.start(ob)

        count += 1
        prevStepCount = stepCount
        while stepCount < maxStep:
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, world, flag, realReward, isSuccess = env.step(
                action, isTraining, goal)
            totalReward = totalReward + reward

            if flag:
                controller.end(reward, realReward, isSuccess)
                break
            objLoc = tool.getObjLoc(world, gridSize)
            marioLoc = tool.getMarioLoc(world, gridSize)
            print "-------------mario: ", marioLoc
            #if len(bestPath) == 0 or curPlanCounter == 0:
            dummy, bestPath = GetPlan(discrete_size, marioLoc, objLoc,
                                      controller)
            print "plan: ", bestPath
            #curPlanCounter = curPlanCounter - 1

            if isSuccess:  #if not, just use the old plan
                curPathCost = GetPlanCost(bestPath, objLoc, controller, True)
                prevPathCost = GetPlanCost(prevPath, objLoc, controller, True)
                print "cur plan Cost: ", curPathCost
                print "prev plan Cost: ", prevPathCost
                if prevPathCost > 0.9 * curPathCost and len(prevPath) > 1:
                    #stay with old plan
                    print "stay with old plan"
                    bestPath = prevPath

            dummy = bestPath.pop(0)
            prevPath = bestPath[:]
            goal = bestPath.pop(0)

            #print "goal: ", goal
            objLocWithGoal = tool.addGoalLoc(objLoc, goal)
            ob = (marioLoc, objLocWithGoal)
            allQ = controller.getAllQ(ob)
            print "allQ: ", allQ
            #print "internalReward: ", reward
            action = controller.step(reward, ob, realReward, isSuccess)
            print "action: ", action
            for event in pygame.event.get():
                #action = 0
                if event.type == pygame.QUIT: sys.exit()
            if isShow:
                screen.blit(env.getScreen(), (0, 0))
                pygame.display.flip()
        #rewardList.append((prevStepCount, stepCount, episodeReward))
    #print totalReward
    return rewardList, controller
Ejemplo n.º 6
0
    print "# Turtle ", numOfTurtle
    print "isEpisodeEnd ", isEpisodeEnd

    isTraining = not isEpisodeEnd

    count = 0

    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
        #randomly choose a sub goal at the beginning of the episode
        goalDiff =  actionList[int(random.random()*len(actionList))]
        world = env.start(numOfTurtle, numOfCoin, goalDiff)
        objLoc = tool.getObjLoc(world, gridSize)
        marioLoc = tool.getMarioLoc(world, gridSize)
        goal = (marioLoc[0]+goalDiff[0], marioLoc[1]+goalDiff[1])
        objLocWithGoal = tool.addGoalLoc(objLoc, goal)
        ob = (marioLoc, objLocWithGoal)
        action = controller.start(ob)

        count += 1
        prevStepCount = stepCount
        episodeReward = 0
        while stepCount < maxStep:
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, world, flag, realReward, isSuccess = env.step(action, isTraining)
            totalReward = totalReward + reward
            episodeReward = episodeReward + reward
            if flag: