コード例 #1
0
ファイル: Planner.py プロジェクト: lono175/Planning
def GetPlanCost(inPath, objLoc, controller, isDump):
    Q = 0
    accProb = 1
    path = inPath[:]
    prev = path.pop(0) #remove the first one
    #print "--------------path: ", path
    step = 1
    for node in path:
        #print "goal: ", node
        objLocWithGoal = tool.addGoalLoc(objLoc, node)
        ob = (prev, objLocWithGoal)
        #print "observation: ", ob
        prob = controller.getLinkProb(ob, controller.prob)

        #temporary solution
        #if prob > 0:
            #prob = 0
        #prob = prob/2

        reward = controller.getLinkReward(ob, controller.realReward)
        failedReward = controller.getLinkReward(ob, controller.failedReward)

        newQ = pow(0.95, step)*accProb*(reward*prob + failedReward*(1-prob))

        accProb = accProb * prob
        if isDump:
            print "newQ: ", newQ
            print "failedReward: ",  failedReward
            print "reward: ",  reward
            print "prob", prob
        Q = Q + newQ
        prev = node
        step = step + 1
    return Q
コード例 #2
0
ファイル: Planner.py プロジェクト: lono175/Planning
def GetPlanCost(inPath, objLoc, controller, isDump):
    Q = 0
    accProb = 1
    path = inPath[:]
    prev = path.pop(0)  #remove the first one
    #print "--------------path: ", path
    step = 1
    for node in path:
        #print "goal: ", node
        objLocWithGoal = tool.addGoalLoc(objLoc, node)
        ob = (prev, objLocWithGoal)
        #print "observation: ", ob
        prob = controller.getLinkProb(ob, controller.prob)

        #temporary solution
        #if prob > 0:
        #prob = 0
        #prob = prob/2

        reward = controller.getLinkReward(ob, controller.realReward)
        failedReward = controller.getLinkReward(ob, controller.failedReward)

        newQ = pow(0.95, step) * accProb * (reward * prob + failedReward *
                                            (1 - prob))

        accProb = accProb * prob
        if isDump:
            print "newQ: ", newQ
            print "failedReward: ", failedReward
            print "reward: ", reward
            print "prob", prob
        Q = Q + newQ
        prev = node
        step = step + 1
    return Q
コード例 #3
0
ファイル: GridEnvSim.py プロジェクト: lono175/Maze
    isTraining = not isEpisodeEnd

    count = 0

    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
        #randomly choose a sub goal at the beginning of the episode
        goalDiff = actionList[int(random.random() * len(actionList))]
        world = env.start(numOfTurtle, numOfCoin, goalDiff)
        objLoc = tool.getObjLoc(world, gridSize)
        marioLoc = tool.getMarioLoc(world, gridSize)
        goal = (marioLoc[0] + goalDiff[0], marioLoc[1] + goalDiff[1])
        objLocWithGoal = tool.addGoalLoc(objLoc, goal)
        ob = (marioLoc, objLocWithGoal)
        action = controller.start(ob)

        count += 1
        prevStepCount = stepCount
        episodeReward = 0
        while stepCount < maxStep:
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, world, flag = env.step(action, isTraining)
            totalReward = totalReward + reward
            episodeReward = episodeReward + reward
            if flag:
                #print "episodeEnd: ", reward
                controller.end(reward)
コード例 #4
0
ファイル: Planner.py プロジェクト: lono175/Maze
def GetPlan(gridSize, marioLoc, objLoc, controller):
    
    diffGoal = ((0, 1), (0, -1), (1, 0), (-1, 0))
    DG=nx.DiGraph()
    #G=nx.path_graph(5)
    #add each node into the graph
    for x in range(0, gridSize):
        for y in range(0, gridSize):
            loc = (x, y)
            for diff in diffGoal:
                goal = (diff[0]+loc[0], diff[1]+loc[1])
                if not goal[0]  in range(0, gridSize) or not goal[1] in range(0, gridSize):
                   continue
                objLocWithGoal = tool.addGoalLoc(objLoc, goal)
                ob = (loc, objLocWithGoal)
                #compute the link cost
                maxQ = controller.getMaxQ(ob)
                #print maxQ
                #assert(maxQ > -10) #don't handle monster
                linkCost = 1 / pow(2, (1 + maxQ))
                DG.add_weighted_edges_from([(loc, goal, linkCost)])

    goal = (0, 0)
    #for x in range(0, gridSize):
        #for y in range(0, gridSize):
            #goal = (x, y)
    pathList = nx.shortest_path(DG, source = marioLoc, target = goal, weighted = True)
    #print nx.single_source_dijkstra_path(DG, source = marioLoc)
    #print nx.single_source_dijkstra_path_length(DG,source=marioLoc)
    #print '------------------'
    #print nx.shortest_path(DG, source = marioLoc, target = (4, 4), weighted=True)
    #print nx.shortest_path_length(DG, source = marioLoc, target = (4, 4), weighted=True)

    #follow the path and find the one with maximum reward
    maxQ = -10000
    bestPath = []
    #for goal in pathList:
    Q = 0
    path = pathList
    #print "--------------path: ", path
    prev = path.pop(0) #remove the first one
    for node in path:
        #print "goal: ", node
        objLocWithGoal = tool.addGoalLoc(objLoc, node)
        ob = (prev, objLocWithGoal)
        #print "observation: ", ob
        nodeQ = controller.getMaxQ(ob)
        #print "nodeQ: ", nodeQ
        Q = Q + nodeQ
        prev = node
    #print "Q: ", Q
    if Q > maxQ:
        maxQ = Q
        bestPath = path
    #just reture the next node in the best path
    return bestPath[0], bestPath
    #print bestPath
    #print maxQ

    #maxQ = -10000
    #for goal in {(7, 7):[(1, 0), (2, 0), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5), (4, 5), (4, 6), (4, 7), (5, 7), (6, 7), (7, 7)]}:
        #Q = 0
        #path = pathList[goal]
        #prev = path.pop(0) #remove the first one
        #for node in path:
            ##diff = (node[0] - prev[0], node[1] - prev[1])
            #objLocWithGoal = tool.addGoalLoc(objLoc, node)
            #ob = (prev, objLocWithGoal)
            #nodeQ = controller.getMaxQ(ob)
            #Q = Q + nodeQ
            #print "nodeQ: ", nodeQ
            #prev = node
        #if Q > maxQ:
            #maxQ = Q
            #bestPath = path
    #print bestPath
    #print maxQ

    #maxQ = -10000
    #for goal in {(7, 1):[(0, 0), (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (7, 1)]}:
        #Q = 0
        #path = pathList[goal]
        #prev = path.pop(0) #remove the first one
        #for node in pathList:
            ##diff = (node[0] - prev[0], node[1] - prev[1])
            #objLocWithGoal = tool.addGoalLoc(objLoc, node)
            #ob = (prev, objLocWithGoal)
            #nodeQ = controller.getMaxQ(ob)
            #Q = Q + nodeQ
            #prev = node
        #if Q > maxQ:
            #maxQ = Q
            #bestPath = path
    #print bestPath
    #print maxQ
    return
コード例 #5
0
ファイル: Planner.py プロジェクト: lono175/Maze
def TestRun(controller, discrete_size, monsterMoveProb, objSet, maxStep, isEpisodeEnd, isShow, frameRate):
    #controller.disableUpdate() #no update after training
    print "MaxStep: ", maxStep
    size = 800, 800
    gridSize = (discrete_size, discrete_size)
    delay = 100
    interval = 50
    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock=pygame.time.Clock()
    screen = pygame.display.set_mode(size)

    actionList = ((0, 1), (0, -1), (1, 0), (-1, 0))
    env = GridEnv.Grid((discrete_size, discrete_size), size, actionList, monsterMoveProb)

    isTraining = not isEpisodeEnd
    #maxStep = 200

    numOfTurtle = objSet[0]
    numOfCoin = objSet[1]

    print "# coin ", numOfCoin
    print "# Turtle ", numOfTurtle
    print "isEpisodeEnd ", isEpisodeEnd

    count = 0
    
    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
    #for i in range(0, maxEpisode):
        #print totalReward
        #rewardList[i] = totalReward

        Save(controller, 'smart.db')
        world = env.start(numOfTurtle, numOfCoin)
        objLoc = tool.getObjLoc(world, gridSize)
        marioLoc = tool.getMarioLoc(world, gridSize)
        goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
        #print "goal: ", goal
        #print "bestPath: ", bestPath
        goal = bestPath.pop(0)
        #print "plan: ", bestPath
        curPlanCounter = 0
        objLocWithGoal = tool.addGoalLoc(objLoc, goal)
        ob = (marioLoc, objLocWithGoal)
        action = controller.start(ob)
            
        count += 1
        prevStepCount = stepCount
        while stepCount < maxStep:
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, world, flag = env.step(action, isTraining)
            totalReward = totalReward + reward

            marioLoc = tool.getMarioLoc(world, gridSize)

            if marioLoc[0] == goal[0] and marioLoc[1] == goal[1]:
               reward = reward + 5
               #print "reward"
            else:
               reward = reward - 6
               #print "punish"
            if flag:
                controller.end(reward)
                break
            objLoc = tool.getObjLoc(world, gridSize)

            #if len(bestPath) == 0 or curPlanCounter == 0:
            dummy, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
            #print "plan: ", bestPath
            #curPlanCounter = curPlanCounter - 1
            goal = bestPath.pop(0)
            #goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
            #print "goal: ", goal
            objLocWithGoal = tool.addGoalLoc(objLoc, goal)
            ob = (marioLoc, objLocWithGoal)
            allQ = controller.getAllQ(ob)
            #print "allQ: ", allQ
            action = controller.step(reward, ob)
            #print "action: ", action
            for event in pygame.event.get():
               #action = 0
               if event.type == pygame.QUIT: sys.exit()
            if isShow:
                screen.blit(env.getScreen(), (0, 0))
                pygame.display.flip()
        #rewardList.append((prevStepCount, stepCount, episodeReward))
    print totalReward
    return rewardList, controller
コード例 #6
0
def GetPlan(gridSize, marioLoc, objLoc, controller):

    diffGoal = ((0, 1), (0, -1), (1, 0), (-1, 0))
    DG = nx.DiGraph()
    #G=nx.path_graph(5)
    #add each node into the graph
    for x in range(0, gridSize):
        for y in range(0, gridSize):
            loc = (x, y)
            for diff in diffGoal:
                goal = (diff[0] + loc[0], diff[1] + loc[1])
                if not goal[0] in range(0, gridSize) or not goal[1] in range(
                        0, gridSize):
                    continue
                objLocWithGoal = tool.addGoalLoc(objLoc, goal)
                ob = (loc, objLocWithGoal)
                #compute the link cost
                maxQ = controller.getMaxQ(ob)
                #print maxQ
                assert (maxQ > -10)  #don't handle monster
                linkCost = 1 / (10 + maxQ)
                DG.add_weighted_edges_from([(loc, goal, linkCost)])

    #for x in range(0, gridSize):
    #for y in range(0, gridSize):
    #goal = (x, y)
    pathList = nx.single_source_dijkstra_path(DG, source=marioLoc)
    #print nx.single_source_dijkstra_path(DG, source = marioLoc)
    #print nx.single_source_dijkstra_path_length(DG,source=marioLoc)
    #print '------------------'
    #print nx.shortest_path(DG, source = marioLoc, target = (4, 4), weighted=True)
    #print nx.shortest_path_length(DG, source = marioLoc, target = (4, 4), weighted=True)

    #follow the path and find the one with maximum reward
    maxQ = -10000
    bestPath = []
    for goal in pathList:
        Q = 0
        path = pathList[goal]
        #print "--------------path: ", path
        prev = path.pop(0)  #remove the first one
        for node in path:
            #print "goal: ", node
            objLocWithGoal = tool.addGoalLoc(objLoc, node)
            ob = (prev, objLocWithGoal)
            #print "observation: ", ob
            nodeQ = controller.getMaxQ(ob)
            #print "nodeQ: ", nodeQ
            Q = Q + nodeQ
            prev = node
        #print "Q: ", Q
        if Q > maxQ:
            maxQ = Q
            bestPath = path
    #just reture the next node in the best path
    return bestPath[0], bestPath
    #print bestPath
    #print maxQ

    #maxQ = -10000
    #for goal in {(7, 7):[(1, 0), (2, 0), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5), (4, 5), (4, 6), (4, 7), (5, 7), (6, 7), (7, 7)]}:
    #Q = 0
    #path = pathList[goal]
    #prev = path.pop(0) #remove the first one
    #for node in path:
    ##diff = (node[0] - prev[0], node[1] - prev[1])
    #objLocWithGoal = tool.addGoalLoc(objLoc, node)
    #ob = (prev, objLocWithGoal)
    #nodeQ = controller.getMaxQ(ob)
    #Q = Q + nodeQ
    #print "nodeQ: ", nodeQ
    #prev = node
    #if Q > maxQ:
    #maxQ = Q
    #bestPath = path
    #print bestPath
    #print maxQ

    #maxQ = -10000
    #for goal in {(7, 1):[(0, 0), (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (7, 1)]}:
    #Q = 0
    #path = pathList[goal]
    #prev = path.pop(0) #remove the first one
    #for node in pathList:
    ##diff = (node[0] - prev[0], node[1] - prev[1])
    #objLocWithGoal = tool.addGoalLoc(objLoc, node)
    #ob = (prev, objLocWithGoal)
    #nodeQ = controller.getMaxQ(ob)
    #Q = Q + nodeQ
    #prev = node
    #if Q > maxQ:
    #maxQ = Q
    #bestPath = path
    #print bestPath
    #print maxQ
    return
コード例 #7
0
def TestRun(controller, discrete_size, monsterMoveProb, objSet, maxStep,
            isEpisodeEnd, isShow, frameRate):
    controller.disableUpdate()  #no update after training
    print "MaxStep: ", maxStep
    size = 800, 800
    gridSize = (discrete_size, discrete_size)
    delay = 100
    interval = 50
    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock = pygame.time.Clock()
    screen = pygame.display.set_mode(size)

    actionList = ((0, 1), (0, -1), (1, 0), (-1, 0))
    env = GridEnv.Grid((discrete_size, discrete_size), size, actionList,
                       monsterMoveProb)

    isTraining = not isEpisodeEnd
    #maxStep = 200

    numOfTurtle = objSet[0]
    numOfCoin = objSet[1]

    print "# coin ", numOfCoin
    print "# Turtle ", numOfTurtle
    print "isEpisodeEnd ", isEpisodeEnd

    count = 0

    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
        #for i in range(0, maxEpisode):
        #print totalReward
        #rewardList[i] = totalReward

        world = env.start(numOfTurtle, numOfCoin)
        objLoc = tool.getObjLoc(world, gridSize)
        marioLoc = tool.getMarioLoc(world, gridSize)
        goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
        goal = bestPath.pop(0)
        #print "plan: ", bestPath
        curPlanCounter = 3
        print "goal: ", goal
        objLocWithGoal = tool.addGoalLoc(objLoc, goal)
        ob = (marioLoc, objLocWithGoal)
        action = controller.start(ob)

        count += 1
        prevStepCount = stepCount
        while stepCount < maxStep:
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, world, flag = env.step(action, isTraining)
            totalReward = totalReward + reward

            if flag:
                controller.end(reward)
                break
            objLoc = tool.getObjLoc(world, gridSize)
            marioLoc = tool.getMarioLoc(world, gridSize)
            if len(bestPath) == 0 or curPlanCounter == 0:
                dummy, bestPath = GetPlan(discrete_size, marioLoc, objLoc,
                                          controller)
                print "plan: ", bestPath
            curPlanCounter = curPlanCounter - 1
            goal = bestPath.pop(0)
            #goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
            print "goal: ", goal
            objLocWithGoal = tool.addGoalLoc(objLoc, goal)
            ob = (marioLoc, objLocWithGoal)
            allQ = controller.getAllQ(ob)
            print "allQ: ", allQ
            action = controller.step(reward, ob)
            print "action: ", action
            for event in pygame.event.get():
                #action = 0
                if event.type == pygame.QUIT: sys.exit()
            if isShow:
                screen.blit(env.getScreen(), (0, 0))
                pygame.display.flip()
        #rewardList.append((prevStepCount, stepCount, episodeReward))
    print totalReward
    return rewardList, controller
コード例 #8
0
ファイル: Planner.py プロジェクト: lono175/Planning
def GetPlan(gridSize, marioLoc, objLoc, controller):

    diffGoal = ((0, 1), (0, -1), (1, 0), (-1, 0))
    DG=nx.DiGraph()
    #G=nx.path_graph(5)
    #add each node into the graph
    for x in range(0, gridSize):
        for y in range(0, gridSize):
            loc = (x, y)

            for diff in diffGoal:
                goal = (diff[0]+loc[0], diff[1]+loc[1])
                if not goal[0]  in range(0, gridSize) or not goal[1] in range(0, gridSize):
                    continue
                objLocWithGoal = tool.addGoalLoc(objLoc, goal)
                ob = (loc, objLocWithGoal)
                #compute the link cost
                #linkCost = controller.getLinkCost(ob, controller.prob)

                #temporary solution, log prob is max at 0
                #if linkCost > 0:
                    #linkCost = 0
                #linkCost = -linkCost #need to be positive

                #print maxQ
                #assert(maxQ > -10) #don't handle monster
                linkCost = controller.getLinkCost(ob, controller.agent[0])
                linkCost = 1 / pow(2, (linkCost))
                DG.add_weighted_edges_from([(loc, goal, linkCost)])

    #for x in range(0, gridSize):
        #for y in range(0, gridSize):
            #goal = (x, y)
    pathList = nx.single_source_dijkstra_path(DG, source = marioLoc)
    #print nx.single_source_dijkstra_path(DG, source = marioLoc)
    #print nx.single_source_dijkstra_path_length(DG,source=marioLoc)
    #print '------------------'
    #print nx.shortest_path(DG, source = marioLoc, target = (4, 4), weighted=True)
    #print nx.shortest_path_length(DG, source = marioLoc, target = (4, 4), weighted=True)

    #follow the path and find the one with maximum reward

    maxQ = -1000000
    bestPath = []
    for goal in pathList:
        
        #cannot stay in the current state
        if goal[0] == marioLoc[0] and goal[1] == marioLoc[1]:
            continue
        Q = GetPlanCost(pathList[goal], objLoc, controller, False)
        if Q > maxQ:
            maxQ = Q
            bestPath = pathList[goal]
    #just reture the next node in the best path
    #print "path cost: ", maxQ
    return bestPath[0], bestPath
#print bestPath
    #print maxQ

    #maxQ = -10000
    #for goal in {(7, 7):[(1, 0), (2, 0), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5), (4, 5), (4, 6), (4, 7), (5, 7), (6, 7), (7, 7)]}:
        #Q = 0
        #path = pathList[goal]
        #prev = path.pop(0) #remove the first one
        #for node in path:
            ##diff = (node[0] - prev[0], node[1] - prev[1])
            #objLocWithGoal = tool.addGoalLoc(objLoc, node)
            #ob = (prev, objLocWithGoal)
            #nodeQ = controller.getMaxQ(ob)
            #Q = Q + nodeQ
            #print "nodeQ: ", nodeQ
            #prev = node
        #if Q > maxQ:
            #maxQ = Q
            #bestPath = path
    #print bestPath
    #print maxQ

    #maxQ = -10000
    #for goal in {(7, 1):[(0, 0), (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (7, 1)]}:
        #Q = 0
        #path = pathList[goal]
        #prev = path.pop(0) #remove the first one
        #for node in pathList:
            ##diff = (node[0] - prev[0], node[1] - prev[1])
            #objLocWithGoal = tool.addGoalLoc(objLoc, node)
            #ob = (prev, objLocWithGoal)
            #nodeQ = controller.getMaxQ(ob)
            #Q = Q + nodeQ
            #prev = node
        #if Q > maxQ:
            #maxQ = Q
            #bestPath = path
    #print bestPath
    #print maxQ
    return
コード例 #9
0
ファイル: Planner.py プロジェクト: lono175/Planning
def TestRun(controller, discrete_size, monsterMoveProb, objSet, maxStep, isEpisodeEnd, isShow, frameRate):
    print "MaxStep: ", maxStep
    size = 800, 800
    gridSize = (discrete_size, discrete_size)
    delay = 100
    interval = 50
    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock=pygame.time.Clock()
    screen = pygame.display.set_mode(size)

    actionList = ((0, 1), (0, -1), (1, 0), (-1, 0))
    env = GridEnv.Grid((discrete_size, discrete_size), size, actionList, monsterMoveProb)

    isTraining = not isEpisodeEnd
    #maxStep = 200

    numOfTurtle = objSet[0]
    numOfCoin = objSet[1]

    print "# coin ", numOfCoin
    print "# Turtle ", numOfTurtle
    print "isEpisodeEnd ", isEpisodeEnd

    count = 0

    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
        #for i in range(0, maxEpisode):
        #print totalReward
        #rewardList[i] = totalReward

        Save(controller, 'smart.db')
        world = env.start(numOfTurtle, numOfCoin)
        objLoc = tool.getObjLoc(world, gridSize)
        marioLoc = tool.getMarioLoc(world, gridSize)
        goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
        #print "plan: ", bestPath

        dummy = bestPath.pop(0)
        prevPath = bestPath[:]
        goal = bestPath.pop(0)
        #curPlanCounter = 3
        #print "goal: ", goal
        objLocWithGoal = tool.addGoalLoc(objLoc, goal)
        ob = (marioLoc, objLocWithGoal)
        action = controller.start(ob)

        count += 1
        prevStepCount = stepCount
        while stepCount < maxStep:
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, world, flag, realReward, isSuccess = env.step(action, isTraining, goal)
            totalReward = totalReward + reward

            if flag:
                controller.end(reward, realReward, isSuccess)
                break
            objLoc = tool.getObjLoc(world, gridSize)
            marioLoc = tool.getMarioLoc(world, gridSize)
            print "-------------mario: ", marioLoc
            #if len(bestPath) == 0 or curPlanCounter == 0:
            dummy, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
            print "plan: ", bestPath
            #curPlanCounter = curPlanCounter - 1

            if isSuccess: #if not, just use the old plan
                curPathCost = GetPlanCost(bestPath, objLoc, controller, True)
                prevPathCost = GetPlanCost(prevPath, objLoc, controller, True)
                print "cur plan Cost: ", curPathCost
                print "prev plan Cost: ", prevPathCost
                if prevPathCost > 0.9*curPathCost and len(prevPath) > 1:
                    #stay with old plan
                    print "stay with old plan"
                    bestPath = prevPath
                
            dummy = bestPath.pop(0)
            prevPath = bestPath[:]
            goal = bestPath.pop(0)
                    
            #print "goal: ", goal
            objLocWithGoal = tool.addGoalLoc(objLoc, goal)
            ob = (marioLoc, objLocWithGoal)
            allQ = controller.getAllQ(ob)
            print "allQ: ", allQ
            #print "internalReward: ", reward
            action = controller.step(reward, ob, realReward, isSuccess)
            print "action: ", action
            for event in pygame.event.get():
                #action = 0
               if event.type == pygame.QUIT: sys.exit()
            if isShow:
                screen.blit(env.getScreen(), (0, 0))
                pygame.display.flip()
        #rewardList.append((prevStepCount, stepCount, episodeReward))
    #print totalReward
    return rewardList, controller
コード例 #10
0
ファイル: Planner.py プロジェクト: lono175/Planning
def GetPlan(gridSize, marioLoc, objLoc, controller):

    diffGoal = ((0, 1), (0, -1), (1, 0), (-1, 0))
    DG = nx.DiGraph()
    #G=nx.path_graph(5)
    #add each node into the graph
    for x in range(0, gridSize):
        for y in range(0, gridSize):
            loc = (x, y)

            for diff in diffGoal:
                goal = (diff[0] + loc[0], diff[1] + loc[1])
                if not goal[0] in range(0, gridSize) or not goal[1] in range(
                        0, gridSize):
                    continue
                objLocWithGoal = tool.addGoalLoc(objLoc, goal)
                ob = (loc, objLocWithGoal)
                #compute the link cost
                #linkCost = controller.getLinkCost(ob, controller.prob)

                #temporary solution, log prob is max at 0
                #if linkCost > 0:
                #linkCost = 0
                #linkCost = -linkCost #need to be positive

                #print maxQ
                #assert(maxQ > -10) #don't handle monster
                linkCost = controller.getLinkCost(ob, controller.agent[0])
                linkCost = 1 / pow(2, (linkCost))
                DG.add_weighted_edges_from([(loc, goal, linkCost)])

    #for x in range(0, gridSize):
    #for y in range(0, gridSize):
    #goal = (x, y)
    pathList = nx.single_source_dijkstra_path(DG, source=marioLoc)
    #print nx.single_source_dijkstra_path(DG, source = marioLoc)
    #print nx.single_source_dijkstra_path_length(DG,source=marioLoc)
    #print '------------------'
    #print nx.shortest_path(DG, source = marioLoc, target = (4, 4), weighted=True)
    #print nx.shortest_path_length(DG, source = marioLoc, target = (4, 4), weighted=True)

    #follow the path and find the one with maximum reward

    maxQ = -1000000
    bestPath = []
    for goal in pathList:

        #cannot stay in the current state
        if goal[0] == marioLoc[0] and goal[1] == marioLoc[1]:
            continue
        Q = GetPlanCost(pathList[goal], objLoc, controller, False)
        if Q > maxQ:
            maxQ = Q
            bestPath = pathList[goal]
    #just reture the next node in the best path
    #print "path cost: ", maxQ
    return bestPath[0], bestPath
    #print bestPath
    #print maxQ

    #maxQ = -10000
    #for goal in {(7, 7):[(1, 0), (2, 0), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5), (4, 5), (4, 6), (4, 7), (5, 7), (6, 7), (7, 7)]}:
    #Q = 0
    #path = pathList[goal]
    #prev = path.pop(0) #remove the first one
    #for node in path:
    ##diff = (node[0] - prev[0], node[1] - prev[1])
    #objLocWithGoal = tool.addGoalLoc(objLoc, node)
    #ob = (prev, objLocWithGoal)
    #nodeQ = controller.getMaxQ(ob)
    #Q = Q + nodeQ
    #print "nodeQ: ", nodeQ
    #prev = node
    #if Q > maxQ:
    #maxQ = Q
    #bestPath = path
    #print bestPath
    #print maxQ

    #maxQ = -10000
    #for goal in {(7, 1):[(0, 0), (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (7, 1)]}:
    #Q = 0
    #path = pathList[goal]
    #prev = path.pop(0) #remove the first one
    #for node in pathList:
    ##diff = (node[0] - prev[0], node[1] - prev[1])
    #objLocWithGoal = tool.addGoalLoc(objLoc, node)
    #ob = (prev, objLocWithGoal)
    #nodeQ = controller.getMaxQ(ob)
    #Q = Q + nodeQ
    #prev = node
    #if Q > maxQ:
    #maxQ = Q
    #bestPath = path
    #print bestPath
    #print maxQ
    return
コード例 #11
0
ファイル: Planner.py プロジェクト: lono175/Planning
def TestRun(controller, discrete_size, monsterMoveProb, objSet, maxStep,
            isEpisodeEnd, isShow, frameRate):
    print "MaxStep: ", maxStep
    size = 800, 800
    gridSize = (discrete_size, discrete_size)
    delay = 100
    interval = 50
    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock = pygame.time.Clock()
    screen = pygame.display.set_mode(size)

    actionList = ((0, 1), (0, -1), (1, 0), (-1, 0))
    env = GridEnv.Grid((discrete_size, discrete_size), size, actionList,
                       monsterMoveProb)

    isTraining = not isEpisodeEnd
    #maxStep = 200

    numOfTurtle = objSet[0]
    numOfCoin = objSet[1]

    print "# coin ", numOfCoin
    print "# Turtle ", numOfTurtle
    print "isEpisodeEnd ", isEpisodeEnd

    count = 0

    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
        #for i in range(0, maxEpisode):
        #print totalReward
        #rewardList[i] = totalReward

        Save(controller, 'smart.db')
        world = env.start(numOfTurtle, numOfCoin)
        objLoc = tool.getObjLoc(world, gridSize)
        marioLoc = tool.getMarioLoc(world, gridSize)
        goal, bestPath = GetPlan(discrete_size, marioLoc, objLoc, controller)
        #print "plan: ", bestPath

        dummy = bestPath.pop(0)
        prevPath = bestPath[:]
        goal = bestPath.pop(0)
        #curPlanCounter = 3
        #print "goal: ", goal
        objLocWithGoal = tool.addGoalLoc(objLoc, goal)
        ob = (marioLoc, objLocWithGoal)
        action = controller.start(ob)

        count += 1
        prevStepCount = stepCount
        while stepCount < maxStep:
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, world, flag, realReward, isSuccess = env.step(
                action, isTraining, goal)
            totalReward = totalReward + reward

            if flag:
                controller.end(reward, realReward, isSuccess)
                break
            objLoc = tool.getObjLoc(world, gridSize)
            marioLoc = tool.getMarioLoc(world, gridSize)
            print "-------------mario: ", marioLoc
            #if len(bestPath) == 0 or curPlanCounter == 0:
            dummy, bestPath = GetPlan(discrete_size, marioLoc, objLoc,
                                      controller)
            print "plan: ", bestPath
            #curPlanCounter = curPlanCounter - 1

            if isSuccess:  #if not, just use the old plan
                curPathCost = GetPlanCost(bestPath, objLoc, controller, True)
                prevPathCost = GetPlanCost(prevPath, objLoc, controller, True)
                print "cur plan Cost: ", curPathCost
                print "prev plan Cost: ", prevPathCost
                if prevPathCost > 0.9 * curPathCost and len(prevPath) > 1:
                    #stay with old plan
                    print "stay with old plan"
                    bestPath = prevPath

            dummy = bestPath.pop(0)
            prevPath = bestPath[:]
            goal = bestPath.pop(0)

            #print "goal: ", goal
            objLocWithGoal = tool.addGoalLoc(objLoc, goal)
            ob = (marioLoc, objLocWithGoal)
            allQ = controller.getAllQ(ob)
            print "allQ: ", allQ
            #print "internalReward: ", reward
            action = controller.step(reward, ob, realReward, isSuccess)
            print "action: ", action
            for event in pygame.event.get():
                #action = 0
                if event.type == pygame.QUIT: sys.exit()
            if isShow:
                screen.blit(env.getScreen(), (0, 0))
                pygame.display.flip()
        #rewardList.append((prevStepCount, stepCount, episodeReward))
    #print totalReward
    return rewardList, controller
コード例 #12
0
ファイル: GridEnvSim.py プロジェクト: lono175/Planning
    isTraining = not isEpisodeEnd

    count = 0

    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
        #randomly choose a sub goal at the beginning of the episode
        goalDiff =  actionList[int(random.random()*len(actionList))]
        world = env.start(numOfTurtle, numOfCoin, goalDiff)
        objLoc = tool.getObjLoc(world, gridSize)
        marioLoc = tool.getMarioLoc(world, gridSize)
        goal = (marioLoc[0]+goalDiff[0], marioLoc[1]+goalDiff[1])
        objLocWithGoal = tool.addGoalLoc(objLoc, goal)
        ob = (marioLoc, objLocWithGoal)
        action = controller.start(ob)

        count += 1
        prevStepCount = stepCount
        episodeReward = 0
        while stepCount < maxStep:
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, world, flag, realReward, isSuccess = env.step(action, isTraining)
            totalReward = totalReward + reward
            episodeReward = episodeReward + reward
            if flag:
                #print "episodeEnd: ", reward
                controller.end(reward, realReward, isSuccess)