Esempio n. 1
0
def test4():
    env = ConnectFourEnv(display=True)
    simpleAgent1 = SimpleAgent(env, 1, 2)
    simpleAgent2 = SimpleAgent(env, 2, 1)

    state = env.getState()
    while True:
        acttion1 = simpleAgent1.getAction(state)
        state, gameOver, winner = env.act(1, acttion1, True)
        time.sleep(0.3)
        if gameOver:
            break
        acttion2 = simpleAgent2.getAction(state)
        state, gameOver, winner = env.act(2, acttion2, True)
        time.sleep(0.3)
        if gameOver:
            break

    if winner == -1:
        print 'Game draw'
    else:
        print 'Player %s won' % winner
    time.sleep(5)
Esempio n. 2
0
def test3():
    env = ConnectFourEnv(display=True)
    simpleAgent = SimpleAgent(env, 2, 1)

    state, gameOver, winner = env.act(1, 0)
    state, gameOver, winner = env.act(2, 6)
    time.sleep(0.5)

    state, gameOver, winner = env.act(1, 1)
    state, gameOver, winner = env.act(2, 5)
    time.sleep(0.5)

    state, gameOver, winner = env.act(1, 3)
    acttion = simpleAgent.getAction(env.state)
    state, gameOver, winner = env.act(2, acttion)
    time.sleep(5)
Esempio n. 3
0
class MCTS:
    def __init__(self, settings):
        self.settings = settings
        self.totalGameNo = settings['total_game_no']
        self.playedGameNo = 0
        self.simStepNo = settings['sim_step_no']
        self.saveStepNo = settings['save_step_no']
        self.display = settings['display']
        self.env = ConnectFourEnv(self.display)
        self.visited = {}           # (stateStr, turn, action), visited
        self.won = {}              # (stateStr, turn, action), won
        self.DRAW = -1
        self.PLAYER = 1
        self.OPP = 2
        self.simpleAgent = SimpleAgent(self.env, self.OPP, self.PLAYER)
        self.winnerResult = {self.DRAW:0, self.PLAYER:0, self.OPP:0}
        self.greedyEpsilon = 0.1

        self.startTime = time.strftime('%Y%m%d_%H%M%S')
        logFile="output/%s.log" % (self.startTime)            
        util.Logger(logFile)

        self.testMode = False
        self.debugger = DebugInput(self).start()

    def initializeProcesses(self):
        # Multi process jobs
        self.multiCpuNo = self.settings['multi_cpu_no']
        self.queueList = []
        self.processList = []
        self.queueChild2Parent = Queue()
        for i in range(self.multiCpuNo):        
            queueParent2Child = Queue()
            self.queueList.append(queueParent2Child)
            #print 'creating a child process[%s]' % i
            p = Process(target=self.simulateOne, args=(i, self.simStepNo / self.multiCpuNo, 
                                                       queueParent2Child, self.queueChild2Parent))
            p.start()
            self.processList.append(p)

    def __getstate__(self):
        d = dict(self.__dict__)
        del d['queueList']
        del d['processList']
        del d['queueChild2Parent']
        return d

    def printEnv(self):
        print 'Start time: %s' % self.startTime
        print '[ Running Environment ]'
        for key in self.settings.keys():
            print '{} : '.format(key).ljust(30) + '{}'.format(self.settings[key])
        print 'width: %s, height: %s' % (self.env.width, self.env.height)
    
    def getStateStr(self, state):
        #return np.array_str(state)
        return hash(state.tostring())
    
    def simulate(self, orgState):
        time1 = time.time()
        for i in range(self.multiCpuNo):
            self.queueList[i].put((orgState, self.visited, self.won))
            
        finishedChildNo = 0
        for i in range(self.multiCpuNo):
            childID, winnerList, historyList, expandedList = self.queueChild2Parent.get()
            
            for expandedNode in expandedList:
                if expandedNode not in self.visited:
                    self.visited[expandedNode] = 0
                    self.won[expandedNode] = 0
            
            for winner, history in zip(winnerList, historyList):
                self.updateTreeInfo(winner, history)
            
            finishedChildNo += 1
            
            #print 'simulateOne done %s' % childID
            if finishedChildNo == self.multiCpuNo:
                break
        #print 'all simulateOne finished'
        time2 = time.time()
        
        #print 'simulte took %.2f sec' % (time2 - time1)
        

    def simulateOne(self, id, simStepNo, queueParent2Child, queueChild2Parent):
        while True:
            orgState, visited, won = queueParent2Child.get()
            self.visited = visited
            self.won = won
            self.env.reset()
            self.env.setState(orgState)
            
            self.visited['haha'] = 'dj'

            historyList = []
            winnerList = []
            expandedList = []
            state = orgState.copy()
            turn = self.PLAYER
            history = []
            expanded = False
    
            for i in range(simStepNo):
                if turn == self.PLAYER:
                    availableActions = self.env.availableActions(state)
                    stateStr = self.getStateStr(state)
                    totalStateVisited = 0
                    # check every actions are visited before
                    for action in availableActions:
                        stateActionPair = (stateStr, turn, action)
                        if stateActionPair in self.visited:
                            totalStateVisited += self.visited[stateActionPair]
                        else:
                            totalStateVisited = 0
    
                    if totalStateVisited == 0:
                        action = self.getRandomAction(state)
                    else:
                        maxUpperBound = 0            
                        for action in availableActions:
                            stateActionPair = (stateStr, turn, action)
                            won = self.won.get(stateActionPair, 0)
                            visited = max(self.visited.get(stateActionPair, 1), 1)
                            winRatio = float(won) / visited
                            upperBound = winRatio + math.sqrt(2 * math.log(totalStateVisited) / visited)
                            if upperBound >= maxUpperBound:
                                maxUpperBound = upperBound
                                selectedAction = action
                        action = selectedAction
                elif turn == self.OPP:
                    if 'sim_opp_policy' in self.settings and self.settings['sim_opp_policy'] == 'simple':
                        action = self.simpleAgent.getAction(state)
                    else:
                        action = self.getRandomAction(state)
                
                stateStr = self.getStateStr(state)
                stateActionPair = (stateStr, turn, action)
                if expanded == False and stateActionPair not in self.visited:
                    canExpand = True
                    expanded = True
                else:
                    canExpand = False
                    
                state, gameOver, winner = self.doAction(state, action, turn, history, expandedList, canExpand, False)
                              
                if turn == self.PLAYER:
                    turn = self.OPP
                else:
                    turn = self.PLAYER
    
                if gameOver:
                    self.updateTreeInfo(winner, history)
                    historyList.append(history)
                    winnerList.append(winner)
                    
                    # restart sim
                    self.env.reset()
                    self.env.setState(orgState)
                    state = orgState.copy()                
                    turn = self.PLAYER
                    history = []
                    expanded = False
                    continue
    
            queueChild2Parent.put((id, winnerList, historyList, expandedList))

    def getRandomAction(self, state, availableActions=None):
        if availableActions == None:
            availableActions = self.env.availableActions(state)
        actionIndex = random.randint(0, len(availableActions)-1)
        return availableActions[actionIndex]

    def getAction(self, state, turn):
        availableActions = self.env.availableActions(state)
        
        if len(availableActions) == 1:
            return availableActions[0]
        
        maxAction = -1
        maxWinRatio = 0
        availableActions = self.env.availableActions(state)
        stateStr = self.getStateStr(state)
        for action in availableActions:
            stateActionPair = (stateStr, turn, action)
            if stateActionPair not in self.visited:
                continue
            winRatio = float(self.won.get(stateActionPair, 0)) / max(self.visited.get(stateActionPair, 1), 1)
            if winRatio >= maxWinRatio:
                maxWinRatio = winRatio
                maxAction = action

        return maxAction
        
    def doAction(self, state, action, turn, history, expandedList, canExpand, display):
        newState, gameOver, winner = self.env.act(turn, action, display)
        
        stateStr = self.getStateStr(state)
        stateActionPair = (stateStr, turn, action)
        if stateActionPair not in self.visited and canExpand:
            self.visited[stateActionPair] = 0
            self.won[stateActionPair] = 0
            if expandedList != None:
                expandedList.append(stateActionPair)
        history.append(stateActionPair)
        return newState, gameOver, winner
        
    def updateTreeInfo(self, winner, history):
        """ Update win result from the current node to the top node """

        for stateActionPair in history:
            if stateActionPair in self.visited:
                self.visited[stateActionPair] += 1
                _, turn, _ = stateActionPair
                if turn == winner:
                    self.won[stateActionPair] += 1
    
    def printHistory(self, history):
        step = 0
        print '\n[ history ]'
        for stateActionPair in history:
            state, turn, action = stateActionPair
            if stateActionPair in self.visited:
                visited = self.visited[stateActionPair]
                won = self.won[stateActionPair]
            else:
                visited = 0
                won = 0
                
            print 'step[%s] turn=%s, action=%s, visited=%s, won=%s' % \
                    (step, turn, action, visited, won)
            step += 1
        print ''
        
    def printResult(self):
        print 'total states: %s' % len(self.visited)
                    
    def save(self, step):
        if os.path.exists('snapshot') == False:
            os.makedirs('snapshot')
        fileName = 'snapshot/mcts_%s' % step
        with open(fileName + '.pickle', 'wb') as f:
            pickle.dump(self, f)
        
    def gogo(self):
        self.initializeProcesses()

        lastResult = []
        lastResultWin = 0
        for i in range(self.totalGameNo):
            self.env.reset()
            state = self.env.getState()
            history = []
            turn = random.randint(self.PLAYER, self.OPP)
            startTime = time.time()
            
            while True:
                if turn == self.PLAYER:
                    self.simulate(state)
                    if settings['player_action'] == 'egreedy':
                        action = self.getActionEGreedy(state, self.PLAYER)
                    else:
                        action = self.getAction(state, self.PLAYER)
                elif turn == self.OPP:
                    if settings['opponent'] == 'user':
                        action = self.env.getManualAction(state)
                    else:
                        action = self.simpleAgent.getAction(state)
                
                state, gameOver, winner = self.doAction(state, action, turn, history, None, True, True)

                if gameOver:
                    break
                
                if turn == self.PLAYER:
                    turn = self.OPP
                else:
                    turn = self.PLAYER

            elapsed = time.time() - startTime
            
            if settings['opponent'] == 'user':
                self.env.showWinner(winner)
                
            self.playedGameNo += 1
            
            self.winnerResult[winner] += 1
            if winner == -1:
                print 'Game draw'
            else:
                mcts.updateTreeInfo(winner, history)
                if winner == self.PLAYER:
                    lastResultWin += 1
                if len(lastResult) == 100:
                    todel = lastResult.pop(0)
                    if todel == 1:
                        lastResultWin -= 1
                lastResult.append(winner)
                lastRatio = float(lastResultWin) * 100 / len(lastResult)
                #mcts.printResult()
                winRatio = float(self.winnerResult[self.PLAYER]) * 100 \
                                     / (self.winnerResult[self.OPP] + self.winnerResult[self.PLAYER])
                
                if winner == 1:
                    winStr = 'Win'
                else:
                    winStr = 'Lose'
                print 'Game %s : %s, %s, total=%.0f%%, last 100=%.0f%%, %.1fs' % (self.playedGameNo, self.winnerResult, winStr, winRatio, lastRatio, elapsed)
            
            if self.playedGameNo % self.saveStepNo == 0:
                self.save(self.playedGameNo)
            #time.sleep(5)
    
        self.debugger.finish()