Esempio n. 1
0
 def action(self, state, player=1):
     if random.random() < self.epsilon:
         move = c4.getRandomMove(state)
     else:
         move = self.greedy(state)
     new_state = c4.makeMove(state, player, move)
     self.previous_state = new_state
     return move
Esempio n. 2
0
 def action(self, state, player=1):
     if random.random() < self.epsilon:
         move = c4.getRandomMove(state)
     else:
         move = self.greedy(state)
     new_state = c4.makeMove(state, player, move)
     self.previous_state = new_state
     return move
Esempio n. 3
0
 def action(self, state):
     r = random.random()
     if r < self.epsilon:
         move = self.random(state)
     else:
         move = self.greedy(state)
     new_state = c4.makeMove(state, self.player, move)
     self.prevstate = self.state_string(new_state)
     self.prevscore = self.lookup(new_state)
     return move
 def action(self, state):
     r = random.random()
     if r < self.epsilon:
         move = self.random(state)
     else:
         move = self.greedy(state)
     new_state = c4.makeMove(state, self.player, move)
     self.prevstate = self.state_string(new_state)
     self.prevscore = self.lookup(new_state)
     return move
Esempio n. 5
0
 def greedy(self, state):
     maxval = float('-inf')
     maxmove = None
     for i in range(7):
         if c4.isValidMove(state, i):
             new_state = c4.makeMove(state, self.player, i)
             val = self.lookup(new_state)
             if val > maxval:
                 maxval = val
                 maxmove = i
     self.backup(maxval)
     return maxmove
 def greedy(self, state):
     maxval = float('-inf')
     maxmove = None
     for i in range(7):
         if c4.isValidMove(state, i):
             new_state = c4.makeMove(state, self.player, i)
             val = self.lookup(new_state)
             if val > maxval:
                 maxval = val
                 maxmove = i
     self.backup(maxval)
     return maxmove
Esempio n. 7
0
 def greedy(self, state, player=1):
     max_value = float("-inf")
     next_move = None
     # TODO: implemen get_possible_moves in c4
     for move in range(7):
         if c4.isValidMove(state, move):
             new_state = c4.makeMove(state, player, move)
             val = self.net.predict(c4.getNeuralInput(new_state).reshape(1, 126), batch_size=1)
             if val > max_value:
                 max_value = val
                 next_move = move
     self.backup(max_value)
     return next_move
Esempio n. 8
0
 def play(self):
     state = c4.getNewBoard()
     player = 1
     while not c4.isBoardFull(state):
         if player == 1:
             move = self.action(state, player)
         else:
             move = c4.getRandomMove(state)
         state = c4.makeMove(state, player, move)
         if c4.isWinner(state, player):
             return player
         player *= -1
     return 0.5  # draw
Esempio n. 9
0
 def play(self):
     state = c4.getNewBoard()
     player = 1
     while not c4.isBoardFull(state):
         if player == 1:
             move = self.action(state, player)
         else:
             move = c4.getRandomMove(state)
         state = c4.makeMove(state, player, move)
         if c4.isWinner(state, player):
             return player
         player *= -1
     return 0.5  # draw
Esempio n. 10
0
def play(agent1, agent2):
    state = empty_state()
    player = 1
    while not c4.isBoardFull(state):
        if player > 0:
            move = agent1.action(state)
        else:
            move = agent2.action(state)
        state = c4.makeMove(state, player, move)
        winner = game_over(state)
        player *= -1
        if winner != EMPTY:
            return winner
    return winner
def play(agent1, agent2):
    state = empty_state()
    player = 1
    while not c4.isBoardFull(state):
        if player > 0:
            move = agent1.action(state)
        else:
            move = agent2.action(state)
        state = c4.makeMove(state, player, move)
        winner = game_over(state)
        player *= -1
        if winner != EMPTY:
            return winner
    return winner
Esempio n. 12
0
 def greedy(self, state, player=1):
     max_value = float("-inf")
     next_move = None
     # TODO: implemen get_possible_moves in c4
     for move in range(7):
         if c4.isValidMove(state, move):
             new_state = c4.makeMove(state, player, move)
             val = self.net.predict(c4.getNeuralInput(new_state).reshape(
                 1, 126),
                                    batch_size=1)
             if val > max_value:
                 max_value = val
                 next_move = move
     self.backup(max_value)
     return next_move