Esempio n. 1
0
def think(state):

    moves = state.get_moves()

    best_move = greedo.think(state)
    best_expectation = float("-inf")

    me = state.get_whose_turn()

    def rollout(state):
        while not state.is_terminal():
            rollout_move = greedo.think(state)
            state.apply_move(rollout_move)
        return state.get_score(me)

    for move in moves:
        total_score = 0.0

        for r in range(ROLLOUTS):
            rollout_state = state.copy()

            rollout_state.apply_move(move)

            total_score += sum(
                [rollout(rollout_state) for i in range(ROLLOUTS)])

        expectation = float(total_score) / ROLLOUTS

        if expectation > best_expectation:
            best_expectation = expectation
            best_move = move

    print("{} rollsout with move {} with expected score {}".format(
        me, best_move, best_expectation))
    return best_move
Esempio n. 2
0
def think(state):
	me = state.get_whose_turn()
	moves = state.get_moves()
	best_move = moves[0]
	best_score = float('-inf')



	for m in moves:
		total_score = 0.0
		score_opp = 0.0

		for r in range(ROLLOUT):
			rollout_state = state.copy()
			rollout_state.apply_move(m)

			for h in range(DEPTH):
				if rollout_state.is_terminal():
					break
				rollout_state.apply_move(greedo.think(rollout_state))

			total_score += rollout_state.get_score(me)
		

		if total_score >= best_score:
			best_score = total_score
			best_move = m

	return best_move
Esempio n. 3
0
def think(state):

    moves = state.get_moves()
    rootnode = Node(state, None, None)
    t_start = time.time()
    # t_deadline = t_start + 1
    # iterations = 0
    iterations = 0

    for i in range(1000):
        iterations += 1
        node = rootnode
        test_state = state.copy()

        # Select
        while node.untried_moves == [] and node.children != []:  # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            test_state.apply_move(node.move)
            #test_state.apply_move(greedo.think(test_state))

        # Expand
        if node.untried_moves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untried_moves)
            currentPlayer = test_state.get_whose_turn()
            test_state.apply_move(m)
            node = node.addChild(test_state, m,
                                 currentPlayer)  # add child and descend tree

        while test_state.get_moves() != []:
            test_state.apply_move(greedo.think(test_state))

        # Backpropagate
        while node != None:

            node.update(test_state.get_score(currentPlayer))
            node = node.parent

    return sorted(rootnode.children, key=lambda c: c.visits
                  )[-1].move  # return the move that was most visited
Esempio n. 4
0
def think(state):

    rootnode = Node(state=state, last_move=state.get_whose_turn())
    rootnode.times_visited = 0.0

    for iterations in range(ITERMAX):
        node = rootnode
        next_state = state.copy()

        # Selection
        # while node still has children to explore and all moves have been tried
        while not node.untried_moves and node.child_nodes:
            node = node.uct_select_child()
            next_state.apply_move(node.move)

        # Expansion
        if node.untried_moves != []:
            move = random.choice(node.untried_moves)
            turn = next_state.get_whose_turn()
            next_state.apply_move(move)
            node = node.add_child(move, next_state, last_move=turn)

        # Rollout
        while not next_state.is_terminal():
            rollout_move = greedo.think(next_state)
            next_state.apply_move(rollout_move)

        # Backpropogation
        while node != None:
            result = next_state.get_score(node.who)
            node.times_visited += 1
            node.reward += result
            node = node.parent

    chosen_node = max(rootnode.child_nodes,
                      key=lambda c: c.reward / c.times_visited)
    return chosen_node.move
Esempio n. 5
0
 def rollout(state):
     while not state.is_terminal():
         rollout_move = greedo.think(state)
         state.apply_move(rollout_move)
     return state.get_score(me)