Beispiel #1
0
	def envir(pos, X_0):
		reward = 0

		valid = ttt.play(pos, X_0)
		if valid: ttt.ai_play('0')

		return (reward, valid)
def tourney(output,games = 50,depth = 0):
    tourney_results = {'wins' : 0, 'ties' : 0, 'losses' : 0}
    for _ in range(games):
        results = ttt.play(nn_ai(output),alpha_beta(depth))
        if results.winner ==  1:
            tourney_results['wins'] +=1
        if results.winner ==  0:
            tourney_results['ties'] +=1
        if results.winner == -1:
            tourney_results['losses'] +=1

        results = ttt.play(alpha_beta(depth),nn_ai(output))
        if results.winner == -1:
            tourney_results['wins'] +=1
        if results.winner ==  0:
            tourney_results['ties'] +=1
        if results.winner ==  1:
            tourney_results['losses'] +=1
    return tourney_results
    def get(self):
        parser = reqparse.RequestParser()
        parser.add_argument("board", type=str, required=True, location="args")
        args = parser.parse_args()
        board_state = args["board"]

        if is_safe_to_play(board_state):
            board = create_board(board_state)
            return play(board)
        else:
            raise BadRequest()
Beispiel #4
0
    def envir(pos, X_0):

        valid = ttt.play(pos, X_0)

        return (0, valid)
epoch_objective_list = []

#gonna have it play against random.
#might make it play against itself

for epoch in range(train_duration):
    param_values = lasagne.layers.get_all_param_values(value_out)
    
    minimum_data = sum([param_values[i].size for i,_ in enumerate(param_values)])
    t1 = time.clock()
    for _ in range(training_per_step):
        exploration = min(exploration_max ,
                      exploration*0.99 + 0.01 *(min(exploration_max-exploration_min,objective_total) + exploration_min))
        future_discount = future_discount*0.99 + 0.01*(1 - min(1,objective_total))
        future_discount = 1
        result_X = ttt.play(nn_ai(value_fun,net = 'value',exploration = exploration),alpha_beta(minimax_str))
        board_list =get_inputs(result_X.log)
        game_length = len(result_X.log)
        input_list = [board_list[2*i] for i in range((game_length+1)/2)]
        output_list = [board_list[2*i+1] for i in range((game_length+1)/2)]
        move_list = [result_X.log[2*i] for i in range((game_length+1)/2)]
        
        
        
        
        # [monte_carlo_mod(-1 * board_list[2*i+1],results,epsilon =
        #epsilon,duration = monte_carlo_duration)  for i in range((game_length+1)/2)]
    #reward_list = [-1*monte_carlo(-1*board_list[2*i+1],epsilon=epsilon,
    #                              duration = monte_carlo_duration) for i in range((game_length+1)/2)]
#    reward_list = [ttt.winner(board_list[2*i+1]) for i in range((game_length+1)/2)]
    
 def test_play(self):
     response = play(self.board)
     self.assertEqual(response, "oxxo  o  ")
     self.assertEqual(len(response), NUM_BOARD_POSITIONS)
Beispiel #7
0
from tic_tac_toe import play

print("Welcome to Tic Tac Toe!")
print("Player 1, what is your name?")
p1 = input()
print("Player 2, what is your name?")
p2 = input()
play(0, 0, p1, p2)