def envir(pos, X_0): reward = 0 valid = ttt.play(pos, X_0) if valid: ttt.ai_play('0') return (reward, valid)
def tourney(output,games = 50,depth = 0): tourney_results = {'wins' : 0, 'ties' : 0, 'losses' : 0} for _ in range(games): results = ttt.play(nn_ai(output),alpha_beta(depth)) if results.winner == 1: tourney_results['wins'] +=1 if results.winner == 0: tourney_results['ties'] +=1 if results.winner == -1: tourney_results['losses'] +=1 results = ttt.play(alpha_beta(depth),nn_ai(output)) if results.winner == -1: tourney_results['wins'] +=1 if results.winner == 0: tourney_results['ties'] +=1 if results.winner == 1: tourney_results['losses'] +=1 return tourney_results
def get(self): parser = reqparse.RequestParser() parser.add_argument("board", type=str, required=True, location="args") args = parser.parse_args() board_state = args["board"] if is_safe_to_play(board_state): board = create_board(board_state) return play(board) else: raise BadRequest()
def envir(pos, X_0): valid = ttt.play(pos, X_0) return (0, valid)
epoch_objective_list = [] #gonna have it play against random. #might make it play against itself for epoch in range(train_duration): param_values = lasagne.layers.get_all_param_values(value_out) minimum_data = sum([param_values[i].size for i,_ in enumerate(param_values)]) t1 = time.clock() for _ in range(training_per_step): exploration = min(exploration_max , exploration*0.99 + 0.01 *(min(exploration_max-exploration_min,objective_total) + exploration_min)) future_discount = future_discount*0.99 + 0.01*(1 - min(1,objective_total)) future_discount = 1 result_X = ttt.play(nn_ai(value_fun,net = 'value',exploration = exploration),alpha_beta(minimax_str)) board_list =get_inputs(result_X.log) game_length = len(result_X.log) input_list = [board_list[2*i] for i in range((game_length+1)/2)] output_list = [board_list[2*i+1] for i in range((game_length+1)/2)] move_list = [result_X.log[2*i] for i in range((game_length+1)/2)] # [monte_carlo_mod(-1 * board_list[2*i+1],results,epsilon = #epsilon,duration = monte_carlo_duration) for i in range((game_length+1)/2)] #reward_list = [-1*monte_carlo(-1*board_list[2*i+1],epsilon=epsilon, # duration = monte_carlo_duration) for i in range((game_length+1)/2)] # reward_list = [ttt.winner(board_list[2*i+1]) for i in range((game_length+1)/2)]
def test_play(self): response = play(self.board) self.assertEqual(response, "oxxo o ") self.assertEqual(len(response), NUM_BOARD_POSITIONS)
from tic_tac_toe import play print("Welcome to Tic Tac Toe!") print("Player 1, what is your name?") p1 = input() print("Player 2, what is your name?") p2 = input() play(0, 0, p1, p2)