#bot_2.new_game() #Play a game #Make a move until Game ends #world.new_init(initial_stones = initial_stones) #while (world.get_winner() == -1): # if (world.active_player == 1): # (x, y) = bot_1.get_action(world) # world.perform_action(x, y) # bot_1.evaluate_action(world) # else: # (x, y) = bot_2.get_action(world) # world.perform_action(x, y) # bot_2.evaluate_action(world) #Evaluate Game winner[int(world.get_winner())] += 1 if ((counter % log_interval) == log_interval - 1): print 'W_i min :',bot_RL.mlp.W_i.min(), ' W_i max :', bot_RL.mlp.W_i.max() print 'W_o min :',bot_RL.mlp.W_o.min(), ' W_o max :', bot_RL.mlp.W_o.max() if (draw_graph == True): win[0].append(winner[0]) win[1].append(winner[1]) win[2].append(winner[2]) scale.append(counter) plt.plot(scale, win[0], label='Draw') plt.plot(scale, win[1], label='Win') plt.plot(scale, win[2], label='Lose') plt.legend(loc='lower left') plt.show()
bot_2 = Bot_RL_MLP(size_x, size_y, rl_beta, mlp_hidden, mlp_learning_rate, rl_reward, initial_field=f, player_ID=1) bot_1 = Bot_Random.Bot_Random_Static(size_x, size_y) # bot_1 = Bot_Random.Bot_Random_Dynamic(size_x, size_y) win = [[], [], []] scale = [] winner = [0, 0, 0] for counter in range(runs): bot_1.new_game() bot_2.new_game() # Play a game # Make a move until Game ends world.new_init(initial_stones=initial_stones) while world.get_winner() == -1: if world.active_player == 1: (x, y) = bot_1.get_action(world) world.perform_action(x, y) bot_1.evaluate_action(world) else: (x, y) = bot_2.get_action(world) world.perform_action(x, y) bot_2.evaluate_action(world) # Evaluate Game winner[int(world.get_winner())] += 1 if (counter % log_interval) == log_interval - 1: # print 'W_i min :',bot_1.mlp.W_i.min(), ' W_i max :', bot_1.mlp.W_i.max() # print 'W_o min :',bot_1.mlp.W_o.min(), ' W_o max :', bot_1.mlp.W_o.max()