예제 #1
0
sensor = world.get_sensor_info()
f = [0]*len(sensor)
for i in range(len(sensor)):
    f[i] = sensor[i]

#Choose Bots
bot_RL = Bot_RL_MLP(size_x, size_y, rl_beta, mlp_hidden, mlp_learning_rate, rl_reward, initial_field = f, player_ID = 1)
bot_train = Bot_Random.Bot_Random_Static(size_x, size_y)
#bot_train = Bot_Random.Bot_Random_Dynamic(size_x, size_y)

win    = [[],[],[]]
scale  = []
winner = [0,0,0]

for counter in range (runs):
    bot_RL.play_game(world, 1, bot_train)
    #bot_1.new_game()
    #bot_2.new_game()
    #Play a game
    #Make a move until Game ends
    #world.new_init(initial_stones = initial_stones)
    #while (world.get_winner() == -1):
    #    if (world.active_player == 1):
    #        (x, y) = bot_1.get_action(world)
    #        world.perform_action(x, y)            
    #        bot_1.evaluate_action(world)
    #    else:
    #        (x, y) = bot_2.get_action(world)
    #        world.perform_action(x, y)
    #        bot_2.evaluate_action(world)