def randwalk(): g = ludopy.Game([1, 2, 3]) there_is_a_winner = False while not there_is_a_winner: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = g.get_observation() print("dice", dice) print("move_pieces", move_pieces) print("player_pieces", player_pieces) print("enemy_pieces", enemy_pieces) print("player_is_a_winner", player_is_a_winner) print("there_is_a_winner", there_is_a_winner) print("player_i", player_i) print('determind_state', Q_Learning.determind_state(player_pieces)) if len(move_pieces): piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))] else: piece_to_move = -1 _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move) cv2.imshow('test', (g.render_environment())) cv2.waitKey(0) print("Saving history to numpy file") g.save_hist("game_history.npy") print("Saving game video") g.save_hist_video("game_video.mp4") return True
def objective(self, x): # play game 100 times, using the given weights # times won = fitness value times_won = 0 start = timer() for i in range(1000): game = ludopy.Game() player_is_a_winner = False there_is_a_winner = False while not there_is_a_winner: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = game.get_observation() # only do moves for player 0, all other players will move randomly piece_to_move = -1 if player_i == 0: if len(move_pieces): piece_to_move = self.util_func(x, deepcopy(game), dice, move_pieces, player_pieces, enemy_pieces) else: if len(move_pieces): piece_to_move = move_pieces[np.random.randint( 0, len(move_pieces))] _, _, _, _, _, there_is_a_winner = game.answer_observation( piece_to_move) # game done, if first winnner was player 0, increment times_won if game.first_winner_was == 0: times_won += 1 end = timer() #logging.info('Done playing 100 games for a single member in the population. Games won: {}, time taken: {}'.format(times_won, end-start)) return times_won
def play_match(player_0, player_1, player_2, player_3): g = ludopy.Game() there_is_a_winner = False while not there_is_a_winner: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = g.get_observation() if len(move_pieces): if player_i == 0: piece_to_move = player_0.play(dice, move_pieces, player_pieces, enemy_pieces) elif player_i == 1: piece_to_move = player_1.play(dice, move_pieces, player_pieces, enemy_pieces) elif player_i == 2: piece_to_move = player_2.play(dice, move_pieces, player_pieces, enemy_pieces) elif player_i == 3: piece_to_move = player_3.play(dice, move_pieces, player_pieces, enemy_pieces) else: raise ValueError("No players turn") else: piece_to_move = -1 _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move) if there_is_a_winner: return player_i
def evaluate_fitness(self, evaluation_iterations=500): self.q_agent = QAgent(self.game, self.q_learner.q_table.q_table) #self.q_agent.q_table.evaluating = True self.agents[0] = self.q_agent self.eval_games_won = [] for i in range(evaluation_iterations): there_is_a_winner = False while not there_is_a_winner: self.agents[self.game.current_player].move() there_is_a_winner = len(self.game.game_winners) > 0 if self.game.first_winner_was == 0: self.eval_games_won.append(1) else: self.eval_games_won.append(0) self.game = ludopy.Game() self.q_agent.new_game(self.game) for i in range(1, 4): self.agents[i].new_game(self.game) self.fitness = sum(self.eval_games_won) / len(self.eval_games_won) return self.fitness
def evaluate(x, n_games): # play game 100 times, using the given weights # times won = fitness value times_won = 0 print('Starting eval of {} games...'.format(n_games)) print('Weights: {}'.format(x)) logging.info('Starting eval of {} games...'.format(n_games)) logging.info('Weights: {}'.format(x)) start = timer() for i in range(n_games): game = ludopy.Game() player_is_a_winner = False there_is_a_winner = False while not there_is_a_winner: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = game.get_observation() # only do moves for player 0, all other players will move randomly piece_to_move = -1 if player_i == 0: if len(move_pieces): piece_to_move = util_func(x, deepcopy(game), dice, move_pieces, player_pieces, enemy_pieces) else: if len(move_pieces): piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))] _, _, _, _, _, there_is_a_winner = game.answer_observation(piece_to_move) # game done, if first winnner was player 0, increment times_won if game.first_winner_was == 0: times_won += 1 if i % 10000 == 0: logging.info('Done playing {} games. Won so far: {}'.format(i, times_won)) end = timer() print('Done playing {} games. Games won: {}, time taken: {}'.format(n_games, times_won, end-start)) logging.info('Done playing {} games. Games won: {}, time taken: {}'.format(n_games, times_won, end-start)) return times_won
def play(players): # print("\n\nNew game\n----------------------\n") game = ludopy.Game() there_is_a_winner = False player_i = -1 while not there_is_a_winner: observation, player_i = game.get_observation() (dice, movable_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner) = observation player = players[player_i] if len(movable_pieces) > 0: # print('player: #' + str(player_i) + '\t' + str(dice) + '\t' + str(movable_pieces.tolist()) + '\t\t' + str(player_pieces.tolist()) + '\t\t' + str(enemy_pieces.tolist())) piece_to_move = player.select_piece_to_move(observation) else: # print('-') piece_to_move = -1 _, _, _, _, _, there_is_a_winner = game.answer_observation( piece_to_move) # print("Saving history to numpy file") # game.save_hist(f"game_history.npy") # print("Saving game video") # game.save_hist_video(f"game_video.mp4") players[player_i].wincount += 1 return player_i
def __init__(self, chromo): self.g = ludopy.Game() self.current_state = [] self.qtable = {} # init a dictionary # self.learning_rate = 0.25 # 0.5 alpha # self.discount_factor = 0.95 # 0.9 gamma self.epsilon = 1.0 # 0.10 ; 1.0 = 100% random, 0 = max val (greedy) self.reward = 0 # r self.HOME_AREAL_INDEXS = np.array([53, 54, 55, 56, 57, 58]) self.GLOBUS_INDEXS = np.array([9, 22, 35, 48]) self.STAR_INDEXS = np.array([5, 12, 18, 25, 31, 38, 44, 51]) self.GOAL_INDEX = 59 self.ENEMY_1_GLOB_INDX = 14 self.ENEMY_2_GLOB_INDX = 27 self.ENEMY_3_GLOB_INDX = 40 self.next_position = [] self.next_qvalue = 0 self.there_is_a_winner = False self.dice = None self.player_i = None self.move_pieces = [] self.current_position = [] self.enemy_pieces = [] self.current_action = 0 self.current_state = [] self.chromosome = chromo self.discount_factor = chromo[0] # 0.9 gamma self.learning_rate = chromo[1] # 0.5 alpha
def randwalk(): import ludopy import numpy as np from PIL import Image as pilImg g = ludopy.Game() there_is_a_winner = False while not there_is_a_winner: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = g.get_observation() if len(move_pieces): piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))] # g.test() boardImg = g.render_environment() img = pilImg.fromarray(boardImg) img.save("test.jpeg") else: piece_to_move = -1 _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move) print("Saving history to numpy file") g.save_hist("game_history.npy") print("Saving game video") g.save_hist_video("game_video.mp4") return True
def __init__(self): self.g = ludopy.Game() self.there_is_a_winner = False self.dice = None self.player_i = None self.move_pieces = [] self.current_position = [] self.enemy_pieces = [] self.player = [Player(), Player(), Player(), Player()]
def winRate(load_path, episodes, player_num): tf.reset_default_graph() number_of_players = 2 number_of_pieces = 4 reward = -1000 EPISODES = episodes ghost_players = list(reversed(range(0, 4)))[:-number_of_players] players = list(reversed(range(0, 4)))[-number_of_players:] winner = None act = util.Action(number_of_players, number_of_pieces, reward) winnerCount = defaultdict(int) print(load_path, "---") PG = PolicyGradient( n_x=(number_of_players * number_of_pieces) + 5, #input layer size n_y=5, #ouput layer size learning_rate=0.02, reward_decay=0.99, load_path=load_path, save_path=None, player_num=player_num) preds = list() for episode in range(EPISODES): g = ludopy.Game(ghost_players=ghost_players,\ number_of_pieces=number_of_pieces) there_is_a_winner = False winner = None totalMoves, wrongPred = 0, 0 while True: for i in range(number_of_players): (dice, move_pieces, player_pieces, enemy_pieces, \ player_is_a_winner,there_is_a_winner),\ player_i = g.get_observation() if player_i == 1: action, random = act.getAction(PG, enemy_pieces, player_pieces, move_pieces, dice) totalMoves += 1 if random: wrongPred += 1 else: action = act.getAction(move_pieces=move_pieces) _, _, _, _, _, there_is_a_winner = g.answer_observation(action) if there_is_a_winner: if episode % 1000 == 0 and 0: print("saving the game--", episode) winner = player_i winnerCount[player_i] += 1 break if there_is_a_winner: preds.append([wrongPred, totalMoves]) break return winnerCount, preds
def evaluate_qlearning_multiprocessing(i): times_won = 0 game = ludopy.Game() player_is_a_winner = False there_is_a_winner = False while not there_is_a_winner: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = game.get_observation() # only do moves for player 0, all other players will move randomly piece_to_move = -1 if player_i == 0: if len(move_pieces): piece_to_move = player.getNextAction(player.getState(player_pieces, enemy_pieces), dice, move_pieces) else: if len(move_pieces): piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))] _, _, _, _, _, there_is_a_winner = game.answer_observation(piece_to_move) # game done, if first winnner was player 0, increment times_won return game.first_winner_was
def run_random_game(save_video=False): g = ludopy.Game() game_done = False game_obs = [[] for _ in range(4)] game_events = [[] for _ in range(4)] rewards = [[] for _ in range(4)] player_end = [False for _ in range(4)] while not all(player_end): (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, game_done), player_i = g.get_observation() # enemy_pieces = np.copy(enemy_pieces) action = -1 if len(move_pieces): action = random.choice(move_pieces) (_, _, player_pieces_after, enemy_pieces_after, player_is_a_winner_after, game_done_after) = g.answer_observation(action) if action != -1: game_event = cal_game_events(player_pieces, enemy_pieces, player_pieces_after, enemy_pieces_after) game_events[player_i].append(list(game_event.values())) reward, end_game = cal_reward_and_endgame(game_event) rewards[player_i].append(reward) player_end[player_i] = end_game else: reward = 0 cal_state(player_pieces, enemy_pieces, dice) # For at test at der kan laves states game_obs[player_i].append( [dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, game_done, action, player_pieces_after, enemy_pieces_after, player_is_a_winner_after, game_done_after]) if save_video: g.save_hist_video("test.mp4") return game_obs, np.array(game_events), list(game_event.keys()), np.array(rewards)
def __init__(self): self.there_is_a_winner = False self.g = ludopy.Game() self.player = None self.Q = [] self.ca = capture_image() self.list_winner = [] self.number_winner_my_player = 0 self.tr = train_data() self.player_last_piece = [] self.second_player = 0 self.file_name = "" self.file_plyer_hist = "" self.type_play = True self.winner = 0 self.my_player_winner = False self.gamma_m = 0 self.alfa_m = 0 self.percentage = [] self.epsilon = 0
def evaluate_qlearning_vs_ga_multiprocessing(i): weights = [104.0, 118.0, -80.0, 57.0, 94.0, -19.0, 98.0, -58.0, 69.0, 5.0] game = ludopy.Game() player_is_a_winner = False there_is_a_winner = False while not there_is_a_winner: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = game.get_observation() # only do moves for player 0, all other players will move randomly piece_to_move = -1 if player_i == 0: if len(move_pieces): piece_to_move = util_func(weights, deepcopy(game), dice, move_pieces, player_pieces, enemy_pieces) elif player_i == 2: if len(move_pieces): piece_to_move = player.getNextAction(player.getState(player_pieces, enemy_pieces), dice, move_pieces) else: if len(move_pieces): piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))] _, _, _, _, _, there_is_a_winner = game.answer_observation(piece_to_move) # game done, if first winnner was player 0, increment times_won return game.first_winner_was
def randwalk(number_of_players=4, number_of_pieces=4): """ Generate a saved Numpy array representing a 2-player Ludo game sequence. This is taken from the `test/randomwalk.py` in LUDOpy :param number_of_players: Number of Ludo players. :type number_of_players: `int` :param number_of_pieces: Number of pieces per player. :type number_of_pieces: `int` """ # `ghost_players` is a LUDOpy specific way to specify the number of # players. So, if we want 2 players, the code below will generate a list: # # [3, 2, 1, 0] # # and slice it so it omits players 2 and 3. # # [3, 2, 1, 0][:2] == [3, 2] g = ludopy.Game(ghost_players=list(reversed(range( 0, 4)))[:-number_of_players], number_of_pieces=number_of_pieces) there_is_a_winner = False while not there_is_a_winner: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = g.get_observation() if len(move_pieces): piece_to_move = \ move_pieces[np.random.randint(0, len(move_pieces))] else: piece_to_move = -1 _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move) return g
def train(self, training_iterations=10000, win_rate_iterations=100): for i in range(training_iterations): there_is_a_winner = False while not there_is_a_winner: self.agents[self.game.current_player].move() there_is_a_winner = len(self.game.game_winners) > 0 self.n_games += 1 if self.game.first_winner_was == 0: self.games_won.append(1) else: self.games_won.append(0) if len(self.games_won) > win_rate_iterations: self.games_won = self.games_won[-win_rate_iterations:] self.winning_rates.append( sum(self.games_won) / len(self.games_won)) self.known_state_rates.append( sum(self.q_learner.q_table.known_state_encountered) / len(self.q_learner.q_table.known_state_encountered)) if self.n_games % 1000 == 0: self.q_learner.dump_q_table(str(self.id) + "/qtable.json") self.game = ludopy.Game() if i == training_iterations - 1: self.q_learner.new_game(self.game, epsilon=0) else: self.q_learner.new_game(self.game) for i in range(1, 4): self.agents[i].new_game(self.game)
def randwalk(): import ludopy import numpy as np g = ludopy.Game() there_is_a_winner = False while not there_is_a_winner: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = g.get_observation() if len(move_pieces): piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))] else: piece_to_move = -1 _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move) print("Saving history to numpy file") g.save_hist("game_history.npy") print("Saving game video") g.save_hist_video("game_video.mp4") return True
def setUp(self): np.random.seed(0) self.__g = ludopy.Game(number_of_pieces=4)
import ludopy import cv2 import numpy as np import matplotlib.pyplot as plt import bottleneck as bn from ludopy import player import qlearning env = ludopy.Game() EPISODES = 10000 ACTION_SPACE_SIZE = 4 INNER_STARS = [5, 18, 31, 44] OUTER_STARS = [12, 25, 38, 51] avg_window_size = 1000 ep_rewards = [] ep_won = [] rewards_table = {'star': 0.2, 'safe': 0.2, 'send_another_home': 0.2, 'send_self_home': -0.3, 'goal': 0.1, 'moved_into_goal_area': 0.2, 'out_of_start': 0.25, 'winner': 1, 'not_winner': -1} def get_reward(moved_piece_previous_location, moved_piece_location, p_pieces, e_pieces, n_player_pieces_before, n_enemy_pieces_before): reward = 0 if moved_piece_location in player.STAR_INDEXS:
def play(self, policyPlayers, randomPlayers, load_path, save_path, episodes, episodeStart, training, ghost_players, model2keep, n_x=125, n_y=5, learning_rate=0.02, reward_decay=0.99, player_num=0, number_of_players=2, number_of_pieces=4, reward=-1000, rewardType="monte", inputBoardType="fullBoard"): totalPlayers = len(policyPlayers) + len(randomPlayers) playerPool = policyPlayers + randomPlayers data = dict() for i in policyPlayers: data[i] = StoreTrainingData(n_y) act = Action(reward) PG = PolicyGradient( n_x=n_x, #input layer size n_y=n_y, #ouput layer size learning_rate=learning_rate, reward_decay=reward_decay, load_path=load_path, save_path=save_path, player_num=player_num, rewardType=rewardType, toKeep=model2keep) timeInterval = 50 winCount = defaultdict(int) preds = list() startTime = time.time() for episode in range(episodeStart + 1, episodeStart + episodes): g = ludopy.Game(ghost_players=ghost_players,\ number_of_pieces=number_of_pieces) while True: obs, currPlayer = g.get_observation() state = State(obs, currPlayer) action = None if currPlayer in policyPlayers and len(state.actions()) > 0: action = act.action(self, state, n_y, playerPool, currPlayer, data[currPlayer], PG, training) elif currPlayer in randomPlayers: action = act.action(self, state, n_y) _, _, _, _, _, there_is_a_winner = g.answer_observation(action) if int(time.time() - startTime) > timeInterval: print("episode: {} running for {}".format( episode, time.time() - startTime)) timeInterval += 50 if there_is_a_winner: winCount[currPlayer] += 1 if episode % 1000 == 0: print("wincount: {}".format(winCount)) print("time take for this epoch is {}".format( time.time() - startTime)) startTime = time.time() timeInterval = 50 winCount = defaultdict(int) g.save_hist_video( "videos/gameabc{}.avi".format(episode)) if training: try: self.__train(PG, data, episode, currPlayer) except: g.save_hist_video("error.avi".format(episode)) print( "-----------------error------------------------" ) pass break return winCount
return state def getNextAction(self, state, dice, movePieces): diceIdx = dice - 1 bestAction = movePieces[0] bestQValue = self.getQValue(state, diceIdx, bestAction) for action in movePieces: if self.getQValue(state,diceIdx, action) > bestQValue: bestAction = action bestQValue = self.getQValue(state,diceIdx,action) return bestAction player = QLearningPlayer('BestQTable.npy') #Give the path to the QTable. g = ludopy.Game() gameNumber = 0 winners = [] numOfGames = 5000 while gameNumber < numOfGames: g.reset() gameNumber += 1 there_is_a_winner = False while not there_is_a_winner: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = g.get_observation() if player_i == 0 and len(move_pieces) > 0: #This is the line that is important piece_to_move = player.getNextAction(player.getState(player_pieces,enemy_pieces), dice, move_pieces) else: if len(move_pieces):
#!/usr/bin/python3.6 import ludopy import numpy as np from agents.random_agent import RandomAgent from agents.q_learning.q_learning_agent import QLearningAgent q_table_filename = None game = ludopy.Game() q_learner = QLearningAgent(game, 0, learning_rate=0.5, discount_factor=0.9, epsilon=0.5, win_reward=10.0, lost_reward=-10.0, piece_in_reward=5.0, land_on_globe_reward=1.0, land_on_star_reward=2.0, knock_enemy_home_reward=0.5, got_knocked_home_reward=-1.1, no_move_reward=-0.5, piece_number_scale_reward=0.001, piece_number_init_func_value=5, q_table_filename=q_table_filename) print(len(q_learner.q_table.q_table)) random_agent_1 = RandomAgent(game, 1) random_agent_2 = RandomAgent(game, 2)
def run_ludo(): # Explore rate: 0.05, discount rate: 0.4 and learning rate: 0.1 learning_rate_vec = [0.1] #[0.1, 0.2, 0.3, 0.4, 0.5] discount_factor_vec = [0.4] #[0.1, 0.2, 0.3, 0.4, 0.5] explore_rate_vec = [0.05] #[0.05, 0.10, 0.15, 0.2] after = 800 number_of_runs_without_learning = 25 number_of_runs_with_learning = 1000 q_player = 0 size_of_win_rate_vec = (len(explore_rate_vec),len(discount_factor_vec),len(learning_rate_vec), number_of_runs_with_learning) win_rate_vec = np.zeros(size_of_win_rate_vec) for ER_index, ER_value in enumerate(explore_rate_vec): for DF_index, DF_value in enumerate(discount_factor_vec): for LR_index, LR_value in enumerate(learning_rate_vec): q = Q_Learning.QLearning(q_player) q.training = 1 q.learning_rate = LR_value q.discount_factor = DF_value q.explore_rate = ER_value for k in range(number_of_runs_with_learning): print('Test2: Number of learning games: ', k, ' ER: ', q.explore_rate, ' DF: ', q.discount_factor, ' LR: ', q.learning_rate) g = ludopy.Game() stop_while = False q.training = 1 while not stop_while: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = g.get_observation() if player_i == q_player: piece_to_move = q.update_q_table(player_pieces, enemy_pieces, dice, g, there_is_a_winner) if there_is_a_winner == 1: stop_while = True else: if len(move_pieces): piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))] else: piece_to_move = -1 _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move) q.reset_game() if after < k: wins = [0, 0, 0, 0] q.training = 0 number_of_steps = 0 for j in range(number_of_runs_without_learning): g = ludopy.Game() stop_while = False while not stop_while: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = g.get_observation() if player_i == q_player: number_of_steps = number_of_steps + 1 piece_to_move = q.update_q_table(player_pieces, enemy_pieces, dice, g, there_is_a_winner) if there_is_a_winner == 1: stop_while = True else: if len(move_pieces): piece_to_move = move_pieces[np.random.randint(0, len(move_pieces))] else: piece_to_move = -1 _, _, _, _, _, there_is_a_winner = g.answer_observation(piece_to_move) q.reset_game() wins[g.first_winner_was] = wins[g.first_winner_was] + 1 win_rate_vec[ER_index][DF_index][LR_index][k] = (wins[q_player] / number_of_runs_without_learning) print('Win rate: ', wins[q_player] / number_of_runs_without_learning) q.save_Q_table("Best_learning_parameters" + str(k) + ".npy") test_name = "Test_run" file_name = test_name + "_data.npy" file_ext = file_name.split(".")[-1] assert file_ext == "npy", "The file extension has to be npy (numpy file)" np.save(file_name, win_rate_vec) file_name = test_name + "_parameters.npy" file_ext = file_name.split(".")[-1] assert file_ext == "npy", "The file extension has to be npy (numpy file)" np.save(file_name, [explore_rate_vec, discount_factor_vec, learning_rate_vec, number_of_runs_with_learning, number_of_runs_without_learning]) return True
def setUp(self): np.random.seed(0) self.__g = ludopy.Game(ghost_players=[3, 2], number_of_pieces=4)
def __init__(self, individual_id, discount_factor=None, learning_rate=None, epsilon=None, win_reward=None, lost_reward=None, piece_in_reward=None, land_on_globe_reward=None, land_on_star_reward=None, knock_enemy_home_reward=None, got_knocked_home_reward=None, no_move_reward=None, piece_number_scale_reward=None, piece_number_init_func_value=None, mutation_rate=0): self.id = individual_id random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if discount_factor == None or random_int < 100 * mutation_rate: self.discount_factor = np.random.uniform(discount_factor_bounds[0], discount_factor_bounds[1]) else: self.discount_factor = discount_factor random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if learning_rate == None or random_int < 100 * mutation_rate: self.learning_rate = np.random.uniform(learning_rate_bounds[0], learning_rate_bounds[1]) else: self.learning_rate = learning_rate random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if epsilon == None or random_int < 100 * mutation_rate: self.epsilon = np.random.uniform(epsilon_bounds[0], epsilon_bounds[1]) else: self.epsilon = epsilon random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if win_reward == None or random_int < 100 * mutation_rate: self.win_reward = np.random.uniform(win_reward_bounds[0], win_reward_bounds[1]) else: self.win_reward = win_reward random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if lost_reward == None or random_int < 100 * mutation_rate: self.lost_reward = np.random.uniform(lost_reward_bounds[0], lost_reward_bounds[1]) else: self.lost_reward = lost_reward random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if piece_in_reward == None or random_int < 100 * mutation_rate: self.piece_in_reward = np.random.uniform(piece_in_reward_bounds[0], piece_in_reward_bounds[1]) else: self.piece_in_reward = piece_in_reward random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if land_on_globe_reward == None or random_int < 100 * mutation_rate: self.land_on_globe_reward = np.random.uniform( land_on_globe_reward_bounds[0], land_on_globe_reward_bounds[1]) else: self.land_on_globe_reward = land_on_globe_reward random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if land_on_star_reward == None or random_int < 100 * mutation_rate: self.land_on_star_reward = np.random.uniform( land_on_star_reward_bounds[0], land_on_star_reward_bounds[1]) else: self.land_on_star_reward = land_on_star_reward random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if knock_enemy_home_reward == None or random_int < 100 * mutation_rate: self.knock_enemy_home_reward = np.random.uniform( knock_enemy_home_reward_bounds[0], knock_enemy_home_reward_bounds[1]) else: self.knock_enemy_home_reward = knock_enemy_home_reward random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if got_knocked_home_reward == None or random_int < 100 * mutation_rate: self.got_knocked_home_reward = np.random.uniform( got_knocked_home_reward_bounds[0], got_knocked_home_reward_bounds[1]) else: self.got_knocked_home_reward = got_knocked_home_reward random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if no_move_reward == None or random_int < 100 * mutation_rate: self.no_move_reward = np.random.uniform(no_move_reward_bounds[0], no_move_reward_bounds[1]) else: self.no_move_reward = no_move_reward random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if piece_number_scale_reward == None or random_int < 100 * mutation_rate: self.piece_number_scale_reward = np.random.uniform( piece_number_scale_reward_bounds[0], piece_number_scale_reward_bounds[1]) else: self.piece_number_scale_reward = piece_number_scale_reward random_int = 100 if mutation_rate > 0: random_int = np.random.randint(0, high=100) if piece_number_init_func_value == None or random_int < 100 * mutation_rate: self.piece_number_init_func_value = np.random.uniform( piece_number_init_func_value_bounds[0], piece_number_init_func_value_bounds[1]) else: self.piece_number_init_func_value = piece_number_init_func_value self.game = ludopy.Game() self.q_learner = QLearningAgent( self.game, 0, self.discount_factor, self.learning_rate, self.epsilon, self.win_reward, self.lost_reward, self.piece_in_reward, self.land_on_globe_reward, self.land_on_star_reward, self.knock_enemy_home_reward, self.got_knocked_home_reward, self.no_move_reward, self.piece_number_scale_reward, self.piece_number_init_func_value) self.agents = [self.q_learner] for i in range(1, 4): self.agents.append(RandomAgent(self.game, i)) self.games_won = [] self.n_games = 0 self.winning_rates = [] self.known_state_rates = [] self.fitness = None
def train(episode, rewardType=None): tf.reset_default_graph() number_of_players = 2 number_of_pieces = 4 # Load checkpoint load_version = 11 save_version = load_version + 1 #load_path = "output/weights/ludo/{}/ludo-v2.ckpt".format(load_version) load_path = None save_path = "/content/drive/My Drive/cse8673_project/output/weights/ludo/{}/ludo-v2.ckpt".format( rewardType) PG_dict = {} reward = -1000 act = util.Action(number_of_players, number_of_pieces, reward) PG = PolicyGradient( n_x=(number_of_players * number_of_pieces) + 5, #input layer size n_y=5, #ouput layer size learning_rate=0.02, reward_decay=0.99, load_path=load_path, save_path=save_path, player_num=0, rewardType=rewardType) EPISODES = episode ghost_players = list(reversed(range(0, 4)))[:-number_of_players] players = list(reversed(range(0, 4)))[-number_of_players:] winner = None winnerCount = defaultdict(int) for episode in range(EPISODES): if episode % 500 == 0: print("episode : ", episode) g = ludopy.Game(ghost_players=ghost_players,\ number_of_pieces=number_of_pieces) episode_reward = 0 there_is_a_winner = False winner = None count = 0 while True: count += 1 for i in range(number_of_players): if i == 0: (dice, move_pieces, player_pieces, enemy_pieces, player_is_a_winner, there_is_a_winner), player_i = g.get_observation() action, random = act.getAction(PG, enemy_pieces, player_pieces, move_pieces, dice) _, _, _, _, _, there_is_a_winner = g.answer_observation( action) else: action = act.getAction(move_pieces=move_pieces) if there_is_a_winner: winner = player_i winnerCount[player_i] += 1 break #this is where the agents are leanring if there_is_a_winner: if winner == 0: PG.episode_rewards = [ i + 2000 if i == -1000 else i for i in PG.episode_rewards ] discounted_episode_rewards_norm = PG.learn(episode, 0, winner) return winnerCount, save_path