def randomPlay(numEpisodes, render=False): env = gym.make('CartPole-v0') env.seed(0) agent = nnAgent.NNAgent(env) agent.minEpsilon = 1 runEpisodes(numEpisodes, env, agent, True, render, None) agent.kill()
def playAndTrain(numEpisodes, render=False, nHidd=[40, 40, 40], solveStop=False, **agentParams): env = gym.make('LunarLander-v2') env.seed(0) agent = nnAgent.NNAgent(8, 4, **agentParams) runEpisodes(numEpisodes, env, agent, True, render, solveStop=solveStop) agent.kill()
def testAndExperiment(): env = gym.make('CartPole-v0') env.seed(0) print("obs space", env.observation_space.shape) print("act space", env.action_space.shape) #test random seed of environment is fixed correctly by env.seed() # for i in range(5): # print(env.reset()) # env.seed(0) # for i in range(5): # print(env.action_space.sample()) #spaces are seeded separately and apparently by a fixed seed agent = nnAgent.NNAgent(env) #agent.test() agent.testNpSeed() print(agent.action([1, 2, 3, 4]))
def playAndTrain(numEpisodes, saveFreq=10, render=False, nHidd=[10, 10, 10], solveStop=False): env = gym.make('CartPole-v0') env.seed(0) agent = nnAgent.NNAgent(env, nNeuronsHidLayers=nHidd, alpha=0.001, epsilonDecay=0.99, batchSize=16) runEpisodes(numEpisodes, env, agent, True, render, saveFreq, solveStop=solveStop) agent.kill()
def testAndExperiment(): env = gym.make('LunarLander-v2') env.seed(0) print("obs space", env.observation_space.shape) print("act space", env.action_space.shape) #test random seed of environment is fixed correctly by env.seed() # for i in range(5): # print(env.reset()) # env.seed(0) # for i in range(5): # print(env.action_space.sample()) #spaces are seeded separately and apparently by a fixed seed agent = nnAgent.NNAgent(8, 4) #agent.test() #agent.testNpSeed() print(agent.action([1, 2, 3, 4, 5, 6, 7, 8])) print(agent.qNetsDict["session"].run( agent.qNetsDict["outTargetQ"], feed_dict={ agent.qNetsDict["inTargetQ"]: np.reshape([1, 2, 3, 4, 5, 6, 7, 8], (1, 8)) }))
def play (): PRINT_MODE = DASHBOARD["PRINT_MODE"] NUM_OF_GAME_PLAY = DASHBOARD["NUM_OF_GAME_PLAY"] AUTO_REPLAY = DASHBOARD["AUTO_REPLAY"] WIN_RATE_COUNT = DASHBOARD["WIN_RATE_COUNT"] GAME_RECORD= DASHBOARD["GAME_RECORD"] NUM_OF_PLAYER = DASHBOARD["NUM_OF_PLAYER"] # AGENT_WAREHOUSE Set-up # for agent_name in DASHBOARD["AGENT_NAMES"]: if (agent_name == ""): continue elif (agent_name not in AGENT_WAREHOUSE): AGENT_WAREHOUSE[agent_name] = {} if (agent_name == "svm"): AGENT_WAREHOUSE[agent_name]["sell"] = agent.SVMAgent(targetType = "sell") AGENT_WAREHOUSE[agent_name]["bid"] = agent.SVMAgent(targetType = "bid") elif (agent_name == "nn"): AGENT_WAREHOUSE[agent_name]["sell"] = agent.NNAgent(targetType = "sell") AGENT_WAREHOUSE[agent_name]["bid"] = agent.NNAgent(targetType = "bid") elif (agent_name == "nb"): AGENT_WAREHOUSE[agent_name]["sell"] = agent.NBAgent(targetType = "sell") AGENT_WAREHOUSE[agent_name]["bid"] = agent.NBAgent(targetType = "bid") elif (agent_name == "dt"): AGENT_WAREHOUSE[agent_name]["sell"] = agent.DTAgent(targetType = "sell") AGENT_WAREHOUSE[agent_name]["bid"] = agent.DTAgent(targetType = "bid") elif (agent_name == "lr"): AGENT_WAREHOUSE[agent_name]["sell"] = agent.LRAgent(targetType = "sell") AGENT_WAREHOUSE[agent_name]["bid"] = agent.LRAgent(targetType = "bid") elif (agent_name == "rl"): AGENT_WAREHOUSE[agent_name]["sell"] = rl_agent.get_agent() AGENT_WAREHOUSE[agent_name]["bid"] = agent.SVMAgent(targetType = "bid") else: del AGENT_WAREHOUSE[agent_name] hold = raw_input("ERROR unknown agent name: %s." % agent_name) # Game Rules Set-up # default_deck_value = { # dictionary <string, int> "+15": 15, "+11": 11, "+8" : 8, "+5" : 5, "+3" : 3, "0" : 0, "-5" : -5, "-8" : -8, "DOG": 0, "dog": 0 } default_deck = {} # dictionary <string, bool> for key in list(default_deck_value.keys()): default_deck[key] = True total_round = 10 default_score = 0 default_token = 15 max_bid = 2 skip_rewards = [1, 2, 3, 4] # length must not exceed num_of_player num_of_player = NUM_OF_PLAYER # Player Template Set-up # player_template = { "deck": {}, # dictionary <string, bool> "show_deck_public": [], "score": default_score, "token": default_token, "bid": 0, "skipped": False } # Agent Template Set-up # agent_input_template = { "my_index": -1, # index of current player "rule_total_round": total_round, "rule_default_deck_value": default_deck_value, # dictionary <string, int> "rule_max_bid": max_bid, "round": -1, "starting_player_index": -1, "stage": 0, # 1 for Selling Stage, 2 for Bidding Stage "players_public": [], "my_deck": {}, "central_series_public": [], "rule_skip_rewards": skip_rewards, "reward_pointer": -1, "current_highest_bid": -1 } agent_output_template = { "card_to_sell": "INITIAL", "bid_to_add": -1, "bid_to_exceed": -1 } game_result = { "winner": -1, "total_scores": [] } # Game Loop # if (WIN_RATE_COUNT): for _ in range(num_of_player): WIN_COUNTS.append(0) for game_play in range(0, NUM_OF_GAME_PLAY): printm("\n###### GAME PLAY %d ######" % game_play, "g") # Data Recorder Set-up # if (GAME_RECORD): game_recorder.set_recording() # Game Initiation # current_round = 0 starting_player_index = random.randint(0,3) players = [] # array of dictionary for i in range(num_of_player): players.append(player_template.copy()) players[i]["deck"] = default_deck.copy() # Game Start # printm("\n### GAME START ###", "t") while (current_round < total_round): current_round += 1 # prepare agent_input and agent_output agent_input = agent_input_template.copy() agent_output = agent_output_template.copy() printm("\n========", "t") printm(" Round %d" % current_round, "t") printm("========", "t") agent_input["round"] = current_round agent_input["starting_player_index"] = starting_player_index # Round Initiation # central_series = [] # empty the central series central_series_revealed_length = 1 reward_pointer = 0 current_highest_bid = 0 current_highest_bidder_index = -1 fleeing_detector = 1 # detect fleeing fleeing = False # explanation: blanking = False # explanation: fleeing_loop_terminator = False for i in range(num_of_player): players[i]["show_deck_public"] = show_deck(players[i]["deck"]) players[i]["bid"] = 0 players[i]["skipped"] = False # Selling Stage # printm("\n-----------------", "t") printm(" Selling Stage", "t") printm("-----------------", "t") agent_input["stage"] = 1 for i in range(num_of_player): current_player_index = (starting_player_index + i) % num_of_player current_player = players[current_player_index] current_deck = current_player["deck"] printm("\n<Player %d's Turn>\n" % current_player_index, "t") agent_input["my_index"] = current_player_index # information (excluding bid) of all players agent_input["players_public"] = [] for i in range(num_of_player): placeholder = FIG_ARROW if (i == current_player_index) else "" printm("Player %d%s\n%s" % (i, placeholder, player_info(players[i])), "i") agent_input["players_public"].append(players[i].copy()) del agent_input["players_public"][i]["deck"] printm("\n" + str(show_deck(current_deck)), "i") agent_input["my_deck"] = current_deck handler(agent_input, agent_output) card_to_sell = agent_output["card_to_sell"] # Record the selling decision # if (GAME_RECORD): game_recorder.decision_recorder(agent_input, agent_output) current_deck[card_to_sell] = False central_series.append(card_to_sell) # Bidding Stage # printm("\n-----------------", "t") printm("Bidding Stage", "t") printm("-----------------", "t") agent_input["stage"] = 2 current_player_index = starting_player_index - 1 while ( (reward_pointer < num_of_player - 1) or fleeing): if (fleeing): fleeing_loop_terminator = True current_player_index = (current_player_index + 1) % num_of_player current_player = players[current_player_index] current_deck = current_player["deck"] if (current_player["skipped"]): continue printm("\n<Player %d's Turn>\n" % current_player_index, "t") agent_input["my_index"] = current_player_index agent_input["my_deck"] = current_deck # information (including bid) of all players agent_input["players_public"] = [] for i in range(num_of_player): placeholder = FIG_ARROW if (i == current_player_index) else "" printm("Player %d%s\n%s" % (i, placeholder, player_info_bid(players[i])), "i") agent_input["players_public"].append(players[i].copy()) del agent_input["players_public"][i]["deck"] printm("\n" + str(show_series(central_series, central_series_revealed_length)), "i") agent_input["central_series_public"] = show_series(central_series, central_series_revealed_length) printm("Starting Player: %d" % starting_player_index, "i") printm(rewards_info(skip_rewards, reward_pointer), "i") agent_input["reward_pointer"] = reward_pointer agent_input["current_highest_bid"] = current_highest_bid skip = False # forced to skip if (current_player["token"] <= current_highest_bid - current_player["bid"]): if (fleeing): blanking = True printm("You are forced to skip since you don't have enough tokens to win the bid.", "o") skip = True # not forced to skip else: if (fleeing): printm("All other players fleed. Now you are the only bidder.", "o") handler(agent_input, agent_output) bid_to_add = agent_output["bid_to_add"] # Record the bidding decision # if (GAME_RECORD): game_recorder.decision_recorder(agent_input, agent_output) skip = (bid_to_add == 0) if (skip): printm("You choose to skip.", "o") else: current_player["token"] -= bid_to_add current_player["bid"] += bid_to_add current_highest_bid = current_player["bid"] current_highest_bidder_index = current_player_index printm("You add your bid to %d." % current_player["bid"], "o") if (skip): current_player["skipped"] = True current_player["token"] += current_player["bid"] current_player["bid"] = 0 # receive the reward reward = skip_rewards[reward_pointer] current_player["token"] += reward placeholder = "s" if (reward > 1) else "" printm("You receive a skip reward of %d token%s." % (reward, placeholder), "o") reward_pointer += 1 central_series_revealed_length += 1 if (fleeing): # fleeing + last player skip = blanking blanking = True elif (fleeing_detector): # detect fleeing fleeing_detector += 1 if (fleeing_detector == num_of_player): # fleeing detected fleeing = True else: # if a player does not skip , turn off the fleeing detector fleeing_detector = 0 if (fleeing_loop_terminator): break printm("\n" + str(show_series(central_series, central_series_revealed_length)), "i") if (blanking): printm("All players choose to skip.", "b") else: if (fleeing): # fleeing but not blanking bid_winner_index = current_player_index else: # not fleeing bid_winner_index = current_highest_bidder_index score = compute_series_score(central_series, default_deck_value) bid_winner = players[bid_winner_index] cost = bid_winner["bid"] bid_winner["bid"] = 0 bid_winner["score"] += score placeholder = "s" if (reward > 1) else "" printm("Player %d win the bid of score %d at the cost of %d token%s." % \ (bid_winner_index, score, cost, placeholder), "b") starting_player_index = bid_winner_index # Game Over # printm("\n### GAME OVER ###\n", "t") total_scores = [] for i in range(num_of_player): total_scores.append(players[i]["score"] + players[i]["token"]) printm("Player %d\n%s" % (i, player_info_game_over(players[i])), "i") winner_index = total_scores.index(max(total_scores)) # for agent game_result["winner"] = winner_index game_result["total_scores"] = total_scores # Record the Result # if (GAME_RECORD): game_recorder.result_recorder(game_result) printm("\n--------------", "r") printm("Total Score", "r") printm("--------------", "r") for i in range(num_of_player): placeholder = FIG_ARROW if (i == winner_index) else "" printm("Player %d %d%s" % (i, total_scores[i], placeholder), "r") printm("\nThe winner is Player %d!" % winner_index, "r") if (WIN_RATE_COUNT): WIN_COUNTS[winner_index] += 1 # Replay # if (not AUTO_REPLAY): replay = raw_input("\nNext game? (y/n) ") if (replay != "y"): break if (WIN_RATE_COUNT): printm("\n###### WINNING STATISTICS ######", "g") total_game_play = game_play + 1 printm("Total game play: %d" % total_game_play, "g") for i in range(num_of_player): printm("Player %d Winning: %d Winning Rate: %.2f" % \ (i, WIN_COUNTS[i], float(WIN_COUNTS[i]) / float(total_game_play)), "g")
def randomPlay(numEpisodes, render=False): env = gym.make('LunarLander-v2') env.seed(0) agent = nnAgent.NNAgent(8, 4, runName="random", minEpsilon=1.1) runEpisodes(numEpisodes, env, agent, True, render, validationSteps=False) agent.kill()