def runModel(): #Parameters for the game agent1 = AgentA2C.AgentA2C([False, 1.0]) #training agent agent2 = AgentA2C.AgentA2C([False, 1.0]) agent3 = AgentA2C.AgentA2C([False, 1.0]) agent4 = AgentA2C.AgentA2C([False, 1.0]) # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] reward = RewardOnlyWinning.RewardOnlyWinning() numRuns = 10 #Amount of runs numGames = 100 # amount of games per run experimentDescriptor = "Testing_NewPlot" A2cActor_1 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best - Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5" A2cCritic_1 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best - Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5" A2cActor_4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_4_Training_Best_Agent_1Choice_ Best - Scratch - Scratch - _2020-03-26_19:27:58.464895/Model/actor_iteration_999_Player_0.hd5" A2cCritic_4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_4_Training_Best_Agent_1Choice_ Best - Scratch - Scratch - _2020-03-26_19:27:58.464895/Model/critic_iteration_999_Player_0.hd5" A2cActor_9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_1_Training_Best_Agent_2_2020-03-26_17:33:51.517296/Model/actor_iteration_999_Player_1.hd5" A2cCritic_9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_2_Training_Best_Agent_1_2020-03-26_17:42:24.306637/Model/actor_iteration_999_Player_1.hd5" A2cActor_r = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/actor_iteration_999_Player_0.hd5" A2cCritic_r = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/critic_iteration_999_Player_0.hd5" loadModelAgent1 = [ A2cActor_1, A2cCritic_1 ] #""#""#"" #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] loadModelAgent2 = [A2cActor_4, A2cCritic_4] #""#""# ""#[actorModel,criticModel] loadModelAgent3 = [A2cActor_9, A2cCritic_9] #""#""# "" loadModelAgent4 = [A2cActor_r, A2cCritic_r] #""#DQLModelr#""#""# "" # loadModel = [ loadModelAgent4, loadModelAgent3, loadModelAgent1, loadModelAgent2 ] #indicate where the saved model is # loadModel = [loadModelAgent3, loadModelAgent2, loadModelAgent1, # loadModelAgent4] # indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False # Logg the experiment isPlotting = True # plot the experiment plotFrequency = 1000 # plot the plots every X games createDataset = True # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/" # Directory where the experiment will be saved winsP1 = [] winsP2 = [] winsP3 = [] winsP4 = [] for i in range(numRuns): metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, plotFrequency=plotFrequency, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) # Player1 - agent p1 = metrics[2] p2 = metrics[3] p3 = metrics[4] p4 = metrics[5] winsP1.append(p1[0]) winsP2.append(p2[0]) winsP3.append(p3[0]) winsP4.append(p4[0]) print("Metrics:" + str(metrics)) plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames, experimentDescriptor, saveExperimentsIn)
def runModel(): #Parameters for the game agent1 = AgentDQL.AgentDQL([False, 1.0]) #training agent agent2 = AgentA2C.AgentA2C([False, 1.0]) agent3 = AgentPPO.AgentPPO([False, 1.0]) agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] reward = RewardOnlyWinning.RewardOnlyWinning() numRuns = 1 #Amount of runs numGames = 100 # amount of games per run experimentDescriptor = "Testing_NoTraining_newPlot" DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0_2020-03-26_17:26:18.198600/Model/actor_iteration_999_Player_2.hd5" A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best - Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5" A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best - Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5" PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch - Scratch - Scratch - _2020-03-26_22:27:02.430568/Model/actor_iteration_999_Player_3.hd5" PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch - Scratch - Scratch - _2020-03-26_22:27:02.430568/Model/critic_iteration_999_Player_3.hd5" loadModelAgent1 = DQLModel #""#""#"" #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] loadModelAgent2 = [A2cActor, A2cCritic] #""#""# ""#[actorModel,criticModel] loadModelAgent3 = [PPOActor, PPOCritic] #""#""# "" loadModelAgent4 = "" #""#DQLModelr#""#""# "" # # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is loadModel = [ loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4 ] # indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False # Logg the experiment isPlotting = False # plot the experiment plotFrequency = 1 # plot the plots every X games createDataset = True # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/AllQValues_beforetraining/" # Directory where the experiment will be saved winsP1 = [] winsP2 = [] winsP3 = [] winsP4 = [] qvalues = [] for a in range(len(playersAgents)): qvalues.append([]) for i in range(numRuns): metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, plotFrequency=plotFrequency, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) # Player1 - agent p1 = metrics[2] p2 = metrics[3] p3 = metrics[4] p4 = metrics[5] winsP1.append(p1[0]) winsP2.append(p2[0]) winsP3.append(p3[0]) winsP4.append(p4[0]) for a in range(len(playersAgents) - 1): qvalues[a].append(metrics[a + 2][-1]) # print ("Metrics:" + str(metrics)) plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames, experimentDescriptor, saveExperimentsIn)
def runModel(): #Parameters for the game agent1 = AgentA2C.AgentA2C([False, 1.0]) #training agent agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] reward = RewardOnlyWinning.RewardOnlyWinning() numRuns = 1 #Amount of runs numGames = 100 # amount of games per run experimentDescriptor = "Testing_NewPlot_Stack" A2CActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/actor_iteration_999_Player_0.hd5" A2CCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/critic_iteration_999_Player_0.hd5" loadModelAgent1 = [ A2CActor, A2CCritic ] #""#[actorModelDDPG,criticModelDDPG]#DQLModel#""# #"" #""#""#DQLModel#""#[actorModelDDPG,criticModelDDPG] ##""#[actorModelA2C,criticModelA2C] #[actorModelA2C,criticModelA2C] #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] loadModelAgent2 = "" #DQLModel #"" #DQLModel loadModelAgent3 = "" #[actorModelDDPG,criticModelDDPG] loadModelAgent4 = "" # # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is loadModel = [ loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4 ] #indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False #Logg the experiment isPlotting = False #plot the experiment plotFrequency = 1 #plot the plots every X games createDataset = True # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/AllQValues" # Directory where the experiment will be saved winsP1 = [] winsP2 = [] winsP3 = [] winsP4 = [] for i in range(numRuns): metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, plotFrequency=plotFrequency, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) # Player1 - agent p1 = metrics[2] p2 = metrics[3] p3 = metrics[4] p4 = metrics[5] winsP1.append(p1[0]) winsP2.append(p2[0]) winsP3.append(p3[0]) winsP4.append(p4[0]) # print ("Metrics:" + str(metrics)) plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames, experimentDescriptor, saveExperimentsIn)
def runModel(): #Parameters for the game agent1 = AgentDQL.AgentDQL([False, 1.0]) #training agent agent2 = AgentDQL.AgentDQL([False, 1.0]) agent3 = AgentDQL.AgentDQL([False, 1.0]) agent4 = AgentDQL.AgentDQL([False, 1.0]) # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] reward = RewardOnlyWinning.RewardOnlyWinning() numRuns = 10 #Amount of runs numGames = 100# amount of games per run experimentDescriptor = "Testing_50x1000" # DQLModel1 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_1_Training_Best_Agent_2_2020-03-26_17:33:51.517296/Model/actor_iteration_999_Player_1.hd5" # DQLModel2 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_2_Training_Best_Agent_1_2020-03-26_17:42:24.306637/Model/actor_iteration_999_Player_1.hd5" # DQLModel3 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/Old/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_3_Training_Best_Agent_2_2020-03-26_00:13:55.479460/Model/actor_iteration_999_Player_1.hd5" # DQLModelr = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Random/Player_4_Cards_11_games_3000TrainingAgents_['DQL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-25_18:15:32.076987/Model/actor_iteration_2999_Player_0.hd5" # # # # DQLModel0 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0_2020-03-26_17:26:18.198600/Model/actor_iteration_999_Player_2.hd5" # DQLModel4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_4_Training_Best_Agent_3_2020-03-26_18:03:32.220659/Model/actor_iteration_999_Player_2.hd5" # DQLModel9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/Player_4_Cards_11_games_3000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best Agents - Second Best Agents - _2020-03-26_20:28:12.574082/Model/actor_iteration_2999_Player_0.hd5" # DQLModelr = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Random/Player_4_Cards_11_games_3000TrainingAgents_['DQL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-25_18:15:32.076987/Model/actor_iteration_2999_Player_0.hd5" DQLModel0 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000x50/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_SecondBestAgents-SecondBestAgents-Scratch-_2020-03-27_01:55:16.105188/Model/actor_iteration_999_Player_2.hd5" DQLModel4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000x50/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_15_Training_Best_Agent_2Choice_SecondBestAgents-BestAgents-BestAgents-_2020-03-27_04:50:05.205901/Model/actor_iteration_999_Player_2.hd5" DQLModel9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000x50/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_30_Training_Best_Agent_0Choice_BestAgents-BestAgents-BestAgents-_2020-03-27_12:18:40.723555/Model/actor_iteration_999_Player_2.hd5" DQLModelr = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:26:46.054899/Model/actor_iteration_999_Player_0.hd5" loadModelAgent1 =DQLModel0 #""#""#"" #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] loadModelAgent2 =DQLModel4#""#""# ""#[actorModel,criticModel] loadModelAgent3 =DQLModel9#""#""# "" loadModelAgent4 =DQLModelr#""#DQLModelr#""#""# "" # # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is loadModel = [loadModelAgent3, loadModelAgent4, loadModelAgent1, loadModelAgent2] # indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False # Logg the experiment isPlotting = True # plot the experiment plotFrequency = 1000 # plot the plots every X games createDataset = True # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/" # Directory where the experiment will be saved winsP1 = [] winsP2 = [] winsP3 = [] winsP4 = [] for i in range(numRuns): metrics = ChefsHatExperimentHandler.runExperiment(numGames=numGames, playersAgents=playersAgents,experimentDescriptor=experimentDescriptor,isLogging=isLogging,isPlotting=isPlotting,plotFrequency = plotFrequency, createDataset=createDataset,saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) # Player1 - agent p1 = metrics[2] p2 = metrics[3] p3 = metrics[4] p4 = metrics[5] winsP1.append(p1[0]) winsP2.append(p2[0]) winsP3.append(p3[0]) winsP4.append(p4[0]) print ("Metrics:" + str(metrics)) plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4,numGames, experimentDescriptor, saveExperimentsIn)