def objective(args): agent1 = AgentDQL.AgentDQL([True, 1.0, "DQL"]) #training agent agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) playersAgents = [agent1, agent2, agent3, agent4] loadModels = "" loadModel = [loadModels, loadModels, loadModels, loadModels] numGames = 100 #training trial metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward, agentParams=[args]) # Player1 - agent p1 = metrics[2] p1_model = p1[4] #testing trial agent1 = AgentDQL.AgentDQL([False, 1.0, "DQL"]) # training agent playersAgents = [agent1, agent2, agent3, agent4] loadModelAgent1 = p1_model loadModel = [loadModelAgent1, loadModels, loadModels, loadModels] numGames = 100 # amount of training games metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward, agentParams=[args]) rounds = metrics[0] startGameFinishingPosition = metrics[1] # Player1 - agent p1 = metrics[2] p1_wins = p1[0] p1_positions = p1[1] p1_rewards = p1[2] p1_wrongActions = p1[3] p1_model = p1[4] p2 = metrics[3] p3 = metrics[4] wins = numGames - p1_wins averageReward = 1 - numpy.average(p1_rewards) averageRounds = rounds # return wins print("Args: " + str(args) + "Reward:" + str(averageReward)) return { 'loss': averageReward, 'status': STATUS_OK, # -- store other results like this 'eval_time': time.time(), 'other_stuff': { 'wins': wins, "rounds": averageRounds, 'wrongMoves': p1_wrongActions }, }
def runModel(): # Parameters for the game agent1 = AgentDQL.AgentDQL([True, 1.0, "DQL"]) #training agent agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # Load agents from loadModels = "" loadModel = [loadModels, loadModels, loadModels, loadModels] # List of agents playersAgents = [agent1, agent2, agent3, agent4] # Reward function reward = RewardOnlyWinning.RewardOnlyWinning() # Experimental parameters numGames = 1000 # amount of games to be executed numMaxEvals = 100 # number of evaluations for the optmization experimentDescriptor = "Optmizing_DQL" # Experiment name saveTrialsDataset = "folder" #Folder where the optmization trials will be saved saveTrialsDataset += experimentDescriptor isLogging = False # create a .txt file with the experiment log isPlotting = False # Create plots of the experiment createDataset = False # Create a .pkl dataset of the experiemnt saveExperimentsIn = "" # Directory where the experiment will be saved #Search space for the agent space = hp.choice('a', [(hp.choice( "layers", [1, 2, 3, 4]), hp.choice("hiddenUnits", [8, 32, 64, 256]), hp.choice("batchSize", [16, 64, 128, 256, 512]), hp.uniform("tau", 0.01, 0.99))]) def objective(args): agent1 = AgentDQL.AgentDQL([True, 1.0, "DQL"]) #training agent agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) playersAgents = [agent1, agent2, agent3, agent4] loadModels = "" loadModel = [loadModels, loadModels, loadModels, loadModels] numGames = 100 #training trial metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward, agentParams=[args]) # Player1 - agent p1 = metrics[2] p1_model = p1[4] #testing trial agent1 = AgentDQL.AgentDQL([False, 1.0, "DQL"]) # training agent playersAgents = [agent1, agent2, agent3, agent4] loadModelAgent1 = p1_model loadModel = [loadModelAgent1, loadModels, loadModels, loadModels] numGames = 100 # amount of training games metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward, agentParams=[args]) rounds = metrics[0] startGameFinishingPosition = metrics[1] # Player1 - agent p1 = metrics[2] p1_wins = p1[0] p1_positions = p1[1] p1_rewards = p1[2] p1_wrongActions = p1[3] p1_model = p1[4] p2 = metrics[3] p3 = metrics[4] wins = numGames - p1_wins averageReward = 1 - numpy.average(p1_rewards) averageRounds = rounds # return wins print("Args: " + str(args) + "Reward:" + str(averageReward)) return { 'loss': averageReward, 'status': STATUS_OK, # -- store other results like this 'eval_time': time.time(), 'other_stuff': { 'wins': wins, "rounds": averageRounds, 'wrongMoves': p1_wrongActions }, } trials = Trials() best = fmin(objective, space=space, algo=tpe.suggest, max_evals=numMaxEvals, trials=trials) print("Saving the trials dataset:", saveTrialsDataset) pickle.dump(trials, open(saveTrialsDataset, "wb")) print("Trials:", trials) print("BEst: ", hyperopt.space_eval(space, best)) print("Best:" + str(best)) hyperopt.plotting.main_plot_history(trials, title="WinsHistory")
def runModel(): #Parameters for the game agent1 = AgentA2C.AgentA2C([False, 1.0]) #training agent agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] reward = RewardOnlyWinning.RewardOnlyWinning() numRuns = 1 #Amount of runs numGames = 100 # amount of games per run experimentDescriptor = "Testing_NewPlot_Stack" A2CActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/actor_iteration_999_Player_0.hd5" A2CCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/critic_iteration_999_Player_0.hd5" loadModelAgent1 = [ A2CActor, A2CCritic ] #""#[actorModelDDPG,criticModelDDPG]#DQLModel#""# #"" #""#""#DQLModel#""#[actorModelDDPG,criticModelDDPG] ##""#[actorModelA2C,criticModelA2C] #[actorModelA2C,criticModelA2C] #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] loadModelAgent2 = "" #DQLModel #"" #DQLModel loadModelAgent3 = "" #[actorModelDDPG,criticModelDDPG] loadModelAgent4 = "" # # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is loadModel = [ loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4 ] #indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False #Logg the experiment isPlotting = False #plot the experiment plotFrequency = 1 #plot the plots every X games createDataset = True # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/AllQValues" # Directory where the experiment will be saved winsP1 = [] winsP2 = [] winsP3 = [] winsP4 = [] for i in range(numRuns): metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, plotFrequency=plotFrequency, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) # Player1 - agent p1 = metrics[2] p2 = metrics[3] p3 = metrics[4] p4 = metrics[5] winsP1.append(p1[0]) winsP2.append(p2[0]) winsP3.append(p3[0]) winsP4.append(p4[0]) # print ("Metrics:" + str(metrics)) plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames, experimentDescriptor, saveExperimentsIn)
def train(train_data, labels, epochs, test_data, true_labels, word_vector_size, num_intents, hidden_size): model = baseline_model(word_vector_size, num_intents, hidden_size) inputs = [] targets = [] cum_train_acc_hist = [] cum_test_acc_hist = [] abs_train_acc_hist = [] abs_test_acc_hist = [] random_wins = [] # Overpopulating start max_intent = max(np.unique(labels, axis=0, return_counts=True)[1]) for i in range(num_intents): tup = np.unique(labels, axis=0, return_counts=True) current_intent = tup[0][i] current_count = tup[1][i] diff_intent = max_intent - current_count indices = [k for k, x in enumerate(labels) if sum(x == current_intent) == num_intents] for j in range(diff_intent): index = random.choice(indices) train_data.append(train_data[index]) labels = labels.tolist() labels.append(labels[index]) labels = np.array(labels) # Overpopulating end for e in range(epochs): # epochs envRM = AgentRandom() envML = AgentML() for i in range(len(train_data)): # no. of games(sentences) in one epoch sentence_embedding = train_data[i] rm_intent_list = [] ml_intent_list = [] rm_reward_list = [] ml_reward_list = [] for j in range(len(sentence_embedding)): # no. of episodes(words) in one game(sentence) rm_reward = -1 ml_reward = -1 current_state = sentence_embedding[j].reshape(1, word_vector_size) rm_dist = envRM.act(num_intents) ml_dist = envML.act(current_state, model) if np.argmax(labels[i]) == np.argmax(rm_dist): rm_reward = 1 if np.argmax(labels[i]) == np.argmax(ml_dist): ml_reward = 1 rm_intent_list.append(rm_dist) ml_intent_list.append(ml_dist) rm_reward_list.append(rm_reward) ml_reward_list.append(ml_reward) envRM.dist_holder(rm_intent_list, rm_reward_list) envML.dist_holder(ml_intent_list, ml_reward_list) cum_reward_rm = [] cum_reward_ml = [] for i in range(len(envRM.reward_dist)): cum_reward_rm.append(np.mean(envRM.reward_dist[i])) for i in range(len(envML.reward_dist)): cum_reward_ml.append(np.mean(envML.reward_dist[i])) diff = np.array(cum_reward_rm) - np.array(cum_reward_ml) rm_wins = (diff > 0) random_wins.append(sum(rm_wins)) for i in range(len(rm_wins)): if rm_wins[i] == True: # Agent Random performed better than Agent ML for j in range(len(train_data[i])): if envRM.reward_dist[i][j] == 1: if (train_data[i][j]).tolist() in inputs: input_index = inputs.index((train_data[i][j]).tolist()) target_index = np.argmax(envRM.intent_dist[i][j]) targets[input_index][target_index] += diff[i] else: inputs.append((train_data[i][j]).tolist()) index = np.argmax(envRM.intent_dist[i][j]) target = (np.zeros((num_intents,))).tolist() target[index] = diff[i] targets.append(target) X_generated = copy.deepcopy(inputs) y_generated = copy.deepcopy(targets) for i in range(len(y_generated)): y_generated[i] = [float(j) / max(y_generated[i]) for j in y_generated[i]] model = baseline_model(word_vector_size, num_intents, hidden_size) model.fit(np.array(X_generated), np.array(y_generated), epochs=500, batch_size=32) cum_train_acc, abs_train_acc = test(train_data, labels, model, word_vector_size) cum_test_acc, abs_test_acc = test(test_data, true_labels, model, word_vector_size) cum_train_acc_hist.append(cum_train_acc) abs_train_acc_hist.append(abs_train_acc) cum_test_acc_hist.append(cum_test_acc) abs_test_acc_hist.append(abs_test_acc) print("Epoch " + str(e) + " Train " + str(cum_train_acc) + " , " + str(abs_train_acc)) print("Epoch " + str(e) + " Test " + str(cum_test_acc) + " , " + str(abs_test_acc)) return model, cum_train_acc_hist, abs_train_acc_hist, cum_test_acc_hist, abs_test_acc_hist, random_wins
def runModel(): #Plots plotsToGenerate = [] demonstrations = numpy.load("/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/ExpertObs/Demonstrations_ExpertCollection.npy", allow_pickle=True) #Parameters for the agents agentDQL = AgentDQL.AgentDQL([False, 1.0, "DQL"]) #training agent agentA2C = AgentA2C.AgentA2C([False, 1.0, "A2C"]) # training agent agentPPO = AgentPPO.AgentPPO([False, 1.0, "PPO"]) # training agent agentAIRL = AIRL.AgentAIRL([False, 1.0, "AIRL", None, demonstrations]) # training agent possibleAgent1 = [agentDQL,agentA2C, agentPPO, agentAIRL ] agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # training agent agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # training agent agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # training agent #Load agents from DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/DQL_vsRandom/actor_iteration_999_Player_0.hd5" A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/A2C_vsEveryone/actor_iteration_999_Player_1.hd5" A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/A2C_vsEveryone/critic_iteration_999_Player_1.hd5" PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/PPO_vsEveryone/actor_iteration_999_Player_2.hd5" PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/PPO_vsEveryone/critic_iteration_999_Player_2.hd5" AIRLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/Training/Player_4_Cards_11_games_5000TrainingAgents_['DQL_AIRL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Train_NegativeReward_notInverted_2020-04-05_16:31:22.781799/Model/actor_iteration_4999_Player_0.hd5" AIRLReward = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/Training/Player_4_Cards_11_games_5000TrainingAgents_['DQL_AIRL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Train_NegativeReward_notInverted_2020-04-05_16:31:22.781799/Model/reward_iteration_4999_Player_0.hd5" possibleLoadModel1 = [DQLModel, [A2cActor, A2cCritic], [PPOActor,PPOCritic], [AIRLModel,AIRLReward]] loadModelEmpty = "" #Reward function reward = RewardOnlyWinning.RewardOnlyWinning() #Experimental parameters numberOfTrials = 50 maximumScore = 15 # maximumScore to be reached experimentDescriptor = "BaselineExperimentsVsRandom" #Experiment name isLogging = False # create a .txt file with the experiment log isPlotting = False #Create plots of the experiment createDataset = False # Create a .pkl dataset of the experiemnt saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/BaselineNumberGames" # Directory where the experiment will be saved #Metrics to be saved avgTotalGames = [] avgWonGames = [] avgPoints = [] avgWonRounds = [] for a in range(4): avgPoints.append([]) avgWonGames.append([]) avgWonRounds.append([]) columns = ["ExperimentName", "AvgTotalNumberGames", "stdNumberGames", "Player0_AvgPoints", "Player0_stdAvgPoints", "Player0_AvgWonGames", "Player0_stdAvgWonGames", "Player0_AvgWonRounds","Player0_stdWonRounds", "Player1_AvgPoints", "Player1_stdAvgPoints", "Player1_AvgWonGames", "Player1_stdAvgWonGames", "Player1_AvgWonRounds", "Player1_stdWonRounds", "Player2_AvgPoints", "Player2_stdAvgPoints", "Player2_AvgWonGames", "Player2_stdAvgWonGames", "Player2_AvgWonRounds", "Player2_stdWonRounds", "Player3_AvgPoints", "Player3_stdAvgPoints", "Player3_AvgWonGames", "Player3_stdAvgWonGames", "Player3_AvgWonRounds", "Player3_stdWonRounds", ] totalDataFame = pd.DataFrame(columns = columns) for agent in range(4): loadModel = [possibleLoadModel1[agent], loadModelEmpty, loadModelEmpty, loadModelEmpty] # List of agents and Models to Load playersAgents = [possibleAgent1[agent], agent2, agent3, agent4] print ("Evaluating agent:" + str(playersAgents[0].name)) for a in range(numberOfTrials): metrics = ChefsHatExperimentHandler.runExperiment(maximumScore=maximumScore, playersAgents=playersAgents,experimentDescriptor=experimentDescriptor,isLogging=isLogging,isPlotting=isPlotting, createDataset=createDataset,saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward, plots=plotsToGenerate) games = metrics[-1] score = metrics[-2] winner = numpy.argmax(score) for i in range(len(playersAgents)): playerMetric = metrics[2+i] rounds = playerMetric[5] avgPoints[i].append(score[i]) if winner == i: avgWonGames[i].append(games) avgWonRounds[i].append(numpy.mean(rounds)) print("Trial:" + str(a) + "- Games" + str(games) + " - Winner: " + str(winner)) avgTotalGames.append(games) currentDataFrame = [] currentDataFrame.append(playersAgents[0].name) #Trained Agent Name currentDataFrame.append(numpy.mean(avgTotalGames)) # AvgTotalNumberGames currentDataFrame.append(numpy.std(avgTotalGames))# AvgSTDTotalNumberGames for i in range(len(playersAgents)): points = avgPoints[i] wongamesNumber = avgWonGames[i] wonRounds = avgWonRounds[i] currentDataFrame.append(numpy.mean(points)) # Player X AvgPoints currentDataFrame.append(numpy.std(points)) # Player X StdPoints currentDataFrame.append(numpy.mean(wongamesNumber)) # Player X AvgWonGames currentDataFrame.append(numpy.std(wongamesNumber)) # Player X StdWonGames currentDataFrame.append(numpy.mean(wonRounds)) # Player X AvgRounds currentDataFrame.append(numpy.std(wonRounds)) # Player X StdRounds # print ("Player - " + str(i)) # print (" -- Average points:" + str(numpy.mean(points)) + "("+str(numpy.std(points))+")") # print(" -- Average Num Games When Win:" + str(numpy.mean(wongamesNumber)) + "(" + str(numpy.std(wongamesNumber)) + ")") # print(" -- Average Num Rounds When Win:" + str(numpy.mean(roundsWin)) + "(" + str( # numpy.std(roundsWin)) + ")") totalDataFame.loc[-1] = currentDataFrame totalDataFame.index = totalDataFame.index + 1 totalDataFame.to_pickle(saveExperimentsIn+"/"+experimentDescriptor) totalDataFame.to_csv(saveExperimentsIn+"/"+experimentDescriptor + ".csv", index=False, header=True)
def runModel(): #Parameters for the game agent1 = AgentDQL.AgentDQL([False, 1.0]) #training agent agent2 = AgentA2C.AgentA2C([False, 1.0]) agent3 = AgentPPO.AgentPPO([False, 1.0]) agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM) # if training specific agents playersAgents = [agent1, agent2, agent3, agent4] reward = RewardOnlyWinning.RewardOnlyWinning() numRuns = 1 #Amount of runs numGames = 100 # amount of games per run experimentDescriptor = "Testing_NoTraining_newPlot" DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0_2020-03-26_17:26:18.198600/Model/actor_iteration_999_Player_2.hd5" A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best - Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5" A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch - Second Best - Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5" PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch - Scratch - Scratch - _2020-03-26_22:27:02.430568/Model/actor_iteration_999_Player_3.hd5" PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch - Scratch - Scratch - _2020-03-26_22:27:02.430568/Model/critic_iteration_999_Player_3.hd5" loadModelAgent1 = DQLModel #""#""#"" #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG] loadModelAgent2 = [A2cActor, A2cCritic] #""#""# ""#[actorModel,criticModel] loadModelAgent3 = [PPOActor, PPOCritic] #""#""# "" loadModelAgent4 = "" #""#DQLModelr#""#""# "" # # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is loadModel = [ loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4 ] # indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False # Logg the experiment isPlotting = False # plot the experiment plotFrequency = 1 # plot the plots every X games createDataset = True # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/AllQValues_beforetraining/" # Directory where the experiment will be saved winsP1 = [] winsP2 = [] winsP3 = [] winsP4 = [] qvalues = [] for a in range(len(playersAgents)): qvalues.append([]) for i in range(numRuns): metrics = ChefsHatExperimentHandler.runExperiment( numGames=numGames, playersAgents=playersAgents, experimentDescriptor=experimentDescriptor, isLogging=isLogging, isPlotting=isPlotting, plotFrequency=plotFrequency, createDataset=createDataset, saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) # Player1 - agent p1 = metrics[2] p2 = metrics[3] p3 = metrics[4] p4 = metrics[5] winsP1.append(p1[0]) winsP2.append(p2[0]) winsP3.append(p3[0]) winsP4.append(p4[0]) for a in range(len(playersAgents) - 1): qvalues[a].append(metrics[a + 2][-1]) # print ("Metrics:" + str(metrics)) plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames, experimentDescriptor, saveExperimentsIn)