Ejemplo n.º 1
0
    def objective(args):
        agent1 = AgentDQL.AgentDQL([True, 1.0, "DQL"])  #training agent
        agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
        agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
        agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
        playersAgents = [agent1, agent2, agent3, agent4]

        loadModels = ""

        loadModel = [loadModels, loadModels, loadModels, loadModels]

        numGames = 100

        #training trial
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward,
            agentParams=[args])

        # Player1 - agent
        p1 = metrics[2]
        p1_model = p1[4]

        #testing trial
        agent1 = AgentDQL.AgentDQL([False, 1.0, "DQL"])  # training agent
        playersAgents = [agent1, agent2, agent3, agent4]

        loadModelAgent1 = p1_model
        loadModel = [loadModelAgent1, loadModels, loadModels, loadModels]
        numGames = 100  # amount of training games

        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward,
            agentParams=[args])

        rounds = metrics[0]
        startGameFinishingPosition = metrics[1]

        # Player1 - agent
        p1 = metrics[2]
        p1_wins = p1[0]
        p1_positions = p1[1]
        p1_rewards = p1[2]
        p1_wrongActions = p1[3]
        p1_model = p1[4]

        p2 = metrics[3]
        p3 = metrics[4]

        wins = numGames - p1_wins
        averageReward = 1 - numpy.average(p1_rewards)
        averageRounds = rounds

        # return wins
        print("Args: " + str(args) + "Reward:" + str(averageReward))
        return {
            'loss': averageReward,
            'status': STATUS_OK,
            # -- store other results like this
            'eval_time': time.time(),
            'other_stuff': {
                'wins': wins,
                "rounds": averageRounds,
                'wrongMoves': p1_wrongActions
            },
        }
Ejemplo n.º 2
0
def runModel():
    # Parameters for the game
    agent1 = AgentDQL.AgentDQL([True, 1.0, "DQL"])  #training agent
    agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # Load agents from

    loadModels = ""

    loadModel = [loadModels, loadModels, loadModels, loadModels]

    # List of agents
    playersAgents = [agent1, agent2, agent3, agent4]

    # Reward function
    reward = RewardOnlyWinning.RewardOnlyWinning()

    # Experimental parameters

    numGames = 1000  # amount of games to be executed
    numMaxEvals = 100  # number of evaluations for the optmization
    experimentDescriptor = "Optmizing_DQL"  # Experiment name

    saveTrialsDataset = "folder"  #Folder where the optmization trials will be saved
    saveTrialsDataset += experimentDescriptor

    isLogging = False  # create a .txt file with the experiment log

    isPlotting = False  # Create plots of the experiment

    createDataset = False  # Create a .pkl dataset of the experiemnt

    saveExperimentsIn = ""  # Directory where the experiment will be saved

    #Search space for the agent
    space = hp.choice('a', [(hp.choice(
        "layers", [1, 2, 3, 4]), hp.choice("hiddenUnits", [8, 32, 64, 256]),
                             hp.choice("batchSize", [16, 64, 128, 256, 512]),
                             hp.uniform("tau", 0.01, 0.99))])

    def objective(args):
        agent1 = AgentDQL.AgentDQL([True, 1.0, "DQL"])  #training agent
        agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
        agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
        agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
        playersAgents = [agent1, agent2, agent3, agent4]

        loadModels = ""

        loadModel = [loadModels, loadModels, loadModels, loadModels]

        numGames = 100

        #training trial
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward,
            agentParams=[args])

        # Player1 - agent
        p1 = metrics[2]
        p1_model = p1[4]

        #testing trial
        agent1 = AgentDQL.AgentDQL([False, 1.0, "DQL"])  # training agent
        playersAgents = [agent1, agent2, agent3, agent4]

        loadModelAgent1 = p1_model
        loadModel = [loadModelAgent1, loadModels, loadModels, loadModels]
        numGames = 100  # amount of training games

        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward,
            agentParams=[args])

        rounds = metrics[0]
        startGameFinishingPosition = metrics[1]

        # Player1 - agent
        p1 = metrics[2]
        p1_wins = p1[0]
        p1_positions = p1[1]
        p1_rewards = p1[2]
        p1_wrongActions = p1[3]
        p1_model = p1[4]

        p2 = metrics[3]
        p3 = metrics[4]

        wins = numGames - p1_wins
        averageReward = 1 - numpy.average(p1_rewards)
        averageRounds = rounds

        # return wins
        print("Args: " + str(args) + "Reward:" + str(averageReward))
        return {
            'loss': averageReward,
            'status': STATUS_OK,
            # -- store other results like this
            'eval_time': time.time(),
            'other_stuff': {
                'wins': wins,
                "rounds": averageRounds,
                'wrongMoves': p1_wrongActions
            },
        }

    trials = Trials()
    best = fmin(objective,
                space=space,
                algo=tpe.suggest,
                max_evals=numMaxEvals,
                trials=trials)

    print("Saving the trials dataset:", saveTrialsDataset)

    pickle.dump(trials, open(saveTrialsDataset, "wb"))

    print("Trials:", trials)
    print("BEst: ", hyperopt.space_eval(space, best))
    print("Best:" + str(best))

    hyperopt.plotting.main_plot_history(trials, title="WinsHistory")
def runModel():
    #Parameters for the game
    agent1 = AgentA2C.AgentA2C([False, 1.0])  #training agent
    agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numRuns = 1  #Amount of runs
    numGames = 100  # amount of games per run
    experimentDescriptor = "Testing_NewPlot_Stack"

    A2CActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/actor_iteration_999_Player_0.hd5"
    A2CCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/critic_iteration_999_Player_0.hd5"

    loadModelAgent1 = [
        A2CActor, A2CCritic
    ]  #""#[actorModelDDPG,criticModelDDPG]#DQLModel#""# #"" #""#""#DQLModel#""#[actorModelDDPG,criticModelDDPG] ##""#[actorModelA2C,criticModelA2C] #[actorModelA2C,criticModelA2C] #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = ""  #DQLModel #"" #DQLModel

    loadModelAgent3 = ""  #[actorModelDDPG,criticModelDDPG]
    loadModelAgent4 = ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  #indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  #Logg the experiment

    isPlotting = False  #plot the experiment

    plotFrequency = 1  #plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/AllQValues"  # Directory where the experiment will be saved

    winsP1 = []
    winsP2 = []
    winsP3 = []
    winsP4 = []
    for i in range(numRuns):
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Player1 - agent
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        winsP1.append(p1[0])
        winsP2.append(p2[0])
        winsP3.append(p3[0])
        winsP4.append(p4[0])

        # print ("Metrics:" + str(metrics))

    plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames,
                       experimentDescriptor, saveExperimentsIn)
def train(train_data, labels, epochs, test_data, true_labels, word_vector_size, num_intents, hidden_size):

    model = baseline_model(word_vector_size, num_intents, hidden_size)

    inputs = []
    targets = []

    cum_train_acc_hist = []
    cum_test_acc_hist = []
    abs_train_acc_hist = []
    abs_test_acc_hist = []
    random_wins = []
    
    # Overpopulating start
    max_intent = max(np.unique(labels, axis=0, return_counts=True)[1])
    
    for i in range(num_intents):
            
        tup = np.unique(labels, axis=0, return_counts=True)
        current_intent = tup[0][i]
        current_count = tup[1][i]
        diff_intent = max_intent - current_count
        
        indices = [k for k, x in enumerate(labels) if sum(x == current_intent) == num_intents]
            
        for j in range(diff_intent):
            
            index = random.choice(indices)
            train_data.append(train_data[index])
            labels = labels.tolist()
            labels.append(labels[index])
            labels = np.array(labels)
    # Overpopulating end

    for e in range(epochs):  # epochs

        envRM = AgentRandom()
        envML = AgentML()

        for i in range(len(train_data)):  # no. of games(sentences) in one epoch

            sentence_embedding = train_data[i]

            rm_intent_list = []
            ml_intent_list = []

            rm_reward_list = []
            ml_reward_list = []

            for j in range(len(sentence_embedding)):  # no. of episodes(words) in one game(sentence)

                rm_reward = -1
                ml_reward = -1

                current_state = sentence_embedding[j].reshape(1, word_vector_size)

                rm_dist = envRM.act(num_intents)
                ml_dist = envML.act(current_state, model)

                if np.argmax(labels[i]) == np.argmax(rm_dist):
                    rm_reward = 1

                if np.argmax(labels[i]) == np.argmax(ml_dist):
                    ml_reward = 1

                rm_intent_list.append(rm_dist)
                ml_intent_list.append(ml_dist)

                rm_reward_list.append(rm_reward)
                ml_reward_list.append(ml_reward)

            envRM.dist_holder(rm_intent_list, rm_reward_list)
            envML.dist_holder(ml_intent_list, ml_reward_list)

        cum_reward_rm = []
        cum_reward_ml = []

        for i in range(len(envRM.reward_dist)):
            cum_reward_rm.append(np.mean(envRM.reward_dist[i]))

        for i in range(len(envML.reward_dist)):
            cum_reward_ml.append(np.mean(envML.reward_dist[i]))

        diff = np.array(cum_reward_rm) - np.array(cum_reward_ml)
        rm_wins = (diff > 0)
        random_wins.append(sum(rm_wins))

        for i in range(len(rm_wins)):

            if rm_wins[i] == True:  # Agent Random performed better than Agent ML

                for j in range(len(train_data[i])):

                    if envRM.reward_dist[i][j] == 1:

                        if (train_data[i][j]).tolist() in inputs:
    
                            input_index = inputs.index((train_data[i][j]).tolist())
                            target_index = np.argmax(envRM.intent_dist[i][j])
                            targets[input_index][target_index] += diff[i]
    
                        else:
    
                            inputs.append((train_data[i][j]).tolist())
                            index = np.argmax(envRM.intent_dist[i][j])
                            target = (np.zeros((num_intents,))).tolist()
                            target[index] = diff[i]
                            targets.append(target)

        X_generated = copy.deepcopy(inputs)
        y_generated = copy.deepcopy(targets)

        for i in range(len(y_generated)):
            y_generated[i] = [float(j) / max(y_generated[i]) for j in y_generated[i]]

        model = baseline_model(word_vector_size, num_intents, hidden_size)
        model.fit(np.array(X_generated), np.array(y_generated), epochs=500, batch_size=32)

        cum_train_acc, abs_train_acc = test(train_data, labels, model, word_vector_size)
        cum_test_acc, abs_test_acc = test(test_data, true_labels, model, word_vector_size)

        cum_train_acc_hist.append(cum_train_acc)
        abs_train_acc_hist.append(abs_train_acc)
        cum_test_acc_hist.append(cum_test_acc)
        abs_test_acc_hist.append(abs_test_acc)

        print("Epoch " + str(e) + " Train " + str(cum_train_acc) + " , " + str(abs_train_acc))
        print("Epoch " + str(e) + " Test " + str(cum_test_acc) + " , " + str(abs_test_acc))
            
    
    return model, cum_train_acc_hist, abs_train_acc_hist, cum_test_acc_hist, abs_test_acc_hist, random_wins
Ejemplo n.º 5
0
def runModel():

    #Plots
    plotsToGenerate = []

    demonstrations = numpy.load("/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/ExpertObs/Demonstrations_ExpertCollection.npy", allow_pickle=True)

    #Parameters for the agents
    agentDQL = AgentDQL.AgentDQL([False, 1.0, "DQL"]) #training agent
    agentA2C = AgentA2C.AgentA2C([False, 1.0, "A2C"])  # training agent
    agentPPO = AgentPPO.AgentPPO([False, 1.0, "PPO"])  # training agent
    agentAIRL = AIRL.AgentAIRL([False, 1.0, "AIRL", None, demonstrations])  # training agent

    possibleAgent1 = [agentDQL,agentA2C, agentPPO, agentAIRL ]


    agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)  # training agent
    agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)  # training agent
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)  # training agent

    #Load agents from
    DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/DQL_vsRandom/actor_iteration_999_Player_0.hd5"

    A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/A2C_vsEveryone/actor_iteration_999_Player_1.hd5"
    A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/A2C_vsEveryone/critic_iteration_999_Player_1.hd5"

    PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/PPO_vsEveryone/actor_iteration_999_Player_2.hd5"
    PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/PPO_vsEveryone/critic_iteration_999_Player_2.hd5"

    AIRLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/Training/Player_4_Cards_11_games_5000TrainingAgents_['DQL_AIRL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Train_NegativeReward_notInverted_2020-04-05_16:31:22.781799/Model/actor_iteration_4999_Player_0.hd5"
    AIRLReward = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/Training/Player_4_Cards_11_games_5000TrainingAgents_['DQL_AIRL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Train_NegativeReward_notInverted_2020-04-05_16:31:22.781799/Model/reward_iteration_4999_Player_0.hd5"

    possibleLoadModel1 = [DQLModel, [A2cActor, A2cCritic], [PPOActor,PPOCritic], [AIRLModel,AIRLReward]]
    loadModelEmpty = ""

    #Reward function
    reward = RewardOnlyWinning.RewardOnlyWinning()

    #Experimental parameters
    numberOfTrials = 50
    maximumScore = 15 # maximumScore to be reached
    experimentDescriptor = "BaselineExperimentsVsRandom" #Experiment name

    isLogging = False # create a .txt file with the experiment log

    isPlotting = False #Create plots of the experiment

    createDataset = False # Create a .pkl dataset of the experiemnt

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/BaselineNumberGames"  # Directory where the experiment will be saved

    #Metrics to be saved
    avgTotalGames = []

    avgWonGames = []
    avgPoints = []
    avgWonRounds = []

    for a in range(4):
        avgPoints.append([])
        avgWonGames.append([])
        avgWonRounds.append([])


    columns = ["ExperimentName", "AvgTotalNumberGames", "stdNumberGames",
               "Player0_AvgPoints", "Player0_stdAvgPoints",
               "Player0_AvgWonGames", "Player0_stdAvgWonGames",
               "Player0_AvgWonRounds","Player0_stdWonRounds",
               "Player1_AvgPoints", "Player1_stdAvgPoints",
               "Player1_AvgWonGames", "Player1_stdAvgWonGames",
               "Player1_AvgWonRounds", "Player1_stdWonRounds",
               "Player2_AvgPoints", "Player2_stdAvgPoints",
               "Player2_AvgWonGames", "Player2_stdAvgWonGames",
               "Player2_AvgWonRounds", "Player2_stdWonRounds",
               "Player3_AvgPoints", "Player3_stdAvgPoints",
               "Player3_AvgWonGames", "Player3_stdAvgWonGames",
               "Player3_AvgWonRounds", "Player3_stdWonRounds",
                     ]

    totalDataFame = pd.DataFrame(columns = columns)


    for agent in range(4):

        loadModel = [possibleLoadModel1[agent], loadModelEmpty, loadModelEmpty, loadModelEmpty]

        # List of agents and Models to Load
        playersAgents = [possibleAgent1[agent], agent2, agent3, agent4]
        print ("Evaluating agent:" + str(playersAgents[0].name))
        for a in range(numberOfTrials):

            metrics = ChefsHatExperimentHandler.runExperiment(maximumScore=maximumScore, playersAgents=playersAgents,experimentDescriptor=experimentDescriptor,isLogging=isLogging,isPlotting=isPlotting, createDataset=createDataset,saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward, plots=plotsToGenerate)
            games = metrics[-1]
            score = metrics[-2]
            winner = numpy.argmax(score)

            for i in range(len(playersAgents)):
                playerMetric = metrics[2+i]
                rounds = playerMetric[5]
                avgPoints[i].append(score[i])
                if winner == i:
                    avgWonGames[i].append(games)
                    avgWonRounds[i].append(numpy.mean(rounds))

            print("Trial:" + str(a) + "- Games" + str(games) + " - Winner: " + str(winner))
            avgTotalGames.append(games)

        currentDataFrame = []
        currentDataFrame.append(playersAgents[0].name) #Trained Agent Name
        currentDataFrame.append(numpy.mean(avgTotalGames)) # AvgTotalNumberGames
        currentDataFrame.append(numpy.std(avgTotalGames))# AvgSTDTotalNumberGames


        for i in range(len(playersAgents)):
            points = avgPoints[i]
            wongamesNumber = avgWonGames[i]
            wonRounds = avgWonRounds[i]

            currentDataFrame.append(numpy.mean(points)) # Player X AvgPoints
            currentDataFrame.append(numpy.std(points))  # Player X StdPoints

            currentDataFrame.append(numpy.mean(wongamesNumber)) # Player X AvgWonGames
            currentDataFrame.append(numpy.std(wongamesNumber))  # Player X StdWonGames

            currentDataFrame.append(numpy.mean(wonRounds)) # Player X AvgRounds
            currentDataFrame.append(numpy.std(wonRounds))  # Player X StdRounds
            # print ("Player - " + str(i))
            # print (" -- Average points:" + str(numpy.mean(points)) + "("+str(numpy.std(points))+")")
            # print(" -- Average Num Games  When Win:" + str(numpy.mean(wongamesNumber)) + "(" + str(numpy.std(wongamesNumber)) + ")")
            # print(" -- Average Num Rounds  When Win:" + str(numpy.mean(roundsWin)) + "(" + str(
            #     numpy.std(roundsWin)) + ")")


        totalDataFame.loc[-1] = currentDataFrame
        totalDataFame.index = totalDataFame.index + 1

        totalDataFame.to_pickle(saveExperimentsIn+"/"+experimentDescriptor)
        totalDataFame.to_csv(saveExperimentsIn+"/"+experimentDescriptor + ".csv", index=False, header=True)
def runModel():
    #Parameters for the game
    agent1 = AgentDQL.AgentDQL([False, 1.0])  #training agent
    agent2 = AgentA2C.AgentA2C([False, 1.0])
    agent3 = AgentPPO.AgentPPO([False, 1.0])
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numRuns = 1  #Amount of runs
    numGames = 100  # amount of games per run

    experimentDescriptor = "Testing_NoTraining_newPlot"

    DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0_2020-03-26_17:26:18.198600/Model/actor_iteration_999_Player_2.hd5"

    A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5"
    A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5"

    PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch -  Scratch -  Scratch - _2020-03-26_22:27:02.430568/Model/actor_iteration_999_Player_3.hd5"
    PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch -  Scratch -  Scratch - _2020-03-26_22:27:02.430568/Model/critic_iteration_999_Player_3.hd5"

    loadModelAgent1 = DQLModel  #""#""#""  #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = [A2cActor,
                       A2cCritic]  #""#""# ""#[actorModel,criticModel]

    loadModelAgent3 = [PPOActor, PPOCritic]  #""#""# ""
    loadModelAgent4 = ""  #""#DQLModelr#""#""# ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  # indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  # Logg the experiment

    isPlotting = False  # plot the experiment

    plotFrequency = 1  # plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/AllQValues_beforetraining/"  # Directory where the experiment will be saved

    winsP1 = []
    winsP2 = []
    winsP3 = []
    winsP4 = []
    qvalues = []

    for a in range(len(playersAgents)):
        qvalues.append([])

    for i in range(numRuns):
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Player1 - agent
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        winsP1.append(p1[0])
        winsP2.append(p2[0])
        winsP3.append(p3[0])
        winsP4.append(p4[0])

        for a in range(len(playersAgents) - 1):
            qvalues[a].append(metrics[a + 2][-1])

        # print ("Metrics:" + str(metrics))

    plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames,
                       experimentDescriptor, saveExperimentsIn)