Example #1
0
def runModel():
    #Parameters for the game
    agent1 = AgentA2C.AgentA2C([False, 1.0])  #training agent
    agent2 = AgentA2C.AgentA2C([False, 1.0])
    agent3 = AgentA2C.AgentA2C([False, 1.0])
    agent4 = AgentA2C.AgentA2C([False, 1.0])

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numRuns = 10  #Amount of runs
    numGames = 100  # amount of games per run

    experimentDescriptor = "Testing_NewPlot"

    A2cActor_1 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5"
    A2cCritic_1 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5"

    A2cActor_4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_4_Training_Best_Agent_1Choice_ Best -  Scratch -  Scratch - _2020-03-26_19:27:58.464895/Model/actor_iteration_999_Player_0.hd5"
    A2cCritic_4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_4_Training_Best_Agent_1Choice_ Best -  Scratch -  Scratch - _2020-03-26_19:27:58.464895/Model/critic_iteration_999_Player_0.hd5"

    A2cActor_9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_1_Training_Best_Agent_2_2020-03-26_17:33:51.517296/Model/actor_iteration_999_Player_1.hd5"
    A2cCritic_9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_2_Training_Best_Agent_1_2020-03-26_17:42:24.306637/Model/actor_iteration_999_Player_1.hd5"

    A2cActor_r = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/actor_iteration_999_Player_0.hd5"
    A2cCritic_r = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/critic_iteration_999_Player_0.hd5"

    loadModelAgent1 = [
        A2cActor_1, A2cCritic_1
    ]  #""#""#""  #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = [A2cActor_4,
                       A2cCritic_4]  #""#""# ""#[actorModel,criticModel]

    loadModelAgent3 = [A2cActor_9, A2cCritic_9]  #""#""# ""
    loadModelAgent4 = [A2cActor_r, A2cCritic_r]  #""#DQLModelr#""#""# ""

    #
    loadModel = [
        loadModelAgent4, loadModelAgent3, loadModelAgent1, loadModelAgent2
    ]  #indicate where the saved model is
    # loadModel = [loadModelAgent3, loadModelAgent2, loadModelAgent1,
    #              loadModelAgent4]  # indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  # Logg the experiment

    isPlotting = True  # plot the experiment

    plotFrequency = 1000  # plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/"  # Directory where the experiment will be saved

    winsP1 = []
    winsP2 = []
    winsP3 = []
    winsP4 = []
    for i in range(numRuns):
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Player1 - agent
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        winsP1.append(p1[0])
        winsP2.append(p2[0])
        winsP3.append(p3[0])
        winsP4.append(p4[0])

        print("Metrics:" + str(metrics))

    plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames,
                       experimentDescriptor, saveExperimentsIn)
def runModel():
    #Parameters for the game
    agent1 = AgentDQL.AgentDQL([False, 1.0])  #training agent
    agent2 = AgentA2C.AgentA2C([False, 1.0])
    agent3 = AgentPPO.AgentPPO([False, 1.0])
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numRuns = 1  #Amount of runs
    numGames = 100  # amount of games per run

    experimentDescriptor = "Testing_NoTraining_newPlot"

    DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0_2020-03-26_17:26:18.198600/Model/actor_iteration_999_Player_2.hd5"

    A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5"
    A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5"

    PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch -  Scratch -  Scratch - _2020-03-26_22:27:02.430568/Model/actor_iteration_999_Player_3.hd5"
    PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch -  Scratch -  Scratch - _2020-03-26_22:27:02.430568/Model/critic_iteration_999_Player_3.hd5"

    loadModelAgent1 = DQLModel  #""#""#""  #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = [A2cActor,
                       A2cCritic]  #""#""# ""#[actorModel,criticModel]

    loadModelAgent3 = [PPOActor, PPOCritic]  #""#""# ""
    loadModelAgent4 = ""  #""#DQLModelr#""#""# ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  # indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  # Logg the experiment

    isPlotting = False  # plot the experiment

    plotFrequency = 1  # plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/AllQValues_beforetraining/"  # Directory where the experiment will be saved

    winsP1 = []
    winsP2 = []
    winsP3 = []
    winsP4 = []
    qvalues = []

    for a in range(len(playersAgents)):
        qvalues.append([])

    for i in range(numRuns):
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Player1 - agent
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        winsP1.append(p1[0])
        winsP2.append(p2[0])
        winsP3.append(p3[0])
        winsP4.append(p4[0])

        for a in range(len(playersAgents) - 1):
            qvalues[a].append(metrics[a + 2][-1])

        # print ("Metrics:" + str(metrics))

    plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames,
                       experimentDescriptor, saveExperimentsIn)
def runModel():
    #Parameters for the game
    agent1 = AgentA2C.AgentA2C([False, 1.0])  #training agent
    agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numRuns = 1  #Amount of runs
    numGames = 100  # amount of games per run
    experimentDescriptor = "Testing_NewPlot_Stack"

    A2CActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/actor_iteration_999_Player_0.hd5"
    A2CCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/critic_iteration_999_Player_0.hd5"

    loadModelAgent1 = [
        A2CActor, A2CCritic
    ]  #""#[actorModelDDPG,criticModelDDPG]#DQLModel#""# #"" #""#""#DQLModel#""#[actorModelDDPG,criticModelDDPG] ##""#[actorModelA2C,criticModelA2C] #[actorModelA2C,criticModelA2C] #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = ""  #DQLModel #"" #DQLModel

    loadModelAgent3 = ""  #[actorModelDDPG,criticModelDDPG]
    loadModelAgent4 = ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  #indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  #Logg the experiment

    isPlotting = False  #plot the experiment

    plotFrequency = 1  #plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/AllQValues"  # Directory where the experiment will be saved

    winsP1 = []
    winsP2 = []
    winsP3 = []
    winsP4 = []
    for i in range(numRuns):
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Player1 - agent
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        winsP1.append(p1[0])
        winsP2.append(p2[0])
        winsP3.append(p3[0])
        winsP4.append(p4[0])

        # print ("Metrics:" + str(metrics))

    plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames,
                       experimentDescriptor, saveExperimentsIn)
def runModel():
    #Parameters for the game
    agent1 = AgentDQL.AgentDQL([False, 1.0]) #training agent
    agent2 = AgentDQL.AgentDQL([False, 1.0])
    agent3 = AgentDQL.AgentDQL([False, 1.0])
    agent4 = AgentDQL.AgentDQL([False, 1.0])

     # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numRuns = 10 #Amount of runs
    numGames = 100# amount of games per run

    experimentDescriptor = "Testing_50x1000"

    # DQLModel1 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_1_Training_Best_Agent_2_2020-03-26_17:33:51.517296/Model/actor_iteration_999_Player_1.hd5"
    # DQLModel2 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_2_Training_Best_Agent_1_2020-03-26_17:42:24.306637/Model/actor_iteration_999_Player_1.hd5"
    # DQLModel3 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/Old/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_3_Training_Best_Agent_2_2020-03-26_00:13:55.479460/Model/actor_iteration_999_Player_1.hd5"
    # DQLModelr = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Random/Player_4_Cards_11_games_3000TrainingAgents_['DQL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-25_18:15:32.076987/Model/actor_iteration_2999_Player_0.hd5"
    #
    #
    #
    # DQLModel0 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0_2020-03-26_17:26:18.198600/Model/actor_iteration_999_Player_2.hd5"
    # DQLModel4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_4_Training_Best_Agent_3_2020-03-26_18:03:32.220659/Model/actor_iteration_999_Player_2.hd5"
    # DQLModel9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/Player_4_Cards_11_games_3000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best Agents -  Second Best Agents - _2020-03-26_20:28:12.574082/Model/actor_iteration_2999_Player_0.hd5"
    # DQLModelr = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Random/Player_4_Cards_11_games_3000TrainingAgents_['DQL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-25_18:15:32.076987/Model/actor_iteration_2999_Player_0.hd5"


    DQLModel0 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000x50/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_SecondBestAgents-SecondBestAgents-Scratch-_2020-03-27_01:55:16.105188/Model/actor_iteration_999_Player_2.hd5"
    DQLModel4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000x50/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_15_Training_Best_Agent_2Choice_SecondBestAgents-BestAgents-BestAgents-_2020-03-27_04:50:05.205901/Model/actor_iteration_999_Player_2.hd5"
    DQLModel9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000x50/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_30_Training_Best_Agent_0Choice_BestAgents-BestAgents-BestAgents-_2020-03-27_12:18:40.723555/Model/actor_iteration_999_Player_2.hd5"
    DQLModelr = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:26:46.054899/Model/actor_iteration_999_Player_0.hd5"

    loadModelAgent1 =DQLModel0 #""#""#""  #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 =DQLModel4#""#""# ""#[actorModel,criticModel]

    loadModelAgent3 =DQLModel9#""#""# ""
    loadModelAgent4 =DQLModelr#""#DQLModelr#""#""# ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [loadModelAgent3, loadModelAgent4, loadModelAgent1,
                 loadModelAgent2]  # indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  # Logg the experiment

    isPlotting = True  # plot the experiment

    plotFrequency = 1000  # plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/"  # Directory where the experiment will be saved

    winsP1 = []
    winsP2 = []
    winsP3 = []
    winsP4 = []
    for i in range(numRuns):
         metrics = ChefsHatExperimentHandler.runExperiment(numGames=numGames, playersAgents=playersAgents,experimentDescriptor=experimentDescriptor,isLogging=isLogging,isPlotting=isPlotting,plotFrequency = plotFrequency, createDataset=createDataset,saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward)

         # Player1 - agent
         p1 = metrics[2]
         p2 = metrics[3]
         p3 = metrics[4]
         p4 = metrics[5]

         winsP1.append(p1[0])
         winsP2.append(p2[0])
         winsP3.append(p3[0])
         winsP4.append(p4[0])

         print ("Metrics:" + str(metrics))

    plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4,numGames, experimentDescriptor, saveExperimentsIn)