def runModel():
    #Parameters for the game
    agent1 = AgentA2C.AgentA2C([True, 1.0])  #training agent
    agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numGames = 1000  # amount of training games
    experimentDescriptor = "Training_NewPlot"

    DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Player_4_Cards_11_games_3000TrainingAgents_['DQL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-25_18:15:32.076987/Model/actor_iteration_2999_Player_0.hd5"

    loadModelAgent1 = ""  #[actorModelDDPG,criticModelDDPG]#DQLModel#""# #"" #""#""#DQLModel#""#[actorModelDDPG,criticModelDDPG] ##""#[actorModelA2C,criticModelA2C] #[actorModelA2C,criticModelA2C] #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = ""  #DQLModel #"" #DQLModel

    loadModelAgent3 = ""  #[actorModelDDPG,criticModelDDPG]
    loadModelAgent4 = ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  #indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  #Logg the experiment

    isPlotting = True  #plot the experiment

    plotFrequency = 1000  #plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/NewQPlot/"  # Directory where the experiment will be saved

    metrics = ChefsHatExperimentHandler.runExperiment(
        numGames=numGames,
        playersAgents=playersAgents,
        experimentDescriptor=experimentDescriptor,
        isLogging=isLogging,
        isPlotting=isPlotting,
        plotFrequency=plotFrequency,
        createDataset=createDataset,
        saveExperimentsIn=saveExperimentsIn,
        loadModel=loadModel,
        rewardFunction=reward)

    print("Metrics:" + str(metrics))
def runModel():

    #Plots
    plotsToGenerate = [plots["Experiment_Rounds"], plots["Experiment_FinishingPosition"], plots["Experiment_ActionsBehavior"],
                   plots["Experiment_Reward"], plots["Experiment_QValues"], plots["Experiment_Mood"], plots["Experiment_MoodNeurons"],
                   plots["Experiment_SelfProbabilitySuccess"]]
    plotsToGenerate = []

    #Parameters for the agents
    agent1 = AgentDQL.AgentDQL([False, 1.0, "DQL"]) #training agent
    agent2 = AgentPPO.AgentPPO([False, 1.0, "PPO"]) #training agent
    agent3 = AgentA2C.AgentA2C([False, 1.0, "A2C"])  # training agent
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)  # training agent
Beispiel #3
0
def runModel():
    #Parameters for the game
    agent1 = AgentDQL.AgentDQL([True, 1.0])  #training agent
    agent2 = AgentA2C.AgentA2C([True, 1.0])
    agent3 = AgentPPO.AgentPPO([True, 1.0])
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numGames = 1000  # amount of training games

    experimentDescriptor = "Training"

    DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0_2020-03-26_17:26:18.198600/Model/actor_iteration_999_Player_2.hd5"

    A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5"
    A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5"

    PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch -  Scratch -  Scratch - _2020-03-26_22:27:02.430568/Model/actor_iteration_999_Player_3.hd5"
    PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch -  Scratch -  Scratch - _2020-03-26_22:27:02.430568/Model/critic_iteration_999_Player_3.hd5"

    loadModelAgent1 = DQLModel  #""#""#""  #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = [A2cActor,
                       A2cCritic]  #""#""# ""#[actorModel,criticModel]

    loadModelAgent3 = [PPOActor, PPOCritic]  #""#""# ""
    loadModelAgent4 = ""  #""#DQLModelr#""#""# ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  # indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  # Logg the experiment

    isPlotting = True  # plot the experiment

    plotFrequency = 1000  # plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/QValuePlot"  # Directory where the experiment will be saved

    metrics = ChefsHatExperimentHandler.runExperiment(
        numGames=numGames,
        playersAgents=playersAgents,
        experimentDescriptor=experimentDescriptor,
        isLogging=isLogging,
        isPlotting=isPlotting,
        plotFrequency=plotFrequency,
        createDataset=createDataset,
        saveExperimentsIn=saveExperimentsIn,
        loadModel=loadModel,
        rewardFunction=reward)

    print("Metrics:" + str(metrics))
Beispiel #4
0
def runModel():
    #Parameters for the game
    agent1 = AgentA2C.AgentA2C([False, 1.0])  #training agent
    agent2 = AgentA2C.AgentA2C([False, 1.0])
    agent3 = AgentA2C.AgentA2C([False, 1.0])
    agent4 = AgentA2C.AgentA2C([False, 1.0])

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numRuns = 10  #Amount of runs
    numGames = 100  # amount of games per run

    experimentDescriptor = "Testing_NewPlot"

    A2cActor_1 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5"
    A2cCritic_1 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5"

    A2cActor_4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_4_Training_Best_Agent_1Choice_ Best -  Scratch -  Scratch - _2020-03-26_19:27:58.464895/Model/actor_iteration_999_Player_0.hd5"
    A2cCritic_4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_4_Training_Best_Agent_1Choice_ Best -  Scratch -  Scratch - _2020-03-26_19:27:58.464895/Model/critic_iteration_999_Player_0.hd5"

    A2cActor_9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_1_Training_Best_Agent_2_2020-03-26_17:33:51.517296/Model/actor_iteration_999_Player_1.hd5"
    A2cCritic_9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_2_Training_Best_Agent_1_2020-03-26_17:42:24.306637/Model/actor_iteration_999_Player_1.hd5"

    A2cActor_r = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/actor_iteration_999_Player_0.hd5"
    A2cCritic_r = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/critic_iteration_999_Player_0.hd5"

    loadModelAgent1 = [
        A2cActor_1, A2cCritic_1
    ]  #""#""#""  #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = [A2cActor_4,
                       A2cCritic_4]  #""#""# ""#[actorModel,criticModel]

    loadModelAgent3 = [A2cActor_9, A2cCritic_9]  #""#""# ""
    loadModelAgent4 = [A2cActor_r, A2cCritic_r]  #""#DQLModelr#""#""# ""

    #
    loadModel = [
        loadModelAgent4, loadModelAgent3, loadModelAgent1, loadModelAgent2
    ]  #indicate where the saved model is
    # loadModel = [loadModelAgent3, loadModelAgent2, loadModelAgent1,
    #              loadModelAgent4]  # indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  # Logg the experiment

    isPlotting = True  # plot the experiment

    plotFrequency = 1000  # plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/"  # Directory where the experiment will be saved

    winsP1 = []
    winsP2 = []
    winsP3 = []
    winsP4 = []
    for i in range(numRuns):
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Player1 - agent
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        winsP1.append(p1[0])
        winsP2.append(p2[0])
        winsP3.append(p3[0])
        winsP4.append(p4[0])

        print("Metrics:" + str(metrics))

    plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames,
                       experimentDescriptor, saveExperimentsIn)
Beispiel #5
0
def runModel():
    #Parameters for the game
    agent1 = AgentA2C.AgentA2C([True, 1.0])  #training agent
    agent2 = AgentA2C.AgentA2C([True, 1.0])
    agent3 = AgentA2C.AgentA2C([True, 1.0])
    agent4 = AgentA2C.AgentA2C([True, 1.0])

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numExperiments = 10  # number of experiments. At the end of each experiment, we copy the best player and make them play against each other.
    numGames = 1000  # amount of training games

    experimentDescriptor = "Training"

    loadModelAgent1 = ""  #""#""  #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = ""  #""# ""#[actorModel,criticModel]

    loadModelAgent3 = ""  #""# ""
    loadModelAgent4 = ""  #""# ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  #indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  #Logg the experiment

    isPlotting = True  #plot the experiment

    plotFrequency = 1000  #plot the plots every X games

    createDataset = False  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/"  # Directory where the experiment will be saved

    # #Initial Run
    # metrics = ChefsHatExperimentHandler.runExperiment(numGames=numGames, playersAgents=playersAgents,
    #                                                   experimentDescriptor=experimentDescriptor, isLogging=isLogging,
    #                                                   isPlotting=isPlotting, plotFrequency=plotFrequency,
    #                                                   createDataset=createDataset, saveExperimentsIn=saveExperimentsIn,
    #                                                   loadModel=loadModel, rewardFunction=reward)

    bestAgent = 0
    description = experimentDescriptor
    epsilon = 1.0

    bestAgentsList = []
    secondBestList = []
    lastBestAgent = ""

    for i in range(numExperiments):

        agents = []
        agentsChoice = ""
        for agentNumber in range(3):
            probNumber = numpy.random.rand()

            if probNumber <= 0.33:  #Pull from the BestAgentList
                if len(bestAgentsList) == 0:
                    agents.append("")
                else:
                    random.shuffle(bestAgentsList)
                    agents.append(bestAgentsList[0])
                agentsChoice = agentsChoice + " Best - "

            elif probNumber > 0.33 and probNumber <= 0.66:  #Pull from the secondBestList
                if len(secondBestList) == 0:
                    agents.append("")
                else:
                    random.shuffle(secondBestList)
                    agents.append(secondBestList[0])
                agentsChoice = agentsChoice + " Second Best - "
            else:  #start a new agent from the scratch
                agents.append("")
                agentsChoice = agentsChoice + " Scratch - "
        agents.append(lastBestAgent)

        loadModel = agents
        # Train the best scored one
        agent1 = AgentA2C.AgentA2C([True, epsilon])  # training agent
        agent2 = AgentA2C.AgentA2C([True, epsilon])
        agent3 = AgentA2C.AgentA2C([True, epsilon])
        agent4 = AgentA2C.AgentA2C([True, epsilon])
        epsilon = epsilon * 0.7
        if epsilon < 0.1:
            epsilon = 0.1
        # if training specific agents
        playersAgents = [agent1, agent2, agent3, agent4]

        # loadModelAgent1 = loadModel[0]  # DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]
        #
        # loadModelAgent2 = loadModel[1]  # [actorModel,criticModel]
        #
        # loadModelAgent3 = loadModel[2]
        # loadModelAgent4 = loadModel[3]
        #
        # loadModel = [loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4]

        numGames = 3000
        plotFrequency = 1000  # plot the plots every X games
        print("Choices: " + str(agentsChoice))
        print("Best agent: " + str(bestAgent) + " - Loading:" + str(loadModel))

        # input("here")
        # experimentDescriptor = description + "_GameExperimentNumber_" + str(i) + "_Best_Agent_" + str(bestAgent)
        experimentDescriptor = description + "_GameExperimentNumber_" + str(
            i) + "_Training_Best_Agent_" + str(bestAgent) + "Choice_" + str(
                agentsChoice)
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Evaluate them without training them

        # print("Train metrics:" + str(metrics))
        # Get Trained Agents
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        loadModelAgent1 = p1[4]
        loadModelAgent2 = p2[4]
        loadModelAgent3 = p3[4]
        loadModelAgent4 = p4[4]

        loadModel = [
            loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
        ]

        #Initialize evaluation agents
        agent1 = AgentA2C.AgentA2C([False, 0.1])
        agent2 = AgentA2C.AgentA2C([False, 0.1])
        agent3 = AgentA2C.AgentA2C([False, 0.1])
        agent4 = AgentA2C.AgentA2C([False, 0, 1])
        playersAgents = [agent1, agent2, agent3, agent4]

        print("Testing - loading: " + str(loadModel))
        # input("here")
        experimentDescriptor = description + "_GameExperimentNumber_" + str(
            i) + "_Test"

        numGames = 100
        plotFrequency = 100  # plot the plots every X games
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Player1 - agent
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        wins = (numpy.average(p1[2]), numpy.average(p2[2]),
                numpy.average(p3[2]), numpy.average(p4[2]))  #Reward
        # wins = (numpy.array(p1[0].sum(), p2[0], p3[0], p4[0]) # Wins

        bestAgent = 0
        secondBestAgent = 0
        bestWin = -5000
        secondBestWin = -5000
        for a in range(4):
            if wins[a] >= bestWin:
                bestWin = wins[a]
                bestAgent = a
            if wins[a] >= secondBestWin and wins[a] < bestWin:
                secondBestWin = wins[a]
                secondBestAgent = a

        bestAgentsList.append(loadModel[bestAgent])
        lastBestAgent = loadModel[bestAgent]
        secondBestList.append(loadModel[secondBestAgent])

        # loadModel = [loadModel[bestAgent], loadModel[bestAgent], loadModel[bestAgent], loadModel[bestAgent]]

        print("Best Agent: " + str(bestAgent))
        print("Rewards: " + str(wins))
        # input("Here")

    print("Metrics:" + str(metrics))
Beispiel #6
0
import cv2
import numpy

#Experiment control variables
dataSetLocation = "dataset.pkl" #location of the dataset.PKL file

saveMoodDataset = "" #Location where the Mood dataset will be saved
saveMoodPlot = "" #Location where the Mood Plots will be saved

gameToGenerateMood = 0 # Game from which to generate the mood.

#Agents
agent1 = AgentDQL.AgentDQL([False, 1.0, "DQL"]) #training agent
agent2 = AgentPPO.AgentPPO([False, 1.0, "PPO"]) #training agent
agent3 = AgentA2C.AgentA2C([False, 1.0, "A2C"])  # training agent
agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

agents = [agent1,agent2,agent3,agent4]

DQLModel = "dql.dh5" # Location of the trained DQL model

A2cActor = "a2cActor.dh5" # Location of the trained A2C Actor model
A2cCritic = "a2cCritic.dh5" # Location of the trained A2C Critic model

PPOActor = "ppoActor.dh5" # Location of the trained PPO Actor model
PPOCritic = "ppoCritic.dh5" # Location of the trained PPO Critic model

loadModelAgent1 = DQLModel
loadModelAgent2 = [PPOActor, PPOCritic]
loadModelAgent3 = [A2cActor,
def runModel():
    #Parameters for the game
    agent1 = AgentA2C.AgentA2C([False, 1.0])  #training agent
    agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numRuns = 1  #Amount of runs
    numGames = 100  # amount of games per run
    experimentDescriptor = "Testing_NewPlot_Stack"

    A2CActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/actor_iteration_999_Player_0.hd5"
    A2CCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:14:16.796731/Model/critic_iteration_999_Player_0.hd5"

    loadModelAgent1 = [
        A2CActor, A2CCritic
    ]  #""#[actorModelDDPG,criticModelDDPG]#DQLModel#""# #"" #""#""#DQLModel#""#[actorModelDDPG,criticModelDDPG] ##""#[actorModelA2C,criticModelA2C] #[actorModelA2C,criticModelA2C] #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = ""  #DQLModel #"" #DQLModel

    loadModelAgent3 = ""  #[actorModelDDPG,criticModelDDPG]
    loadModelAgent4 = ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  #indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  #Logg the experiment

    isPlotting = False  #plot the experiment

    plotFrequency = 1  #plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Random/1000/AllQValues"  # Directory where the experiment will be saved

    winsP1 = []
    winsP2 = []
    winsP3 = []
    winsP4 = []
    for i in range(numRuns):
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Player1 - agent
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        winsP1.append(p1[0])
        winsP2.append(p2[0])
        winsP3.append(p3[0])
        winsP4.append(p4[0])

        # print ("Metrics:" + str(metrics))

    plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames,
                       experimentDescriptor, saveExperimentsIn)
Beispiel #8
0
def runModel():

    #Plots
    plotsToGenerate = []

    demonstrations = numpy.load("/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/ExpertObs/Demonstrations_ExpertCollection.npy", allow_pickle=True)

    #Parameters for the agents
    agentDQL = AgentDQL.AgentDQL([False, 1.0, "DQL"]) #training agent
    agentA2C = AgentA2C.AgentA2C([False, 1.0, "A2C"])  # training agent
    agentPPO = AgentPPO.AgentPPO([False, 1.0, "PPO"])  # training agent
    agentAIRL = AIRL.AgentAIRL([False, 1.0, "AIRL", None, demonstrations])  # training agent

    possibleAgent1 = [agentDQL,agentA2C, agentPPO, agentAIRL ]


    agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)  # training agent
    agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)  # training agent
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)  # training agent

    #Load agents from
    DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/DQL_vsRandom/actor_iteration_999_Player_0.hd5"

    A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/A2C_vsEveryone/actor_iteration_999_Player_1.hd5"
    A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/A2C_vsEveryone/critic_iteration_999_Player_1.hd5"

    PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/PPO_vsEveryone/actor_iteration_999_Player_2.hd5"
    PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/PPO_vsEveryone/critic_iteration_999_Player_2.hd5"

    AIRLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/Training/Player_4_Cards_11_games_5000TrainingAgents_['DQL_AIRL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Train_NegativeReward_notInverted_2020-04-05_16:31:22.781799/Model/actor_iteration_4999_Player_0.hd5"
    AIRLReward = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/Training/Player_4_Cards_11_games_5000TrainingAgents_['DQL_AIRL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Train_NegativeReward_notInverted_2020-04-05_16:31:22.781799/Model/reward_iteration_4999_Player_0.hd5"

    possibleLoadModel1 = [DQLModel, [A2cActor, A2cCritic], [PPOActor,PPOCritic], [AIRLModel,AIRLReward]]
    loadModelEmpty = ""

    #Reward function
    reward = RewardOnlyWinning.RewardOnlyWinning()

    #Experimental parameters
    numberOfTrials = 50
    maximumScore = 15 # maximumScore to be reached
    experimentDescriptor = "BaselineExperimentsVsRandom" #Experiment name

    isLogging = False # create a .txt file with the experiment log

    isPlotting = False #Create plots of the experiment

    createDataset = False # Create a .pkl dataset of the experiemnt

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/BaselineNumberGames"  # Directory where the experiment will be saved

    #Metrics to be saved
    avgTotalGames = []

    avgWonGames = []
    avgPoints = []
    avgWonRounds = []

    for a in range(4):
        avgPoints.append([])
        avgWonGames.append([])
        avgWonRounds.append([])


    columns = ["ExperimentName", "AvgTotalNumberGames", "stdNumberGames",
               "Player0_AvgPoints", "Player0_stdAvgPoints",
               "Player0_AvgWonGames", "Player0_stdAvgWonGames",
               "Player0_AvgWonRounds","Player0_stdWonRounds",
               "Player1_AvgPoints", "Player1_stdAvgPoints",
               "Player1_AvgWonGames", "Player1_stdAvgWonGames",
               "Player1_AvgWonRounds", "Player1_stdWonRounds",
               "Player2_AvgPoints", "Player2_stdAvgPoints",
               "Player2_AvgWonGames", "Player2_stdAvgWonGames",
               "Player2_AvgWonRounds", "Player2_stdWonRounds",
               "Player3_AvgPoints", "Player3_stdAvgPoints",
               "Player3_AvgWonGames", "Player3_stdAvgWonGames",
               "Player3_AvgWonRounds", "Player3_stdWonRounds",
                     ]

    totalDataFame = pd.DataFrame(columns = columns)


    for agent in range(4):

        loadModel = [possibleLoadModel1[agent], loadModelEmpty, loadModelEmpty, loadModelEmpty]

        # List of agents and Models to Load
        playersAgents = [possibleAgent1[agent], agent2, agent3, agent4]
        print ("Evaluating agent:" + str(playersAgents[0].name))
        for a in range(numberOfTrials):

            metrics = ChefsHatExperimentHandler.runExperiment(maximumScore=maximumScore, playersAgents=playersAgents,experimentDescriptor=experimentDescriptor,isLogging=isLogging,isPlotting=isPlotting, createDataset=createDataset,saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward, plots=plotsToGenerate)
            games = metrics[-1]
            score = metrics[-2]
            winner = numpy.argmax(score)

            for i in range(len(playersAgents)):
                playerMetric = metrics[2+i]
                rounds = playerMetric[5]
                avgPoints[i].append(score[i])
                if winner == i:
                    avgWonGames[i].append(games)
                    avgWonRounds[i].append(numpy.mean(rounds))

            print("Trial:" + str(a) + "- Games" + str(games) + " - Winner: " + str(winner))
            avgTotalGames.append(games)

        currentDataFrame = []
        currentDataFrame.append(playersAgents[0].name) #Trained Agent Name
        currentDataFrame.append(numpy.mean(avgTotalGames)) # AvgTotalNumberGames
        currentDataFrame.append(numpy.std(avgTotalGames))# AvgSTDTotalNumberGames


        for i in range(len(playersAgents)):
            points = avgPoints[i]
            wongamesNumber = avgWonGames[i]
            wonRounds = avgWonRounds[i]

            currentDataFrame.append(numpy.mean(points)) # Player X AvgPoints
            currentDataFrame.append(numpy.std(points))  # Player X StdPoints

            currentDataFrame.append(numpy.mean(wongamesNumber)) # Player X AvgWonGames
            currentDataFrame.append(numpy.std(wongamesNumber))  # Player X StdWonGames

            currentDataFrame.append(numpy.mean(wonRounds)) # Player X AvgRounds
            currentDataFrame.append(numpy.std(wonRounds))  # Player X StdRounds
            # print ("Player - " + str(i))
            # print (" -- Average points:" + str(numpy.mean(points)) + "("+str(numpy.std(points))+")")
            # print(" -- Average Num Games  When Win:" + str(numpy.mean(wongamesNumber)) + "(" + str(numpy.std(wongamesNumber)) + ")")
            # print(" -- Average Num Rounds  When Win:" + str(numpy.mean(roundsWin)) + "(" + str(
            #     numpy.std(roundsWin)) + ")")


        totalDataFame.loc[-1] = currentDataFrame
        totalDataFame.index = totalDataFame.index + 1

        totalDataFame.to_pickle(saveExperimentsIn+"/"+experimentDescriptor)
        totalDataFame.to_csv(saveExperimentsIn+"/"+experimentDescriptor + ".csv", index=False, header=True)
def runModel():
    #Parameters for the game
    agent1 = AgentDQL.AgentDQL([False, 1.0])  #training agent
    agent2 = AgentA2C.AgentA2C([False, 1.0])
    agent3 = AgentPPO.AgentPPO([False, 1.0])
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numRuns = 1  #Amount of runs
    numGames = 100  # amount of games per run

    experimentDescriptor = "Testing_NoTraining_newPlot"

    DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0_2020-03-26_17:26:18.198600/Model/actor_iteration_999_Player_2.hd5"

    A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5"
    A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5"

    PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch -  Scratch -  Scratch - _2020-03-26_22:27:02.430568/Model/actor_iteration_999_Player_3.hd5"
    PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch -  Scratch -  Scratch - _2020-03-26_22:27:02.430568/Model/critic_iteration_999_Player_3.hd5"

    loadModelAgent1 = DQLModel  #""#""#""  #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = [A2cActor,
                       A2cCritic]  #""#""# ""#[actorModel,criticModel]

    loadModelAgent3 = [PPOActor, PPOCritic]  #""#""# ""
    loadModelAgent4 = ""  #""#DQLModelr#""#""# ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  # indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  # Logg the experiment

    isPlotting = False  # plot the experiment

    plotFrequency = 1  # plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/AllQValues_beforetraining/"  # Directory where the experiment will be saved

    winsP1 = []
    winsP2 = []
    winsP3 = []
    winsP4 = []
    qvalues = []

    for a in range(len(playersAgents)):
        qvalues.append([])

    for i in range(numRuns):
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Player1 - agent
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        winsP1.append(p1[0])
        winsP2.append(p2[0])
        winsP3.append(p3[0])
        winsP4.append(p4[0])

        for a in range(len(playersAgents) - 1):
            qvalues[a].append(metrics[a + 2][-1])

        # print ("Metrics:" + str(metrics))

    plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames,
                       experimentDescriptor, saveExperimentsIn)