def runModel():

    #Plots
    plotsToGenerate = [plots["Experiment_Rounds"], plots["Experiment_FinishingPosition"], plots["Experiment_ActionsBehavior"],
                   plots["Experiment_Reward"], plots["Experiment_QValues"], plots["Experiment_Mood"], plots["Experiment_MoodNeurons"],
                   plots["Experiment_SelfProbabilitySuccess"]]
    plotsToGenerate = []

    #Parameters for the agents
    agent1 = AgentDQL.AgentDQL([False, 1.0, "DQL"]) #training agent
    agent2 = AgentPPO.AgentPPO([False, 1.0, "PPO"]) #training agent
    agent3 = AgentA2C.AgentA2C([False, 1.0, "A2C"])  # training agent
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)  # training agent
Example #2
0
def runModel():
    # Parameters of the agents
    agent1 = AgentDQL.AgentDQL([True, 1.0])  # training agent
    agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    playersAgents = [agent1, agent2, agent3,
                     agent4]  # if training specific agents

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numGames = 100  # amount of training games
    experimentDescriptor = "TrainingAgent"

    loadModelAgent1 = ""
    loadModelAgent2 = ""
    loadModelAgent3 = ""
    loadModelAgent4 = ""

    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  # indicate where the saved model is

    # #Parameters for controling the experiment

    isLogging = False  # Logg the experiment

    isPlotting = True  # plot the experiment

    plotFrequency = 1000  # plot the plots every X games

    createDataset = False  # weather to save the dataset

    saveExperimentsIn = "saveExperiment"  # Directory where the experiment will be saved

    # Run simulation
    metrics = ChefsHatExperimentHandler.runExperiment(
        numGames=numGames,
        playersAgents=playersAgents,
        experimentDescriptor=experimentDescriptor,
        isLogging=isLogging,
        isPlotting=isPlotting,
        plotFrequency=plotFrequency,
        createDataset=createDataset,
        saveExperimentsIn=saveExperimentsIn,
        loadModel=loadModel,
        rewardFunction=reward)

    print("Metrics:" + str(metrics))
def runModel():
    #Parameters for the game
    agent1 = AgentDQL.AgentDQL([True, 1.0])  #training agent
    agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardIROSPaper.RewardIROSPaper()

    numGames = 250
    experimentDescriptor = "IROSExperiments"

    loadModelAgent1 = ""  #If loading an existing agent, indicate here.
    loadModelAgent2 = ""  #If loading an existing agent, indicate here.

    loadModelAgent3 = ""  #If loading an existing agent, indicate here.
    loadModelAgent4 = ""  #If loading an existing agent, indicate here.

    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  #indicate where the saved model is

    isLogging = True  #Logg the experiment

    isPlotting = True  #plot the experiment

    plotFrequency = 250  #plot the plots every X games

    createDataset = False  # weather to save the dataset

    saveExperimentsIn = "SaveExperiment_IROS2020/"  # Directory where the experiment will be saved

    metrics = ChefsHatExperimentHandler.runExperiment(
        numGames=numGames,
        playersAgents=playersAgents,
        experimentDescriptor=experimentDescriptor,
        isLogging=isLogging,
        isPlotting=isPlotting,
        plotFrequency=plotFrequency,
        createDataset=createDataset,
        saveExperimentsIn=saveExperimentsIn,
        loadModel=loadModel,
        rewardFunction=reward)

    print("Metrics:" + str(metrics))
def runModel():
    #Parameters for the game
    agent1 = AgentDQL.AgentDQL([True, 1.0]) #training agent
    agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

     # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numGames = 1000# amount of training games
    experimentDescriptor = "Training_NewPlots"


    DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Player_4_Cards_11_games_3000TrainingAgents_['DQL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-25_18:15:32.076987/Model/actor_iteration_2999_Player_0.hd5"

    loadModelAgent1 = ""#[actorModelDDPG,criticModelDDPG]#DQLModel#""# #"" #""#""#DQLModel#""#[actorModelDDPG,criticModelDDPG] ##""#[actorModelA2C,criticModelA2C] #[actorModelA2C,criticModelA2C] #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = ""#DQLModel #"" #DQLModel

    loadModelAgent3 = "" #[actorModelDDPG,criticModelDDPG]
    loadModelAgent4 = ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False #Logg the experiment

    isPlotting = True #plot the experiment

    plotFrequency = 1000 #plot the plots every X games

    createDataset = True # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Random/1000/NewQPlot" # Directory where the experiment will be saved

    metrics = ChefsHatExperimentHandler.runExperiment(numGames=numGames, playersAgents=playersAgents,experimentDescriptor=experimentDescriptor,isLogging=isLogging,isPlotting=isPlotting,plotFrequency = plotFrequency, createDataset=createDataset,saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward)

    print ("Metrics:" + str(metrics))
def runModel():
    #Parameters for the game
    agent1 = AgentDQL.AgentDQL([True, 1.0])  #training agent
    agent2 = AgentDQL.AgentDQL([True, 1.0])
    agent3 = AgentDQL.AgentDQL([True, 1.0])
    agent4 = AgentDQL.AgentDQL([True, 1.0])

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numExperiments = 50  # number of experiments. At the end of each experiment, we copy the best player and make them play against each other.
    numGames = 1000  # amount of training games

    experimentDescriptor = "Training"

    loadModelAgent1 = ""  #""#""  #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = ""  #""# ""#[actorModel,criticModel]

    loadModelAgent3 = ""  #""# ""
    loadModelAgent4 = ""  #""# ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  #indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  #Logg the experiment

    isPlotting = True  #plot the experiment

    plotFrequency = 1000  #plot the plots every X games

    createDataset = False  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000x50/"  # Directory where the experiment will be saved

    # #Initial Run
    # metrics = ChefsHatExperimentHandler.runExperiment(numGames=numGames, playersAgents=playersAgents,
    #                                                   experimentDescriptor=experimentDescriptor, isLogging=isLogging,
    #                                                   isPlotting=isPlotting, plotFrequency=plotFrequency,
    #                                                   createDataset=createDataset, saveExperimentsIn=saveExperimentsIn,
    #                                                   loadModel=loadModel, rewardFunction=reward)

    bestAgent = 0
    description = experimentDescriptor
    epsilon = 1.0

    bestAgentsList = []
    secondBestList = []
    lastBestAgent = ""

    for i in range(numExperiments):

        agents = []
        agentsChoice = ""
        for agentNumber in range(3):
            probNumber = numpy.random.rand()

            if probNumber <= 0.33:  #Pull from the BestAgentList
                if len(bestAgentsList) == 0:
                    agents.append("")
                else:
                    random.shuffle(bestAgentsList)
                    agents.append(bestAgentsList[0])
                agentsChoice = agentsChoice + "BestAgents-"

            elif probNumber > 0.33 and probNumber <= 0.66:  #Pull from the secondBestList
                if len(secondBestList) == 0:
                    agents.append("")
                else:
                    random.shuffle(secondBestList)
                    agents.append(secondBestList[0])
                agentsChoice = agentsChoice + "SecondBestAgents-"
            else:  #start a new agent from the scratch
                agents.append("")
                agentsChoice = agentsChoice + "Scratch-"
        agents.append(lastBestAgent)

        loadModel = agents
        # Train the best scored one
        agent1 = AgentDQL.AgentDQL([True, epsilon])  # training agent
        agent2 = AgentDQL.AgentDQL([True, epsilon])
        agent3 = AgentDQL.AgentDQL([True, epsilon])
        agent4 = AgentDQL.AgentDQL([True, epsilon])
        # epsilon = epsilon * 0.7
        # if epsilon < 0.1:
        #     epsilon = 0.1
        # if training specific agents
        playersAgents = [agent1, agent2, agent3, agent4]

        # loadModelAgent1 = loadModel[0]  # DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]
        #
        # loadModelAgent2 = loadModel[1]  # [actorModel,criticModel]
        #
        # loadModelAgent3 = loadModel[2]
        # loadModelAgent4 = loadModel[3]
        #
        # loadModel = [loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4]

        numGames = 1000
        plotFrequency = 1000  # plot the plots every X games
        print("Choices: " + str(agentsChoice))
        print("Best agent: " + str(bestAgent) + " - Loading:" + str(loadModel))

        # input("here")
        # experimentDescriptor = description + "_GameExperimentNumber_" + str(i) + "_Best_Agent_" + str(bestAgent)
        experimentDescriptor = description + "_GameExperimentNumber_" + str(
            i) + "_Training_Best_Agent_" + str(bestAgent) + "Choice_" + str(
                agentsChoice)
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Evaluate them without training them

        # print("Train metrics:" + str(metrics))
        # Get Trained Agents
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        loadModelAgent1 = p1[4]
        loadModelAgent2 = p2[4]
        loadModelAgent3 = p3[4]
        loadModelAgent4 = p4[4]

        loadModel = [
            loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
        ]

        #Initialize evaluation agents
        agent1 = AgentDQL.AgentDQL([False, 0.1])
        agent2 = AgentDQL.AgentDQL([False, 0.1])
        agent3 = AgentDQL.AgentDQL([False, 0.1])
        agent4 = AgentDQL.AgentDQL([False, 0, 1])
        playersAgents = [agent1, agent2, agent3, agent4]

        print("Testing - loading: " + str(loadModel))
        # input("here")
        experimentDescriptor = description + "_GameExperimentNumber_" + str(
            i) + "_Test"

        numGames = 100
        plotFrequency = 100  # plot the plots every X games
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Player1 - agent
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        wins = (numpy.average(p1[2]), numpy.average(p2[2]),
                numpy.average(p3[2]), numpy.average(p4[2]))  #Reward
        # wins = (numpy.array(p1[0].sum(), p2[0], p3[0], p4[0]) # Wins

        bestAgent = 0
        secondBestAgent = 0
        bestWin = -5000
        secondBestWin = -5000
        for a in range(4):
            if wins[a] >= bestWin:
                bestWin = wins[a]
                bestAgent = a
            if wins[a] >= secondBestWin and wins[a] < bestWin:
                secondBestWin = wins[a]
                secondBestAgent = a

        bestAgentsList.append(loadModel[bestAgent])
        lastBestAgent = loadModel[bestAgent]
        secondBestList.append(loadModel[secondBestAgent])

        # loadModel = [loadModel[bestAgent], loadModel[bestAgent], loadModel[bestAgent], loadModel[bestAgent]]

        print("Best Agent: " + str(bestAgent))
        print("Rewards: " + str(wins))
        # input("Here")

    print("Metrics:" + str(metrics))
Example #6
0
def runModel():
    #Parameters for the game
    agent1 = AgentDQL.AgentDQL([True, 1.0])  #training agent
    agent2 = AgentA2C.AgentA2C([True, 1.0])
    agent3 = AgentPPO.AgentPPO([True, 1.0])
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numGames = 1000  # amount of training games

    experimentDescriptor = "Training"

    DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0_2020-03-26_17:26:18.198600/Model/actor_iteration_999_Player_2.hd5"

    A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/actor_iteration_999_Player_0.hd5"
    A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/A2C/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best -  Best - _2020-03-26_19:11:39.863435/Model/critic_iteration_999_Player_0.hd5"

    PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch -  Scratch -  Scratch - _2020-03-26_22:27:02.430568/Model/actor_iteration_999_Player_3.hd5"
    PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/PPO/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['A2C', 'A2C', 'A2C', 'A2C']_Reward_OnlyWinning_Training_GameExperimentNumber_9_Training_Best_Agent_3Choice_ Scratch -  Scratch -  Scratch - _2020-03-26_22:27:02.430568/Model/critic_iteration_999_Player_3.hd5"

    loadModelAgent1 = DQLModel  #""#""#""  #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = [A2cActor,
                       A2cCritic]  #""#""# ""#[actorModel,criticModel]

    loadModelAgent3 = [PPOActor, PPOCritic]  #""#""# ""
    loadModelAgent4 = ""  #""#DQLModelr#""#""# ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  # indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  # Logg the experiment

    isPlotting = True  # plot the experiment

    plotFrequency = 1000  # plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/QValuePlot"  # Directory where the experiment will be saved

    metrics = ChefsHatExperimentHandler.runExperiment(
        numGames=numGames,
        playersAgents=playersAgents,
        experimentDescriptor=experimentDescriptor,
        isLogging=isLogging,
        isPlotting=isPlotting,
        plotFrequency=plotFrequency,
        createDataset=createDataset,
        saveExperimentsIn=saveExperimentsIn,
        loadModel=loadModel,
        rewardFunction=reward)

    print("Metrics:" + str(metrics))
Example #7
0
from Agents import AgentRandom, AgentDQL, AgentA2C, AgentPPO

import cv2
import numpy

#Experiment control variables
dataSetLocation = "dataset.pkl" #location of the dataset.PKL file

saveMoodDataset = "" #Location where the Mood dataset will be saved
saveMoodPlot = "" #Location where the Mood Plots will be saved

gameToGenerateMood = 0 # Game from which to generate the mood.

#Agents
agent1 = AgentDQL.AgentDQL([False, 1.0, "DQL"]) #training agent
agent2 = AgentPPO.AgentPPO([False, 1.0, "PPO"]) #training agent
agent3 = AgentA2C.AgentA2C([False, 1.0, "A2C"])  # training agent
agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

agents = [agent1,agent2,agent3,agent4]

DQLModel = "dql.dh5" # Location of the trained DQL model

A2cActor = "a2cActor.dh5" # Location of the trained A2C Actor model
A2cCritic = "a2cCritic.dh5" # Location of the trained A2C Critic model

PPOActor = "ppoActor.dh5" # Location of the trained PPO Actor model
PPOCritic = "ppoCritic.dh5" # Location of the trained PPO Critic model

loadModelAgent1 = DQLModel
    def objective(args):
        agent1 = AgentDQL.AgentDQL([True, 1.0, "DQL"])  #training agent
        agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
        agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
        agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
        playersAgents = [agent1, agent2, agent3, agent4]

        loadModels = ""

        loadModel = [loadModels, loadModels, loadModels, loadModels]

        numGames = 100

        #training trial
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward,
            agentParams=[args])

        # Player1 - agent
        p1 = metrics[2]
        p1_model = p1[4]

        #testing trial
        agent1 = AgentDQL.AgentDQL([False, 1.0, "DQL"])  # training agent
        playersAgents = [agent1, agent2, agent3, agent4]

        loadModelAgent1 = p1_model
        loadModel = [loadModelAgent1, loadModels, loadModels, loadModels]
        numGames = 100  # amount of training games

        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward,
            agentParams=[args])

        rounds = metrics[0]
        startGameFinishingPosition = metrics[1]

        # Player1 - agent
        p1 = metrics[2]
        p1_wins = p1[0]
        p1_positions = p1[1]
        p1_rewards = p1[2]
        p1_wrongActions = p1[3]
        p1_model = p1[4]

        p2 = metrics[3]
        p3 = metrics[4]

        wins = numGames - p1_wins
        averageReward = 1 - numpy.average(p1_rewards)
        averageRounds = rounds

        # return wins
        print("Args: " + str(args) + "Reward:" + str(averageReward))
        return {
            'loss': averageReward,
            'status': STATUS_OK,
            # -- store other results like this
            'eval_time': time.time(),
            'other_stuff': {
                'wins': wins,
                "rounds": averageRounds,
                'wrongMoves': p1_wrongActions
            },
        }
def runModel():
    # Parameters for the game
    agent1 = AgentDQL.AgentDQL([True, 1.0, "DQL"])  #training agent
    agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # Load agents from

    loadModels = ""

    loadModel = [loadModels, loadModels, loadModels, loadModels]

    # List of agents
    playersAgents = [agent1, agent2, agent3, agent4]

    # Reward function
    reward = RewardOnlyWinning.RewardOnlyWinning()

    # Experimental parameters

    numGames = 1000  # amount of games to be executed
    numMaxEvals = 100  # number of evaluations for the optmization
    experimentDescriptor = "Optmizing_DQL"  # Experiment name

    saveTrialsDataset = "folder"  #Folder where the optmization trials will be saved
    saveTrialsDataset += experimentDescriptor

    isLogging = False  # create a .txt file with the experiment log

    isPlotting = False  # Create plots of the experiment

    createDataset = False  # Create a .pkl dataset of the experiemnt

    saveExperimentsIn = ""  # Directory where the experiment will be saved

    #Search space for the agent
    space = hp.choice('a', [(hp.choice(
        "layers", [1, 2, 3, 4]), hp.choice("hiddenUnits", [8, 32, 64, 256]),
                             hp.choice("batchSize", [16, 64, 128, 256, 512]),
                             hp.uniform("tau", 0.01, 0.99))])

    def objective(args):
        agent1 = AgentDQL.AgentDQL([True, 1.0, "DQL"])  #training agent
        agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
        agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
        agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)
        playersAgents = [agent1, agent2, agent3, agent4]

        loadModels = ""

        loadModel = [loadModels, loadModels, loadModels, loadModels]

        numGames = 100

        #training trial
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward,
            agentParams=[args])

        # Player1 - agent
        p1 = metrics[2]
        p1_model = p1[4]

        #testing trial
        agent1 = AgentDQL.AgentDQL([False, 1.0, "DQL"])  # training agent
        playersAgents = [agent1, agent2, agent3, agent4]

        loadModelAgent1 = p1_model
        loadModel = [loadModelAgent1, loadModels, loadModels, loadModels]
        numGames = 100  # amount of training games

        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward,
            agentParams=[args])

        rounds = metrics[0]
        startGameFinishingPosition = metrics[1]

        # Player1 - agent
        p1 = metrics[2]
        p1_wins = p1[0]
        p1_positions = p1[1]
        p1_rewards = p1[2]
        p1_wrongActions = p1[3]
        p1_model = p1[4]

        p2 = metrics[3]
        p3 = metrics[4]

        wins = numGames - p1_wins
        averageReward = 1 - numpy.average(p1_rewards)
        averageRounds = rounds

        # return wins
        print("Args: " + str(args) + "Reward:" + str(averageReward))
        return {
            'loss': averageReward,
            'status': STATUS_OK,
            # -- store other results like this
            'eval_time': time.time(),
            'other_stuff': {
                'wins': wins,
                "rounds": averageRounds,
                'wrongMoves': p1_wrongActions
            },
        }

    trials = Trials()
    best = fmin(objective,
                space=space,
                algo=tpe.suggest,
                max_evals=numMaxEvals,
                trials=trials)

    print("Saving the trials dataset:", saveTrialsDataset)

    pickle.dump(trials, open(saveTrialsDataset, "wb"))

    print("Trials:", trials)
    print("BEst: ", hyperopt.space_eval(space, best))
    print("Best:" + str(best))

    hyperopt.plotting.main_plot_history(trials, title="WinsHistory")
def runModel():
    #Parameters for the game
    agent1 = AgentDQL.AgentDQL([False, 1.0])  #training agent
    agent2 = AgentA2C.AgentA2C([False, 1.0])
    agent3 = AgentPPO.AgentPPO([False, 1.0])
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)

    # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numRuns = 1  #Amount of runs
    numGames = 100  # amount of games per run

    experimentDescriptor = "Testing_Training_NewPlot"

    DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'A2C', 'PPO', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_15:45:52.432279/Model/actor_iteration_999_Player_0.hd5"

    A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'A2C', 'PPO', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_15:45:52.432279/Model/actor_iteration_999_Player_1.hd5"
    A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'A2C', 'PPO', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_15:45:52.432279/Model/critic_iteration_999_Player_1.hd5"

    PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'A2C', 'PPO', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_15:45:52.432279/Model/actor_iteration_999_Player_2.hd5"
    PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'A2C', 'PPO', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_15:45:52.432279/Model/critic_iteration_999_Player_2.hd5"

    loadModelAgent1 = DQLModel  #""#""#""  #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 = [A2cActor,
                       A2cCritic]  #""#""# ""#[actorModel,criticModel]

    loadModelAgent3 = [PPOActor, PPOCritic]  #""#""# ""
    loadModelAgent4 = ""  #""#DQLModelr#""#""# ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [
        loadModelAgent1, loadModelAgent2, loadModelAgent3, loadModelAgent4
    ]  # indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  # Logg the experiment

    isPlotting = False  # plot the experiment

    plotFrequency = 1  # plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/All/AllQValues_aftertraining/"  # Directory where the experiment will be saved

    winsP1 = []
    winsP2 = []
    winsP3 = []
    winsP4 = []

    qvalues = []

    for a in range(len(playersAgents)):
        qvalues.append([])

    for i in range(numRuns):
        metrics = ChefsHatExperimentHandler.runExperiment(
            numGames=numGames,
            playersAgents=playersAgents,
            experimentDescriptor=experimentDescriptor,
            isLogging=isLogging,
            isPlotting=isPlotting,
            plotFrequency=plotFrequency,
            createDataset=createDataset,
            saveExperimentsIn=saveExperimentsIn,
            loadModel=loadModel,
            rewardFunction=reward)

        # Player1 - agent
        p1 = metrics[2]
        p2 = metrics[3]
        p3 = metrics[4]
        p4 = metrics[5]

        winsP1.append(p1[0])
        winsP2.append(p2[0])
        winsP3.append(p3[0])
        winsP4.append(p4[0])

        for a in range(len(playersAgents) - 1):
            qvalues[a].append(metrics[a + 2][-1])

        # print ("Metrics:" + str(metrics))

    plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4, numGames,
                       experimentDescriptor, saveExperimentsIn)
Example #11
0
def runModel():

    #Plots
    plotsToGenerate = []

    demonstrations = numpy.load("/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/ExpertObs/Demonstrations_ExpertCollection.npy", allow_pickle=True)

    #Parameters for the agents
    agentDQL = AgentDQL.AgentDQL([False, 1.0, "DQL"]) #training agent
    agentA2C = AgentA2C.AgentA2C([False, 1.0, "A2C"])  # training agent
    agentPPO = AgentPPO.AgentPPO([False, 1.0, "PPO"])  # training agent
    agentAIRL = AIRL.AgentAIRL([False, 1.0, "AIRL", None, demonstrations])  # training agent

    possibleAgent1 = [agentDQL,agentA2C, agentPPO, agentAIRL ]


    agent2 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)  # training agent
    agent3 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)  # training agent
    agent4 = AgentRandom.AgentRandom(AgentRandom.DUMMY_RANDOM)  # training agent

    #Load agents from
    DQLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/DQL_vsRandom/actor_iteration_999_Player_0.hd5"

    A2cActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/A2C_vsEveryone/actor_iteration_999_Player_1.hd5"
    A2cCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/A2C_vsEveryone/critic_iteration_999_Player_1.hd5"

    PPOActor = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/PPO_vsEveryone/actor_iteration_999_Player_2.hd5"
    PPOCritic = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/TrainedModels/PPO_vsEveryone/critic_iteration_999_Player_2.hd5"

    AIRLModel = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/Training/Player_4_Cards_11_games_5000TrainingAgents_['DQL_AIRL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Train_NegativeReward_notInverted_2020-04-05_16:31:22.781799/Model/actor_iteration_4999_Player_0.hd5"
    AIRLReward = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/NEURIPS2020/AIRL/Training/Player_4_Cards_11_games_5000TrainingAgents_['DQL_AIRL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Train_NegativeReward_notInverted_2020-04-05_16:31:22.781799/Model/reward_iteration_4999_Player_0.hd5"

    possibleLoadModel1 = [DQLModel, [A2cActor, A2cCritic], [PPOActor,PPOCritic], [AIRLModel,AIRLReward]]
    loadModelEmpty = ""

    #Reward function
    reward = RewardOnlyWinning.RewardOnlyWinning()

    #Experimental parameters
    numberOfTrials = 50
    maximumScore = 15 # maximumScore to be reached
    experimentDescriptor = "BaselineExperimentsVsRandom" #Experiment name

    isLogging = False # create a .txt file with the experiment log

    isPlotting = False #Create plots of the experiment

    createDataset = False # Create a .pkl dataset of the experiemnt

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/BaselineNumberGames"  # Directory where the experiment will be saved

    #Metrics to be saved
    avgTotalGames = []

    avgWonGames = []
    avgPoints = []
    avgWonRounds = []

    for a in range(4):
        avgPoints.append([])
        avgWonGames.append([])
        avgWonRounds.append([])


    columns = ["ExperimentName", "AvgTotalNumberGames", "stdNumberGames",
               "Player0_AvgPoints", "Player0_stdAvgPoints",
               "Player0_AvgWonGames", "Player0_stdAvgWonGames",
               "Player0_AvgWonRounds","Player0_stdWonRounds",
               "Player1_AvgPoints", "Player1_stdAvgPoints",
               "Player1_AvgWonGames", "Player1_stdAvgWonGames",
               "Player1_AvgWonRounds", "Player1_stdWonRounds",
               "Player2_AvgPoints", "Player2_stdAvgPoints",
               "Player2_AvgWonGames", "Player2_stdAvgWonGames",
               "Player2_AvgWonRounds", "Player2_stdWonRounds",
               "Player3_AvgPoints", "Player3_stdAvgPoints",
               "Player3_AvgWonGames", "Player3_stdAvgWonGames",
               "Player3_AvgWonRounds", "Player3_stdWonRounds",
                     ]

    totalDataFame = pd.DataFrame(columns = columns)


    for agent in range(4):

        loadModel = [possibleLoadModel1[agent], loadModelEmpty, loadModelEmpty, loadModelEmpty]

        # List of agents and Models to Load
        playersAgents = [possibleAgent1[agent], agent2, agent3, agent4]
        print ("Evaluating agent:" + str(playersAgents[0].name))
        for a in range(numberOfTrials):

            metrics = ChefsHatExperimentHandler.runExperiment(maximumScore=maximumScore, playersAgents=playersAgents,experimentDescriptor=experimentDescriptor,isLogging=isLogging,isPlotting=isPlotting, createDataset=createDataset,saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward, plots=plotsToGenerate)
            games = metrics[-1]
            score = metrics[-2]
            winner = numpy.argmax(score)

            for i in range(len(playersAgents)):
                playerMetric = metrics[2+i]
                rounds = playerMetric[5]
                avgPoints[i].append(score[i])
                if winner == i:
                    avgWonGames[i].append(games)
                    avgWonRounds[i].append(numpy.mean(rounds))

            print("Trial:" + str(a) + "- Games" + str(games) + " - Winner: " + str(winner))
            avgTotalGames.append(games)

        currentDataFrame = []
        currentDataFrame.append(playersAgents[0].name) #Trained Agent Name
        currentDataFrame.append(numpy.mean(avgTotalGames)) # AvgTotalNumberGames
        currentDataFrame.append(numpy.std(avgTotalGames))# AvgSTDTotalNumberGames


        for i in range(len(playersAgents)):
            points = avgPoints[i]
            wongamesNumber = avgWonGames[i]
            wonRounds = avgWonRounds[i]

            currentDataFrame.append(numpy.mean(points)) # Player X AvgPoints
            currentDataFrame.append(numpy.std(points))  # Player X StdPoints

            currentDataFrame.append(numpy.mean(wongamesNumber)) # Player X AvgWonGames
            currentDataFrame.append(numpy.std(wongamesNumber))  # Player X StdWonGames

            currentDataFrame.append(numpy.mean(wonRounds)) # Player X AvgRounds
            currentDataFrame.append(numpy.std(wonRounds))  # Player X StdRounds
            # print ("Player - " + str(i))
            # print (" -- Average points:" + str(numpy.mean(points)) + "("+str(numpy.std(points))+")")
            # print(" -- Average Num Games  When Win:" + str(numpy.mean(wongamesNumber)) + "(" + str(numpy.std(wongamesNumber)) + ")")
            # print(" -- Average Num Rounds  When Win:" + str(numpy.mean(roundsWin)) + "(" + str(
            #     numpy.std(roundsWin)) + ")")


        totalDataFame.loc[-1] = currentDataFrame
        totalDataFame.index = totalDataFame.index + 1

        totalDataFame.to_pickle(saveExperimentsIn+"/"+experimentDescriptor)
        totalDataFame.to_csv(saveExperimentsIn+"/"+experimentDescriptor + ".csv", index=False, header=True)
def runModel():
    #Parameters for the game
    agent1 = AgentDQL.AgentDQL([False, 1.0]) #training agent
    agent2 = AgentDQL.AgentDQL([False, 1.0])
    agent3 = AgentDQL.AgentDQL([False, 1.0])
    agent4 = AgentDQL.AgentDQL([False, 1.0])

     # if training specific agents
    playersAgents = [agent1, agent2, agent3, agent4]

    reward = RewardOnlyWinning.RewardOnlyWinning()

    numRuns = 10 #Amount of runs
    numGames = 100# amount of games per run

    experimentDescriptor = "Testing_50x1000"

    # DQLModel1 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_1_Training_Best_Agent_2_2020-03-26_17:33:51.517296/Model/actor_iteration_999_Player_1.hd5"
    # DQLModel2 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_2_Training_Best_Agent_1_2020-03-26_17:42:24.306637/Model/actor_iteration_999_Player_1.hd5"
    # DQLModel3 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/Old/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_3_Training_Best_Agent_2_2020-03-26_00:13:55.479460/Model/actor_iteration_999_Player_1.hd5"
    # DQLModelr = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Random/Player_4_Cards_11_games_3000TrainingAgents_['DQL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-25_18:15:32.076987/Model/actor_iteration_2999_Player_0.hd5"
    #
    #
    #
    # DQLModel0 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0_2020-03-26_17:26:18.198600/Model/actor_iteration_999_Player_2.hd5"
    # DQLModel4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_4_Training_Best_Agent_3_2020-03-26_18:03:32.220659/Model/actor_iteration_999_Player_2.hd5"
    # DQLModel9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/Player_4_Cards_11_games_3000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_ Scratch -  Second Best Agents -  Second Best Agents - _2020-03-26_20:28:12.574082/Model/actor_iteration_2999_Player_0.hd5"
    # DQLModelr = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Random/Player_4_Cards_11_games_3000TrainingAgents_['DQL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-25_18:15:32.076987/Model/actor_iteration_2999_Player_0.hd5"


    DQLModel0 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000x50/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_0_Training_Best_Agent_0Choice_SecondBestAgents-SecondBestAgents-Scratch-_2020-03-27_01:55:16.105188/Model/actor_iteration_999_Player_2.hd5"
    DQLModel4 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000x50/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_15_Training_Best_Agent_2Choice_SecondBestAgents-BestAgents-BestAgents-_2020-03-27_04:50:05.205901/Model/actor_iteration_999_Player_2.hd5"
    DQLModel9 = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000x50/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DQL', 'DQL', 'DQL']_Reward_OnlyWinning_Training_GameExperimentNumber_30_Training_Best_Agent_0Choice_BestAgents-BestAgents-BestAgents-_2020-03-27_12:18:40.723555/Model/actor_iteration_999_Player_2.hd5"
    DQLModelr = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Random/1000/Player_4_Cards_11_games_1000TrainingAgents_['DQL', 'DUMMY_RANDOM', 'DUMMY_RANDOM', 'DUMMY_RANDOM']_Reward_OnlyWinning_Training_2020-03-27_14:26:46.054899/Model/actor_iteration_999_Player_0.hd5"

    loadModelAgent1 =DQLModel0 #""#""#""  #DQLModel #[actorModelA2C,criticModelA2c] #[actorModelDDPG,criticModelDDPG]

    loadModelAgent2 =DQLModel4#""#""# ""#[actorModel,criticModel]

    loadModelAgent3 =DQLModel9#""#""# ""
    loadModelAgent4 =DQLModelr#""#DQLModelr#""#""# ""

    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [loadModelAgent3, loadModelAgent4, loadModelAgent1,
                 loadModelAgent2]  # indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False  # Logg the experiment

    isPlotting = True  # plot the experiment

    plotFrequency = 1000  # plot the plots every X games

    createDataset = True  # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Self/1000/"  # Directory where the experiment will be saved

    winsP1 = []
    winsP2 = []
    winsP3 = []
    winsP4 = []
    for i in range(numRuns):
         metrics = ChefsHatExperimentHandler.runExperiment(numGames=numGames, playersAgents=playersAgents,experimentDescriptor=experimentDescriptor,isLogging=isLogging,isPlotting=isPlotting,plotFrequency = plotFrequency, createDataset=createDataset,saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward)

         # Player1 - agent
         p1 = metrics[2]
         p2 = metrics[3]
         p3 = metrics[4]
         p4 = metrics[5]

         winsP1.append(p1[0])
         winsP2.append(p2[0])
         winsP3.append(p3[0])
         winsP4.append(p4[0])

         print ("Metrics:" + str(metrics))

    plotVictoriesTotal(winsP1, winsP2, winsP3, winsP4,numGames, experimentDescriptor, saveExperimentsIn)