def getDefenderBaseline(aIds, aMap, aMix, game, dPool):
    expectedUtility = 0
    dOb, aOb = game.getEmptyObservations()
    undoClone = ssg.cloneGame(game)
    clone = ssg.cloneGame(game)
    editableAMix = aMix.copy()
    savedAction = None
    # Calculate the expected utility of attacker myopic play
    for agentIndex in range(len(aMap)):
        if aMix[agentIndex] > 0:
            aAgent = aMap[agentIndex]
            for timestep in range(game.timesteps):
                if timestep == 0:
                    # Calculate the best response against the entire mixed strategy and save it
                    savedAction = getBestResponseAction(ssg.DEFENDER, game, aMap, editableAMix, dPool, dOb, aOb)
                    # play the defender action and best response attacker action to obtain
                    # a set of observations
                    attackerAction = aAgent.getAction(game, aOb)
                    dOb, aOb, _, _ = game.performActions(savedAction, attackerAction, dOb, aOb)
                else:
                    # For each agent:
                    for i in range(len(editableAMix)):
                        if editableAMix[i] > 0:
                            #   Play the agent's action on clone with the saved action
                            attackerAction = aMap[i].getAction(clone, game.previousAttackerObservation)
                            dTestOb, _, _, _ = clone.performActions(savedAction, attackerAction, game.previousDefenderObservation, game.previousAttackerObservation)
                            #   Compare the resulting attacker observation. If they don't match set its odds to 0 in the editable mix
                            if not np.array_equal(dTestOb,dOb):
                                editableAMix[i] = 0
                            #   set clone to undoClone
                            ssg.cloneGameState(clone, undoClone)
                    # Using the filtered mix, compute the best response and save it
                    editableAMix = [float(p)/sum(editableAMix) for p in editableAMix]
                    savedAction = getBestResponseAction(ssg.DEFENDER, game, aMap, editableAMix, dPool, dOb, aOb)
                    # Set clone and undo clone to the current game
                    ssg.cloneGameState(clone, game)
                    ssg.cloneGameState(undoClone, game)
                    # perform the best response and agent action on the normal game
                    attackerAction = aAgent.getAction(game, aOb)
                    dOb, aOb, _, _ = game.performActions(savedAction, attackerAction, dOb, aOb)
            print(game.defenderUtility)
            expectedUtility += game.defenderUtility * aMix[agentIndex]
            editableAMix = aMix.copy()
            game.restartGame()
            ssg.cloneGameState(clone, game)
            ssg.cloneGameState(undoClone, game)
    return expectedUtility
Beispiel #2
0
def updatePayoutMatrix(newDefenderId, newAttackerId, payoutMatrix, dIds, aIds,
                       dMap, aMap, game, newDOracle, newAOracle):
    for aId in aIds:
        value = ssg.expectedPureVPure(newDOracle, aMap[aId],
                                      ssg.cloneGame(game))
        payoutMatrix[newDefenderId, aId] = value
    for dId in dIds:
        value = ssg.expectedPureVPure(dMap[dId], newAOracle,
                                      ssg.cloneGame(game))
        payoutMatrix[dId, newAttackerId] = value
    value = ssg.expectedPureVPure(newDOracle, newAOracle, ssg.cloneGame(game))
    payoutMatrix[newDefenderId, newAttackerId] = value
    aIds.append(newAttackerId)
    dIds.append(newDefenderId)
    aMap[newAttackerId] = newAOracle
    dMap[newDefenderId] = newDOracle
    newDefenderId += 1
    newAttackerId += 1
    return newDefenderId, newAttackerId, payoutMatrix
Beispiel #3
0
def calculatePayoutMatrix(dIds, aIds, dMap, aMap, game):
    payoutMatrix = {}
    for attackerId in aIds:
        pureAttacker = aMap[attackerId]
        for defenderId in dIds:
            pureDefender = dMap[defenderId]
            value = ssg.expectedPureVPure(pureDefender, pureAttacker,
                                          ssg.cloneGame(game))
            payoutMatrix[defenderId, attackerId] = value
            game.restartGame()
    return payoutMatrix
def attackerTrain(oracleToTrain,
                  dIds,
                  dMap,
                  dMix,
                  game,
                  aPool,
                  N=300,
                  batchSize=30,
                  C=50,
                  epochs=100,
                  optimizer=None,
                  lossFunction=nn.MSELoss(),
                  showOutput=False,
                  trainingTest=False,
                  writer=None):
    if optimizer is None:
        optimizer = optim.Adam(oracleToTrain.parameters(), lr=0.00001)
        optim.lr_scheduler.ReduceLROnPlateau(optimizer)
    gameClone = ssg.cloneGame(game)

    if trainingTest:
        history = []
        lossHistory = []

    # Initialize the replay memory with limited capacity N
    replayMemory = ReplayMemory(N)
    # Initialize target network with weights equal to the oracle to train
    targetNetwork = AttackerOracle(oracleToTrain.targetNum)
    targetNetwork.setState(oracleToTrain.getState())

    # An epoch is one iteration over all training data. In our case, that's the one
    # Game we're learning on.
    step = 0
    for epoch in range(0, epochs):
        print(f"epoch {epoch} of {epochs}")
        # initialize the starting values for the game
        dOb, aOb = game.getEmptyObservations()
        defenderAgent = dMap[np.random.choice(dIds, 1, p=dMix)[0]]

        for timestep in range(game.timesteps):  # Play a full game
            # Choose an action based off of Q network (oracle to train)
            dAction = defenderAgent.getAction(game, dOb)
            aAction = oracleToTrain.getAction(game, aOb)

            if trainingTest:
                writer.writerow([
                    f"{(timestep+1)+(game.timesteps*(epoch))}",
                    f"{game.getValidActions(ssg.ATTACKER)}",
                    f"{[oracleToTrain.forward(game.previousAttackerObservation, aOb, game.previousAttackerAction, x).item() for x in game.getValidActions(ssg.ATTACKER)]}",
                    f"{aAction}", f"{dAction}"
                ])

            # Execute that action and store the result in replay memory
            ob0 = game.previousAttackerObservation
            action0 = game.previousAttackerAction
            ob1 = aOb
            action1 = aAction
            dOb, aOb, dScore, aScore = game.performActions(
                dAction, aAction, dOb, aOb)
            replayMemory.push(ob0, action0, ob1, action1, aScore, dOb,
                              game.getValidActions(ssg.ATTACKER))

            # Sample a random minibatch of transitions from replay memory
            avgLoss = sampleMinibatch(replayMemory,
                                      game,
                                      targetNetwork,
                                      oracleToTrain,
                                      lossFunction,
                                      optimizer,
                                      timestep,
                                      batchSize=batchSize)

            if trainingTest:
                oracleScore = ssg.expectedPureVMix(ssg.ATTACKER, oracleToTrain,
                                                   dMap, dMix, gameClone)
                history.append(oracleScore)
                lossHistory.append(avgLoss / batchSize)
            # Every C steps, set Q^ = Q
            step += 1
            if step == C:
                targetNetwork.setState(oracleToTrain.getState())
                step = 0

        game.restartGame()
    if trainingTest:
        return history, lossHistory
    return ssg.expectedPureVMix(ssg.ATTACKER, oracleToTrain, dMap, dMix,
                                gameClone)