def get_payoff_mixed(model, attack_profile, defense_profile, attack_strategy,
                     defense_strategy):
    """
    Function for computing the payoff of the defender given its mixed strategy and the mixed strategy of the attacker. 
    :param model: Model of the alert prioritization problem (i.e., Model object).
    :param attack_profile: List of attack policies.ks given a model and a state.
    :param defense_profile: List of defense policies.    
    :param attack_strategy: List of probablities of choosing policy from the attack profile 
    :param defense_strategy: List of probablities of choosing policy from the defense profile 
    :return: The expected discounted reward. 
    """
    total_discount_reward = 0

    attack_policies = np.random.choice(attack_profile,
                                       MAX_EPISODES,
                                       p=attack_strategy)
    defense_policies = np.random.choice(defense_profile,
                                        MAX_EPISODES,
                                        p=defense_strategy)

    initial_state = Model.State(model)

    for i in range(MAX_EPISODES):
        state = initial_state
        episode_reward = 0.0
        defense_policy = defense_policies[i]
        attack_policy = attack_policies[i]
        for j in range(MAX_STEPS):
            next_state = model.next_state('old', state, defense_policy,
                                          attack_policy)
            loss = next_state.U - state.U
            state = next_state
            step_reward = -1.0 * loss
            episode_reward += GAMMA**j * step_reward
        total_discount_reward += episode_reward
    ave_discount_reward = total_discount_reward / MAX_EPISODES
    return ave_discount_reward
Exemple #2
0
        if model.adv_budget == 3:
            alpha = [1, 0, 1]
    elif model.def_budget == 30:
        if model.adv_budget == 2:
            alpha = [0, 0, 1]
        if model.adv_budget == 3:
            alpha = [0, 0.33, 1]
    return alpha


def test_attack_ids(model, state):
    #alpha = [0, 1, 0.54, 1, 0, 0, 0]
    alpha = [0, 0, 1, 1, 0, 0, 0.419]
    return alpha


if __name__ == "__main__":
    model = test_model_fraud(10, 2)
    state = Model.State(model)
    #print(test_defense_action(model, state))
    #print(test_attack_action(model, state))
    i = 0
    while i < 10:
        print('#############################')
        print(i)
        print('state:', state)
        print('attacker:', test_attack_action(model, state))
        state = model.next_state('old', state, test_defense_action,
                                 test_attack_action)
        i += 1