Python load_action_class Examples

Programming Language: Python

Namespace/Package Name: baselines.deepq.load_action

Method/Function: load_action_class

Examples at hotexamples.com: 6

Python load_action_class - 6 examples found. These are the top rated real world Python examples of baselines.deepq.load_action.load_action_class extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: ne_search.py Project: wyz2368/reward_shaping

def load_policies(game, child_partition, identity, path_dict=None):
    if identity == 0: # load defender's policies.
        mid_name = '_def_str_epoch'
        if path_dict is None:
            path = str_path_def
    elif identity == 1:
        mid_name = '_att_str_epoch'
        if path_dict is None:
            path = str_path_att
    else:
        raise ValueError("identity is not correct")

    str_dict = {}
    if path_dict is None:
        for key in child_partition:
            for i in np.arange(1, child_partition[key]+1):
                nn = key + mid_name + str(i+1) + '.pkl'

                uniform_flag = False
                if "epoch1.pkl" in nn:
                    uniform_flag = True

                load_path = path + nn

                # Strategies are kept as a tuple with parameters, session, graph.
                if uniform_flag:
                    nn_act = fp.load_pkl(load_path)
                    str_dict[nn] = (nn_act, None, None)
                else:
                    scope = scope_finder(path)
                    nn_act, sess, graph = load_action_class(load_path, scope, game, training_flag=identity)
                    str_dict[nn] = (nn_act, sess, graph)
    else:
        for key in child_partition:
            str_dict[key] = {}
            for i in np.arange(1, child_partition[key] + 1):
                nn = key + mid_name + str(i + 1) + '.pkl'

                uniform_flag = False
                if "epoch1.pkl" in nn:
                    uniform_flag = True


                path = path_dict[identity][key]
                load_path = path + nn

                # Strategies are kept as a tuple with parameters, session, graph.
                if uniform_flag:
                    nn_act = fp.load_pkl(load_path)
                    str_dict[key][nn] = (nn_act, None, None)
                else:
                    scope = scope_finder(path)
                    nn_act, sess, graph = load_action_class(load_path, scope, game, training_flag=identity)
                    str_dict[key][nn] = (nn_act, sess, graph)

    return str_dict

Example #2

Show file

File: utils_combined.py Project: wyz2368/reward_shaping

def load_policies(game, child_partition, identity):
    if identity == 0:  # load defender's policies.
        name = def_str_abs_path + 'def_str_epoch'
    elif identity == 1:
        name = att_str_abs_path + 'att_str_epoch'
    else:
        raise ValueError("identity is not correct")

    str_dict = {}

    path = os.getcwd() + '/combined_game/'

    for key in child_partition:
        for i in np.arange(1, child_partition[key] + 1):
            # nn = "RS/attacker_strategies/def_str_epoch2.pkl"
            nn = key + name + str(i + 1) + '.pkl'

            uniform_flag = False
            if "epoch1.pkl" in nn:
                uniform_flag = True

            load_path = path + nn

            # Strategies are kept as a tuple with parameters, session, graph.
            if uniform_flag:
                nn_act = fp.load_pkl(load_path)
                str_dict[nn] = (nn_act, None, None)
            else:
                scope = scope_finder(load_path)
                nn_act, sess, graph = load_action_class(load_path,
                                                        scope,
                                                        game,
                                                        training_flag=identity)
                str_dict[nn] = (nn_act, sess, graph)

    return str_dict

Example #3

Show file

File: simulation.py Project: wyz2368/transfer_RL

def series_sim(env, game, nn_att, nn_def, num_episodes):
    aReward_list = np.array([])
    dReward_list = np.array([])
    nn_att_saved = copy.copy(nn_att)
    nn_def_saved = copy.copy(nn_def)

    T = env.T

    # Test if nn_att and nn_def point to one single strategy.
    single_str_att = True
    single_str_def = True
    if isinstance(nn_att, np.ndarray):
        if len(np.where(nn_att > 0.95)[0]) != 1:
            single_str_att = False

    if isinstance(nn_def, np.ndarray):
        if len(np.where(nn_def > 0.95)[0]) != 1:
            single_str_def = False

    for i in range(num_episodes):

        env.reset_everything()
        G = env.G
        attacker = env.attacker
        defender = env.defender

        aReward = 0
        dReward = 0

        if i == 0 or not single_str_att:
            att_uniform_flag = False
            nn_att = copy.copy(nn_att_saved)
            if isinstance(nn_att, np.ndarray):
                str_set = game.att_str
                nn_att = np.random.choice(str_set, p=nn_att)

            if "epoch1.pkl" in nn_att:
                att_uniform_flag = True

            path = os.getcwd() + "/attacker_strategies/" + nn_att
            if att_uniform_flag:
                nn_att_act = fp.load_pkl(path)
            else:
                att_scope = 'att_str_epoch' + str(1) + '.pkl'
                training_flag = 1
                nn_att_act, sess1, graph1 = load_action_class(
                    path, att_scope, game, training_flag)

        if i == 0 or not single_str_def:
            def_uniform_flag = False
            nn_def = copy.copy(nn_def_saved)
            if isinstance(nn_def, np.ndarray):
                str_set = game.def_str
                nn_def = np.random.choice(str_set, p=nn_def)

            if "epoch1.pkl" in nn_def:
                def_uniform_flag = True

            path = os.getcwd() + "/defender_strategies/" + nn_def
            if def_uniform_flag:
                nn_def_act = fp.load_pkl(path)
            else:
                def_scope = "def_str_epoch" + str(0) + '.pkl'
                training_flag = 0
                nn_def_act, sess2, graph2 = load_action_class(
                    path, def_scope, game, training_flag)

        # def_uniform_flag = False
        # att_uniform_flag = False
        #
        # nn_att = copy.copy(nn_att_saved)
        # nn_def = copy.copy(nn_def_saved)
        #
        # # nn_att and nn_def here can be either np.ndarray or str. np.ndarray represents a mixed strategy.
        # # A str represents the name of a strategy.
        #
        # if isinstance(nn_att, np.ndarray) and isinstance(nn_def, str):
        #     str_set = game.att_str
        #     nn_att = np.random.choice(str_set, p=nn_att)
        #
        # if isinstance(nn_att, str) and isinstance(nn_def, np.ndarray):
        #     str_set = game.def_str
        #     nn_def = np.random.choice(str_set, p=nn_def)
        #
        # if isinstance(nn_att, np.ndarray) and isinstance(nn_def, np.ndarray):
        #     str_set = game.att_str
        #     nn_att = np.random.choice(str_set, p=nn_att)
        #     str_set = game.def_str
        #     nn_def = np.random.choice(str_set, p=nn_def)
        #
        # if "epoch1" in nn_att:
        #     att_uniform_flag = True
        #
        # if "epoch1" in nn_def:
        #     def_uniform_flag = True
        #
        # # TODO: Transfer Learning modification
        # path = os.getcwd() + "/attacker_strategies/" + nn_att
        # if att_uniform_flag:
        #     nn_att_act = fp.load_pkl(path)
        # else:
        #     att_scope = 'att_str_epoch' + str(1) + '.pkl'
        #     training_flag = 1
        #     nn_att_act, sess1, graph1 = load_action_class(path, att_scope, game, training_flag)
        #
        # path = os.getcwd() + "/defender_strategies/" + nn_def
        # if def_uniform_flag:
        #     nn_def_act = fp.load_pkl(path)
        # else:
        #     def_scope = "def_str_epoch" + str(0) + '.pkl'
        #     training_flag = 0
        #     nn_def_act, sess2, graph2 = load_action_class(path, def_scope, game, training_flag)

        for t in range(T):
            timeleft = T - t
            if att_uniform_flag:
                attacker.att_greedy_action_builder_single(
                    G, timeleft, nn_att_act)
            else:
                with graph1.as_default():
                    with sess1.as_default():
                        attacker.att_greedy_action_builder_single(
                            G, timeleft, nn_att_act)

            if def_uniform_flag:
                defender.def_greedy_action_builder_single(
                    G, timeleft, nn_def_act)
            else:
                with graph2.as_default():
                    with sess2.as_default():
                        defender.def_greedy_action_builder_single(
                            G, timeleft, nn_def_act)

            att_action_set = attacker.attact
            def_action_set = defender.defact
            # print('att:', att_action_set)
            # print('def:', def_action_set)
            for attack in att_action_set:
                if isinstance(attack, tuple):
                    # check OR node
                    aReward += G.edges[attack]['cost']
                    if random.uniform(0, 1) <= G.edges[attack]['actProb']:
                        G.nodes[attack[-1]]['state'] = 1
                else:
                    # check AND node
                    aReward += G.nodes[attack]['aCost']
                    if random.uniform(0, 1) <= G.nodes[attack]['actProb']:
                        G.nodes[attack]['state'] = 1
            # defender's action
            for node in def_action_set:
                G.nodes[node]['state'] = 0
                dReward += G.nodes[node]['dCost']
            _, targetset = get_Targets(G)
            for node in targetset:
                if G.nodes[node]['state'] == 1:
                    aReward += G.nodes[node]['aReward']
                    dReward += G.nodes[node]['dPenalty']

            # update players' observations
            # update defender's observation
            defender.update_obs(defender.get_def_hadAlert(G))
            defender.save_defact2prev()
            defender.defact.clear()
            # update attacker's observation
            attacker.update_obs(attacker.get_att_isActive(G))
            attacker.attact.clear()

        aReward_list = np.append(aReward_list, aReward)
        dReward_list = np.append(dReward_list, dReward)

    return np.round(np.mean(aReward_list),
                    2), np.round(np.mean(dReward_list), 2)

Example #4

Show file

File: simulation.py Project: wyz2368/transfer_RL

def series_sim_retrain(env, game, nn_att, nn_def, num_episodes):
    aReward_list = np.array([])
    dReward_list = np.array([])

    nn_att_saved = copy.copy(nn_att)
    nn_def_saved = copy.copy(nn_def)

    T = env.T

    single_str_att = True
    single_str_def = True
    if isinstance(nn_att, np.ndarray):
        if len(np.where(nn_att > 0.95)[0]) != 1:
            single_str_att = False

    if isinstance(nn_def, np.ndarray):
        if len(np.where(nn_def > 0.95)[0]) != 1:
            single_str_def = False

    for i in range(num_episodes):  #can be run parallel

        # G = copy.deepcopy(env.G_reserved)
        # attacker = copy.deepcopy(env.attacker)
        # defender = copy.deepcopy(env.defender)

        env.reset_everything()
        G = env.G
        attacker = env.attacker
        defender = env.defender

        aReward = 0
        dReward = 0

        if i == 0 or not single_str_att:
            att_uniform_flag = False
            att_mixed_flag = False

            nn_att = copy.copy(nn_att_saved)
            if isinstance(nn_att, np.ndarray):
                att_mixed_flag = True
                str_set = game.att_str
                nn_att = np.random.choice(str_set, p=nn_att)

            if "epoch1.pkl" in nn_att:
                att_uniform_flag = True

            if att_mixed_flag:
                path = os.getcwd() + "/attacker_strategies/" + nn_att
                scope_att = nn_att
            else:
                path = os.getcwd() + "/retrain_att/" + nn_att
                scope_att = 'att_str_retrain' + str(0) + '.pkl'

            if att_uniform_flag:
                nn_att_act = fp.load_pkl(path)
            else:
                training_flag = 1
                nn_att_act, sess1, graph1 = load_action_class(
                    path, scope_att, game, training_flag)

        if i == 0 or not single_str_def:
            def_uniform_flag = False
            def_mixed_flag = False

            nn_def = copy.copy(nn_def_saved)
            if isinstance(nn_def, np.ndarray):
                def_mixed_flag = True
                str_set = game.def_str
                nn_def = np.random.choice(str_set, p=nn_def)

            if "epoch1.pkl" in nn_def:
                def_uniform_flag = True

            if def_mixed_flag:
                path = os.getcwd() + "/defender_strategies/" + nn_def
                scope_def = nn_def
            else:
                path = os.getcwd() + "/retrain_def/" + nn_def
                scope_def = 'def_str_retrain' + str(0) + '.pkl'

            if def_uniform_flag:
                nn_def_act = fp.load_pkl(path)
            else:
                training_flag = 0
                nn_def_act, sess2, graph2 = load_action_class(
                    path, scope_def, game, training_flag)

        # def_uniform_flag = False
        # att_uniform_flag = False
        #
        # att_mixed_flag = False
        # def_mixed_flag = False
        #
        # nn_att = copy.copy(nn_att_saved)
        # nn_def = copy.copy(nn_def_saved)
        #
        # # nn_att and nn_def here can be either np.ndarray or str. np.ndarray represents a mixed strategy.
        # # A str represents the name of a strategy.
        #
        # if isinstance(nn_att, np.ndarray) and isinstance(nn_def, str):
        #     att_mixed_flag = True
        #     str_set = game.att_str
        #     nn_att = np.random.choice(str_set, p=nn_att)
        #
        # if isinstance(nn_att, str) and isinstance(nn_def, np.ndarray):
        #     def_mixed_flag = True
        #     str_set = game.def_str
        #     nn_def = np.random.choice(str_set, p=nn_def)
        #
        # if isinstance(nn_att, np.ndarray) and isinstance(nn_def, np.ndarray):
        #     str_set = game.att_str
        #     nn_att = np.random.choice(str_set, p=nn_att)
        #     str_set = game.def_str
        #     nn_def = np.random.choice(str_set, p=nn_def)
        #
        # if not att_mixed_flag and not def_mixed_flag:
        #     raise ValueError("One player should play mixed strategy in retraining simulation.")
        #
        # if "epoch1" in nn_att:
        #     att_uniform_flag = True
        #
        # if "epoch1" in nn_def:
        #     def_uniform_flag = True
        #
        # if att_mixed_flag:
        #     path = os.getcwd() + "/attacker_strategies/" + nn_att
        #     scope_att = nn_att
        # else:
        #     path = os.getcwd() + "/retrain_att/" + nn_att
        #     scope_att = 'att_str_retrain' + str(0) + '.pkl'
        #
        # if att_uniform_flag:
        #     nn_att_act = fp.load_pkl(path)
        # else:
        #     training_flag = 1
        #     nn_att_act, sess1, graph1 = load_action_class(path, scope_att, game, training_flag)
        #
        # if def_mixed_flag:
        #     path = os.getcwd() + "/defender_strategies/" + nn_def
        #     scope_def = nn_def
        # else:
        #     path = os.getcwd() + "/retrain_def/" + nn_def
        #     scope_def = 'def_str_retrain' + str(0) + '.pkl'
        #
        # if def_uniform_flag:
        #     nn_def_act = fp.load_pkl(path)
        # else:
        #     training_flag = 0
        #     nn_def_act, sess2, graph2 = load_action_class(path, scope_def, game, training_flag)

        for t in range(T):
            timeleft = T - t
            if att_uniform_flag:
                attacker.att_greedy_action_builder_single(
                    G, timeleft, nn_att_act)
            else:
                with graph1.as_default():
                    with sess1.as_default():
                        attacker.att_greedy_action_builder_single(
                            G, timeleft, nn_att_act)

            if def_uniform_flag:
                defender.def_greedy_action_builder_single(
                    G, timeleft, nn_def_act)
            else:
                with graph2.as_default():
                    with sess2.as_default():
                        defender.def_greedy_action_builder_single(
                            G, timeleft, nn_def_act)

            att_action_set = attacker.attact
            def_action_set = defender.defact
            # print('att:', att_action_set)
            # print('def:', def_action_set)
            for attack in att_action_set:
                if isinstance(attack, tuple):
                    # check OR node
                    aReward += G.edges[attack]['cost']
                    if random.uniform(0, 1) <= G.edges[attack]['actProb']:
                        G.nodes[attack[-1]]['state'] = 1
                else:
                    # check AND node
                    aReward += G.nodes[attack]['aCost']
                    if random.uniform(0, 1) <= G.nodes[attack]['actProb']:
                        G.nodes[attack]['state'] = 1
            # defender's action
            for node in def_action_set:
                G.nodes[node]['state'] = 0
                dReward += G.nodes[node]['dCost']
            _, targetset = get_Targets(G)
            for node in targetset:
                if G.nodes[node]['state'] == 1:
                    aReward += G.nodes[node]['aReward']
                    dReward += G.nodes[node]['dPenalty']

        aReward_list = np.append(aReward_list, aReward)
        dReward_list = np.append(dReward_list, dReward)

    return np.mean(aReward_list), np.mean(dReward_list)

Example #5

Show file

def series_sim(env, game, nn_att, nn_def, size):
    aReward_list = np.array([])
    dReward_list = np.array([])

    nn_att_saved = copy.copy(nn_att)
    nn_def_saved = copy.copy(nn_def)

    if size > 20:
        num_epi = 10
    elif size > 10 and size <= 20:
        num_epi = 20
    else:
        num_epi = 30

    for i in range(2):

        G = copy.deepcopy(env.G_reserved)
        attacker = copy.deepcopy(env.attacker)
        defender = copy.deepcopy(env.defender)
        T = env.T

        aReward = 0
        dReward = 0
        def_uniform_flag = False
        att_uniform_flag = False

        att_mixed_flag = False
        def_mixed_flag = False

        nn_att = copy.copy(nn_att_saved)
        nn_def = copy.copy(nn_def_saved)

        # nn_att and nn_def here can be either np.ndarray or str. np.ndarray represents a mixed strategy.
        # A str represents the name of a strategy.

        if isinstance(nn_att, np.ndarray) and isinstance(nn_def, str):
            att_mixed_flag = True
            str_set = game.att_str
            nn_att = np.random.choice(str_set, p=nn_att)

        if isinstance(nn_att, str) and isinstance(nn_def, np.ndarray):
            def_mixed_flag = True
            str_set = game.def_str
            nn_def = np.random.choice(str_set, p=nn_def)

        if isinstance(nn_att, np.ndarray) and isinstance(nn_def, np.ndarray):
            str_set = game.att_str
            nn_att = np.random.choice(str_set, p=nn_att)
            str_set = game.def_str
            nn_def = np.random.choice(str_set, p=nn_def)

        if not att_mixed_flag and not def_mixed_flag:
            raise ValueError(
                "One player should play mixed strategy in retraining simulation."
            )

        if "epoch1" in nn_att:
            att_uniform_flag = True

        if "epoch1" in nn_def:
            def_uniform_flag = True

        if att_mixed_flag:
            path = os.getcwd() + "/attacker_strategies/" + nn_att
            scope_att = nn_att
        else:
            path = os.getcwd() + "/retrain_att/" + nn_att
            scope_att = 'att_str_retrain' + str(0) + '.pkl'

        if att_uniform_flag:
            nn_att_act = fp.load_pkl(path)
        else:
            training_flag = 1
            nn_att_act, sess1, graph1 = load_action_class(
                path, scope_att, game, training_flag)

        if def_mixed_flag:
            path = os.getcwd() + "/defender_strategies/" + nn_def
            scope_def = nn_def
        else:
            path = os.getcwd() + "/retrain_def/" + nn_def
            scope_def = 'def_str_retrain' + str(0) + '.pkl'

        if def_uniform_flag:
            nn_def_act = fp.load_pkl(path)
        else:
            training_flag = 0
            nn_def_act, sess2, graph2 = load_action_class(
                path, scope_def, game, training_flag)

        for t in range(T):
            timeleft = T - t
            if att_uniform_flag:
                attacker.att_greedy_action_builder_single(
                    G, timeleft, nn_att_act)
            else:
                with graph1.as_default():
                    with sess1.as_default():
                        attacker.att_greedy_action_builder_single(
                            G, timeleft, nn_att_act)

            if def_uniform_flag:
                defender.def_greedy_action_builder_single(
                    G, timeleft, nn_def_act)
            else:
                with graph2.as_default():
                    with sess2.as_default():
                        defender.def_greedy_action_builder_single(
                            G, timeleft, nn_def_act)

            att_action_set = attacker.attact
            def_action_set = defender.defact
            # print('att:', att_action_set)
            # print('def:', def_action_set)
            for attack in att_action_set:
                if isinstance(attack, tuple):
                    # check OR node
                    aReward += G.edges[attack]['cost']
                    if random.uniform(0, 1) <= G.edges[attack]['actProb']:
                        G.nodes[attack[-1]]['state'] = 1
                else:
                    # check AND node
                    aReward += G.nodes[attack]['aCost']
                    if random.uniform(0, 1) <= G.nodes[attack]['actProb']:
                        G.nodes[attack]['state'] = 1
            # defender's action
            for node in def_action_set:
                G.nodes[node]['state'] = 0
                dReward += G.nodes[node]['dCost']
            _, targetset = get_Targets(G)
            for node in targetset:
                if G.nodes[node]['state'] == 1:
                    aReward += G.nodes[node]['aReward']
                    dReward += G.nodes[node]['dPenalty']

        aReward_list = np.append(aReward_list, aReward)
        dReward_list = np.append(dReward_list, dReward)

    return np.mean(aReward_list), np.mean(dReward_list)

Example #6

Show file

def single_sim(param):  #single for single episode.
    # TODO: Dealing with uniform str
    aReward = 0
    dReward = 0
    def_uniform_flag = False
    att_uniform_flag = False

    #nn_att and nn_def here can be either np.ndarray or str. np.ndarray represents a mixed strategy.
    # A str represents the name of a strategy.
    G, game, attacker, nn_att, defender, nn_def, T = param

    if isinstance(nn_att, np.ndarray) and isinstance(nn_def, str):
        str_set = game.att_str
        nn_att = np.random.choice(str_set, p=nn_att)

    if isinstance(nn_att, str) and isinstance(nn_def, np.ndarray):
        str_set = game.def_str
        nn_def = np.random.choice(str_set, p=nn_def)

    if isinstance(nn_att, np.ndarray) and isinstance(nn_def, np.ndarray):
        str_set = game.att_str
        nn_att = np.random.choice(str_set, p=nn_att)
        str_set = game.def_str
        nn_def = np.random.choice(str_set, p=nn_def)

    if "epoch1" in nn_att:
        att_uniform_flag = True

    if "epoch1" in nn_def:
        def_uniform_flag = True

    path = os.getcwd() + "/attacker_strategies/" + nn_att
    if att_uniform_flag:
        nn_att = fp.load_pkl(path)
    else:
        training_flag = 1
        nn_att, sess1, graph1 = load_action_class(path, game, training_flag)

    path = os.getcwd() + "/defender_strategies/" + nn_def
    if def_uniform_flag:
        nn_def = fp.load_pkl(path)
    else:
        training_flag = 0
        nn_def, sess2, graph2 = load_action_class(path, game, training_flag)

    for t in range(T):
        timeleft = T - t
        if att_uniform_flag:
            attacker.att_greedy_action_builder_single(G, timeleft, nn_att)
        else:
            with graph1.as_default():
                with sess1.as_default():
                    attacker.att_greedy_action_builder_single(
                        G, timeleft, nn_att)

        if def_uniform_flag:
            defender.def_greedy_action_builder_single(G, timeleft, nn_def)
        else:
            with graph2.as_default():
                with sess2.as_default():
                    defender.def_greedy_action_builder_single(
                        G, timeleft, nn_def)

        att_action_set = attacker.attact
        def_action_set = defender.defact
        # print('att:', att_action_set)
        # print('def:', def_action_set)
        for attack in att_action_set:
            if isinstance(attack, tuple):
                # check OR node
                aReward += G.edges[attack]['cost']
                if random.uniform(0, 1) <= G.edges[attack]['actProb']:
                    G.nodes[attack[-1]]['state'] = 1
            else:
                # check AND node
                aReward += G.nodes[attack]['aCost']
                if random.uniform(0, 1) <= G.nodes[attack]['actProb']:
                    G.nodes[attack]['state'] = 1
        # defender's action
        for node in def_action_set:
            G.nodes[node]['state'] = 0
            dReward += G.nodes[node]['dCost']
        _, targetset = get_Targets(G)
        for node in targetset:
            if G.nodes[node]['state'] == 1:
                aReward += G.nodes[node]['aReward']
                dReward += G.nodes[node]['dPenalty']

    # print(aReward)
    # print(aReward, dReward)
    return aReward, dReward