def offline_attack(self):
        for setting in self.settings_to_run:
            R_c, success_prob = setting[0], setting[1]
            M_in = get_M(self.M_0[0], self.M_0[1], R_c, success_prob)
            env_in = environment.Environment(M_in)
            pool = teacher.generate_pool(M_in[0], M_in[1], M_in[2], M_in[3], self.target_pi)
            for tchr in self.teachers_to_run:
                target_pi = tchr[2]["target_pi"]
                p = tchr[1]
                epsilon = tchr[2]["epsilon"]
                epsilon_p = tchr[2]["epsilon_p"]
                teacher_type = tchr[0]
                cost_p = tchr[2]["cost_p"]
                teacher_obj = teacher.teacher(env=env_in, target_pi=target_pi, p=p, epsilon=epsilon, epsilon_p=epsilon_p, teacher_type=teacher_type, pool=pool) #Pool here

                print("==================================================")
                try:
                    M_out, feasible = teacher_obj.get_target_M(M_in)
                except Exception as e:

                    print("--teacher_type={}--R_c={}--P_success={}".format(teacher_type, R_c, success_prob))

                if not feasible:
                    print("--teacher_type={}--R_c={}--P_success={}".format(teacher_type, R_c, success_prob))
                    cost = self.max_cost_value_if_non_feasible(cost_p)
                    self.append_cost_to_accumulator(cost, teacher_type, p, cost_p, success_prob, R_c)
                    continue
                else:
                    print("--teacher_type={}--R_c={}--P_success={}".format(teacher_type, R_c, success_prob))

                env_out = environment.Environment(M_out)
                _, pi_T, _ = MDPSolver.averaged_valueIteration(env_out, env_out.reward)
                cost = teacher_obj.cost(M_in, M_out, cost_p)
                self.append_cost_to_accumulator(cost, teacher_type, p, cost_p, success_prob, R_c)
        return self.accumulator
    def offline_attack(self):
        dict = {}
        for setting in self.settings_to_run:
            for tchr in self.teachers_to_run:
                R_c, success_prob = setting[0], setting[1]
                M_in = get_M(self.M_0[0], self.M_0[1], R_c, success_prob)
                env_in = environment.Environment(M_in)
                target_pi = tchr[2]["target_pi"]
                p = tchr[1]
                epsilon = tchr[2]["epsilon"]
                epsilon_p = tchr[2]["epsilon_p"]
                teacher_type = tchr[0]
                teacher_obj = teacher.teacher(env=env_in, target_pi=target_pi, p=p, epsilon=epsilon, epsilon_p=epsilon_p, teacher_type=teacher_type, pool=None) #Pool here

                # print("==================================================")
                try:
                    ##########
                    time_start = time.time()
                    ##########
                    # print(time_start)
                    if "general_attack_on_dynamics" == tchr[0]:
                        pool = teacher.generate_pool(M_in[0], M_in[1], M_in[2], M_in[3], self.target_pi)
                    else:
                        pool = None
                    teacher_obj.pool = pool
                    # print("time after pool=", time.time())
                    M_out, feasible = teacher_obj.get_target_M(M_in)
                    #######
                    end_time = time.time()-time_start
                    #######
                    if feasible:
                        dict["time_R_c={}_Teacher_type={}_P={}".format(R_c, teacher_type, p)] = end_time
                    else:
                        dict["time_R_c={}_Teacher_type={}_P={}".format(R_c, teacher_type, p)] = "NF"
                    # print(end_time)
                    # print("R_c={}, Teacher_type={} --- Runtime = {}".format(R_c, teacher_type, end_time))
                except Exception as e:
                    print(e)
                    print("time_R_c={}_Teacher_type={}_P={}".format(R_c, teacher_type, p))
                    pass
        return dict
Ejemplo n.º 3
0
    def __init__(self, env, target_pi, epsilon, p, epsilon_p, T_UCRL,
                 alpha_UCRL, attackers_cost_p, teacher_type):
        self.env = env
        self.reward_no_attack = env.reward
        _, self.pi_no_attack, _ = MDPSolver.averaged_valueIteration(
            env, env.reward)
        self.target_pi = target_pi
        self.epsilon = epsilon
        self.p = p
        self.epsilon_p = epsilon_p
        self.T_UCRL = T_UCRL
        self.attackers_cost_p = attackers_cost_p
        self.alpha_UCRL = alpha_UCRL
        self.teacher_type = teacher_type

        self.M_0 = env.get_M_0()
        self.pool = teacher.generate_pool(self.M_0[0], self.M_0[1],
                                          self.M_0[2], self.M_0[3], target_pi)
        self.teacher = teacher.teacher(env, target_pi, epsilon, p, epsilon_p,
                                       teacher_type, self.pool)
        self.learner = None