Exemple #1
0
    def post_milp(self, x, nn, output_flag, t, template):
        """milp method"""
        post = []
        for chosen_action in range(2):
            observable_template = self.observable_templates[chosen_action]
            observable_result = self.observable_results[chosen_action]
            gurobi_model = grb.Model()
            gurobi_model.setParam('OutputFlag', output_flag)
            gurobi_model.setParam('Threads', 2)
            input = Experiment.generate_input_region(gurobi_model, template, x,
                                                     self.env_input_size)
            observation = gurobi_model.addMVar(shape=(2, ),
                                               lb=float("-inf"),
                                               ub=float("inf"),
                                               name="input")
            gurobi_model.addConstr(
                observation[1] <= input[0] - input[1] + self.input_epsilon / 2,
                name=f"obs_constr21")
            gurobi_model.addConstr(
                observation[1] >= input[0] - input[1] - self.input_epsilon / 2,
                name=f"obs_constr22")
            gurobi_model.addConstr(
                observation[0] <= input[2] - input[3] + self.input_epsilon / 2,
                name=f"obs_constr11")
            gurobi_model.addConstr(
                observation[0] >= input[2] - input[3] - self.input_epsilon / 2,
                name=f"obs_constr12")
            # feasible_action = Experiment.generate_nn_guard(gurobi_model, observation, nn, action_ego=chosen_action)
            # feasible_action = Experiment.generate_nn_guard(gurobi_model, input, nn, action_ego=chosen_action)

            Experiment.generate_region_constraints(gurobi_model,
                                                   observable_template,
                                                   observation,
                                                   observable_result, 2)
            gurobi_model.optimize()
            feasible_action = gurobi_model.status == 2
            if feasible_action:
                # apply dynamic
                # x_prime_results = self.optimise(template, gurobi_model, input)  # h representation
                # x_prime = Experiment.generate_input_region(gurobi_model, template, x_prime_results, self.env_input_size)
                x_second = StoppingCarExperiment.apply_dynamic(
                    input,
                    gurobi_model,
                    action=chosen_action,
                    env_input_size=self.env_input_size)
                gurobi_model.update()
                gurobi_model.optimize()
                found_successor, x_second_results = self.h_repr_to_plot(
                    gurobi_model, template, x_second)
                if found_successor:
                    post.append(tuple(x_second_results))
        return post
    def post_milp(self, x, nn, output_flag, t, template):
        post = []
        observable_template_action1 = self.observable_templates[1]
        observable_result_action1 = self.observable_results[1]
        observable_template_action0 = self.observable_templates[0]
        observable_result_action0 = self.observable_results[0]

        def standard_op():
            gurobi_model = grb.Model()
            gurobi_model.setParam('OutputFlag', output_flag)
            input = self.generate_input_region(gurobi_model, template, x,
                                               self.env_input_size)
            z = self.apply_dynamic(input, gurobi_model, self.env_input_size)
            return gurobi_model, z, input

        # case 0
        gurobi_model, z, input = standard_op()
        feasible0 = self.generate_guard(gurobi_model, z, case=0)  # bounce
        if feasible0:  # action is irrelevant in this case
            # apply dynamic
            x_prime_results = self.optimise(template, gurobi_model, z)
            gurobi_model = grb.Model()
            gurobi_model.setParam('OutputFlag', output_flag)
            input2 = self.generate_input_region(gurobi_model, template,
                                                x_prime_results,
                                                self.env_input_size)
            x_second = self.apply_dynamic2(input2,
                                           gurobi_model,
                                           case=0,
                                           env_input_size=self.env_input_size)
            found_successor, x_second_results = self.h_repr_to_plot(
                gurobi_model, template, x_second)
            if found_successor:
                post.append(tuple(x_second_results))

        # case 1 : ball going down and hit
        gurobi_model, z, input = standard_op()
        feasible11 = self.generate_guard(gurobi_model, z, case=1)
        if feasible11:
            Experiment.generate_region_constraints(
                gurobi_model, observable_template_action1, input,
                observable_result_action1, 2)
            gurobi_model.optimize()
            feasible12 = gurobi_model.status == 2
            # feasible12 = self.generate_nn_guard(gurobi_model, input, nn, action_ego=1)  # check for action =1 over input (not z!)
            if feasible12:
                # apply dynamic
                x_prime_results = self.optimise(template, gurobi_model, z)
                gurobi_model = grb.Model()
                gurobi_model.setParam('OutputFlag', output_flag)
                input2 = self.generate_input_region(gurobi_model, template,
                                                    x_prime_results,
                                                    self.env_input_size)
                x_second = self.apply_dynamic2(
                    input2,
                    gurobi_model,
                    case=1,
                    env_input_size=self.env_input_size)
                found_successor, x_second_results = self.h_repr_to_plot(
                    gurobi_model, template, x_second)
                if found_successor:
                    post.append(tuple(x_second_results))
        # case 2 : ball going up and hit
        gurobi_model, z, input = standard_op()
        feasible21 = self.generate_guard(gurobi_model, z, case=2)
        if feasible21:
            Experiment.generate_region_constraints(
                gurobi_model, observable_template_action1, input,
                observable_result_action1, 2)
            gurobi_model.optimize()
            feasible22 = gurobi_model.status == 2
            # feasible22 = self.generate_nn_guard(gurobi_model, input, nn, action_ego=1)  # check for action =1 over input (not z!)
            if feasible22:
                # apply dynamic
                x_prime_results = self.optimise(template, gurobi_model, z)
                gurobi_model = grb.Model()
                gurobi_model.setParam('OutputFlag', output_flag)
                input2 = self.generate_input_region(gurobi_model, template,
                                                    x_prime_results,
                                                    self.env_input_size)
                x_second = self.apply_dynamic2(
                    input2,
                    gurobi_model,
                    case=2,
                    env_input_size=self.env_input_size)
                found_successor, x_second_results = self.h_repr_to_plot(
                    gurobi_model, template, x_second)
                if found_successor:
                    post.append(tuple(x_second_results))
        # case 1 alt : ball going down and NO hit
        gurobi_model, z, input = standard_op()
        feasible11_alt = self.generate_guard(gurobi_model, z, case=1)
        if feasible11_alt:
            Experiment.generate_region_constraints(
                gurobi_model, observable_template_action0, input,
                observable_result_action0, 2)
            gurobi_model.optimize()
            feasible12_alt = gurobi_model.status == 2
            # feasible12_alt = self.generate_nn_guard(gurobi_model, input, nn, action_ego=0)  # check for action = 0 over input (not z!)
            if feasible12_alt:
                # apply dynamic
                x_prime_results = self.optimise(template, gurobi_model, z)
                gurobi_model = grb.Model()
                gurobi_model.setParam('OutputFlag', output_flag)
                input2 = self.generate_input_region(gurobi_model, template,
                                                    x_prime_results,
                                                    self.env_input_size)
                x_second = self.apply_dynamic2(
                    input2,
                    gurobi_model,
                    case=3,
                    env_input_size=self.env_input_size)
                found_successor, x_second_results = self.h_repr_to_plot(
                    gurobi_model, template, x_second)

                if found_successor:
                    post.append(tuple(x_second_results))
        # case 2 alt : ball going up and NO hit
        gurobi_model, z, input = standard_op()
        feasible21_alt = self.generate_guard(gurobi_model, z, case=2)
        if feasible21_alt:
            Experiment.generate_region_constraints(
                gurobi_model, observable_template_action0, input,
                observable_result_action0, 2)
            gurobi_model.optimize()
            feasible22_alt = gurobi_model.status == 2
            # feasible22_alt = self.generate_nn_guard(gurobi_model, input, nn, action_ego=0)  # check for action = 0 over input (not z!)
            if feasible22_alt:
                # apply dynamic
                x_prime_results = self.optimise(template, gurobi_model, z)
                gurobi_model = grb.Model()
                gurobi_model.setParam('OutputFlag', output_flag)
                input2 = self.generate_input_region(gurobi_model, template,
                                                    x_prime_results,
                                                    self.env_input_size)
                x_second = self.apply_dynamic2(
                    input2,
                    gurobi_model,
                    case=3,
                    env_input_size=self.env_input_size)
                found_successor, x_second_results = self.h_repr_to_plot(
                    gurobi_model, template, x_second)
                if found_successor:
                    post.append(tuple(x_second_results))
        # case 3 : ball out of reach and not bounce
        gurobi_model, z, input = standard_op()
        feasible3 = self.generate_guard(gurobi_model, z,
                                        case=3)  # out of reach
        if feasible3:  # action is irrelevant in this case
            # apply dynamic
            x_prime_results = self.optimise(template, gurobi_model, z)
            gurobi_model = grb.Model()
            gurobi_model.setParam('OutputFlag', output_flag)
            input2 = self.generate_input_region(gurobi_model, template,
                                                x_prime_results,
                                                self.env_input_size)
            x_second = self.apply_dynamic2(input2,
                                           gurobi_model,
                                           case=3,
                                           env_input_size=self.env_input_size)
            found_successor, x_second_results = self.h_repr_to_plot(
                gurobi_model, template, x_second)
            if found_successor:
                post.append(tuple(x_second_results))

        return post
    def post_milp(self, x, nn, output_flag, t, template):
        """milp method"""
        post = []
        for chosen_action in range(2):
            observable_template = self.observable_templates[chosen_action]
            observable_result = self.observable_results[chosen_action]
            if USE_GUROBI:
                gurobi_model = grb.Model()
                gurobi_model.setParam('OutputFlag', output_flag)
                gurobi_model.setParam('Threads', 2)
                input = Experiment.generate_input_region(
                    gurobi_model, template, x, self.env_input_size)
                Experiment.generate_region_constraints(
                    gurobi_model,
                    observable_template,
                    input,
                    observable_result,
                    env_input_size=self.env_input_size)
                gurobi_model.optimize()
                feasible_action = gurobi_model.status == 2
                if feasible_action:
                    max_theta, min_theta, max_theta_dot, min_theta_dot = self.get_theta_bounds(
                        gurobi_model, input)
                    sin_cos_table = self.get_sin_cos_table(
                        max_theta,
                        min_theta,
                        max_theta_dot,
                        min_theta_dot,
                        action=chosen_action,
                        step_thetaacc=100)
                    x_prime_results = self.optimise(template, gurobi_model,
                                                    input)  # h representation
                    x_prime = Experiment.generate_input_region(
                        gurobi_model, template, x_prime_results,
                        self.env_input_size)
                    thetaacc, xacc = CartpoleExperiment.generate_angle_milp(
                        gurobi_model, x_prime, sin_cos_table)
                    # apply dynamic
                    x_second = self.apply_dynamic(
                        x_prime,
                        gurobi_model,
                        thetaacc=thetaacc,
                        xacc=xacc,
                        env_input_size=self.env_input_size)
                    gurobi_model.update()
                    gurobi_model.optimize()
                    found_successor, x_second_results = self.h_repr_to_plot(
                        gurobi_model, template, x_second)
                    if found_successor:
                        post.append(tuple(x_second_results))
            else:
                model = pyo.ConcreteModel()
                input = Experiment.generate_input_region_pyo(
                    model, template, x, self.env_input_size)
                feasible_action = ORACartpoleExperiment.generate_nn_guard_pyo(
                    model, input, nn, action_ego=chosen_action, M=1e04)
                if feasible_action:  # performs action 2 automatically when battery is dead
                    max_theta, min_theta, max_theta_dot, min_theta_dot = self.get_theta_bounds_pyo(
                        model, input)
                    sin_cos_table = self.get_sin_cos_table(
                        max_theta,
                        min_theta,
                        max_theta_dot,
                        min_theta_dot,
                        action=chosen_action,
                        step_thetaacc=100)
                    x_prime_results = self.optimise_pyo(template, model, input)
                    x_prime = Experiment.generate_input_region_pyo(
                        model,
                        template,
                        x_prime_results,
                        self.env_input_size,
                        name="x_prime_input")
                    thetaacc, xacc = ORACartpoleExperiment.generate_angle_milp_pyo(
                        model, x_prime, sin_cos_table)

                    model.del_component(model.obj)
                    model.obj = pyo.Objective(expr=thetaacc,
                                              sense=pyo.maximize)
                    result = Experiment.solve(model,
                                              solver=Experiment.use_solver)
                    assert (result.solver.status == SolverStatus.ok) and (
                        result.solver.termination_condition
                        == TerminationCondition.optimal
                    ), f"LP wasn't optimally solved {x}"
                    # apply dynamic
                    x_second = self.apply_dynamic_pyo(
                        x_prime,
                        model,
                        thetaacc=thetaacc,
                        xacc=xacc,
                        env_input_size=self.env_input_size,
                        action=chosen_action)
                    x_second_results = self.optimise_pyo(
                        template, model, x_second)
                    found_successor = x_prime_results is not None
                    if found_successor:
                        post.append((tuple(x_second_results)))
        return post