Ejemplo n.º 1
0
    def predict(self, X):

        X, X_priv = split_dataset(X, self.lupi_features)
        w = self.model_state["w"]
        b_s = self.model_state["b_s"]

        scores = np.dot(X, w.T)[np.newaxis]
        bin_thresholds = np.append(b_s, np.inf)

        # If thresholds are smaller than score the value belongs to the bigger bin
        # after subtracting we check for positive elements
        indices = np.sum(scores.T - bin_thresholds >= 0, -1)
        return self.classes_[indices]
Ejemplo n.º 2
0
    def predict(self, X):
        """
        Method to predict points using svm classification rule.
        We use both normal and priv. features.
        This function is mainly used for CV purposes to find the best parameters according to score.

        Parameters
        ----------
        X : numpy.ndarray
        """
        X, X_priv = split_dataset(X, self.lupi_features)
        w = self.model_state["w"]
        b = self.model_state["b"]

        y = np.dot(X, w) + b

        return y
Ejemplo n.º 3
0
    def fit(self, X_combined, y, lupi_features=None):
        """

        Parameters
        ----------
        lupi_features : int
            Number of features in dataset which are considered privileged information (PI).
            PI features are expected to be the last features in the dataset.

        """
        if lupi_features is None:
            try:
                lupi_features = self.lupi_features
                self.lupi_features = lupi_features
            except:
                raise ValueError("No amount of lupi features given.")
        X, X_priv = split_dataset(X_combined, self.lupi_features)
        (n, d) = X.shape

        # Get parameters from CV model without any feature contstraints
        C = self.get_params()["C"]
        epsilon = self.get_params()["epsilon"]
        scaling_lupi_w = self.get_params()["scaling_lupi_w"]
        scaling_lupi_loss = self.get_params()["scaling_lupi_loss"]

        # Initalize Variables in cvxpy
        w = cvx.Variable(shape=(d), name="w")
        b = cvx.Variable(name="bias")
        w_priv_pos = cvx.Variable(lupi_features, name="w_priv_pos")
        b_priv_pos = cvx.Variable(name="bias_priv_pos")
        w_priv_neg = cvx.Variable(lupi_features, name="w_priv_neg")
        b_priv_neg = cvx.Variable(name="bias_priv_neg")
        slack = cvx.Variable(shape=(n), name="slack")

        # Define functions for better readability
        priv_function_pos = X_priv @ w_priv_pos + b_priv_pos
        priv_function_neg = X_priv @ w_priv_neg + b_priv_neg

        # Combined loss of lupi function and normal slacks, scaled by two constants
        priv_loss_pos = cvx.sum(priv_function_pos)
        priv_loss_neg = cvx.sum(priv_function_neg)
        priv_loss = priv_loss_pos + priv_loss_neg
        slack_loss = cvx.sum(slack)
        loss = scaling_lupi_loss * priv_loss + slack_loss

        # L1 norm regularization of both functions with 1 scaling constant
        weight_regularization = 0.5 * (
            cvx.norm(w, 1)
            + scaling_lupi_w
            * (0.5 * cvx.norm(w_priv_pos, 1) + 0.5 * cvx.norm(w_priv_neg, 1))
        )

        constraints = [
            y - X @ w - b <= epsilon + priv_function_pos + slack,
            X @ w + b - y <= epsilon + priv_function_neg + slack,
            priv_function_pos >= 0,
            priv_function_neg >= 0,
            # priv_loss_pos >= 0,
            # priv_loss_neg >= 0,
            # slack_loss >= 0,
            slack >= 0,
            # loss >= 0,
        ]
        objective = cvx.Minimize(C * loss + weight_regularization)

        # Solve problem.

        problem = cvx.Problem(objective, constraints)
        problem.solve(**self.SOLVER_PARAMS)

        self.model_state = {
            "signs_pos": priv_function_pos.value > 0,
            "signs_neg": priv_function_neg.value > 0,
            "w": w.value,
            "w_priv_pos": w_priv_pos.value,
            "w_priv_neg": w_priv_neg.value,
            "b": b.value,
            "b_priv_pos": b_priv_pos.value,
            "b_priv_neg": b_priv_neg.value,
            "lupi_features": lupi_features,  # Number of lupi features in the dataset TODO: Move this somewhere else,
        }
        w_l1 = np.linalg.norm(w.value, ord=1)
        w_priv_pos_l1 = np.linalg.norm(w_priv_pos.value, ord=1)
        w_priv_neg_l1 = np.linalg.norm(w_priv_neg.value, ord=1)
        # We take the mean to combine all submodels (for priv) into a single normalization factor
        w_priv_l1 = w_priv_pos_l1 + w_priv_neg_l1
        self.constraints = {
            "priv_loss": priv_loss.value,
            "scaling_lupi_loss": scaling_lupi_loss,
            # "loss_slack": slack_loss.value,
            "loss": loss.value,
            "w_l1": w_l1,
            "w_priv_l1": w_priv_l1,
            "w_priv_pos_l1": w_priv_pos_l1,
            "w_priv_neg_l1": w_priv_neg_l1,
        }
        return self
Ejemplo n.º 4
0
    def fit(self, X_combined, y, lupi_features=None):
        """

        Parameters
        ----------
        lupi_features : int
            Number of features in dataset which are considered privileged information (PI).
            PI features are expected to be the last features in the dataset.

        """
        if lupi_features is None:
            raise ValueError("No lupi_features argument given.")
        self.lupi_features = lupi_features
        X, X_priv = split_dataset(X_combined, lupi_features)
        (n, d) = X.shape
        self.classes_ = np.unique(y)

        # Get parameters from CV model without any feature contstraints
        C = self.hyperparam["C"]
        scaling_lupi_w = self.hyperparam["scaling_lupi_w"]
        scaling_lupi_loss = self.hyperparam["scaling_lupi_loss"]

        get_original_bin_name, n_bins = get_bin_mapping(y)
        n_boundaries = n_bins - 1

        # Initalize Variables in cvxpy
        w = cvx.Variable(shape=(d), name="w")
        b_s = cvx.Variable(shape=(n_boundaries), name="bias")

        w_priv = cvx.Variable(shape=(self.lupi_features, 2), name="w_priv")
        d_priv = cvx.Variable(shape=(2), name="bias_priv")

        slack_left = cvx.Variable(shape=(n), name="slack_left")
        slack_right = cvx.Variable(shape=(n), name="slack_right")

        def priv_function(bin, sign):
            indices = np.where(y == get_original_bin_name[bin])
            return X_priv[indices] * w_priv[:, sign] + d_priv[sign]

        # L1 norm regularization of both functions with 1 scaling constant
        priv_l1_1 = cvx.norm(w_priv[:, 0], 1)
        priv_l1_2 = cvx.norm(w_priv[:, 1], 1)
        w_priv_l1 = priv_l1_1 + priv_l1_2
        w_l1 = cvx.norm(w, 1)
        weight_regularization = 0.5 * (w_l1 + scaling_lupi_w * w_priv_l1)

        constraints = []
        loss = 0

        for left_bin in range(0, n_bins - 1):
            indices = np.where(y == get_original_bin_name[left_bin])
            constraints.append(
                X[indices] * w - b_s[left_bin] - slack_left[indices] <= -1 +
                priv_function(left_bin, 0))
            constraints.append(priv_function(left_bin, 0) >= 0)
            loss += cvx.sum(priv_function(left_bin, 0))

        # Add constraints for slack into right neighboring bins
        for right_bin in range(1, n_bins):
            indices = np.where(y == get_original_bin_name[right_bin])
            constraints.append(
                X[indices] * w - b_s[right_bin - 1] -
                slack_right[indices] >= +1 - priv_function(right_bin, 1))
            constraints.append(priv_function(right_bin, 1) >= 0)
            loss += cvx.sum(priv_function(right_bin, 1))

        for i_boundary in range(0, n_boundaries - 1):
            constraints.append(b_s[i_boundary] <= b_s[i_boundary + 1])

        constraints.append(slack_left >= 0)
        constraints.append(slack_right >= 0)
        loss = scaling_lupi_loss * loss + cvx.sum(slack_left + slack_right)

        objective = cvx.Minimize(C * loss + weight_regularization)

        # Solve problem.
        solver_params = self.solver_params
        problem = cvx.Problem(objective, constraints)
        problem.solve(**solver_params)

        w = w.value
        b_s = b_s.value
        self.model_state = {
            "w": w,
            "b_s": b_s,
            "w_priv": w_priv.value,
            "d_priv": d_priv.value,
            "lupi_features":
            lupi_features,  # Number of lupi features in the dataset TODO: Move this somewhere else
            "bin_boundaries": n_boundaries,
        }

        self.constraints = {
            "loss": loss.value,
            "w_l1": w_l1.value,
            "w_priv_l1": w_priv_l1.value,
            "priv_l1_1": priv_l1_1.value,
            "priv_l1_2": priv_l1_2.value,
        }
        return self