def predict(self, X): X, X_priv = split_dataset(X, self.lupi_features) w = self.model_state["w"] b_s = self.model_state["b_s"] scores = np.dot(X, w.T)[np.newaxis] bin_thresholds = np.append(b_s, np.inf) # If thresholds are smaller than score the value belongs to the bigger bin # after subtracting we check for positive elements indices = np.sum(scores.T - bin_thresholds >= 0, -1) return self.classes_[indices]
def predict(self, X): """ Method to predict points using svm classification rule. We use both normal and priv. features. This function is mainly used for CV purposes to find the best parameters according to score. Parameters ---------- X : numpy.ndarray """ X, X_priv = split_dataset(X, self.lupi_features) w = self.model_state["w"] b = self.model_state["b"] y = np.dot(X, w) + b return y
def fit(self, X_combined, y, lupi_features=None): """ Parameters ---------- lupi_features : int Number of features in dataset which are considered privileged information (PI). PI features are expected to be the last features in the dataset. """ if lupi_features is None: try: lupi_features = self.lupi_features self.lupi_features = lupi_features except: raise ValueError("No amount of lupi features given.") X, X_priv = split_dataset(X_combined, self.lupi_features) (n, d) = X.shape # Get parameters from CV model without any feature contstraints C = self.get_params()["C"] epsilon = self.get_params()["epsilon"] scaling_lupi_w = self.get_params()["scaling_lupi_w"] scaling_lupi_loss = self.get_params()["scaling_lupi_loss"] # Initalize Variables in cvxpy w = cvx.Variable(shape=(d), name="w") b = cvx.Variable(name="bias") w_priv_pos = cvx.Variable(lupi_features, name="w_priv_pos") b_priv_pos = cvx.Variable(name="bias_priv_pos") w_priv_neg = cvx.Variable(lupi_features, name="w_priv_neg") b_priv_neg = cvx.Variable(name="bias_priv_neg") slack = cvx.Variable(shape=(n), name="slack") # Define functions for better readability priv_function_pos = X_priv @ w_priv_pos + b_priv_pos priv_function_neg = X_priv @ w_priv_neg + b_priv_neg # Combined loss of lupi function and normal slacks, scaled by two constants priv_loss_pos = cvx.sum(priv_function_pos) priv_loss_neg = cvx.sum(priv_function_neg) priv_loss = priv_loss_pos + priv_loss_neg slack_loss = cvx.sum(slack) loss = scaling_lupi_loss * priv_loss + slack_loss # L1 norm regularization of both functions with 1 scaling constant weight_regularization = 0.5 * ( cvx.norm(w, 1) + scaling_lupi_w * (0.5 * cvx.norm(w_priv_pos, 1) + 0.5 * cvx.norm(w_priv_neg, 1)) ) constraints = [ y - X @ w - b <= epsilon + priv_function_pos + slack, X @ w + b - y <= epsilon + priv_function_neg + slack, priv_function_pos >= 0, priv_function_neg >= 0, # priv_loss_pos >= 0, # priv_loss_neg >= 0, # slack_loss >= 0, slack >= 0, # loss >= 0, ] objective = cvx.Minimize(C * loss + weight_regularization) # Solve problem. problem = cvx.Problem(objective, constraints) problem.solve(**self.SOLVER_PARAMS) self.model_state = { "signs_pos": priv_function_pos.value > 0, "signs_neg": priv_function_neg.value > 0, "w": w.value, "w_priv_pos": w_priv_pos.value, "w_priv_neg": w_priv_neg.value, "b": b.value, "b_priv_pos": b_priv_pos.value, "b_priv_neg": b_priv_neg.value, "lupi_features": lupi_features, # Number of lupi features in the dataset TODO: Move this somewhere else, } w_l1 = np.linalg.norm(w.value, ord=1) w_priv_pos_l1 = np.linalg.norm(w_priv_pos.value, ord=1) w_priv_neg_l1 = np.linalg.norm(w_priv_neg.value, ord=1) # We take the mean to combine all submodels (for priv) into a single normalization factor w_priv_l1 = w_priv_pos_l1 + w_priv_neg_l1 self.constraints = { "priv_loss": priv_loss.value, "scaling_lupi_loss": scaling_lupi_loss, # "loss_slack": slack_loss.value, "loss": loss.value, "w_l1": w_l1, "w_priv_l1": w_priv_l1, "w_priv_pos_l1": w_priv_pos_l1, "w_priv_neg_l1": w_priv_neg_l1, } return self
def fit(self, X_combined, y, lupi_features=None): """ Parameters ---------- lupi_features : int Number of features in dataset which are considered privileged information (PI). PI features are expected to be the last features in the dataset. """ if lupi_features is None: raise ValueError("No lupi_features argument given.") self.lupi_features = lupi_features X, X_priv = split_dataset(X_combined, lupi_features) (n, d) = X.shape self.classes_ = np.unique(y) # Get parameters from CV model without any feature contstraints C = self.hyperparam["C"] scaling_lupi_w = self.hyperparam["scaling_lupi_w"] scaling_lupi_loss = self.hyperparam["scaling_lupi_loss"] get_original_bin_name, n_bins = get_bin_mapping(y) n_boundaries = n_bins - 1 # Initalize Variables in cvxpy w = cvx.Variable(shape=(d), name="w") b_s = cvx.Variable(shape=(n_boundaries), name="bias") w_priv = cvx.Variable(shape=(self.lupi_features, 2), name="w_priv") d_priv = cvx.Variable(shape=(2), name="bias_priv") slack_left = cvx.Variable(shape=(n), name="slack_left") slack_right = cvx.Variable(shape=(n), name="slack_right") def priv_function(bin, sign): indices = np.where(y == get_original_bin_name[bin]) return X_priv[indices] * w_priv[:, sign] + d_priv[sign] # L1 norm regularization of both functions with 1 scaling constant priv_l1_1 = cvx.norm(w_priv[:, 0], 1) priv_l1_2 = cvx.norm(w_priv[:, 1], 1) w_priv_l1 = priv_l1_1 + priv_l1_2 w_l1 = cvx.norm(w, 1) weight_regularization = 0.5 * (w_l1 + scaling_lupi_w * w_priv_l1) constraints = [] loss = 0 for left_bin in range(0, n_bins - 1): indices = np.where(y == get_original_bin_name[left_bin]) constraints.append( X[indices] * w - b_s[left_bin] - slack_left[indices] <= -1 + priv_function(left_bin, 0)) constraints.append(priv_function(left_bin, 0) >= 0) loss += cvx.sum(priv_function(left_bin, 0)) # Add constraints for slack into right neighboring bins for right_bin in range(1, n_bins): indices = np.where(y == get_original_bin_name[right_bin]) constraints.append( X[indices] * w - b_s[right_bin - 1] - slack_right[indices] >= +1 - priv_function(right_bin, 1)) constraints.append(priv_function(right_bin, 1) >= 0) loss += cvx.sum(priv_function(right_bin, 1)) for i_boundary in range(0, n_boundaries - 1): constraints.append(b_s[i_boundary] <= b_s[i_boundary + 1]) constraints.append(slack_left >= 0) constraints.append(slack_right >= 0) loss = scaling_lupi_loss * loss + cvx.sum(slack_left + slack_right) objective = cvx.Minimize(C * loss + weight_regularization) # Solve problem. solver_params = self.solver_params problem = cvx.Problem(objective, constraints) problem.solve(**solver_params) w = w.value b_s = b_s.value self.model_state = { "w": w, "b_s": b_s, "w_priv": w_priv.value, "d_priv": d_priv.value, "lupi_features": lupi_features, # Number of lupi features in the dataset TODO: Move this somewhere else "bin_boundaries": n_boundaries, } self.constraints = { "loss": loss.value, "w_l1": w_l1.value, "w_priv_l1": w_priv_l1.value, "priv_l1_1": priv_l1_1.value, "priv_l1_2": priv_l1_2.value, } return self