def _check_optimality_conditions(self,
                                     model_params,
                                     lambdas,
                                     opt_thres=1e-2):
        # sanity check function to see that cvxpy is solving to a good enough accuracy
        # check that the gradient is close to zero
        # can use this to check that our implicit derivative assumptions hold
        # lambdas must be an exploded lambda matrix
        print "check_optimality_conditions!"

        alpha = model_params["alpha"]
        beta = model_params["beta"]
        gamma = model_params["gamma"]

        u_hat, sigma_hat, v_hat = self._get_svd_mini(gamma)

        d_square_loss = -1.0 / self.num_train * np.multiply(
            self.train_vec,
            make_column_major_flat(self.data.observed_matrix -
                                   get_matrix_completion_fitted_values(
                                       self.data.row_features, self.data.
                                       col_features, alpha, beta, gamma)))

        left_grad_at_opt_gamma = (u_hat.T * make_column_major_reshape(
            d_square_loss, (self.data.num_rows, self.data.num_cols)) +
                                  lambdas[0] * np.sign(sigma_hat) * v_hat.T)
        right_grad_at_opt_gamma = (make_column_major_reshape(
            d_square_loss, (self.data.num_rows, self.data.num_cols)) * v_hat +
                                   lambdas[0] * u_hat * np.sign(sigma_hat))
        left_grad_norm = np.linalg.norm(left_grad_at_opt_gamma)
        right_grad_norm = np.linalg.norm(right_grad_at_opt_gamma)
        print "grad_at_opt wrt gamma (should be zero)", left_grad_norm, right_grad_norm
        assert (left_grad_norm < opt_thres)
        assert (right_grad_norm < opt_thres)

        print "alpha", alpha
        grad_at_opt_alpha = []
        for i in range(alpha.size):
            if np.abs(alpha[i]) > self.zero_thres:
                alpha_sign = np.sign(alpha[i])
                grad_at_opt_alpha.append(
                    (d_square_loss.T * make_column_major_flat(
                        self.data.row_features[:, i] * self.onesT_row) +
                     lambdas[1] * alpha_sign + lambdas[2] * alpha[i])[0, 0])
        print "grad_at_opt wrt alpha (should be zero)", grad_at_opt_alpha
        assert (np.all(np.abs(grad_at_opt_alpha) < opt_thres))

        print "beta", beta
        grad_at_opt_beta = []
        for i in range(beta.size):
            if np.abs(beta[i]) > self.zero_thres:
                beta_sign = np.sign(beta[i])
                grad_at_opt_beta.append(
                    (d_square_loss.T * make_column_major_flat(
                        (self.data.col_features[:, i] * self.onesT_col).T) +
                     lambdas[3] * beta_sign + lambdas[4] * beta[i])[0, 0])
        print "grad_at_opt wrt beta (should be zero)", grad_at_opt_beta
        assert (np.all(np.abs(grad_at_opt_beta) < opt_thres))
Esempio n. 2
0
    def get_value(self, alphas, betas, gamma, gamma_nuc_norm=None):
        matrix_eval = make_column_major_flat(
            self.observed_matrix - get_matrix_completion_groups_fitted_values(
                self.row_features,
                self.col_features,
                alphas,
                betas,
                gamma,
            ))

        square_loss = 0.5 / self.num_train * get_norm2(
            matrix_eval[self.train_idx],
            power=2,
        )

        if gamma_nuc_norm is not None:
            nuc_norm = self.lambdas[0] * gamma_nuc_norm
        else:
            nuc_norm = self.lambdas[0] * np.linalg.norm(gamma, ord="nuc")

        alpha_pen = 0
        for i, a in enumerate(alphas):
            # group lasso penalties
            alpha_pen += self.lambdas[1 + i] * get_norm2(a, power=1)
        beta_pen = 0
        for i, b in enumerate(betas):
            # group lasso penalties
            beta_pen += self.lambdas[1 + self.num_row_groups + i] * get_norm2(
                b, power=1)
        return square_loss + nuc_norm + alpha_pen + beta_pen
 def _create_sigma_mask(self, sigma_hat):
     # mask with a zero along diagonal where sigma_hat is zero.
     # everywhere else is a one
     sigma_mask = np.ones(sigma_hat.shape)
     for i in range(sigma_hat.shape[0]):
         if sigma_hat[i, i] == 0:
             sigma_mask[i, i] = 0
     sigma_mask = make_column_major_flat(sigma_mask)
     return np.diag(sigma_mask.flatten())
 def _get_d_square_loss(self, alphas, betas, gamma, row_features,
                        col_features):
     # get first derivative of the square loss wrt X = gamma + stuff
     d_square_loss = -1.0 / self.num_train * np.multiply(
         self.train_vec,
         make_column_major_flat(
             self.data.observed_matrix -
             get_matrix_completion_groups_fitted_values(
                 row_features, col_features, alphas, betas, gamma)))
     return d_square_loss
    def _get_val_gradient(self, grad_dict, alphas, betas, gamma, row_features,
                          col_features):
        # get gradient of the validation loss wrt lambda given the gradient of the
        # model parameters wrt lambda
        model_grad = grad_dict["dgamma_dlambda"]
        for da_dlambda, row_f in zip(grad_dict["dalphas_dlambda"],
                                     row_features):
            model_grad += row_f * da_dlambda * self.onesT_row
        for db_dlambda, col_f in zip(grad_dict["dbetas_dlambda"],
                                     col_features):
            model_grad += (col_f * db_dlambda * self.onesT_col).T

        dval_dlambda = -1.0 / self.num_val * (make_column_major_flat(
            self.data.observed_matrix -
            get_matrix_completion_groups_fitted_values(
                row_features, col_features, alphas, betas, gamma)))[
                    self.data.validate_idx].T * make_column_major_flat(
                        model_grad)[self.data.validate_idx]
        return dval_dlambda
Esempio n. 6
0
 def _get_dsquare_loss():
     d_square_loss = -1.0 / self.num_train * make_column_major_flat(
         self.observed_matrix - get_matrix_completion_fitted_values(
             self.row_features,
             self.col_features,
             self.alpha_curr,
             self.beta_curr,
             self.gamma_curr,
         ))
     d_square_loss[self.non_train_mask_vec] = 0
     return d_square_loss
    def _get_val_gradient(self, grad_dict, alpha, beta, gamma, row_features,
                          col_features):
        # get gradient of the validation loss wrt lambda given the gradient of the
        # model parameters wrt lambda
        model_grad = grad_dict["dgamma_dlambda"]
        if alpha.size > 0:
            model_grad += row_features * grad_dict[
                "dalpha_dlambda"] * self.onesT_row
        if beta.size > 0:
            model_grad += (col_features * grad_dict["dbeta_dlambda"] *
                           self.onesT_col).T

        dval_dlambda = -1.0 / self.num_val * make_column_major_flat(
            self.data.observed_matrix - get_matrix_completion_fitted_values(
                row_features,
                col_features,
                alpha,
                beta,
                gamma,
            ))[self.data.validate_idx].T * make_column_major_flat(model_grad)[
                self.data.validate_idx]
        return dval_dlambda
Esempio n. 8
0
 def get_value(self, alpha, beta, gamma, given_nuc_norm=None):
     matrix_eval = make_column_major_flat(
         self.observed_matrix - get_matrix_completion_fitted_values(
             self.row_features,
             self.col_features,
             alpha,
             beta,
             gamma,
         ))
     square_loss = 0.5 / self.num_train * get_norm2(
         matrix_eval[self.train_idx],
         power=2,
     )
     if given_nuc_norm is None:
         nuc_norm = self.lambdas[0] * np.linalg.norm(gamma, ord="nuc")
     else:
         nuc_norm = self.lambdas[0] * given_nuc_norm
     alpha_norm1 = self.lambdas[1] * np.linalg.norm(alpha, ord=1)
     alpha_norm2 = 0.5 * self.lambdas[2] * get_norm2(alpha, power=2)
     beta_norm1 = self.lambdas[3] * np.linalg.norm(beta, ord=1)
     beta_norm2 = 0.5 * self.lambdas[4] * get_norm2(beta, power=2)
     return square_loss + nuc_norm + alpha_norm1 + alpha_norm2 + beta_norm1 + beta_norm2
Esempio n. 9
0
 def _get_masked(self, obs_matrix):
     masked_obs_vec = make_column_major_flat(obs_matrix)
     masked_obs_vec[self.non_train_mask_vec] = 0
     masked_obs_m = make_column_major_reshape(
         masked_obs_vec, (self.num_rows, self.num_cols))
     return masked_obs_m
    def _double_check_derivative_indepth_lambda0(self, model1, model2, model0,
                                                 eps):
        # not everything should be zero, if it is not differentiable at that point
        dalpha_dlambda = (model1["alpha"] - model2["alpha"]) / (eps * 2)
        dbeta_dlambda = (model1["beta"] - model2["beta"]) / (eps * 2)

        gamma1 = model1["gamma"]
        u1, s1, v1 = self._get_svd_mini(gamma1)
        gamma2 = model2["gamma"]
        u2, s2, v2 = self._get_svd_mini(gamma2)
        gamma0 = model0["gamma"]
        u_hat, sigma_hat, v_hat = self._get_svd_mini(gamma0)
        dU_dlambda = (u1 - u2) / (eps * 2)
        dV_dlambda = (v1 - v2) / (eps * 2)
        dSigma_dlambda = (s1 - s2) / (eps * 2)
        dgamma_dlambda = (gamma1 - gamma2) / (eps * 2)

        print "dalpha_dlambda0, %s" % (dalpha_dlambda)
        print "dBeta_dlambda0, %s" % (dbeta_dlambda)
        print "dU_dlambda0", dU_dlambda
        print "ds_dlambda0, %s" % (dSigma_dlambda)
        print "dgamma_dlambda0, %s" % (dgamma_dlambda)

        split_dgamma_dlambda = dU_dlambda * sigma_hat * v_hat.T + u_hat * dSigma_dlambda * v_hat.T + u_hat * sigma_hat * dV_dlambda.T

        # print "alpha1", model1["alpha"]
        # print 'alpha2', model2["alpha"]
        # print "eps", eps
        # print "u_hat", u_hat
        # print "u1", u1
        # print "u2", u2
        # print "v_hat", v_hat
        # print "v1", v1
        # print "v2", v2
        # print "sigma_hat", sigma_hat
        # print "s1", s1
        # print "s2", s2

        print "should be zero? dU_dlambda * u.T", u_hat.T * dU_dlambda + dU_dlambda.T * u_hat
        print "should be zero? dv_dlambda * v.T", dV_dlambda.T * v_hat + v_hat.T * dV_dlambda

        print "should be zero? dgamma_dlambda - dgamma_dlambda", split_dgamma_dlambda - dgamma_dlambda

        d_square_loss = 1.0 / self.num_train * self.train_vec_diag * make_column_major_flat(
            dgamma_dlambda +
            self.data.row_features * dalpha_dlambda * self.onesT_row +
            (self.data.col_features * dbeta_dlambda * self.onesT_col).T)

        dalpha_dlambda_imp = []
        for i in range(dalpha_dlambda.size):
            dalpha_dlambda_imp.append(
                (d_square_loss.T * make_column_major_flat(
                    self.data.row_features[:, i] * self.onesT_row) +
                 self.fmodel.current_lambdas[1] * dalpha_dlambda[i])[0, 0])
        print "should be zero? numerical plugin to the imp deriv eqn, dalpha_dlambda", dalpha_dlambda_imp

        db_dlambda_imp = []
        for i in range(dbeta_dlambda.size):
            db_dlambda_imp.append(
                (d_square_loss.T * make_column_major_flat(
                    (self.data.col_features[:, i] * self.onesT_col).T) +
                 self.fmodel.current_lambdas[1] * dbeta_dlambda[i])[0, 0])
        print "should be zero? numerical plugin to the imp deriv eqn, dbeta_dlambda_imp", db_dlambda_imp

        print "should be zero? numerical plugin to the imp deriv eqn, dgamma_dlambda", (
            u_hat.T * make_column_major_reshape(
                d_square_loss,
                (self.data.num_rows, self.data.num_cols)) * v_hat +
            np.sign(sigma_hat) + u_hat.T * self.fmodel.current_lambdas[0] *
            dU_dlambda * np.sign(sigma_hat) + self.fmodel.current_lambdas[0] *
            np.sign(sigma_hat) * dV_dlambda.T * v_hat)
    def _check_optimality_conditions(self,
                                     model_params,
                                     lambdas,
                                     opt_thres=1e-2):
        # sanity check function to see that cvxpy is solving to a good enough accuracy
        # check that the gradient is close to zero
        # can use this to check that our implicit derivative assumptions hold
        # lambdas must be an exploded lambda matrix
        print "check_optimality_conditions!"

        alphas = model_params["alphas"]
        betas = model_params["betas"]
        gamma = model_params["gamma"]

        u_hat, sigma_hat, v_hat = self._get_svd_mini(gamma)
        a = self.data.observed_matrix - get_matrix_completion_groups_fitted_values(
            self.data.row_features, self.data.col_features, alphas, betas,
            gamma)

        d_square_loss = self._get_d_square_loss(
            alphas,
            betas,
            gamma,
            self.data.row_features,
            self.data.col_features,
        )

        left_grad_at_opt_gamma = (make_column_major_reshape(
            d_square_loss, (self.data.num_rows, self.data.num_cols)) * v_hat +
                                  lambdas[0] * u_hat * np.sign(sigma_hat))
        right_grad_at_opt_gamma = (u_hat.T * make_column_major_reshape(
            d_square_loss, (self.data.num_rows, self.data.num_cols)) +
                                   lambdas[0] * np.sign(sigma_hat) * v_hat.T)
        print "left grad_at_opt wrt gamma (should be zero)", get_norm2(
            left_grad_at_opt_gamma)
        print "right grad_at_opt wrt gamma (should be zero)", get_norm2(
            right_grad_at_opt_gamma)
        # assert(get_norm2(left_grad_at_opt_gamma) < opt_thres)
        # assert(get_norm2(right_grad_at_opt_gamma) < opt_thres)

        for i, a_f_tuple in enumerate(zip(alphas, self.data.row_features)):
            alpha, row_f = a_f_tuple
            if np.linalg.norm(alpha) > 1e-5:
                grad_at_opt_alpha = []
                for j in range(alpha.size):
                    grad_at_opt_alpha.append(
                        (d_square_loss.T *
                         make_column_major_flat(row_f[:, j] * self.onesT_row) +
                         lambdas[1 + i] * alpha[j] /
                         np.linalg.norm(alpha, ord=None))[0, 0])
                print "grad_at_opt wrt alpha (should be zero)", get_norm2(
                    grad_at_opt_alpha)
                # assert(np.linalg.norm(grad_at_opt_alpha) < opt_thres)

        for i, b_f_tuple in enumerate(zip(betas, self.data.col_features)):
            beta, col_f = b_f_tuple
            if np.linalg.norm(beta) > 1e-5:
                grad_at_opt_beta = []
                for j in range(beta.size):
                    grad_at_opt_beta.append(
                        (d_square_loss.T * make_column_major_flat(
                            (col_f[:, j] * self.onesT_col).T) +
                         lambdas[1 + self.settings.num_row_groups + i] *
                         beta[j] / np.linalg.norm(beta, ord=None))[0, 0])
                print "grad_at_opt wrt beta (should be zero)", get_norm2(
                    grad_at_opt_beta)