Ejemplo n.º 1
0
    def get_value(self, alphas, betas, gamma, gamma_nuc_norm=None):
        matrix_eval = make_column_major_flat(
            self.observed_matrix - get_matrix_completion_groups_fitted_values(
                self.row_features,
                self.col_features,
                alphas,
                betas,
                gamma,
            ))

        square_loss = 0.5 / self.num_train * get_norm2(
            matrix_eval[self.train_idx],
            power=2,
        )

        if gamma_nuc_norm is not None:
            nuc_norm = self.lambdas[0] * gamma_nuc_norm
        else:
            nuc_norm = self.lambdas[0] * np.linalg.norm(gamma, ord="nuc")

        alpha_pen = 0
        for i, a in enumerate(alphas):
            # group lasso penalties
            alpha_pen += self.lambdas[1 + i] * get_norm2(a, power=1)
        beta_pen = 0
        for i, b in enumerate(betas):
            # group lasso penalties
            beta_pen += self.lambdas[1 + self.num_row_groups + i] * get_norm2(
                b, power=1)
        return square_loss + nuc_norm + alpha_pen + beta_pen
        def _get_block_diag_component(idx):
            beta = beta_minis[idx]
            if beta.size == 0:
                return np.matrix(np.zeros((0, 0))).T

            betabeta = beta * beta.T
            block_diag_component = -1 * self.fmodel.current_lambdas[idx] / get_norm2(beta, power=3) * betabeta
            return block_diag_component
 def _get_dbeta_dlambda1(beta_minis, matrix_to_invert):
     if np.concatenate(beta_minis).size == 0:
         return np.zeros((matrix_to_invert.shape[0], 1))
     else:
         normed_betas = [beta / get_norm2(beta) for beta in beta_minis]
         all_normed_betas = np.concatenate(normed_betas)
         dbeta_dlambda1 = sp.sparse.linalg.lsmr(matrix_to_invert, -1 * all_normed_betas.A1)
         return np.matrix(dbeta_dlambda1[0]).T
Ejemplo n.º 4
0
        def _get_block_diag_component(idx):
            beta = beta_minis[idx]
            if beta.size == 0:
                return np.matrix(np.zeros((0,0))).T

            repeat_hstacked_beta = np.tile(beta, (1, beta.size)).T
            block_diag_component = -1 * self.fmodel.current_lambdas[idx] / get_norm2(beta, power=3) * np.diagflat(beta) * repeat_hstacked_beta
            return block_diag_component
        def _get_block_diag_component(idx):
            beta = beta_minis[idx]
            if beta.size == 0:
                return np.matrix(np.zeros((0, 0))).T

            betabeta = beta * beta.T
            block_diag_component = -1 * self.fmodel.current_lambdas[
                idx] / get_norm2(beta, power=3) * betabeta
            return block_diag_component
 def _get_dbeta_dlambda1(beta_minis, matrix_to_invert):
     if np.concatenate(beta_minis).size == 0:
         return np.zeros((matrix_to_invert.shape[0], 1))
     else:
         normed_betas = [beta / get_norm2(beta) for beta in beta_minis]
         all_normed_betas = np.concatenate(normed_betas)
         dbeta_dlambda1 = sp.sparse.linalg.lsmr(
             matrix_to_invert, -1 * all_normed_betas.A1)
         return np.matrix(dbeta_dlambda1[0]).T
Ejemplo n.º 7
0
        def _get_dbeta_dlambda1(beta, matrix_to_invert, num_features_before):
            if beta.size == 0:
                return np.zeros((matrix_to_invert.shape[0], 1))
            else:
                normed_beta = beta / get_norm2(beta)
                zero_normed_beta = np.concatenate([
                    np.matrix(np.zeros(num_features_before)).T,
                    normed_beta,
                    np.matrix(np.zeros(total_features - normed_beta.size - num_features_before)).T
                ])

                dbeta_dlambda1 = sp.sparse.linalg.lsmr(matrix_to_invert, -1 * zero_normed_beta.A1)[0]
                return np.matrix(dbeta_dlambda1).T
Ejemplo n.º 8
0
 def get_value(self, alpha, beta, gamma, given_nuc_norm=None):
     matrix_eval = make_column_major_flat(
         self.observed_matrix - get_matrix_completion_fitted_values(
             self.row_features,
             self.col_features,
             alpha,
             beta,
             gamma,
         ))
     square_loss = 0.5 / self.num_train * get_norm2(
         matrix_eval[self.train_idx],
         power=2,
     )
     if given_nuc_norm is None:
         nuc_norm = self.lambdas[0] * np.linalg.norm(gamma, ord="nuc")
     else:
         nuc_norm = self.lambdas[0] * given_nuc_norm
     alpha_norm1 = self.lambdas[1] * np.linalg.norm(alpha, ord=1)
     alpha_norm2 = 0.5 * self.lambdas[2] * get_norm2(alpha, power=2)
     beta_norm1 = self.lambdas[3] * np.linalg.norm(beta, ord=1)
     beta_norm2 = 0.5 * self.lambdas[4] * get_norm2(beta, power=2)
     return square_loss + nuc_norm + alpha_norm1 + alpha_norm2 + beta_norm1 + beta_norm2
        def _get_dbeta_dlambda1(beta, matrix_to_invert, num_features_before):
            if beta.size == 0:
                return np.zeros((matrix_to_invert.shape[0], 1))
            else:
                normed_beta = beta / get_norm2(beta)
                zero_normed_beta = np.concatenate([
                    np.matrix(np.zeros(num_features_before)).T, normed_beta,
                    np.matrix(
                        np.zeros(total_features - normed_beta.size -
                                 num_features_before)).T
                ])

                dbeta_dlambda1 = sp.sparse.linalg.lsmr(
                    matrix_to_invert, -1 * zero_normed_beta.A1)[0]
                return np.matrix(dbeta_dlambda1).T
Ejemplo n.º 10
0
 def get_prox_l2(self, x_vector, scale_factor):
     thres_x = max(1 - scale_factor / get_norm2(x_vector, power=1),
                   0) * x_vector
     return thres_x
 def _get_diagmatrix_component(idx):
     beta = beta_minis[idx]
     if beta.size == 0:
         return np.matrix(np.zeros((0, 0))).T
     return self.fmodel.current_lambdas[idx] / get_norm2(
         beta) * np.identity(beta.size)
    def _get_dmodel_dlambda(
        self,
        lambda_idx,
        imp_derivs,
        alphas,
        betas,
        gamma,
        row_features,
        col_features,
        u_hat,
        sigma_hat,
        v_hat,
        lambdas,
    ):
        # this fcn accepts mini-fied model parameters - alpha, beta, and u/sigma/v
        # returns the gradient of the model parameters wrt lambda
        num_alphas = len(alphas)
        dd_square_loss_mini = self._get_dd_square_loss_mini(
            imp_derivs, row_features, col_features)
        sigma_mask = self._create_sigma_mask(sigma_hat)
        obj = 0
        lambda_offset = 1 if sigma_hat.size > 0 else 0

        # Constraint from implicit differentiation of the optimality conditions
        # that were defined by taking the gradient of the training objective wrt gamma
        if sigma_hat.size > 0:
            d_square_loss = self._get_d_square_loss(alphas, betas, gamma,
                                                    row_features, col_features)
            d_square_loss_reshape = make_column_major_reshape(
                d_square_loss, (self.data.num_rows, self.data.num_cols))

            dd_square_loss = self._get_dd_square_loss(imp_derivs, row_features,
                                                      col_features)
            dd_square_loss_reshape = reshape(
                dd_square_loss,
                self.data.num_rows,
                self.data.num_cols,
            )

            # left multiply U^T and implicit derivative
            dgamma_left_imp_deriv_dlambda = (
                imp_derivs.dU_dlambda.T * d_square_loss_reshape +
                u_hat.T * dd_square_loss_reshape +
                lambdas[0] * np.sign(sigma_hat) * imp_derivs.dV_dlambda.T)

            # right multiply V and implicit derivative
            dgamma_right_imp_deriv_dlambda = (
                d_square_loss_reshape * imp_derivs.dV_dlambda +
                dd_square_loss_reshape * v_hat +
                lambdas[0] * imp_derivs.dU_dlambda * np.sign(sigma_hat))
            if lambda_idx == 0:
                dgamma_left_imp_deriv_dlambda += np.sign(sigma_hat) * v_hat.T
                dgamma_right_imp_deriv_dlambda += u_hat * np.sign(sigma_hat)

            obj += sum_squares(dgamma_left_imp_deriv_dlambda) + sum_squares(
                dgamma_right_imp_deriv_dlambda)

        # Constraint from implicit differentiation of the optimality conditions
        # that were defined by taking the gradient of the training objective wrt
        # alpha and beta, respectively

        for i, a_tuple in enumerate(
                zip(row_features, alphas, imp_derivs.dalphas_dlambda)):
            row_f, alpha, da_dlambda = a_tuple
            for j in range(alpha.size):
                dalpha_imp_deriv_dlambda = (
                    dd_square_loss_mini.T *
                    vec(row_f[:, j] * self.onesT_row)[self.data.train_idx] +
                    lambdas[1] * (da_dlambda[j] / get_norm2(alpha, power=1) -
                                  alpha[j] / get_norm2(alpha, power=3) *
                                  (alpha.T * da_dlambda)))
                if lambda_idx == 1:
                    dalpha_imp_deriv_dlambda += alpha[j] / get_norm2(alpha,
                                                                     power=1)
                obj += sum_squares(dalpha_imp_deriv_dlambda)

        for i, b_tuple in enumerate(
                zip(col_features, betas, imp_derivs.dbetas_dlambda)):
            col_f, beta, db_dlambda = b_tuple
            for j in range(beta.size):
                dbeta_imp_deriv_dlambda = (
                    dd_square_loss_mini.T * vec(
                        (col_f[:, j] * self.onesT_col).T)[self.data.train_idx]
                    + lambdas[1] * (db_dlambda[j] / get_norm2(beta, power=1) -
                                    beta[j] / get_norm2(beta, power=3) *
                                    (beta.T * db_dlambda)))
                if lambda_idx == 1:
                    dbeta_imp_deriv_dlambda += beta[j] / get_norm2(beta,
                                                                   power=1)
                obj += sum_squares(dbeta_imp_deriv_dlambda)

        return imp_derivs.solve(obj)
    def _check_optimality_conditions(self,
                                     model_params,
                                     lambdas,
                                     opt_thres=1e-2):
        # sanity check function to see that cvxpy is solving to a good enough accuracy
        # check that the gradient is close to zero
        # can use this to check that our implicit derivative assumptions hold
        # lambdas must be an exploded lambda matrix
        print "check_optimality_conditions!"

        alphas = model_params["alphas"]
        betas = model_params["betas"]
        gamma = model_params["gamma"]

        u_hat, sigma_hat, v_hat = self._get_svd_mini(gamma)
        a = self.data.observed_matrix - get_matrix_completion_groups_fitted_values(
            self.data.row_features, self.data.col_features, alphas, betas,
            gamma)

        d_square_loss = self._get_d_square_loss(
            alphas,
            betas,
            gamma,
            self.data.row_features,
            self.data.col_features,
        )

        left_grad_at_opt_gamma = (make_column_major_reshape(
            d_square_loss, (self.data.num_rows, self.data.num_cols)) * v_hat +
                                  lambdas[0] * u_hat * np.sign(sigma_hat))
        right_grad_at_opt_gamma = (u_hat.T * make_column_major_reshape(
            d_square_loss, (self.data.num_rows, self.data.num_cols)) +
                                   lambdas[0] * np.sign(sigma_hat) * v_hat.T)
        print "left grad_at_opt wrt gamma (should be zero)", get_norm2(
            left_grad_at_opt_gamma)
        print "right grad_at_opt wrt gamma (should be zero)", get_norm2(
            right_grad_at_opt_gamma)
        # assert(get_norm2(left_grad_at_opt_gamma) < opt_thres)
        # assert(get_norm2(right_grad_at_opt_gamma) < opt_thres)

        for i, a_f_tuple in enumerate(zip(alphas, self.data.row_features)):
            alpha, row_f = a_f_tuple
            if np.linalg.norm(alpha) > 1e-5:
                grad_at_opt_alpha = []
                for j in range(alpha.size):
                    grad_at_opt_alpha.append(
                        (d_square_loss.T *
                         make_column_major_flat(row_f[:, j] * self.onesT_row) +
                         lambdas[1 + i] * alpha[j] /
                         np.linalg.norm(alpha, ord=None))[0, 0])
                print "grad_at_opt wrt alpha (should be zero)", get_norm2(
                    grad_at_opt_alpha)
                # assert(np.linalg.norm(grad_at_opt_alpha) < opt_thres)

        for i, b_f_tuple in enumerate(zip(betas, self.data.col_features)):
            beta, col_f = b_f_tuple
            if np.linalg.norm(beta) > 1e-5:
                grad_at_opt_beta = []
                for j in range(beta.size):
                    grad_at_opt_beta.append(
                        (d_square_loss.T * make_column_major_flat(
                            (col_f[:, j] * self.onesT_col).T) +
                         lambdas[1 + self.settings.num_row_groups + i] *
                         beta[j] / np.linalg.norm(beta, ord=None))[0, 0])
                print "grad_at_opt wrt beta (should be zero)", get_norm2(
                    grad_at_opt_beta)
 def _get_diagmatrix_component(idx):
     beta = beta_minis[idx]
     if beta.size == 0:
         return np.matrix(np.zeros((0, 0))).T
     return self.fmodel.current_lambdas[idx] / get_norm2(beta) * np.identity(beta.size)