Beispiel #1
0
    def __init__(self,
                 map,
                 generative_mean,
                 coef=1.,
                 offset=None,
                 quadratic=None):

        self.map = map
        self.q = map.p - map.nactive
        self.r = map.p + map.nactive
        self.p = map.p

        rr.smooth_atom.__init__(self, (2 * self.p, ),
                                offset=offset,
                                quadratic=quadratic,
                                initial=self.map.feasible_point,
                                coef=coef)

        self.coefs[:] = self.map.feasible_point

        opt_vars_0 = np.zeros(self.r, bool)
        opt_vars_0[self.p:] = 1
        opt_vars = np.append(opt_vars_0, np.ones(self.q, bool))

        opt_vars_active = np.append(opt_vars_0, np.zeros(self.q, bool))
        opt_vars_inactive = np.zeros(2 * self.p, bool)
        opt_vars_inactive[self.r:] = 1

        self._response_selector = rr.selector(~opt_vars, (2 * self.p, ))
        self._opt_selector_active = rr.selector(opt_vars_active,
                                                (2 * self.p, ))
        self._opt_selector_inactive = rr.selector(opt_vars_inactive,
                                                  (2 * self.p, ))

        nonnegative = nonnegative_softmax_scaled(self.map.nactive)
        self.nonnegative_barrier = nonnegative.linear(
            self._opt_selector_active)

        cube_objective = smooth_cube_barrier(self.map.inactive_lagrange)
        self.cube_barrier = rr.affine_smooth(cube_objective,
                                             self._opt_selector_inactive)

        linear_map = np.hstack(
            [self.map._score_linear_term, self.map._opt_linear_term])
        randomization_loss = log_likelihood(np.zeros(self.p),
                                            self.map.randomization_cov, self.p)
        self.randomization_loss = rr.affine_smooth(
            randomization_loss,
            rr.affine_transform(linear_map, self.map._opt_affine_term))

        likelihood_loss = log_likelihood(generative_mean, self.map.score_cov,
                                         self.p)

        self.likelihood_loss = rr.affine_smooth(likelihood_loss,
                                                self._response_selector)

        self.total_loss = rr.smooth_sum([
            self.randomization_loss, self.likelihood_loss,
            self.nonnegative_barrier, self.cube_barrier
        ])
Beispiel #2
0
def test_group_lasso_separable():
    """
    This test verifies that the specification of a separable
    penalty yields the same results as having two linear_atoms
    with selector matrices. The penalty here is a group_lasso, i.e. l2
    penalty.
    """

    X = np.random.standard_normal((100,20))
    Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20))

    penalty1 = rr.l2norm(10, lagrange=.2)
    penalty2 = rr.l2norm(10, lagrange=.2)
    penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)])

    # solve using separable
    
    loss = rr.quadratic_loss.affine(X, -Y, coef=0.5)
    problem = rr.separable_problem.fromatom(penalty, loss)
    solver = rr.FISTA(problem)
    solver.fit(min_its=200, tol=1.0e-12)
    coefs = solver.composite.coefs

    # solve using the selectors

    penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in
                 zip(penalty.atoms, penalty.groups)]
    problem_s = rr.container(loss, *penalty_s)
    solver_s = rr.FISTA(problem_s)
    solver_s.fit(min_its=200, tol=1.0e-12)
    coefs_s = solver_s.composite.coefs

    np.testing.assert_almost_equal(coefs, coefs_s)
Beispiel #3
0
def test_group_lasso_separable():
    """
    This test verifies that the specification of a separable
    penalty yields the same results as having two linear_atoms
    with selector matrices. The penalty here is a group_lasso, i.e. l2
    penalty.
    """

    X = np.random.standard_normal((100,20))
    Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20))

    penalty1 = rr.l2norm(10, lagrange=.2)
    penalty2 = rr.l2norm(10, lagrange=.2)
    penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)])

    # solve using separable
    
    loss = rr.quadratic.affine(X, -Y, coef=0.5)
    problem = rr.separable_problem.fromatom(penalty, loss)
    solver = rr.FISTA(problem)
    solver.fit(min_its=200, tol=1.0e-12)
    coefs = solver.composite.coefs

    # solve using the selectors

    penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in
                 zip(penalty.atoms, penalty.groups)]
    problem_s = rr.container(loss, *penalty_s)
    solver_s = rr.FISTA(problem_s)
    solver_s.fit(min_its=200, tol=1.0e-12)
    coefs_s = solver_s.composite.coefs

    np.testing.assert_almost_equal(coefs, coefs_s)
Beispiel #4
0
def test_lasso_separable():
    """
    This test verifies that the specification of a separable
    penalty yields the same results as having two linear_atoms
    with selector matrices. The penalty here is a lasso, i.e. l1
    penalty.
    """

    X = np.random.standard_normal((100,20))
    Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20))

    penalty1 = rr.l1norm(10, lagrange=1.2)
    penalty2 = rr.l1norm(10, lagrange=1.2)
    penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)], test_for_overlap=True)

    # ensure code is tested

    print(penalty1.latexify())

    print(penalty.latexify())
    print(penalty.conjugate)
    print(penalty.dual)
    print(penalty.seminorm(np.ones(penalty.shape)))
    print(penalty.constraint(np.ones(penalty.shape), bound=2.))

    pencopy = copy(penalty)
    pencopy.set_quadratic(rr.identity_quadratic(1,0,0,0))
    pencopy.conjugate

    # solve using separable
    
    loss = rr.quadratic.affine(X, -Y, coef=0.5)
    problem = rr.separable_problem.fromatom(penalty, loss)
    solver = rr.FISTA(problem)
    solver.fit(min_its=200, tol=1.0e-12)
    coefs = solver.composite.coefs

    # solve using the usual composite

    penalty_all = rr.l1norm(20, lagrange=1.2)
    problem_all = rr.container(loss, penalty_all)
    solver_all = rr.FISTA(problem_all)
    solver_all.fit(min_its=100, tol=1.0e-12)

    coefs_all = solver_all.composite.coefs

    # solve using the selectors

    penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in
                 zip(penalty.atoms, penalty.groups)]
    problem_s = rr.container(loss, *penalty_s)
    solver_s = rr.FISTA(problem_s)
    solver_s.fit(min_its=500, tol=1.0e-12)
    coefs_s = solver_s.composite.coefs

    np.testing.assert_almost_equal(coefs, coefs_all)
    np.testing.assert_almost_equal(coefs, coefs_s)
Beispiel #5
0
def test_lasso_separable():
    """
    This test verifies that the specification of a separable
    penalty yields the same results as having two linear_atoms
    with selector matrices. The penalty here is a lasso, i.e. l1
    penalty.
    """

    X = np.random.standard_normal((100,20))
    Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20))

    penalty1 = rr.l1norm(10, lagrange=1.2)
    penalty2 = rr.l1norm(10, lagrange=1.2)
    penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)], test_for_overlap=True)

    # ensure code is tested

    print(penalty1.latexify())

    print(penalty.latexify())
    print(penalty.conjugate)
    print(penalty.dual)
    print(penalty.seminorm(np.ones(penalty.shape)))
    print(penalty.constraint(np.ones(penalty.shape), bound=2.))

    pencopy = copy(penalty)
    pencopy.set_quadratic(rr.identity_quadratic(1,0,0,0))
    pencopy.conjugate

    # solve using separable
    
    loss = rr.quadratic_loss.affine(X, -Y, coef=0.5)
    problem = rr.separable_problem.fromatom(penalty, loss)
    solver = rr.FISTA(problem)
    solver.fit(min_its=200, tol=1.0e-12)
    coefs = solver.composite.coefs

    # solve using the usual composite

    penalty_all = rr.l1norm(20, lagrange=1.2)
    problem_all = rr.container(loss, penalty_all)
    solver_all = rr.FISTA(problem_all)
    solver_all.fit(min_its=100, tol=1.0e-12)

    coefs_all = solver_all.composite.coefs

    # solve using the selectors

    penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in
                 zip(penalty.atoms, penalty.groups)]
    problem_s = rr.container(loss, *penalty_s)
    solver_s = rr.FISTA(problem_s)
    solver_s.fit(min_its=500, tol=1.0e-12)
    coefs_s = solver_s.composite.coefs

    np.testing.assert_almost_equal(coefs, coefs_all)
    np.testing.assert_almost_equal(coefs, coefs_s)
Beispiel #6
0
    def __init__(
            self,
            X,
            feasible_point,
            active,  # the active set chosen by randomized lasso
            active_sign,  # the set of signs of active coordinates chosen by lasso
            lagrange,  # in R^p
            mean_parameter,  # in R^n
            noise_variance,  #noise_level in data
            randomizer,  #specified randomization
            epsilon,  # ridge penalty for randomized lasso
            coef=1.,
            offset=None,
            quadratic=None,
            nstep=10):

        n, p = X.shape

        self._X = X

        E = active.sum()
        self.q = p - E

        self.active = active
        self.noise_variance = noise_variance
        self.randomization = randomizer
        self.inactive_conjugate = self.active_conjugate = randomizer.CGF_conjugate
        if self.active_conjugate is None:
            raise ValueError(
                'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates'
            )

        initial = np.zeros(n + E, )
        initial[n:] = feasible_point
        self.n = n

        rr.smooth_atom.__init__(self, (n + E, ),
                                offset=offset,
                                quadratic=quadratic,
                                initial=initial,
                                coef=coef)

        self.coefs[:] = initial

        opt_vars = np.zeros(n + E, bool)
        opt_vars[n:] = 1

        nonnegative = nonnegative_softmax_scaled(E)

        self._opt_selector = rr.selector(opt_vars, (n + E, ))
        self.nonnegative_barrier = nonnegative.linear(self._opt_selector)
        self._response_selector = rr.selector(~opt_vars, (n + E, ))

        self.set_parameter(mean_parameter, noise_variance)

        X_E = X[:, active]
        B = X.T.dot(X_E)

        B_E = B[active]
        B_mE = B[~active]

        self.A_active = np.hstack([
            -X[:, active].T,
            (B_E + epsilon * np.identity(E)) * active_sign[None, :]
        ])

        self.A_inactive = np.hstack(
            [-X[:, ~active].T, (B_mE * active_sign[None, :])])

        self.offset_active = active_sign * lagrange[active]

        self.offset_inactive = np.zeros(p - E)

        self.active_conj_loss = rr.affine_smooth(
            self.active_conjugate,
            rr.affine_transform(self.A_active, self.offset_active))

        cube_obj = neg_log_cube_probability(self.q,
                                            lagrange[~active],
                                            randomization_scale=1.)

        self.cube_loss = rr.affine_smooth(cube_obj, self.A_inactive)

        self.total_loss = rr.smooth_sum([
            self.active_conj_loss, self.cube_loss, self.likelihood_loss,
            self.nonnegative_barrier
        ])
Beispiel #7
0
    def __init__(self,
                 X,
                 feasible_point,
                 active,  # the active set chosen by randomized marginal screening
                 active_signs,  # the set of signs of active coordinates chosen by ms
                 threshold,  # in R^p
                 mean_parameter,
                 noise_variance,
                 randomizer,
                 coef=1.,
                 offset=None,
                 quadratic=None,
                 nstep=10):

        n, p = X.shape
        self._X = X

        E = active.sum()
        self.q = p - E
        sigma = np.sqrt(noise_variance)

        self.active = active

        self.noise_variance = noise_variance
        self.randomization = randomizer
        self.inactive_conjugate = self.active_conjugate = randomizer.CGF_conjugate
        if self.active_conjugate is None:
            raise ValueError(
                'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates')

        initial = np.zeros(n + E, )
        initial[n:] = feasible_point
        self.n = n

        rr.smooth_atom.__init__(self,
                                (n + E,),
                                offset=offset,
                                quadratic=quadratic,
                                initial=initial,
                                coef=coef)

        self.coefs[:] = initial
        nonnegative = nonnegative_softmax_scaled(E)

        opt_vars = np.zeros(n + E, bool)
        opt_vars[n:] = 1

        self._opt_selector = rr.selector(opt_vars, (n + E,))
        self.nonnegative_barrier = nonnegative.linear(self._opt_selector)
        self._response_selector = rr.selector(~opt_vars, (n + E,))

        self.set_parameter(mean_parameter, noise_variance)

        self.A_active = np.hstack([np.true_divide(-X[:, active].T, sigma), np.identity(E) * active_signs[None, :]])

        self.A_inactive = np.hstack([np.true_divide(-X[:, ~active].T, sigma), np.zeros((p - E, E))])

        self.offset_active = active_signs * threshold[active]
        self.offset_inactive = np.zeros(p - E)

        self.active_conj_loss = rr.affine_smooth(self.active_conjugate,
                                                 rr.affine_transform(self.A_active, self.offset_active))

        cube_obj = neg_log_cube_probability(self.q, threshold[~active], randomization_scale=1.)

        self.cube_loss = rr.affine_smooth(cube_obj, rr.affine_transform(self.A_inactive, self.offset_inactive))

        self.total_loss = rr.smooth_sum([self.active_conj_loss,
                                         self.cube_loss,
                                         self.likelihood_loss,
                                         self.nonnegative_barrier])
Beispiel #8
0
    def __init__(
            self,
            X,
            feasible_point,  #in R^{|E|_1 + |E|_2}
            active_1,  #the active set chosen by randomized marginal screening
            active_2,  #the active set chosen by randomized lasso
            active_signs_1,  #the set of signs of active coordinates chosen by ms
            active_signs_2,  #the set of signs of active coordinates chosen by lasso
            lagrange,  #in R^p
            threshold,  #in R^p
            mean_parameter,  # in R^n
            noise_variance,
            randomizer,
            epsilon,  #ridge penalty for randomized lasso
            coef=1.,
            offset=None,
            quadratic=None,
            nstep=10):

        n, p = X.shape
        self._X = X

        E_1 = active_1.sum()
        E_2 = active_2.sum()

        sigma = np.sqrt(noise_variance)

        self.active_1 = active_1
        self.active_2 = active_2
        self.noise_variance = noise_variance
        self.randomization = randomizer
        self.inactive_conjugate = self.active_conjugate = randomizer.CGF_conjugate
        if self.active_conjugate is None:
            raise ValueError(
                'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates'
            )

        initial = np.zeros(n + E_1 + E_2, )
        initial[n:] = feasible_point
        self.n = n

        rr.smooth_atom.__init__(self, (n + E_1 + E_2, ),
                                offset=offset,
                                quadratic=quadratic,
                                initial=initial,
                                coef=coef)

        self.coefs[:] = initial
        nonnegative = nonnegative_softmax_scaled(E_1 + E_2)
        opt_vars = np.zeros(n + E_1 + E_2, bool)
        opt_vars[n:] = 1

        self._opt_selector = rr.selector(opt_vars, (n + E_1 + E_2, ))
        self.nonnegative_barrier = nonnegative.linear(self._opt_selector)
        self._response_selector = rr.selector(~opt_vars, (n + E_1 + E_2, ))

        self.set_parameter(mean_parameter, noise_variance)

        arg_ms = np.zeros(self.n + E_1 + E_2, bool)
        arg_ms[:self.n + E_1] = 1
        arg_lasso = np.zeros(self.n + E_1, bool)
        arg_lasso[:self.n] = 1
        arg_lasso = np.append(arg_lasso, np.ones(E_2, bool))

        self.A_active_1 = np.hstack([
            np.true_divide(-X[:, active_1].T, sigma),
            np.identity(E_1) * active_signs_1[None, :]
        ])

        self.A_inactive_1 = np.hstack([
            np.true_divide(-X[:, ~active_1].T, sigma),
            np.zeros((p - E_1, E_1))
        ])

        self.offset_active_1 = active_signs_1 * threshold[active_1]
        self.offset_inactive_1 = np.zeros(p - E_1)

        self._active_ms = rr.selector(
            arg_ms, (self.n + E_1 + E_2, ),
            rr.affine_transform(self.A_active_1, self.offset_active_1))

        self._inactive_ms = rr.selector(
            arg_ms, (self.n + E_1 + E_2, ),
            rr.affine_transform(self.A_inactive_1, self.offset_inactive_1))

        self.active_conj_loss_1 = rr.affine_smooth(self.active_conjugate,
                                                   self._active_ms)

        self.q_1 = p - E_1

        cube_obj_1 = neg_log_cube_probability(self.q_1,
                                              threshold[~active_1],
                                              randomization_scale=1.)

        self.cube_loss_1 = rr.affine_smooth(cube_obj_1, self._inactive_ms)

        X_step2 = X[:, active_1]
        X_E_2 = X_step2[:, active_2]
        B = X_step2.T.dot(X_E_2)

        B_E = B[active_2]
        B_mE = B[~active_2]

        self.A_active_2 = np.hstack([
            -X_step2[:, active_2].T,
            (B_E + epsilon * np.identity(E_2)) * active_signs_2[None, :]
        ])
        self.A_inactive_2 = np.hstack(
            [-X_step2[:, ~active_2].T, (B_mE * active_signs_2[None, :])])

        self.offset_active_2 = active_signs_2 * lagrange[active_2]

        self.offset_inactive_2 = np.zeros(E_1 - E_2)

        self._active_lasso = rr.selector(
            arg_lasso, (self.n + E_1 + E_2, ),
            rr.affine_transform(self.A_active_2, self.offset_active_2))

        self._inactive_lasso = rr.selector(
            arg_lasso, (self.n + E_1 + E_2, ),
            rr.affine_transform(self.A_inactive_2, self.offset_inactive_2))

        self.active_conj_loss_2 = rr.affine_smooth(self.active_conjugate,
                                                   self._active_lasso)

        self.q_2 = E_1 - E_2

        cube_obj_2 = neg_log_cube_probability(self.q_2,
                                              lagrange[~active_2],
                                              randomization_scale=1.)

        self.cube_loss_2 = rr.affine_smooth(cube_obj_2, self._inactive_lasso)

        self.total_loss = rr.smooth_sum([
            self.active_conj_loss_1, self.active_conj_loss_2, self.cube_loss_1,
            self.cube_loss_2, self.likelihood_loss, self.nonnegative_barrier
        ])
    def __init__(self,
                 map,
                 generative_mean,
                 coef=1.,
                 offset=None,
                 quadratic=None):

        self.map = map
        self.q = map.p - map.nactive
        self.r = map.p + map.nactive
        self.p = map.p

        self.inactive_conjugate = self.active_conjugate = map.randomization.CGF_conjugate

        if self.active_conjugate is None:
            raise ValueError(
                'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates')

        self.inactive_lagrange = self.map.inactive_lagrange

        rr.smooth_atom.__init__(self,
                                (self.r,),
                                offset=offset,
                                quadratic=quadratic,
                                initial=self.map.feasible_point,
                                coef=coef)

        self.coefs[:] = self.map.feasible_point

        nonnegative = nonnegative_softmax_scaled(self.map.nactive)

        opt_vars = np.zeros(self.r, bool)
        opt_vars[map.p:] = 1

        self._opt_selector = rr.selector(opt_vars, (self.r,))
        self._response_selector = rr.selector(~opt_vars, (self.r,))

        self.nonnegative_barrier = nonnegative.linear(self._opt_selector)

        self.active_conj_loss = rr.affine_smooth(self.active_conjugate,
                                                 rr.affine_transform(np.hstack([self.map.A_active, self.map.B_active]),
                                                                     self.map.offset_active))

        cube_obj = neg_log_cube_probability(self.q, self.inactive_lagrange, randomization_scale=1.)
        self.cube_loss = rr.affine_smooth(cube_obj, np.hstack([self.map.A_inactive, self.map.B_inactive]))

        # w_1, v_1 = np.linalg.eig(self.map.score_cov)
        # self.score_cov_inv_half = (v_1.T.dot(np.diag(np.power(w_1, -0.5)))).dot(v_1)
        # likelihood_loss = rr.signal_approximator(np.squeeze(np.zeros(self.p)), coef=1.)
        # scaled_response_selector = rr.selector(~opt_vars, (self.r,), rr.affine_transform(self.score_cov_inv_half,
        #                                                                                  self.score_cov_inv_half.
        #                                                                                  dot(np.squeeze(generative_mean))))
        #print("cov", self.map.score_cov.shape )
        likelihood_loss = log_likelihood(generative_mean, self.map.score_cov, self.p)

        self.likelihood_loss = rr.affine_smooth(likelihood_loss, self._response_selector)

        self.total_loss = rr.smooth_sum([self.active_conj_loss,
                                         self.likelihood_loss,
                                         self.nonnegative_barrier,
                                         self.cube_loss])
Beispiel #10
0
P = 200

Y = 2 * np.random.binomial(1, 0.5, size=(N, )) - 1.
X = np.random.standard_normal((N, P))
X[Y == 1] += np.array([30, -20] + (P - 2) * [0])[np.newaxis, :]
X -= X.mean(0)[np.newaxis, :]

X_1 = np.hstack([X, np.ones((N, 1))])
transform = rr.affine_transform(-Y[:, np.newaxis] * X_1, np.ones(N))
C = 0.2
hinge = rr.positive_part(N, lagrange=C)
hinge_loss = rr.linear_atom(hinge, transform)
epsilon = 0.04
smoothed_hinge_loss = rr.smoothed_atom(hinge_loss, epsilon=epsilon)

s = rr.selector(slice(0, P), (P + 1, ))
sparsity = rr.l1norm.linear(s, lagrange=3.)
quadratic = rr.quadratic.linear(s, coef=0.5)

from regreg.affine import power_L
ltransform = rr.linear_transform(X_1)
singular_value_sq = power_L(X_1)
# the other smooth piece is a quadratic with identity
# for quadratic form, so its lipschitz constant is 1

lipschitz = 1.05 * singular_value_sq / epsilon + 1.1

problem = rr.container(quadratic, smoothed_hinge_loss, sparsity)
solver = rr.FISTA(problem)
solver.composite.lipschitz = lipschitz
solver.debug = True
    def __init__(
            self,
            X,
            feasible_point,
            active,  # the active set chosen by randomized lasso
            active_sign,  # the set of signs of active coordinates chosen by lasso
            lagrange,  # in R^p
            mean_parameter,  # in R^n
            noise_variance,  # noise_level in data
            randomizer,  # specified randomization
            epsilon,  # ridge penalty for randomized lasso
            coef=1.,
            offset=None,
            quadratic=None,
            nstep=10):

        n, p = X.shape
        E = active.sum()
        self._X = X
        self.active = active
        self.noise_variance = noise_variance
        self.randomization = randomizer

        self.CGF_randomization = randomizer.CGF

        if self.CGF_randomization is None:
            raise ValueError(
                'randomization must know its cgf -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates'
            )

        self.inactive_lagrange = lagrange[~active]

        initial = feasible_point

        self.feasible_point = feasible_point

        rr.smooth_atom.__init__(self, (p, ),
                                offset=offset,
                                quadratic=quadratic,
                                initial=initial,
                                coef=coef)

        self.coefs[:] = feasible_point

        mean_parameter = np.squeeze(mean_parameter)

        self.active = active

        X_E = self.X_E = X[:, active]
        self.X_permute = np.hstack([self.X_E, self._X[:, ~active]])
        B = X.T.dot(X_E)

        B_E = B[active]
        B_mE = B[~active]

        self.active_slice = np.zeros_like(active, np.bool)
        self.active_slice[:active.sum()] = True

        self.B_active = np.hstack([
            (B_E + epsilon * np.identity(E)) * active_sign[None, :],
            np.zeros((E, p - E))
        ])
        self.B_inactive = np.hstack(
            [B_mE * active_sign[None, :],
             np.identity((p - E))])
        self.B_p = np.vstack((self.B_active, self.B_inactive))

        self.B_p_inv = np.linalg.inv(self.B_p.T)

        self.offset_active = active_sign * lagrange[active]
        self.inactive_subgrad = np.zeros(p - E)

        self.cube_bool = np.zeros(p, np.bool)

        self.cube_bool[E:] = 1

        self.dual_arg = self.B_p_inv.dot(
            np.append(self.offset_active, self.inactive_subgrad))

        self._opt_selector = rr.selector(~self.cube_bool, (p, ))

        self.set_parameter(mean_parameter, noise_variance)

        _barrier_star = barrier_conjugate_softmax_scaled_rr(
            self.cube_bool, self.inactive_lagrange)

        self.conjugate_barrier = rr.affine_smooth(_barrier_star,
                                                  np.identity(p))

        self.CGF_randomizer = rr.affine_smooth(self.CGF_randomization,
                                               -self.B_p_inv)

        self.constant = np.true_divide(mean_parameter.dot(mean_parameter),
                                       2 * noise_variance)

        self.linear_term = rr.identity_quadratic(0, 0, self.dual_arg,
                                                 -self.constant)

        self.total_loss = rr.smooth_sum([
            self.conjugate_barrier, self.CGF_randomizer, self.likelihood_loss
        ])

        self.total_loss.quadratic = self.linear_term
Beispiel #12
0
N = 500
P = 2

Y = 2 * np.random.binomial(1, 0.5, size=(N,)) - 1.
X = np.random.standard_normal((N,P))
X[Y==1] += np.array([3,-2])[np.newaxis,:]

X_1 = np.hstack([X, np.ones((N,1))])
X_1_signs = -Y[:,np.newaxis] * X_1
transform = rr.affine_transform(X_1_signs, np.ones(N))
C = 0.2
hinge = rr.positive_part(N, lagrange=C)
hinge_loss = rr.linear_atom(hinge, transform)

quadratic = rr.quadratic.linear(rr.selector(slice(0,P), (P+1,)), coef=0.5)
problem = rr.container(quadratic, hinge_loss)
solver = rr.FISTA(problem)
solver.fit()

import pylab
pylab.clf()
pylab.scatter(X[Y==1,0],X[Y==1,1], facecolor='red')
pylab.scatter(X[Y==-1,0],X[Y==-1,1], facecolor='blue')

fits = np.dot(X_1, problem.coefs)
labels = 2 * (fits > 0) - 1

pointX = [X[:,0].min(), X[:,0].max()]
pointY = [-(pointX[0]*problem.coefs[0]+problem.coefs[2])/problem.coefs[1],
          -(pointX[1]*problem.coefs[0]+problem.coefs[2])/problem.coefs[1]]
P = 200

Y = 2 * np.random.binomial(1, 0.5, size=(N,)) - 1.
X = np.random.standard_normal((N,P))
X[Y==1] += np.array([30,-20] + (P-2)*[0])[np.newaxis,:]
X -= X.mean(0)[np.newaxis, :]

X_1 = np.hstack([X, np.ones((N,1))])
transform = rr.affine_transform(-Y[:,np.newaxis] * X_1, np.ones(N))
C = 0.2
hinge = rr.positive_part(N, lagrange=C)
hinge_loss = rr.linear_atom(hinge, transform)
epsilon = 0.04
smoothed_hinge_loss = rr.smoothed_atom(hinge_loss, epsilon=epsilon)

s = rr.selector(slice(0,P), (P+1,))
sparsity = rr.l1norm.linear(s, lagrange=3.)
quadratic = rr.quadratic.linear(s, coef=0.5)


from regreg.affine import power_L
ltransform = rr.linear_transform(X_1)
singular_value_sq = power_L(X_1)
# the other smooth piece is a quadratic with identity
# for quadratic form, so its lipschitz constant is 1

lipschitz = 1.05 * singular_value_sq / epsilon + 1.1


problem = rr.container(quadratic, 
                       smoothed_hinge_loss, sparsity)
Beispiel #14
0
    def __init__(self,
                 X,
                 feasible_point,
                 active,
                 active_sign,
                 mean_parameter,  # in R^n
                 noise_variance,
                 randomizer,
                 coef=1.,
                 offset=None,
                 quadratic=None,
                 nstep=10):


        self.n, p = X.shape
        E = 1
        self.q = p-1
        self._X = X
        self.active = active
        self.noise_variance = noise_variance
        self.randomization = randomizer

        self.inactive_conjugate = self.active_conjugate = randomizer.CGF_conjugate
        if self.active_conjugate is None:
            raise ValueError(
                'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates')

        initial = np.zeros(self.n + E, )
        initial[self.n:] = feasible_point

        rr.smooth_atom.__init__(self,
                                (self.n + E,),
                                offset=offset,
                                quadratic=quadratic,
                                initial=initial,
                                coef=coef)

        self.coefs[:] = initial

        nonnegative = nonnegative_softmax_scaled(E)

        opt_vars = np.zeros(self.n + E, bool)
        opt_vars[self.n:] = 1

        self._opt_selector = rr.selector(opt_vars, (self.n + E,))
        self._response_selector = rr.selector(~opt_vars, (self.n + E,))

        self.nonnegative_barrier = nonnegative.linear(self._opt_selector)

        sign = np.zeros((1, 1))
        sign[0:, :] = active_sign
        self.A_active = np.hstack([-X[:, active].T, sign])
        self.active_conj_loss = rr.affine_smooth(self.active_conjugate, self.A_active)

        self.A_in_1 = np.hstack([-X[:, ~active].T, np.zeros((p - 1, 1))])
        self.A_in_2 = np.hstack([np.zeros((self.n, 1)).T, np.ones((1, 1))])
        self.A_inactive = np.vstack([self.A_in_1, self.A_in_2])

        cube_loss = neg_log_cube_probability_fs(self.q, p)
        self.cube_loss = rr.affine_smooth(cube_loss, self.A_inactive)

        self.set_parameter(mean_parameter, noise_variance)

        self.total_loss = rr.smooth_sum([self.active_conj_loss,
                                         self.cube_loss,
                                         self.likelihood_loss,
                                         self.nonnegative_barrier])