def setup_sampler(self,
                      score_mean,
                      scaling=1,
                      solve_args={
                          'min_its': 20,
                          'tol': 1.e-10
                      }):

        X, _ = self.loss.data
        n, p = X.shape
        bootstrap_score = pairs_bootstrap_glm(self.loss,
                                              self._overall,
                                              beta_full=self._beta_full,
                                              inactive=~self._overall)[0]

        score_cov = bootstrap_cov(
            lambda: np.random.choice(n, size=(n, ), replace=True),
            bootstrap_score)
        #score_cov = np.zeros((p,p))
        #X_E = X[:, self._active_groups]
        #X_minusE = X[:, ~self._active_groups]
        #score_cov[:self._active_groups.sum(), :self._active_groups.sum()] = np.linalg.inv(np.dot(X_E.T, X_E))
        #residual_mat = np.identity(n)-np.dot(X_E, np.linalg.pinv(X_E))
        #score_cov[self._active_groups.sum():, self._active_groups.sum():] = np.dot(X_minusE.T, np.dot(residual_mat, X_minusE))

        self.score_cov = score_cov
        self.score_cov_inv = np.linalg.inv(self.score_cov)

        #self.score_mat = -self.score_transform[0]
        #self.score_mat_inv = np.linalg.inv(self.score_mat)
        #self.total_cov = np.dot(self.score_mat, self.score_cov).dot(self.score_mat.T)
        #self.total_cov_inv = np.linalg.inv(self.total_cov)
        self.reference = score_mean
예제 #2
0
    def solve_approx(self):

        self.solve()

        (_opt_linear_term, _opt_affine_term) = self.opt_transform
        self._opt_linear_term = np.concatenate((_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)
        self._opt_affine_term = np.concatenate((_opt_affine_term[self._overall], _opt_affine_term[~self._overall]), 0)

        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)

        (_score_linear_term, _) = self.score_transform

        self._score_linear_term = np.concatenate((_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)
        self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))

        self.feasible_point = np.append(self.observed_score_state, np.abs(self.initial_soln[self._overall]))

        lagrange = self.penalty._weight_array

        self.inactive_lagrange = lagrange[~self._overall]

        X, _ = self.loss.data
        n, p = X.shape
        self.p = p
        nactive = self._overall.sum()
        self.nactive = nactive

        self.target_observed = self.observed_score_state[:self.nactive]

        if self.estimation == 'parametric':
            score_cov = np.zeros((p,p))
            inv_X_active = np.linalg.inv(X[:, self._overall].T.dot(X[:, self._overall]))
            projection_X_active = X[:,self._overall].dot(np.linalg.inv(X[:, self._overall].T.dot(X[:, self._overall]))).dot(X[:,self._overall].T)
            score_cov[:self.nactive, :self.nactive] = inv_X_active
            score_cov[self.nactive:, self.nactive:] = X[:,~self._overall].T.dot(np.identity(n)- projection_X_active).dot(X[:,~self._overall])

        elif self.estimation == 'bootstrap':
            bootstrap_score = pairs_bootstrap_glm(self.loss,
                                              self._overall,
                                              beta_full=self._beta_full,
                                              inactive=~self._overall)[0]
            score_cov = bootstrap_cov(lambda: np.random.choice(n, size=(n,), replace=True), bootstrap_score)

        self.score_cov = score_cov
        self.target_cov = score_cov[:nactive, :nactive]
        self.score_cov_inv = np.linalg.inv(self.score_cov)


        self.B = self._opt_linear_term
        self.A = self._score_linear_term

        self.B_active = self.B[:nactive, :nactive]
        self.B_inactive = self.B[nactive:, :nactive]

        self.A_active = self._score_linear_term[:nactive, :]
        self.A_inactive = self._score_linear_term[nactive:, :]

        self.offset_active = self._opt_affine_term[:nactive]
예제 #3
0
    def solve_approx(self):

        self.solve()
        self.setup_sampler()
        p = self.inactive.sum()
        self.feasible_point = self.observed_scaling
        self._overall = np.zeros(p, dtype=bool)
        #print(self.selection_variable['variables'])
        self._overall[self.selection_variable['variables']] = 1

        self.observed_opt_state = np.hstack(
            [self.observed_scaling, self.observed_subgradients])

        _opt_linear_term = np.concatenate((np.atleast_2d(
            self.maximizing_subgrad).T, self.losing_padding_map), 1)
        self._opt_linear_term = np.concatenate(
            (_opt_linear_term[self._overall, :],
             _opt_linear_term[~self._overall, :]), 0)

        self.opt_transform = (self._opt_linear_term, np.zeros(p))

        (self._score_linear_term, _) = self.score_transform

        self.inactive_lagrange = self.observed_scaling * self.penalty.weights[
            0] * np.ones(p - 1)

        X, _ = self.loss.data
        n, p = X.shape
        self.p = p
        bootstrap_score = pairs_bootstrap_glm(self.loss,
                                              self.active,
                                              inactive=~self.active)[0]

        bootstrap_target, target_observed = pairs_bootstrap_glm(self.loss,
                                                                self._overall,
                                                                beta_full=None,
                                                                inactive=None)

        sampler = lambda: np.random.choice(n, size=(n, ), replace=True)
        self.target_cov, target_score_cov = bootstrap_cov(
            sampler, bootstrap_target, cross_terms=(bootstrap_score, ))
        self.score_target_cov = np.atleast_2d(target_score_cov).T
        self.target_observed = target_observed

        nactive = self._overall.sum()
        self.nactive = nactive

        self.B_active = self._opt_linear_term[:nactive, :nactive]
        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
    def solve_approx(self):
        self.solve()
        self.setup_sampler()
        #print("boundary", self.observed_opt_state, self.boundary)
        #self.feasible_point = self.observed_opt_state[self.boundary]
        self.observed_score_state = self.observed_internal_state

        self.feasible_point = np.ones(self.boundary.sum())
        (_opt_linear_term, _opt_offset) = self.opt_transform
        print("shapes", _opt_linear_term[self.boundary, :].shape,
              _opt_linear_term[self.interior, :].shape)
        self._opt_linear_term = np.concatenate(
            (_opt_linear_term[self.boundary, :],
             _opt_linear_term[self.interior, :]), 0)
        self._opt_affine_term = np.concatenate(
            (_opt_offset[self.boundary], _opt_offset[self.interior]), 0)
        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)

        (_score_linear_term, _) = self.score_transform
        self._score_linear_term = np.concatenate(
            (_score_linear_term[self.boundary, :],
             _score_linear_term[self.interior, :]), 0)
        self.score_transform = (self._score_linear_term,
                                np.zeros(self._score_linear_term.shape[0]))
        self._overall = self.boundary
        self.inactive_lagrange = self.threshold[0] * np.ones(
            np.sum(~self.boundary))

        X, _ = self.loss.data
        n, p = X.shape
        self.p = p
        bootstrap_score = pairs_bootstrap_glm(self.loss,
                                              self._overall,
                                              beta_full=self._beta_full,
                                              inactive=~self._overall)[0]

        score_cov = bootstrap_cov(
            lambda: np.random.choice(n, size=(n, ), replace=True),
            bootstrap_score)
        nactive = self._overall.sum()
        self.score_target_cov = score_cov[:, :nactive]
        self.target_cov = score_cov[:nactive, :nactive]
        self.target_observed = self.observed_score_state[:nactive]
        self.nactive = nactive

        self.B_active = self._opt_linear_term[:nactive, :nactive]
        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
예제 #5
0
    def solve_approx(self):

        self.solve()

        self.nactive = self._overall.sum()
        X, _ = self.loss.data
        n, p = X.shape
        self.p = p
        self.target_observed = self.observed_score_state[:self.nactive]

        self.feasible_point = np.concatenate([self.observed_score_state, np.fabs(self.observed_opt_state[:self.nactive]),
                                              self.observed_opt_state[self.nactive:]], axis = 0)

        (_opt_linear_term, _opt_affine_term) = self.opt_transform
        self._opt_linear_term = np.concatenate(
            (_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)

        self._opt_affine_term = np.concatenate((_opt_affine_term[self._overall], _opt_affine_term[~self._overall]), 0)
        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)

        (_score_linear_term, _) = self.score_transform
        self._score_linear_term = np.concatenate(
            (_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)

        self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))

        lagrange = self.penalty._weight_array

        #print("True or false", np.all(lagrange[0]-np.fabs(self.feasible_point[p+self.nactive:]))>0)
        #print("True or false", np.all(self.feasible_point[p:][:self.nactive]) > 0)

        self.inactive_lagrange = lagrange[~self._overall]

        self.bootstrap_score, self.randomization_cov = self.setup_sampler()

        if self.estimation == 'parametric':
            score_cov = np.zeros((p,p))
            inv_X_active = np.linalg.inv(X[:, self._overall].T.dot(X[:, self._overall]))
            projection_X_active = X[:,self._overall].dot(np.linalg.inv(X[:, self._overall].T.dot(X[:, self._overall]))).dot(X[:,self._overall].T)
            score_cov[:self.nactive, :self.nactive] = inv_X_active
            score_cov[self.nactive:, self.nactive:] = X[:,~self._overall].T.dot(np.identity(n)- projection_X_active).dot(X[:,~self._overall])

        elif self.estimation == 'bootstrap':
            score_cov = bootstrap_cov(lambda: np.random.choice(n, size=(n,), replace=True), self.bootstrap_score)

        self.score_cov = score_cov
        self.score_cov_inv = np.linalg.inv(self.score_cov)
예제 #6
0
    def solve_approx(self):
        self.solve()
        (_opt_linear_term, _opt_affine_term) = self.opt_transform
        self._opt_linear_term = np.concatenate(
            (_opt_linear_term[self._overall, :],
             _opt_linear_term[~self._overall, :]), 0)
        self._opt_affine_term = np.concatenate(
            (_opt_affine_term[self._overall],
             _opt_affine_term[~self._overall]), 0)
        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)

        (_score_linear_term, _) = self.score_transform
        self._score_linear_term = np.concatenate(
            (_score_linear_term[self._overall, :],
             _score_linear_term[~self._overall, :]), 0)
        self.score_transform = (self._score_linear_term,
                                np.zeros(self._score_linear_term.shape[0]))
        self.feasible_point = np.abs(self.initial_soln[self._overall])
        lagrange = []
        for key, value in self.penalty.weights.iteritems():
            lagrange.append(value)
        lagrange = np.asarray(lagrange)
        self.inactive_lagrange = lagrange[~self._overall]

        X, _ = self.loss.data
        n, p = X.shape
        self.p = p
        bootstrap_score = pairs_bootstrap_glm(self.loss,
                                              self._overall,
                                              beta_full=self._beta_full,
                                              inactive=~self._overall)[0]

        score_cov = bootstrap_cov(
            lambda: np.random.choice(n, size=(n, ), replace=True),
            bootstrap_score)
        nactive = self._overall.sum()
        self.score_target_cov = score_cov[:, :nactive]
        self.target_cov = score_cov[:nactive, :nactive]
        self.target_observed = self.observed_score_state[:nactive]
        self.nactive = nactive

        self.B_active = self._opt_linear_term[:nactive, :nactive]
        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
예제 #7
0
def test_overall_null_two_queries():
    s, n, p = 5, 200, 20

    randomizer = randomization.laplace((p, ), scale=0.5)
    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0.1, snr=14)

    nonzero = np.where(beta)[0]
    lam_frac = 1.

    loss = rr.glm.logistic(X, y)
    epsilon = 1. / np.sqrt(n)

    lam = lam_frac * np.mean(
        np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
    W = np.ones(p) * lam
    W[0] = 0  # use at least some unpenalized
    penalty = rr.group_lasso(np.arange(p),
                             weights=dict(zip(np.arange(p), W)),
                             lagrange=1.)
    # first randomization

    M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer)
    M_est1.solve()
    bootstrap_score1 = M_est1.setup_sampler(scaling=2.)

    # second randomization

    M_est2 = glm_group_lasso(loss, epsilon, penalty, randomizer)
    M_est2.solve()
    bootstrap_score2 = M_est2.setup_sampler(scaling=2.)

    # we take target to be union of two active sets

    active = M_est1.selection_variable[
        'variables'] + M_est2.selection_variable['variables']

    if set(nonzero).issubset(np.nonzero(active)[0]):
        boot_target, target_observed = pairs_bootstrap_glm(loss, active)

        # target are all true null coefficients selected

        sampler = lambda: np.random.choice(n, size=(n, ), replace=True)
        target_cov, cov1, cov2 = bootstrap_cov(sampler,
                                               boot_target,
                                               cross_terms=(bootstrap_score1,
                                                            bootstrap_score2))

        active_set = np.nonzero(active)[0]
        inactive_selected = I = [
            i for i in np.arange(active_set.shape[0])
            if active_set[i] not in nonzero
        ]

        # is it enough only to bootstrap the inactive ones?
        # seems so...

        if not I:
            return None

        A1, b1 = M_est1.linear_decomposition(cov1[I], target_cov[I][:, I],
                                             target_observed[I])
        A2, b2 = M_est2.linear_decomposition(cov2[I], target_cov[I][:, I],
                                             target_observed[I])

        target_inv_cov = np.linalg.inv(target_cov[I][:, I])

        initial_state = np.hstack([
            target_observed[I], M_est1.observed_opt_state,
            M_est2.observed_opt_state
        ])

        ntarget = len(I)
        target_slice = slice(0, ntarget)
        opt_slice1 = slice(ntarget, p + ntarget)
        opt_slice2 = slice(p + ntarget, 2 * p + ntarget)

        def target_gradient(state):
            # with many samplers, we will add up the `target_slice` component
            # many target_grads
            # and only once do the Gaussian addition of full_grad

            target = state[target_slice]
            opt_state1 = state[opt_slice1]
            opt_state2 = state[opt_slice2]
            target_grad1 = M_est1.randomization_gradient(
                target, (A1, b1), opt_state1)
            target_grad2 = M_est2.randomization_gradient(
                target, (A2, b2), opt_state2)

            full_grad = np.zeros_like(state)
            full_grad[opt_slice1] = -target_grad1[1]
            full_grad[opt_slice2] = -target_grad2[1]
            full_grad[target_slice] -= target_grad1[0] + target_grad2[0]
            full_grad[target_slice] -= target_inv_cov.dot(target)

            return full_grad

        def target_projection(state):
            opt_state1 = state[opt_slice1]
            state[opt_slice1] = M_est1.projection(opt_state1)
            opt_state2 = state[opt_slice2]
            state[opt_slice2] = M_est2.projection(opt_state2)
            return state

        target_langevin = projected_langevin(initial_state, target_gradient,
                                             target_projection,
                                             .5 / (2 * p + 1))

        Langevin_steps = 10000
        burning = 2000
        samples = []
        for i in range(Langevin_steps):
            target_langevin.next()
            if (i >= burning):
                samples.append(target_langevin.state[target_slice].copy())

        test_stat = lambda x: np.linalg.norm(x)
        observed = test_stat(target_observed[I])
        sample_test_stat = np.array([test_stat(x) for x in samples])

        family = discrete_family(sample_test_stat,
                                 np.ones_like(sample_test_stat))
        pval = family.ccdf(0, observed)
        return pval, False
예제 #8
0
def test_one_inactive_coordinate_handcoded():
    s, n, p = 5, 200, 20

    randomizer = randomization.laplace((p, ), scale=1.)
    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0.1, snr=14)

    nonzero = np.where(beta)[0]
    lam_frac = 1.

    loss = rr.glm.logistic(X, y)
    epsilon = 1.

    lam = lam_frac * np.mean(
        np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
    W = np.ones(p) * lam
    W += lam * np.arange(p) / 200
    W[0] = 0
    penalty = rr.group_lasso(np.arange(p),
                             weights=dict(zip(np.arange(p), W)),
                             lagrange=1.)

    print(lam)
    # our randomization

    M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer)
    M_est1.solve()
    bootstrap_score1 = M_est1.setup_sampler()

    active = M_est1.selection_variable['variables']
    if set(nonzero).issubset(np.nonzero(active)[0]):
        boot_target, target_observed = pairs_bootstrap_glm(loss, active)

        # target are all true null coefficients selected

        sampler = lambda: np.random.choice(n, size=(n, ), replace=True)
        target_cov, cov1 = bootstrap_cov(sampler,
                                         boot_target,
                                         cross_terms=(bootstrap_score1, ))

        # have checked that covariance up to here agrees with other test_glm_langevin example

        active_set = np.nonzero(active)[0]
        inactive_selected = I = [
            i for i in np.arange(active_set.shape[0])
            if active_set[i] not in nonzero
        ]

        # is it enough only to bootstrap the inactive ones?
        # seems so...

        if not I:
            return None

        # take the first inactive one
        I = I[:1]
        A1, b1 = M_est1.linear_decomposition(cov1[I], target_cov[I][:, I],
                                             target_observed[I])

        print(I, 'I', target_observed[I])
        target_inv_cov = np.linalg.inv(target_cov[I][:, I])

        initial_state = np.hstack(
            [target_observed[I], M_est1.observed_opt_state])

        ntarget = len(I)
        target_slice = slice(0, ntarget)
        opt_slice1 = slice(ntarget, p + ntarget)

        def target_gradient(state):
            # with many samplers, we will add up the `target_slice` component
            # many target_grads
            # and only once do the Gaussian addition of full_grad

            target = state[target_slice]
            opt_state1 = state[opt_slice1]
            target_grad1 = M_est1.randomization_gradient(
                target, (A1, b1), opt_state1)

            full_grad = np.zeros_like(state)
            full_grad[opt_slice1] = -target_grad1[1]
            full_grad[target_slice] -= target_grad1[0]
            full_grad[target_slice] -= target_inv_cov.dot(target)

            return full_grad

        def target_projection(state):
            opt_state1 = state[opt_slice1]
            state[opt_slice1] = M_est1.projection(opt_state1)
            return state

        target_langevin = projected_langevin(initial_state, target_gradient,
                                             target_projection, 1. / p)

        Langevin_steps = 10000
        burning = 2000
        samples = []
        for i in range(Langevin_steps + burning):
            target_langevin.next()
            if (i > burning):
                samples.append(target_langevin.state[target_slice].copy())

        test_stat = lambda x: x
        observed = test_stat(target_observed[I])
        sample_test_stat = np.array([test_stat(x) for x in samples])

        family = discrete_family(sample_test_stat,
                                 np.ones_like(sample_test_stat))
        pval = family.ccdf(0, observed)
        pval = 2 * min(pval, 1 - pval)

        _i = I[0]
        naive_Z = target_observed[_i] / np.sqrt(target_cov[_i, _i])
        naive_pval = ndist.sf(np.fabs(naive_Z))
        naive_pval = 2 * min(naive_pval, 1 - naive_pval)
        print('naive Z', naive_Z, naive_pval)
        return pval, naive_pval, False
    def solve_approx(self):

        self.solve()

        (_opt_linear_term, _opt_affine_term) = self.opt_transform
        self._opt_linear_term = np.concatenate(
            (_opt_linear_term[self._overall, :],
             _opt_linear_term[~self._overall, :]), 0)
        self._opt_affine_term = np.concatenate(
            (_opt_affine_term[self._overall],
             _opt_affine_term[~self._overall]), 0)

        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)

        (_score_linear_term, _) = self.score_transform

        self._score_linear_term = np.concatenate(
            (_score_linear_term[self._overall, :],
             _score_linear_term[~self._overall, :]), 0)
        self.score_transform = (self._score_linear_term,
                                np.zeros(self._score_linear_term.shape[0]))

        self.feasible_point = np.append(
            self.observed_score_state,
            np.abs(self.initial_soln[self._overall]))

        lagrange = []
        for key, value in self.penalty.weights.iteritems():
            lagrange.append(value)
        lagrange = np.asarray(lagrange)

        self.inactive_lagrange = lagrange[~self._overall]

        X, _ = self.loss.data
        n, p = X.shape
        self.p = p
        nactive = self._overall.sum()
        self.nactive = nactive

        self.target_observed = self.observed_score_state[:self.nactive]

        if self.estimation == 'parametric':
            score_cov = np.zeros((p, p))

            vec = np.exp(X[:, self._overall].dot(self.target_observed))
            #vec = np.exp(np.zeros(n))
            pi = np.true_divide(vec, np.power(1. + vec, 2))
            weights = np.diag(pi)
            Q_active = X[:, self._overall].T.dot(weights).dot(X[:,
                                                                self._overall])
            Q_active_inv = np.linalg.inv(Q_active)

            P_inactive = X[:, ~self._overall].T.dot(
                np.identity(n) - weights.dot(X[:, self._overall].dot(
                    Q_active_inv).dot(X[:, self._overall].T)))
            score_cov[:self.nactive, :self.nactive] = Q_active_inv
            score_cov[self.nactive:,
                      self.nactive:] = P_inactive.dot(weights).dot(
                          P_inactive.T)

        elif self.estimation == 'bootstrap':
            bootstrap_score = pairs_bootstrap_glm(self.loss,
                                                  self._overall,
                                                  beta_full=self._beta_full,
                                                  inactive=~self._overall)[0]
            score_cov = bootstrap_cov(
                lambda: np.random.choice(n, size=(n, ), replace=True),
                bootstrap_score)

        self.score_cov = score_cov
        self.target_cov = score_cov[:nactive, :nactive]
        self.score_cov_inv = np.linalg.inv(self.score_cov)

        self.B = self._opt_linear_term
        self.A = self._score_linear_term

        self.B_active = self.B[:nactive, :nactive]
        self.B_inactive = self.B[nactive:, :nactive]

        self.A_active = self._score_linear_term[:nactive, :]
        self.A_inactive = self._score_linear_term[nactive:, :]

        self.offset_active = self._opt_affine_term[:nactive]