예제 #1
0
파일: irwls.py 프로젝트: xuqiang9042/mtag
    def irwls(cls, x, y, update_func, n_blocks, w, slow=False, separators=None):
        '''
        Iteratively re-weighted least squares (IRWLS).

        Parameters
        ----------
        x : np.matrix with shape (n, p)
            Independent variable.
        y : np.matrix with shape (n, 1)
            Dependent variable.
        update_func: function
            Transforms output of np.linalg.lstsq to new weights.
        n_blocks : int
            Number of jackknife blocks (for estimating SE via block jackknife).
        w : np.matrix with shape (n, 1)
            Initial regression weights.
        slow : bool
            Use slow block jackknife? (Mostly for testing)
        separators : list or None
            Block jackknife block boundaries (optional).

        Returns
        -------
        jknife : jk.LstsqJackknifeFast
            Block jackknife regression with the final IRWLS weights.

        '''
        (n, p) = x.shape
        if y.shape != (n, 1):
            raise ValueError(
                'y has shape {S}. y must have shape ({N}, 1).'.format(S=y.shape, N=n))
        if w.shape != (n, 1):
            raise ValueError(
                'w has shape {S}. w must have shape ({N}, 1).'.format(S=w.shape, N=n))

        w = np.sqrt(w)
        for i in xrange(2):  # update this later
            new_w = np.sqrt(update_func(cls.wls(x, y, w)))
            if new_w.shape != w.shape:
                print 'IRWLS update:', new_w.shape, w.shape
                raise ValueError('New weights must have same shape.')
            else:
                w = new_w

        x = cls._weight(x, w)
        y = cls._weight(y, w)
        if slow:
            jknife = jk.LstsqJackknifeSlow(
                x, y, n_blocks, separators=separators)
        else:
            jknife = jk.LstsqJackknifeFast(
                x, y, n_blocks, separators=separators)

        return jknife
예제 #2
0
    def __init__(self, y, x, w, N, M, n_blocks, intercept=None, slow=False, step1_ii=None, old_weights=False):
        for i in [y, x, w, M, N]:
            try:
                if len(i.shape) != 2:
                    raise TypeError('Arguments must be 2D arrays.')
            except AttributeError:
                raise TypeError('Arguments must be arrays.')

        n_snp, self.n_annot = x.shape
        if any(i.shape != (n_snp, 1) for i in [y, w, N]):
            raise ValueError(
                'N, weights and response (z1z2 or chisq) must have shape (n_snp, 1).')
        if M.shape != (1, self.n_annot):
            raise ValueError('M must have shape (1, n_annot).')

        M_tot = float(np.sum(M))
        x_tot = np.sum(x, axis=1).reshape((n_snp, 1))
        self.constrain_intercept = intercept is not None
        self.intercept = intercept
        self.n_blocks = n_blocks
        tot_agg = self.aggregate(y, x_tot, N, M_tot, intercept)
        initial_w = self._update_weights(
            x_tot, w, N, M_tot, tot_agg, intercept)
        Nbar = np.mean(N)  # keep condition number low
        x = np.multiply(N, x) / Nbar
        if not self.constrain_intercept:
            x, x_tot = append_intercept(x), append_intercept(x_tot)
            yp = y
        else:
            yp = y - intercept
            self.intercept_se = 'NA'
        del y
        self.twostep_filtered = None
        if step1_ii is not None and self.constrain_intercept:
            raise ValueError(
                'twostep is not compatible with constrain_intercept.')
        elif step1_ii is not None and self.n_annot > 1:
            raise ValueError(
                'twostep not compatible with partitioned LD Score yet.')
        elif step1_ii is not None:
            n1 = np.sum(step1_ii)
            self.twostep_filtered = n_snp - n1
            x1 = x[np.squeeze(step1_ii), :]
            yp1, w1, N1, initial_w1 = map(
                lambda a: a[step1_ii].reshape((n1, 1)), (yp, w, N, initial_w))
            update_func1 = lambda a: self._update_func(
                a, x1, w1, N1, M_tot, Nbar, ii=step1_ii)
            step1_jknife = IRWLS(
                x1, yp1, update_func1, n_blocks, slow=slow, w=initial_w1)
            step1_int, _ = self._intercept(step1_jknife)
            yp = yp - step1_int
            x = remove_intercept(x)
            x_tot = remove_intercept(x_tot)
            update_func2 = lambda a: self._update_func(
                a, x_tot, w, N, M_tot, Nbar, step1_int)
            s = update_separators(step1_jknife.separators, step1_ii)
            step2_jknife = IRWLS(
                x, yp, update_func2, n_blocks, slow=slow, w=initial_w, separators=s)
            c = np.sum(np.multiply(initial_w, x)) / \
                np.sum(np.multiply(initial_w, np.square(x)))
            jknife = self._combine_twostep_jknives(
                step1_jknife, step2_jknife, M_tot, c, Nbar)
        elif old_weights:
            initial_w = np.sqrt(initial_w)
            x = IRWLS._weight(x, initial_w)
            y = IRWLS._weight(yp, initial_w)
            jknife = jk.LstsqJackknifeFast(x, y, n_blocks)
        else:
            update_func = lambda a: self._update_func(
                a, x_tot, w, N, M_tot, Nbar, intercept)
            jknife = IRWLS(
                x, yp, update_func, n_blocks, slow=slow, w=initial_w)

        self.coef, self.coef_cov, self.coef_se = self._coef(jknife, Nbar)
        self.cat, self.cat_cov, self.cat_se =\
            self._cat(jknife, M, Nbar, self.coef, self.coef_cov)

        self.tot, self.tot_cov, self.tot_se = self._tot(self.cat, self.cat_cov)
        self.prop, self.prop_cov, self.prop_se =\
            self._prop(jknife, M, Nbar, self.cat, self.tot)

        self.enrichment, self.M_prop = self._enrichment(
            M, M_tot, self.cat, self.tot)
        if not self.constrain_intercept:
            self.intercept, self.intercept_se = self._intercept(jknife)

        self.jknife = jknife
        self.tot_delete_values = self._delete_vals_tot(jknife, Nbar, M)
        if not self.constrain_intercept:
            self.intercept_delete_values = jknife.delete_values[
                :, self.n_annot]

        self.M = M
예제 #3
0
    def __init__(self, y, x, w, N, M, n_blocks, intercept=None, slow=False, step1_ii=None, 
                old_weights=False,
                chr=None,
                verbose=True,
                ridge=False, ridge_lambda=None, standardize_ridge=True, approx_ridge=False,
                nonneg_constraints=None):
        for i in [y, x, w, M, N]:
            try:
                if len(i.shape) != 2:
                    raise TypeError('Arguments must be 2D arrays.')
            except AttributeError:
                raise TypeError('Arguments must be arrays.')

        n_snp, self.n_annot = x.shape
        if any(i.shape != (n_snp, 1) for i in [y, w, N]):
            raise ValueError(
                'N, weights and response (z1z2 or chisq) must have shape (n_snp, 1).')
        if M.shape != (1, self.n_annot):
            raise ValueError('M must have shape (1, n_annot).')
            
        use_bootstrap = False
        
        M_tot = float(np.sum(M))
        x_tot = np.sum(x, axis=1).reshape((n_snp, 1))
        self.constrain_intercept = intercept is not None
        self.intercept = intercept
        self.n_blocks = n_blocks
        tot_agg = self.aggregate(y, x_tot, N, M_tot, intercept)
        initial_w = self._update_weights(
            x_tot, w, N, M_tot, tot_agg, intercept)
        Nbar = np.mean(N)  # keep condition number low
        self.Nbar = Nbar
        x = np.multiply(N, x) / Nbar
        if not self.constrain_intercept:
            x, x_tot = append_intercept(x), append_intercept(x_tot)
            yp = y
        else:
            yp = y - intercept
            self.intercept_se = 'NA'
        del y
        self.twostep_filtered = None
        
        if ridge and step1_ii is not None:
            raise ValueError(
                'Ridge regression cannot be used with a two-step estimator')
        
        if step1_ii is not None and self.constrain_intercept:
            raise ValueError(
                'twostep is not compatible with constrain_intercept.')
        elif step1_ii is not None and self.n_annot > 1:
            raise ValueError(
                'twostep not compatible with partitioned LD Score yet.')
        elif step1_ii is not None:
            n1 = np.sum(step1_ii)
            self.twostep_filtered = n_snp - n1
            x1 = x[np.squeeze(step1_ii), :]
            yp1, w1, N1, initial_w1 = map(
                lambda a: a[step1_ii].reshape((n1, 1)), (yp, w, N, initial_w))
            update_func1 = lambda a: self._update_func(
                a, x1, w1, N1, M_tot, Nbar, ii=step1_ii)
            step1_jknife = IRWLS(
                x1, yp1, update_func1, n_blocks, slow=slow, w=initial_w1)
            step1_int, _ = self._intercept(step1_jknife)
            yp = yp - step1_int
            x = remove_intercept(x)
            x_tot = remove_intercept(x_tot)
            update_func2 = lambda a: self._update_func(
                a, x_tot, w, N, M_tot, Nbar, step1_int)
            s = update_separators(step1_jknife.separators, step1_ii)
            step2_jknife = IRWLS(
                x, yp, update_func2, n_blocks, slow=slow, w=initial_w, separators=s)
            c = np.sum(np.multiply(initial_w, x)) / \
                np.sum(np.multiply(initial_w, np.square(x)))
            jknife = self._combine_twostep_jknives(
                step1_jknife, step2_jknife, M_tot, c, Nbar)

        elif old_weights:
        
            if ridge and (not self.constrain_intercept):
                x_orig = x.copy()
        
            initial_w = np.sqrt(initial_w)
            x = IRWLS._weight(x, initial_w)
            y = IRWLS._weight(yp, initial_w)
            
            if not ridge:
                jknife = jk.LstsqJackknifeFast(x, y, n_blocks, chr=chr)            
                    
            else:            
                #mean-center x, if an intercept exists (for Ridge regression numerical stability)
                #note this should only affect the intercept estimate (which we don't care about anyhow)
                if self.constrain_intercept or not standardize_ridge:
                    xc = x
                else:                
                    x_mean = x_orig.mean(axis=0)
                    x_mean[-1]=0.0
                    xc = x_orig - x_mean
                    xc = IRWLS._weight(xc, initial_w)
                
                jknife = jk.Jackknife_Ridge(xc, y, n_blocks,
                    chr=chr, verbose=verbose, ridge_lambda=ridge_lambda,
                    has_intercept=(not self.constrain_intercept),
                    standardize=standardize_ridge, approx_ridge=approx_ridge,
                    nonneg_constraints=nonneg_constraints)
            
            self.ooc_score = jknife.ooc_score
            
        else:
            assert not ridge
        
            update_func = lambda a: self._update_func(
                a, x_tot, w, N, M_tot, Nbar, intercept)
            jknife = IRWLS(
                x, yp, update_func, n_blocks, slow=slow, w=initial_w)

        
        self.coef, self.coef_cov, self.coef_se = self._coef(jknife, Nbar)
        self.cat, self.cat_cov, self.cat_se =\
            self._cat(jknife, M, Nbar, self.coef, self.coef_cov)
            
        self.tot, self.tot_cov, self.tot_se = self._tot(self.cat, self.cat_cov)
        self.prop, self.prop_cov, self.prop_se =\
            self._prop(jknife, M, Nbar, self.cat, self.tot, use_bootstrap=use_bootstrap)

        self.enrichment, self.M_prop = self._enrichment(
            M, M_tot, self.cat, self.tot)
        if not self.constrain_intercept:
            self.intercept, self.intercept_se = self._intercept(jknife)

        self.jknife = jknife
        self.tot_delete_values = self._delete_vals_tot(jknife, Nbar, M)
        self.part_delete_values = self._delete_vals_part(jknife, Nbar, M)
        if not self.constrain_intercept:
            self.intercept_delete_values = jknife.delete_values[
                :, self.n_annot]

        self.M = M
예제 #4
0
    def __init__(self,
                 y,
                 x,
                 g_ld,
                 w,
                 N,
                 M,
                 G,
                 ave_h2_cis,
                 n_blocks,
                 intercept=None,
                 slow=False):
        for i in [y, x, g_ld, w, M, N, G, ave_h2_cis]:
            try:
                if len(i.shape) != 2:
                    raise TypeError('Arguments must be 2D arrays.')
            except AttributeError:
                raise TypeError('Arguments must be arrays.')

        n_snp, self.n_only_annot = x.shape
        _, self.n_g_annot = g_ld.shape
        self.n_annot = self.n_only_annot + self.n_g_annot
        if any(i.shape != (n_snp, 1) for i in [y, w, N]):
            raise ValueError(
                'N, weights and response (z1z2 or chisq) must have shape (n_snp, 1).'
            )
        if M.shape != (1, self.n_only_annot):
            raise ValueError('M must have shape (1, n_annot).')

        M_tot = float(np.sum(M))
        G_tot = float(np.sum(G))
        G_mult = np.multiply(G, ave_h2_cis).reshape((1, self.n_g_annot))
        M_combined = np.hstack((M, G_mult)).reshape((1, self.n_annot))

        x_tot = np.sum(x, axis=1).reshape((n_snp, 1))
        self.constrain_intercept = intercept is not None
        self.intercept = intercept
        self.n_blocks = n_blocks
        tot_agg = self.aggregate(y, x_tot, N, M_tot, intercept)
        initial_w = self._update_weights(x_tot, w, N, M_tot, tot_agg,
                                         intercept)
        x = np.hstack((x, g_ld))
        Nbar = np.mean(N)  # keep condition number low
        x = np.multiply(N, x) / Nbar
        x, x_tot = append_intercept(x), append_intercept(x_tot)
        yp = y
        del y

        initial_w = np.sqrt(initial_w)
        x = IRWLS._weight(x, initial_w)
        y = IRWLS._weight(yp, initial_w)
        jknife = jk.LstsqJackknifeFast(x, y, n_blocks)

        self.coef, self.coef_cov, self.coef_se = self._coef(jknife, Nbar)
        self.coef_dir, self.coef_dir_cov, self.coef_dir_se = self.coef[:self.
                                                                       n_only_annot], self.coef_cov[:
                                                                                                    self
                                                                                                    .
                                                                                                    n_only_annot, :
                                                                                                    self
                                                                                                    .
                                                                                                    n_only_annot], self.coef_se[:
                                                                                                                                self
                                                                                                                                .
                                                                                                                                n_only_annot]
        self.coef_g, self.coef_g_cov, self.coef_g_se = self.coef[
            self.n_only_annot:], self.coef_cov[
                self.n_only_annot:,
                self.n_only_annot:], self.coef_se[self.n_only_annot:]

        self.cat, self.cat_cov, self.cat_se = self._cat(
            jknife, M_combined, Nbar, self.coef, self.coef_cov)
        self.cat_dir, self.cat_dir_cov, self.cat_dir_se = self.cat[:, :self.
                                                                   n_only_annot], self.cat_cov[:
                                                                                               self
                                                                                               .
                                                                                               n_only_annot, :
                                                                                               self
                                                                                               .
                                                                                               n_only_annot], self.cat_se[:
                                                                                                                          self
                                                                                                                          .
                                                                                                                          n_only_annot]
        self.cat_g, self.cat_g_cov, self.cat_g_se = self.cat[:, self.n_only_annot:], self.cat_cov[
            self.n_only_annot:,
            self.n_only_annot:], self.cat_se[self.n_only_annot:]

        self.tot, self.tot_cov, self.tot_se = self._tot(self.cat, self.cat_cov)
        self.tot_dir, self.tot_dir_cov, self.tot_dir_se = self._tot(
            self.cat_dir, self.cat_dir_cov)
        self.tot_g, self.tot_g_cov, self.tot_g_se = self._tot(
            self.cat_g, self.cat_g_cov)

        self.prop, self.prop_cov, self.prop_se = self._prop(
            jknife, M_combined, Nbar, self.cat, self.tot, self.n_annot)
        self.prop_dir, self.prop_dir_cov, self.prop_dir_se = self._prop(
            jknife, M, Nbar, self.cat_dir, self.tot_dir, self.n_only_annot)
        self.prop_g, self.prop_g_cov, self.prop_g_se = self._prop(
            jknife,
            G_mult,
            Nbar,
            self.cat_g,
            self.tot_g,
            self.n_only_annot,
            g_annot=True)
        self.tot_dir_prop, self.tot_dir_prop_cov, self.tot_dir_prop_se = self._tot(
            self.prop[:, :self.n_only_annot],
            self.prop_cov[:self.n_only_annot, :self.n_only_annot])
        self.tot_g_prop, self.tot_g_prop_cov, self.tot_g_prop_se = self._tot(
            self.prop[:, self.n_only_annot:],
            self.prop_cov[self.n_only_annot:, self.n_only_annot:])

        self.enrichment_dir, self.enrichment_dir_se, self.M_prop_dir = self._enrichment(
            M, M_tot, self.prop_dir, self.prop_dir_se)
        self.enrichment_g_herit, self.enrichment_g_herit_se, _ = self._enrichment(
            G, G_tot, self.prop_g, self.prop_g_se)

        self.intercept, self.intercept_se = self._intercept(jknife)

        self.jknife = jknife
        self.tot_delete_values = self._delete_vals_tot(jknife, Nbar,
                                                       M_combined)
        self.part_delete_values = self._delete_vals_part(
            jknife, Nbar, M_combined)
        self.intercept_delete_values = jknife.delete_values[:, self.n_annot]

        self.M = M