예제 #1
0
 def compute_statistic(self, alphahat, R, RA, N, Nref, memoize=False):
     # TODO: should we regularize RA?
     print("regularizing R...")
     Rreg = R.add_ridge(self.params.Lambda, renormalize=True)
     if not memoize or not hasattr(self, "bias"):
         print("done.computing bias...")
         self.bias = BlockDiag.solve(Rreg, RA).trace() / N
         print("bias =", self.bias)
     betahat = BlockDiag.solve(Rreg, alphahat)
     return betahat.dot(RA.dot(betahat)) - self.bias
예제 #2
0
 def compute_statistic(self, alphahat, R, RA, N, Nref, memoize=False):
     #TODO: should we regularize RA?
     print('regularizing R...')
     Rreg = R.add_ridge(self.params.Lambda, renormalize=True)
     if not memoize or not hasattr(self, 'bias'):
         print('done.computing bias...')
         self.bias = BlockDiag.solve(Rreg, RA).trace() / N
         print('bias =', self.bias)
     betahat = BlockDiag.solve(Rreg, alphahat)
     return betahat.dot(RA.dot(betahat)) - self.bias
예제 #3
0
 def compute_statistic(self, alphahat, R, RA, N, Nref, memoize=False):
     try:
         if not memoize or not hasattr(self, "bias"):
             print("computing bias")
             self.bias = BlockDiag.solve(R, RA).trace() / N
             print("bias =", self.bias)
         betahat = BlockDiag.solve(R, alphahat)
         return betahat.dot(RA.dot(betahat)) - self.bias
     except np.linalg.linalg.LinAlgError:
         print("R was singular. Its shape was", R.shape(), "and Nref=", Nref)
         return 0
예제 #4
0
 def compute_statistic(self, alphahat, R, RA, N, Nref, memoize=False):
     try:
         if not memoize or not hasattr(self, 'bias'):
             print('computing bias')
             self.bias = BlockDiag.solve(R, RA).trace() / N
             print('bias =', self.bias)
         betahat = BlockDiag.solve(R, alphahat)
         return betahat.dot(RA.dot(betahat)) - self.bias
     except np.linalg.linalg.LinAlgError:
         print('R was singular. Its shape was', R.shape(), 'and Nref=',
               Nref)
         return 0
예제 #5
0
 def run(self, beta_num, sim):
     RA = pickle.load(self.RA_file())
     beta = pickle.load(sim.beta_file(beta_num))
     beta = BlockDiag.from_big1darray(beta, RA.ranges())
     results = [beta.dot(RA.dot(beta))]
     print(results[-1])
     return results
예제 #6
0
    def run(self, beta_num, sim):
        if not hasattr(self, 'R'):
            print('loading matrices')
            self.init()
        self.beta = pickle.load(sim.beta_file(beta_num))

        print('computing bias')
        self.biases = self.compute_biases(sim.sample_size)
        print('biases are', self.biases)
        self.scalings = self.get_scalings()

        # compute the results
        results = []
        variances = []
        for alphahat in sim.sumstats_aligned_to_refpanel(
                beta_num, self.refpanel):
            alphahat = BlockDiag.from_big1darray(alphahat, self.R.ranges())
            results.append(self.compute_statistic(alphahat))
            variances.append(
                self.compute_variance(alphahat, results[-1], sim.sample_size))
            print(len(results), results[-1], variances[-1])

        print('empirical var of results:', np.var(results))

        return np.concatenate([np.array([results]).T,
                               np.array([variances]).T],
                              axis=1)
예제 #7
0
파일: truth.py 프로젝트: yakirr/statgen_y1
 def run(self, beta_num, sim):
     RA = pickle.load(self.RA_file())
     beta = pickle.load(sim.beta_file(beta_num))
     beta = BlockDiag.from_big1darray(beta, RA.ranges())
     results = [beta.dot(RA.dot(beta))]
     print(results[-1])
     return results
예제 #8
0
    def run(self, beta_num, sim):
        if not hasattr(self, 'R'):
            print('loading matrices')
            self.init()
        self.beta = pickle.load(sim.beta_file(beta_num))

        print('computing bias')
        self.biases = self.compute_biases(sim.sample_size)
        print('biases are', self.biases)
        self.scalings = self.get_scalings()

        # compute the results
        results = []
        variances = []
        for alphahat in sim.sumstats_aligned_to_refpanel(beta_num, self.refpanel):
            alphahat = BlockDiag.from_big1darray(alphahat, self.R.ranges())
            results.append(self.compute_statistic(
                alphahat))
            variances.append(self.compute_variance(
                alphahat, results[-1], sim.sample_size))
            print(len(results), results[-1], variances[-1])

        print('empirical var of results:', np.var(results))

        return np.concatenate([np.array([results]).T,
            np.array([variances]).T], axis=1)
예제 #9
0
 def preprocess(self):
     matplotlib.use("Agg")
     gs = GenomicSubset(self.params.region)
     A = SnpSubset(self.refpanel, bedtool=gs.bedtool)
     W = A.expanded_by(self.params.ld_window / 1000.0)
     R = BlockDiag.ld_matrix(self.refpanel, W.irs.ranges(), 300, band_units="SNPs")
     pickle.dump(R, self.R_file(mode="wb"), 2)
     # R.plot(A.irs, filename=self.R_plotfilename())
     RA = R.zero_outside_irs(A.irs)
     pickle.dump(RA, self.RA_file(mode="wb"), 2)
예제 #10
0
파일: truth.py 프로젝트: yakirr/statgen_y1
 def preprocess(self):
     matplotlib.use('Agg')
     gs = GenomicSubset(self.params.region)
     ss = SnpSubset(self.refpanel, bedtool=gs.bedtool)
     RA = BlockDiag.ld_matrix(self.refpanel, ss.irs.ranges(), self.params.ld_bandwidth / 1000.)
     try: # if the plotting has some error we don't want to not save the stuff
         # RA.plot(ss.irs, filename=self.RA_plotfilename())
         pass
     except:
         pass
     pickle.dump(RA, self.RA_file(mode='wb'), 2)
예제 #11
0
 def preprocess(self):
     matplotlib.use('Agg')
     gs = GenomicSubset(self.params.region)
     ss = SnpSubset(self.refpanel, bedtool=gs.bedtool)
     RA = BlockDiag.ld_matrix(self.refpanel, ss.irs.ranges(),
                              self.params.ld_bandwidth / 1000.)
     try:  # if the plotting has some error we don't want to not save the stuff
         # RA.plot(ss.irs, filename=self.RA_plotfilename())
         pass
     except:
         pass
     pickle.dump(RA, self.RA_file(mode='wb'), 2)
예제 #12
0
    def run(self, beta_num, sim):
        R = pickle.load(self.R_file())
        RA = pickle.load(self.RA_file())

        # compute the results
        results = []
        for alphahat in sim.sumstats_aligned_to_refpanel(beta_num, self.refpanel):
            alphahat = BlockDiag.from_big1darray(alphahat, R.ranges())
            results.append(self.compute_statistic(alphahat, R, RA, sim.sample_size, self.refpanel.N, memoize=True))
            print(len(results), results[-1])

        return results
예제 #13
0
 def preprocess(self):
     matplotlib.use('Agg')
     gs = GenomicSubset(self.params.region)
     A = SnpSubset(self.refpanel, bedtool=gs.bedtool)
     W = A.expanded_by(self.params.ld_window / 1000.)
     R = BlockDiag.ld_matrix(self.refpanel,
                             W.irs.ranges(),
                             300,
                             band_units='SNPs')
     pickle.dump(R, self.R_file(mode='wb'), 2)
     # R.plot(A.irs, filename=self.R_plotfilename())
     RA = R.zero_outside_irs(A.irs)
     pickle.dump(RA, self.RA_file(mode='wb'), 2)
예제 #14
0
 def preprocess(self):
     matplotlib.use("Agg")
     gs = GenomicSubset(self.params.region)
     A = SnpSubset(self.refpanel, bedtool=gs.bedtool)
     W = self.window(A)
     R = BlockDiag.ld_matrix(self.refpanel, W.irs.ranges(), 1000000)  # bandwidth=infty
     pickle.dump(R, self.R_file(mode="wb"), 2)
     try:  # if the plotting has some error we don't want to not save the stuff
         # R.plot(A.irs, filename=self.R_plotfilename())
         pass
     except:
         pass
     RA = R.zero_outside_irs(A.irs)
     pickle.dump(RA, self.RA_file(mode="wb"), 2)
예제 #15
0
 def preprocess(self):
     matplotlib.use('Agg')
     gs = GenomicSubset(self.params.region)
     A = SnpSubset(self.refpanel, bedtool=gs.bedtool)
     W = self.window(A)
     R = BlockDiag.ld_matrix(self.refpanel, W.irs.ranges(),
                             1000000)  # bandwidth=infty
     pickle.dump(R, self.R_file(mode='wb'), 2)
     try:  # if the plotting has some error we don't want to not save the stuff
         # R.plot(A.irs, filename=self.R_plotfilename())
         pass
     except:
         pass
     RA = R.zero_outside_irs(A.irs)
     pickle.dump(RA, self.RA_file(mode='wb'), 2)
예제 #16
0
    def run(self, beta_num, sim):
        R = pickle.load(self.R_file())
        RA = pickle.load(self.RA_file())

        if self.params.prune_regions > 0:

            def var(L, LA, h2A, N):
                LinvLA = np.linalg.solve(L, LA)
                tr1 = np.einsum('ij,ji', LinvLA, LinvLA)
                tr2 = np.einsum('ij,ji', LA, LinvLA)
                return 2 * tr1 / float(N)**2 + 2 * tr2 * h2A / (float(N) *
                                                                float(750))

            print('computing variances')
            variances = {}
            for r in R.ranges():
                variances[r] = var(R.ranges_to_arrays[r],
                                   RA.ranges_to_arrays[r], 0.05,
                                   sim.sample_size)
            print('total variance:', sum(variances.values()))

            sortedrs = R.ranges()
            sortedrs.sort(key=lambda r: variances[r])
            worstrs = sortedrs[-self.params.prune_regions:]
            for r in worstrs:
                print('removing', r)
                del R.ranges_to_arrays[r]
                del RA.ranges_to_arrays[r]
            print('new variance:', sum([variances[r] for r in R.ranges()]))

        print(len(R.ranges()))
        print(len(RA.ranges()))
        # compute the results
        results = []
        for alphahat in sim.sumstats_aligned_to_refpanel(
                beta_num, self.refpanel):
            alphahat = BlockDiag.from_big1darray(alphahat, R.ranges())
            results.append(
                self.compute_statistic(alphahat,
                                       R,
                                       RA,
                                       sim.sample_size,
                                       self.refpanel.N,
                                       memoize=True))
            print(len(results), results[-1])

        return results
예제 #17
0
    def run(self, beta_num, sim):
        R = pickle.load(self.R_file())
        RA = pickle.load(self.RA_file())

        # compute the results
        results = []
        for alphahat in sim.sumstats_aligned_to_refpanel(
                beta_num, self.refpanel):
            alphahat = BlockDiag.from_big1darray(alphahat, R.ranges())
            results.append(
                self.compute_statistic(alphahat,
                                       R,
                                       RA,
                                       sim.sample_size,
                                       self.refpanel.N,
                                       memoize=True))
            print(len(results), results[-1])

        return results
예제 #18
0
    def run(self, beta_num, sim):
        R = pickle.load(self.R_file())
        RA = pickle.load(self.RA_file())

        if self.params.prune_regions > 0:

            def var(L, LA, h2A, N):
                LinvLA = np.linalg.solve(L, LA)
                tr1 = np.einsum("ij,ji", LinvLA, LinvLA)
                tr2 = np.einsum("ij,ji", LA, LinvLA)
                return 2 * tr1 / float(N) ** 2 + 2 * tr2 * h2A / (float(N) * float(750))

            print("computing variances")
            variances = {}
            for r in R.ranges():
                variances[r] = var(R.ranges_to_arrays[r], RA.ranges_to_arrays[r], 0.05, sim.sample_size)
            print("total variance:", sum(variances.values()))

            sortedrs = R.ranges()
            sortedrs.sort(key=lambda r: variances[r])
            worstrs = sortedrs[-self.params.prune_regions :]
            for r in worstrs:
                print("removing", r)
                del R.ranges_to_arrays[r]
                del RA.ranges_to_arrays[r]
            print("new variance:", sum([variances[r] for r in R.ranges()]))

        print(len(R.ranges()))
        print(len(RA.ranges()))
        # compute the results
        results = []
        for alphahat in sim.sumstats_aligned_to_refpanel(beta_num, self.refpanel):
            alphahat = BlockDiag.from_big1darray(alphahat, R.ranges())
            results.append(self.compute_statistic(alphahat, R, RA, sim.sample_size, self.refpanel.N, memoize=True))
            print(len(results), results[-1])

        return results
예제 #19
0
 def compute_covariance(self):
     breakpoints = BedTool(paths.reference + self.params.breakpointsfile)
     blocks = SnpPartition(self.refpanel, breakpoints, remove_mhc=True)
     myranges = self.ranges_in_chunk(blocks.ranges())
     print('working on', len(myranges), 'ld blocks')
     return BlockDiag.ld_matrix_blocks(self.refpanel, myranges)
예제 #20
0
 def compute_covariance(self):
     breakpoints = BedTool(paths.reference + self.params.breakpointsfile)
     blocks = SnpPartition(self.refpanel, breakpoints, remove_mhc=True)
     myranges = self.ranges_in_chunk(blocks.ranges())
     print('working on', len(myranges), 'ld blocks')
     return BlockDiag.ld_matrix_blocks(self.refpanel, myranges)