def compute_statistic(self, alphahat, R, RA, N, Nref, memoize=False): # TODO: should we regularize RA? print("regularizing R...") Rreg = R.add_ridge(self.params.Lambda, renormalize=True) if not memoize or not hasattr(self, "bias"): print("done.computing bias...") self.bias = BlockDiag.solve(Rreg, RA).trace() / N print("bias =", self.bias) betahat = BlockDiag.solve(Rreg, alphahat) return betahat.dot(RA.dot(betahat)) - self.bias
def compute_statistic(self, alphahat, R, RA, N, Nref, memoize=False): #TODO: should we regularize RA? print('regularizing R...') Rreg = R.add_ridge(self.params.Lambda, renormalize=True) if not memoize or not hasattr(self, 'bias'): print('done.computing bias...') self.bias = BlockDiag.solve(Rreg, RA).trace() / N print('bias =', self.bias) betahat = BlockDiag.solve(Rreg, alphahat) return betahat.dot(RA.dot(betahat)) - self.bias
def compute_statistic(self, alphahat, R, RA, N, Nref, memoize=False): try: if not memoize or not hasattr(self, "bias"): print("computing bias") self.bias = BlockDiag.solve(R, RA).trace() / N print("bias =", self.bias) betahat = BlockDiag.solve(R, alphahat) return betahat.dot(RA.dot(betahat)) - self.bias except np.linalg.linalg.LinAlgError: print("R was singular. Its shape was", R.shape(), "and Nref=", Nref) return 0
def compute_statistic(self, alphahat, R, RA, N, Nref, memoize=False): try: if not memoize or not hasattr(self, 'bias'): print('computing bias') self.bias = BlockDiag.solve(R, RA).trace() / N print('bias =', self.bias) betahat = BlockDiag.solve(R, alphahat) return betahat.dot(RA.dot(betahat)) - self.bias except np.linalg.linalg.LinAlgError: print('R was singular. Its shape was', R.shape(), 'and Nref=', Nref) return 0
def run(self, beta_num, sim): RA = pickle.load(self.RA_file()) beta = pickle.load(sim.beta_file(beta_num)) beta = BlockDiag.from_big1darray(beta, RA.ranges()) results = [beta.dot(RA.dot(beta))] print(results[-1]) return results
def run(self, beta_num, sim): if not hasattr(self, 'R'): print('loading matrices') self.init() self.beta = pickle.load(sim.beta_file(beta_num)) print('computing bias') self.biases = self.compute_biases(sim.sample_size) print('biases are', self.biases) self.scalings = self.get_scalings() # compute the results results = [] variances = [] for alphahat in sim.sumstats_aligned_to_refpanel( beta_num, self.refpanel): alphahat = BlockDiag.from_big1darray(alphahat, self.R.ranges()) results.append(self.compute_statistic(alphahat)) variances.append( self.compute_variance(alphahat, results[-1], sim.sample_size)) print(len(results), results[-1], variances[-1]) print('empirical var of results:', np.var(results)) return np.concatenate([np.array([results]).T, np.array([variances]).T], axis=1)
def run(self, beta_num, sim): if not hasattr(self, 'R'): print('loading matrices') self.init() self.beta = pickle.load(sim.beta_file(beta_num)) print('computing bias') self.biases = self.compute_biases(sim.sample_size) print('biases are', self.biases) self.scalings = self.get_scalings() # compute the results results = [] variances = [] for alphahat in sim.sumstats_aligned_to_refpanel(beta_num, self.refpanel): alphahat = BlockDiag.from_big1darray(alphahat, self.R.ranges()) results.append(self.compute_statistic( alphahat)) variances.append(self.compute_variance( alphahat, results[-1], sim.sample_size)) print(len(results), results[-1], variances[-1]) print('empirical var of results:', np.var(results)) return np.concatenate([np.array([results]).T, np.array([variances]).T], axis=1)
def preprocess(self): matplotlib.use("Agg") gs = GenomicSubset(self.params.region) A = SnpSubset(self.refpanel, bedtool=gs.bedtool) W = A.expanded_by(self.params.ld_window / 1000.0) R = BlockDiag.ld_matrix(self.refpanel, W.irs.ranges(), 300, band_units="SNPs") pickle.dump(R, self.R_file(mode="wb"), 2) # R.plot(A.irs, filename=self.R_plotfilename()) RA = R.zero_outside_irs(A.irs) pickle.dump(RA, self.RA_file(mode="wb"), 2)
def preprocess(self): matplotlib.use('Agg') gs = GenomicSubset(self.params.region) ss = SnpSubset(self.refpanel, bedtool=gs.bedtool) RA = BlockDiag.ld_matrix(self.refpanel, ss.irs.ranges(), self.params.ld_bandwidth / 1000.) try: # if the plotting has some error we don't want to not save the stuff # RA.plot(ss.irs, filename=self.RA_plotfilename()) pass except: pass pickle.dump(RA, self.RA_file(mode='wb'), 2)
def run(self, beta_num, sim): R = pickle.load(self.R_file()) RA = pickle.load(self.RA_file()) # compute the results results = [] for alphahat in sim.sumstats_aligned_to_refpanel(beta_num, self.refpanel): alphahat = BlockDiag.from_big1darray(alphahat, R.ranges()) results.append(self.compute_statistic(alphahat, R, RA, sim.sample_size, self.refpanel.N, memoize=True)) print(len(results), results[-1]) return results
def preprocess(self): matplotlib.use('Agg') gs = GenomicSubset(self.params.region) A = SnpSubset(self.refpanel, bedtool=gs.bedtool) W = A.expanded_by(self.params.ld_window / 1000.) R = BlockDiag.ld_matrix(self.refpanel, W.irs.ranges(), 300, band_units='SNPs') pickle.dump(R, self.R_file(mode='wb'), 2) # R.plot(A.irs, filename=self.R_plotfilename()) RA = R.zero_outside_irs(A.irs) pickle.dump(RA, self.RA_file(mode='wb'), 2)
def preprocess(self): matplotlib.use("Agg") gs = GenomicSubset(self.params.region) A = SnpSubset(self.refpanel, bedtool=gs.bedtool) W = self.window(A) R = BlockDiag.ld_matrix(self.refpanel, W.irs.ranges(), 1000000) # bandwidth=infty pickle.dump(R, self.R_file(mode="wb"), 2) try: # if the plotting has some error we don't want to not save the stuff # R.plot(A.irs, filename=self.R_plotfilename()) pass except: pass RA = R.zero_outside_irs(A.irs) pickle.dump(RA, self.RA_file(mode="wb"), 2)
def preprocess(self): matplotlib.use('Agg') gs = GenomicSubset(self.params.region) A = SnpSubset(self.refpanel, bedtool=gs.bedtool) W = self.window(A) R = BlockDiag.ld_matrix(self.refpanel, W.irs.ranges(), 1000000) # bandwidth=infty pickle.dump(R, self.R_file(mode='wb'), 2) try: # if the plotting has some error we don't want to not save the stuff # R.plot(A.irs, filename=self.R_plotfilename()) pass except: pass RA = R.zero_outside_irs(A.irs) pickle.dump(RA, self.RA_file(mode='wb'), 2)
def run(self, beta_num, sim): R = pickle.load(self.R_file()) RA = pickle.load(self.RA_file()) if self.params.prune_regions > 0: def var(L, LA, h2A, N): LinvLA = np.linalg.solve(L, LA) tr1 = np.einsum('ij,ji', LinvLA, LinvLA) tr2 = np.einsum('ij,ji', LA, LinvLA) return 2 * tr1 / float(N)**2 + 2 * tr2 * h2A / (float(N) * float(750)) print('computing variances') variances = {} for r in R.ranges(): variances[r] = var(R.ranges_to_arrays[r], RA.ranges_to_arrays[r], 0.05, sim.sample_size) print('total variance:', sum(variances.values())) sortedrs = R.ranges() sortedrs.sort(key=lambda r: variances[r]) worstrs = sortedrs[-self.params.prune_regions:] for r in worstrs: print('removing', r) del R.ranges_to_arrays[r] del RA.ranges_to_arrays[r] print('new variance:', sum([variances[r] for r in R.ranges()])) print(len(R.ranges())) print(len(RA.ranges())) # compute the results results = [] for alphahat in sim.sumstats_aligned_to_refpanel( beta_num, self.refpanel): alphahat = BlockDiag.from_big1darray(alphahat, R.ranges()) results.append( self.compute_statistic(alphahat, R, RA, sim.sample_size, self.refpanel.N, memoize=True)) print(len(results), results[-1]) return results
def run(self, beta_num, sim): R = pickle.load(self.R_file()) RA = pickle.load(self.RA_file()) # compute the results results = [] for alphahat in sim.sumstats_aligned_to_refpanel( beta_num, self.refpanel): alphahat = BlockDiag.from_big1darray(alphahat, R.ranges()) results.append( self.compute_statistic(alphahat, R, RA, sim.sample_size, self.refpanel.N, memoize=True)) print(len(results), results[-1]) return results
def run(self, beta_num, sim): R = pickle.load(self.R_file()) RA = pickle.load(self.RA_file()) if self.params.prune_regions > 0: def var(L, LA, h2A, N): LinvLA = np.linalg.solve(L, LA) tr1 = np.einsum("ij,ji", LinvLA, LinvLA) tr2 = np.einsum("ij,ji", LA, LinvLA) return 2 * tr1 / float(N) ** 2 + 2 * tr2 * h2A / (float(N) * float(750)) print("computing variances") variances = {} for r in R.ranges(): variances[r] = var(R.ranges_to_arrays[r], RA.ranges_to_arrays[r], 0.05, sim.sample_size) print("total variance:", sum(variances.values())) sortedrs = R.ranges() sortedrs.sort(key=lambda r: variances[r]) worstrs = sortedrs[-self.params.prune_regions :] for r in worstrs: print("removing", r) del R.ranges_to_arrays[r] del RA.ranges_to_arrays[r] print("new variance:", sum([variances[r] for r in R.ranges()])) print(len(R.ranges())) print(len(RA.ranges())) # compute the results results = [] for alphahat in sim.sumstats_aligned_to_refpanel(beta_num, self.refpanel): alphahat = BlockDiag.from_big1darray(alphahat, R.ranges()) results.append(self.compute_statistic(alphahat, R, RA, sim.sample_size, self.refpanel.N, memoize=True)) print(len(results), results[-1]) return results
def compute_covariance(self): breakpoints = BedTool(paths.reference + self.params.breakpointsfile) blocks = SnpPartition(self.refpanel, breakpoints, remove_mhc=True) myranges = self.ranges_in_chunk(blocks.ranges()) print('working on', len(myranges), 'ld blocks') return BlockDiag.ld_matrix_blocks(self.refpanel, myranges)