def preprocess(self): matplotlib.use("Agg") gs = GenomicSubset(self.params.region) A = SnpSubset(self.refpanel, bedtool=gs.bedtool) W = A.expanded_by(self.params.ld_window / 1000.0) R = BlockDiag.ld_matrix(self.refpanel, W.irs.ranges(), 300, band_units="SNPs") pickle.dump(R, self.R_file(mode="wb"), 2) # R.plot(A.irs, filename=self.R_plotfilename()) RA = R.zero_outside_irs(A.irs) pickle.dump(RA, self.RA_file(mode="wb"), 2)
def preprocess(self): matplotlib.use('Agg') gs = GenomicSubset(self.params.region) A = SnpSubset(self.refpanel, bedtool=gs.bedtool) W = A.expanded_by(self.params.ld_window / 1000.) R = BlockDiag.ld_matrix(self.refpanel, W.irs.ranges(), 300, band_units='SNPs') pickle.dump(R, self.R_file(mode='wb'), 2) # R.plot(A.irs, filename=self.R_plotfilename()) RA = R.zero_outside_irs(A.irs) pickle.dump(RA, self.RA_file(mode='wb'), 2)
if __name__ == '__main__': from primitives import Dataset, GenomicSubset, SnpSubset import copy from time import time import argparse np.random.seed(0) parser = argparse.ArgumentParser() parser.add_argument('--M', type=int, required=True, help='the number of SNPs to use') parser.add_argument('-check_dense', action='store_true', default=False) args = parser.parse_args() d = Dataset('GERA', forced_M=args.M) indivs = d.random_indivs(200) tiny_gs = GenomicSubset('50') tiny_ss = SnpSubset(d, bedtool=tiny_gs.bedtool) tiny_buffered_ss = tiny_ss.expanded_by(0.01) t0 = time() R = BlockDiag.ld_matrix(d, tiny_buffered_ss.irs.ranges(), 0.01, indivs=indivs) # 1 cM bandwidth R = R.add_ridge(0.05, renormalize=True) print('trace of renormalized R should be close to M (with noise due to sample vs pop LD', R.trace(), tiny_buffered_ss.num_snps(), R.trace() == tiny_buffered_ss.num_snps()) print('computing R took', time() - t0) print('shape of R is:', R.shape()) RA = R.copy() RA.zero_outside_irs(tiny_ss.irs) b = BlockDiag.from_big1darray(np.random.randn(d.M), R.ranges()) # check inverse computation