Example #1
0
    def create_baseline_model(self):
        gss = [GenomicSubset(region) for region in LDSC.baseline_model_regions]

        # create the annotation file
        for chrnum in self.refpanel.chromosomes():
            print('creating baseline annot file for chr', chrnum)
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            sss = [SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum)) for gs in gss]
            SnpSubset.print_subsets(self.baseline_filename(chrnum),
                    sss, LDSC.baseline_model_regions)

        # create the ldscores file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ldscores_command = [
                    'python', '-u', paths.foreign + 'ldsc/ldsc.py',
                    '--l2',
                    '--ld-wind-cm', str(self.params.ld_window / 1000.),
                    '--bfile', d.genotypes_bedfile.filename,
                    '--annot', self.baseline_filename(chrnum),
                    '--out', self.baseline_l2_filestem(chrnum)]
            print(' '.join(ldscores_command))
            outfilepath = self.baseline_l2_filestem(chrnum) + '.bsub_out'
            bsub.submit(
                    ldscores_command,
                    outfilepath,
                    jobname='baseline,chr='+str(chrnum))
Example #2
0
    def preprocess(self):
        if self.params.baseline and not self.baseline_preprocessing_in_progress():
            print('baseline model not found. creating...')
            self.declare_baseline_preprocessing_in_progress()
            self.create_baseline_model()

        print('submitting ld score jobs for annotation of interest')
        gs = GenomicSubset(self.params.region)

        # create the annotation file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ss = SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum))
            SnpSubset.print_subsets(self.annotation_filename(chrnum),
                    [ss], [self.params.region], add_other=True)

        # create the ldscores file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ldscores_command = [
                    'python', '-u', paths.foreign + 'ldsc/ldsc.py',
                    '--l2',
                    '--ld-wind-cm', str(self.params.ld_window / 1000.),
                    '--bfile', d.genotypes_bedfile.filename,
                    '--annot', self.annotation_filename(chrnum),
                    '--out', self.annotation_l2_filestem(chrnum)]
            print(' '.join(ldscores_command))
            outfilepath = self.annotation_l2_filestem(chrnum) + '.bsub_out'
            bsub.submit(
                    ldscores_command,
                    outfilepath,
                    jobname=self.preprocessing_foldername()+',chr='+str(chrnum))
Example #3
0
 def preprocess(self):
     matplotlib.use("Agg")
     gs = GenomicSubset(self.params.region)
     A = SnpSubset(self.refpanel, bedtool=gs.bedtool)
     W = A.expanded_by(self.params.ld_window / 1000.0)
     R = BlockDiag.ld_matrix(self.refpanel, W.irs.ranges(), 300, band_units="SNPs")
     pickle.dump(R, self.R_file(mode="wb"), 2)
     # R.plot(A.irs, filename=self.R_plotfilename())
     RA = R.zero_outside_irs(A.irs)
     pickle.dump(RA, self.RA_file(mode="wb"), 2)
Example #4
0
def create_annot(args):
    path = '/'.join(args.bedfile.split('/')[:-1]) + '/'
    filename = args.bedfile.split('/')[-1]
    if filename[-4:] == '.bed':
        name = filename[:-4]
    else:
        name = filename

    gs = GenomicSubset(name, path=path)
    for chrnum in range(1,23)[::-1]:
        print('creating annot file for chr', chrnum)
        d = Dataset(args.refpanel + '.' + str(chrnum))
        sss = [SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum))]
        SnpSubset.print_subsets('{}{}.{}.annot.gz'.format(path, name, chrnum),
                sss, [name])
Example #5
0
    def compute_statistic(self, alphahat, R, RA, N, Nref, memoize=False):
        Rajd = Nadjust_after = None
        if self.params.Radjust == "after":
            Nadjust_after = Nref
            Radj = R
        elif self.params.Radjust == "before":
            Nadjust_after = None
            Radj = R.adjusted_before_inversion(Nref)
        else:
            Nadjust_after = None
            Radj = R

        if self.params.RAreg:
            print("regularizing RA")
            RA = RA.add_ridge(self.params.Lambda, renormalize=True)
            gs = GenomicSubset(self.params.region)
            A = SnpSubset(self.refpanel, bedtool=gs.bedtool)
            RA.zero_outside_irs(A.irs)

        if not memoize or not hasattr(self, "bias"):
            print("adding lambda")
            Radjreg = Radj.add_ridge(self.params.Lambda, renormalize=True)
            print("computing inverse")
            self.Radjreginv = Radjreg.inv(Nadjust_after=Nadjust_after)

            print("done.computing bias...")
            A = SnpSubset(self.refpanel, bedtool=GenomicSubset(self.params.region).bedtool)
            W = self.window(A)
            if not self.params.avgunbiased:
                tr = self.Radjreginv.dot(RA).trace()
                self.scaling = 1
            else:
                tr = RA.dot(self.Radjreginv).dot(R).dot(self.Radjreginv).trace()
                Q = R.dot(self.Radjreginv).dot(RA).dot(self.Radjreginv).dot(R)
                Q.zero_outside_irs(A.irs)
                self.scaling = A.num_snps() / Q.trace()
            # self.bias = tr / N + \
            #         float(self.refpanel.M-len(W.irs))/self.refpanel.M * \
            #             self.params.sigma2g * tr / self.params.pop_size
            self.bias = tr / N + self.params.sigma2g * tr / self.params.pop_size
            print("\nbias =", self.bias)
            print("scaling =", self.scaling)

        betahat = self.Radjreginv.dot(alphahat)

        return self.scaling * (betahat.dot(RA.dot(betahat)) - self.bias)
Example #6
0
 def init(self):
     self.Rri = pickle.load(self.Rri_file())
     self.R = pickle.load(self.R_file())
     self.RA = pickle.load(self.RA_file())
     self.A = SnpSubset(self.refpanel,
                        GenomicSubset(self.params.region).bedtool)
     self.ZR = pickle.load(self.biasmatrix_file())
     self.Q, self.Z, self.QZ, self.QZR = self.get_variance_matrices()