Esempio n. 1
0
    def create_baseline_model(self):
        gss = [GenomicSubset(region) for region in LDSC.baseline_model_regions]

        # create the annotation file
        for chrnum in self.refpanel.chromosomes():
            print('creating baseline annot file for chr', chrnum)
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            sss = [SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum)) for gs in gss]
            SnpSubset.print_subsets(self.baseline_filename(chrnum),
                    sss, LDSC.baseline_model_regions)

        # create the ldscores file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ldscores_command = [
                    'python', '-u', paths.foreign + 'ldsc/ldsc.py',
                    '--l2',
                    '--ld-wind-cm', str(self.params.ld_window / 1000.),
                    '--bfile', d.genotypes_bedfile.filename,
                    '--annot', self.baseline_filename(chrnum),
                    '--out', self.baseline_l2_filestem(chrnum)]
            print(' '.join(ldscores_command))
            outfilepath = self.baseline_l2_filestem(chrnum) + '.bsub_out'
            bsub.submit(
                    ldscores_command,
                    outfilepath,
                    jobname='baseline,chr='+str(chrnum))
Esempio n. 2
0
    def preprocess(self):
        if self.params.baseline and not self.baseline_preprocessing_in_progress():
            print('baseline model not found. creating...')
            self.declare_baseline_preprocessing_in_progress()
            self.create_baseline_model()

        print('submitting ld score jobs for annotation of interest')
        gs = GenomicSubset(self.params.region)

        # create the annotation file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ss = SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum))
            SnpSubset.print_subsets(self.annotation_filename(chrnum),
                    [ss], [self.params.region], add_other=True)

        # create the ldscores file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ldscores_command = [
                    'python', '-u', paths.foreign + 'ldsc/ldsc.py',
                    '--l2',
                    '--ld-wind-cm', str(self.params.ld_window / 1000.),
                    '--bfile', d.genotypes_bedfile.filename,
                    '--annot', self.annotation_filename(chrnum),
                    '--out', self.annotation_l2_filestem(chrnum)]
            print(' '.join(ldscores_command))
            outfilepath = self.annotation_l2_filestem(chrnum) + '.bsub_out'
            bsub.submit(
                    ldscores_command,
                    outfilepath,
                    jobname=self.preprocessing_foldername()+',chr='+str(chrnum))
Esempio n. 3
0
    def preprocess(self):
        if self.params.baseline and not self.baseline_preprocessing_in_progress(
        ):
            print('baseline model not found. creating...')
            self.declare_baseline_preprocessing_in_progress()
            self.create_baseline_model()

        print('submitting ld score jobs for annotation of interest')
        gs = GenomicSubset(self.params.region)

        # create the annotation file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ss = SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum))
            SnpSubset.print_subsets(self.annotation_filename(chrnum), [ss],
                                    [self.params.region],
                                    add_other=True)

        # create the ldscores file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ldscores_command = [
                'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2',
                '--ld-wind-cm',
                str(self.params.ld_window / 1000.), '--bfile',
                d.genotypes_bedfile.filename, '--annot',
                self.annotation_filename(chrnum), '--out',
                self.annotation_l2_filestem(chrnum)
            ]
            print(' '.join(ldscores_command))
            outfilepath = self.annotation_l2_filestem(chrnum) + '.bsub_out'
            bsub.submit(ldscores_command,
                        outfilepath,
                        jobname=self.preprocessing_foldername() + ',chr=' +
                        str(chrnum))
Esempio n. 4
0
    def create_baseline_model(self):
        gss = [GenomicSubset(region) for region in LDSC.baseline_model_regions]

        # create the annotation file
        for chrnum in self.refpanel.chromosomes():
            print('creating baseline annot file for chr', chrnum)
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            sss = [
                SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum))
                for gs in gss
            ]
            SnpSubset.print_subsets(self.baseline_filename(chrnum), sss,
                                    LDSC.baseline_model_regions)

        # create the ldscores file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ldscores_command = [
                'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2',
                '--ld-wind-cm',
                str(self.params.ld_window / 1000.), '--bfile',
                d.genotypes_bedfile.filename, '--annot',
                self.baseline_filename(chrnum), '--out',
                self.baseline_l2_filestem(chrnum)
            ]
            print(' '.join(ldscores_command))
            outfilepath = self.baseline_l2_filestem(chrnum) + '.bsub_out'
            bsub.submit(ldscores_command,
                        outfilepath,
                        jobname='baseline,chr=' + str(chrnum))
Esempio n. 5
0
def create_annot(args):
    path = '/'.join(args.bedfile.split('/')[:-1]) + '/'
    filename = args.bedfile.split('/')[-1]
    if filename[-4:] == '.bed':
        name = filename[:-4]
    else:
        name = filename

    gs = GenomicSubset(name, path=path)
    for chrnum in range(1, 23)[::-1]:
        print('creating annot file for chr', chrnum)
        d = Dataset(args.refpanel + '.' + str(chrnum))
        sss = [SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum))]
        SnpSubset.print_subsets('{}{}.{}.annot.gz'.format(path, name, chrnum),
                                sss, [name])
Esempio n. 6
0
def create_annot(args):
    path = '/'.join(args.bedfile.split('/')[:-1]) + '/'
    filename = args.bedfile.split('/')[-1]
    if filename[-4:] == '.bed':
        name = filename[:-4]
    else:
        name = filename

    gs = GenomicSubset(name, path=path)
    for chrnum in range(1,23)[::-1]:
        print('creating annot file for chr', chrnum)
        d = Dataset(args.refpanel + '.' + str(chrnum))
        sss = [SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum))]
        SnpSubset.print_subsets('{}{}.{}.annot.gz'.format(path, name, chrnum),
                sss, [name])