def create_baseline_model(self): gss = [GenomicSubset(region) for region in LDSC.baseline_model_regions] # create the annotation file for chrnum in self.refpanel.chromosomes(): print('creating baseline annot file for chr', chrnum) d = Dataset(self.params.refpanel, chrnum=chrnum) sss = [SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum)) for gs in gss] SnpSubset.print_subsets(self.baseline_filename(chrnum), sss, LDSC.baseline_model_regions) # create the ldscores file for chrnum in self.refpanel.chromosomes(): d = Dataset(self.params.refpanel, chrnum=chrnum) ldscores_command = [ 'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2', '--ld-wind-cm', str(self.params.ld_window / 1000.), '--bfile', d.genotypes_bedfile.filename, '--annot', self.baseline_filename(chrnum), '--out', self.baseline_l2_filestem(chrnum)] print(' '.join(ldscores_command)) outfilepath = self.baseline_l2_filestem(chrnum) + '.bsub_out' bsub.submit( ldscores_command, outfilepath, jobname='baseline,chr='+str(chrnum))
def preprocess(self): if self.params.baseline and not self.baseline_preprocessing_in_progress(): print('baseline model not found. creating...') self.declare_baseline_preprocessing_in_progress() self.create_baseline_model() print('submitting ld score jobs for annotation of interest') gs = GenomicSubset(self.params.region) # create the annotation file for chrnum in self.refpanel.chromosomes(): d = Dataset(self.params.refpanel, chrnum=chrnum) ss = SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum)) SnpSubset.print_subsets(self.annotation_filename(chrnum), [ss], [self.params.region], add_other=True) # create the ldscores file for chrnum in self.refpanel.chromosomes(): d = Dataset(self.params.refpanel, chrnum=chrnum) ldscores_command = [ 'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2', '--ld-wind-cm', str(self.params.ld_window / 1000.), '--bfile', d.genotypes_bedfile.filename, '--annot', self.annotation_filename(chrnum), '--out', self.annotation_l2_filestem(chrnum)] print(' '.join(ldscores_command)) outfilepath = self.annotation_l2_filestem(chrnum) + '.bsub_out' bsub.submit( ldscores_command, outfilepath, jobname=self.preprocessing_foldername()+',chr='+str(chrnum))
def preprocess(self): if self.params.baseline and not self.baseline_preprocessing_in_progress( ): print('baseline model not found. creating...') self.declare_baseline_preprocessing_in_progress() self.create_baseline_model() print('submitting ld score jobs for annotation of interest') gs = GenomicSubset(self.params.region) # create the annotation file for chrnum in self.refpanel.chromosomes(): d = Dataset(self.params.refpanel, chrnum=chrnum) ss = SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum)) SnpSubset.print_subsets(self.annotation_filename(chrnum), [ss], [self.params.region], add_other=True) # create the ldscores file for chrnum in self.refpanel.chromosomes(): d = Dataset(self.params.refpanel, chrnum=chrnum) ldscores_command = [ 'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2', '--ld-wind-cm', str(self.params.ld_window / 1000.), '--bfile', d.genotypes_bedfile.filename, '--annot', self.annotation_filename(chrnum), '--out', self.annotation_l2_filestem(chrnum) ] print(' '.join(ldscores_command)) outfilepath = self.annotation_l2_filestem(chrnum) + '.bsub_out' bsub.submit(ldscores_command, outfilepath, jobname=self.preprocessing_foldername() + ',chr=' + str(chrnum))
def create_baseline_model(self): gss = [GenomicSubset(region) for region in LDSC.baseline_model_regions] # create the annotation file for chrnum in self.refpanel.chromosomes(): print('creating baseline annot file for chr', chrnum) d = Dataset(self.params.refpanel, chrnum=chrnum) sss = [ SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum)) for gs in gss ] SnpSubset.print_subsets(self.baseline_filename(chrnum), sss, LDSC.baseline_model_regions) # create the ldscores file for chrnum in self.refpanel.chromosomes(): d = Dataset(self.params.refpanel, chrnum=chrnum) ldscores_command = [ 'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2', '--ld-wind-cm', str(self.params.ld_window / 1000.), '--bfile', d.genotypes_bedfile.filename, '--annot', self.baseline_filename(chrnum), '--out', self.baseline_l2_filestem(chrnum) ] print(' '.join(ldscores_command)) outfilepath = self.baseline_l2_filestem(chrnum) + '.bsub_out' bsub.submit(ldscores_command, outfilepath, jobname='baseline,chr=' + str(chrnum))
def create_annot(args): path = '/'.join(args.bedfile.split('/')[:-1]) + '/' filename = args.bedfile.split('/')[-1] if filename[-4:] == '.bed': name = filename[:-4] else: name = filename gs = GenomicSubset(name, path=path) for chrnum in range(1, 23)[::-1]: print('creating annot file for chr', chrnum) d = Dataset(args.refpanel + '.' + str(chrnum)) sss = [SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum))] SnpSubset.print_subsets('{}{}.{}.annot.gz'.format(path, name, chrnum), sss, [name])
def create_annot(args): path = '/'.join(args.bedfile.split('/')[:-1]) + '/' filename = args.bedfile.split('/')[-1] if filename[-4:] == '.bed': name = filename[:-4] else: name = filename gs = GenomicSubset(name, path=path) for chrnum in range(1,23)[::-1]: print('creating annot file for chr', chrnum) d = Dataset(args.refpanel + '.' + str(chrnum)) sss = [SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum))] SnpSubset.print_subsets('{}{}.{}.annot.gz'.format(path, name, chrnum), sss, [name])