def preprocess(self, s): print('Acor is preprocessing', s.name, 'with refpanel=', self.params.refpanel) print(self.params) annots = [ pa.Annotation(paths.annotations + aname) for aname in self.params.annot_chr.split(',') ] for a in annots: print('preprocessing', a.filestem()) for c in s.chromosomes: if not os.path.exists( a.conv_filename(c, full=self.params.fullconv)): conv_command = [ 'python', '-u', paths.code + 'acor/acor.py', '--annot-chr', a.stem_chr, '--bfile-chr', self.refpanel.bfile_chr] + \ (['-fullconv'] if self.params.fullconv else []) + \ ['conv', '--chroms', str(c)] print(' '.join(conv_command)) outfilepath = a.filestem(c) + '.' + \ ('full' if self.params.fullconv else '') + \ 'convbsub_out' bsub.submit( conv_command, outfilepath, jobname=self.params.annot_chr.replace('/','_') + \ ',conv,chr='+str(c))
def preprocess(self, s): print('Acor is preprocessing', s.name, 'with refpanel=', self.params.refpanel) print(self.params) annots = [pa.Annotation(paths.annotations + aname) for aname in self.params.annot_chr.split(',')] for a in annots: print('preprocessing', a.filestem()) for c in s.chromosomes: if not os.path.exists(a.conv_filename(c, full=self.params.fullconv)): conv_command = [ 'python', '-u', paths.code + 'acor/acor.py', '--annot-chr', a.stem_chr, '--bfile-chr', self.refpanel.bfile_chr] + \ (['-fullconv'] if self.params.fullconv else []) + \ ['conv', '--chroms', str(c)] print(' '.join(conv_command)) outfilepath = a.filestem(c) + '.' + \ ('full' if self.params.fullconv else '') + \ 'convbsub_out' bsub.submit( conv_command, outfilepath, jobname=self.params.annot_chr.replace('/','_') + \ ',conv,chr='+str(c))
def create_baseline_model(self): gss = [GenomicSubset(region) for region in LDSC.baseline_model_regions] # create the annotation file for chrnum in self.refpanel.chromosomes(): print('creating baseline annot file for chr', chrnum) d = Dataset(self.params.refpanel, chrnum=chrnum) sss = [ SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum)) for gs in gss ] SnpSubset.print_subsets(self.baseline_filename(chrnum), sss, LDSC.baseline_model_regions) # create the ldscores file for chrnum in self.refpanel.chromosomes(): d = Dataset(self.params.refpanel, chrnum=chrnum) ldscores_command = [ 'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2', '--ld-wind-cm', str(self.params.ld_window / 1000.), '--bfile', d.genotypes_bedfile.filename, '--annot', self.baseline_filename(chrnum), '--out', self.baseline_l2_filestem(chrnum) ] print(' '.join(ldscores_command)) outfilepath = self.baseline_l2_filestem(chrnum) + '.bsub_out' bsub.submit(ldscores_command, outfilepath, jobname='baseline,chr=' + str(chrnum))
def preprocess(self): if self.params.baseline and not self.baseline_preprocessing_in_progress( ): print('baseline model not found. creating...') self.declare_baseline_preprocessing_in_progress() self.create_baseline_model() print('submitting ld score jobs for annotation of interest') gs = GenomicSubset(self.params.region) # create the annotation file for chrnum in self.refpanel.chromosomes(): d = Dataset(self.params.refpanel, chrnum=chrnum) ss = SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum)) SnpSubset.print_subsets(self.annotation_filename(chrnum), [ss], [self.params.region], add_other=True) # create the ldscores file for chrnum in self.refpanel.chromosomes(): d = Dataset(self.params.refpanel, chrnum=chrnum) ldscores_command = [ 'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2', '--ld-wind-cm', str(self.params.ld_window / 1000.), '--bfile', d.genotypes_bedfile.filename, '--annot', self.annotation_filename(chrnum), '--out', self.annotation_l2_filestem(chrnum) ] print(' '.join(ldscores_command)) outfilepath = self.annotation_l2_filestem(chrnum) + '.bsub_out' bsub.submit(ldscores_command, outfilepath, jobname=self.preprocessing_foldername() + ',chr=' + str(chrnum))
def preprocess(self): if self.params.baseline and not self.baseline_preprocessing_in_progress(): print('baseline model not found. creating...') self.declare_baseline_preprocessing_in_progress() self.create_baseline_model() print('submitting ld score jobs for annotation of interest') gs = GenomicSubset(self.params.region) # create the annotation file for chrnum in self.refpanel.chromosomes(): d = Dataset(self.params.refpanel, chrnum=chrnum) ss = SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum)) SnpSubset.print_subsets(self.annotation_filename(chrnum), [ss], [self.params.region], add_other=True) # create the ldscores file for chrnum in self.refpanel.chromosomes(): d = Dataset(self.params.refpanel, chrnum=chrnum) ldscores_command = [ 'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2', '--ld-wind-cm', str(self.params.ld_window / 1000.), '--bfile', d.genotypes_bedfile.filename, '--annot', self.annotation_filename(chrnum), '--out', self.annotation_l2_filestem(chrnum)] print(' '.join(ldscores_command)) outfilepath = self.annotation_l2_filestem(chrnum) + '.bsub_out' bsub.submit( ldscores_command, outfilepath, jobname=self.preprocessing_foldername()+',chr='+str(chrnum))
def submit_runs(self, s, overwrite=False, debug=False): def outfile_path(batch_num): path = s.root_folder() + 'logs/' fs.makedir(path) return path + '{}-batch{}.out'.format(self.fsid(), batch_num) def run_job_name(): return 'run-{}-{}[1-{}]'.format( self.fsid(), s.name, self.num_batches(s)) if not self.missing_results(s) and not overwrite: print('submission unnecessary for', str(self), 'on', s.name) return if not self.dependencies_satisfied(s): print('\nERROR:', str(self), 'cannot submit', s.name, '. It needs preprocessing') return print('\n' + str(self), 'submitting', s.name) my_args = ['--method-name', self.method, '--sim-name', s.name, 'run', '--batch-num', '$LSB_JOBINDEX'] + \ self.command_line_params() outfilepath = outfile_path('%I') bsub.submit( ['python', '-u', paths.code + 'sim/methods/estimator_manager.py'] + my_args, outfilepath, jobname=run_job_name(), memory_GB=8, debug=debug)
def create_baseline_model(self): gss = [GenomicSubset(region) for region in LDSC.baseline_model_regions] # create the annotation file for chrnum in self.refpanel.chromosomes(): print('creating baseline annot file for chr', chrnum) d = Dataset(self.params.refpanel, chrnum=chrnum) sss = [SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum)) for gs in gss] SnpSubset.print_subsets(self.baseline_filename(chrnum), sss, LDSC.baseline_model_regions) # create the ldscores file for chrnum in self.refpanel.chromosomes(): d = Dataset(self.params.refpanel, chrnum=chrnum) ldscores_command = [ 'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2', '--ld-wind-cm', str(self.params.ld_window / 1000.), '--bfile', d.genotypes_bedfile.filename, '--annot', self.baseline_filename(chrnum), '--out', self.baseline_l2_filestem(chrnum)] print(' '.join(ldscores_command)) outfilepath = self.baseline_l2_filestem(chrnum) + '.bsub_out' bsub.submit( ldscores_command, outfilepath, jobname='baseline,chr='+str(chrnum))
def submit(args): my_args = ['main', '--chrom', '$LSB_JOBINDEX'] d = Dataset('UK10Khg19.22') outfilepath = d.auxfiles_path + '../' + \ '%I/.preprocess.out' bsub.submit(['python', '-u', paths.code + 'real/preprocess.py'] + my_args, outfilepath, jobname='preprocess[1-22]', memory_GB=16)
def submit(args): sim = SumstatSimulation(args.sim_name) my_args = ["--sim_name", args.sim_name, "main", "--beta_num", "$LSB_JOBINDEX"] outfilepath = sim.path() + ".sim_betas.%I.out" bsub.submit( ["python", "-u", paths.code + "sim/sim_betas.py"] + my_args, outfilepath, jobname="simbetas[1-" + str(sim.num_betas) + "]", memory_GB=5, )
def submit(args): my_args = ['main', '--chrom', '$LSB_JOBINDEX'] d = Dataset('UK10Khg19.22') outfilepath = d.auxfiles_path + '../' + \ '%I/.preprocess.out' bsub.submit( ['python', '-u', paths.code + 'real/preprocess.py'] + my_args, outfilepath, jobname='preprocess[1-22]', memory_GB=16)
def submit(args): my_args = [ '--annot_stem', args.annot_stem, '--refpanel', args.refpanel, 'main', '--chrnum', '$LSB_JOBINDEX' ] outfilepath = \ args.annot_stem + '.maf1p.%I.creation.log' bsub.submit(['python', '-u', __file__] + my_args, outfilepath, jobname='selectrare[1-22]')
def submit(args): my_args = ['--annot_stem', args.annot_stem, 'main', '--chrnum', '$LSB_JOBINDEX'] outfilepath = \ args.annot_stem + '.random.%I.creation.log' bsub.submit( ['python', '-u', paths.code + 'munge/randomize_annot_signs.py'] + my_args, outfilepath, jobname='randomize[1-22]', debug=args.debug)
def submit(args): sim = SumstatSimulation(args.sim_name) my_args = [ '--sim_name', args.sim_name, 'main', '--beta_num', '$LSB_JOBINDEX' ] outfilepath = sim.path() + \ '.sim_betas.%I.out' bsub.submit(['python', '-u', paths.code + 'sim/sim_betas.py'] + my_args, outfilepath, jobname='simbetas[1-' + str(sim.num_betas) + ']', memory_GB=5)
def submit(args): my_args = [ '--annot_stem', args.annot_stem, '--refpanel', args.refpanel, 'main', '--chrnum', '$LSB_JOBINDEX' ] outfilepath = \ args.annot_stem + '.%I.cannot.log' bsub.submit(['python', '-u', paths.code + 'real/signed_preprocess.py'] + my_args, outfilepath, jobname='preprocess[1-22]')
def submit(args): my_args = ['--annot_stem', args.annot_stem, '--refpanel', args.refpanel, 'main', '--chrnum', '$LSB_JOBINDEX'] outfilepath = \ args.annot_stem + '.maf1p.%I.creation.log' bsub.submit( ['python', '-u', __file__] + my_args, outfilepath, jobname='selectrare[1-22]')
def submit(args): my_args = ['--annot_stem', args.annot_stem, '--refpanel', args.refpanel, 'main', '--chrnum', '$LSB_JOBINDEX'] outfilepath = \ args.annot_stem + '.%I.cannot.log' bsub.submit( ['python', '-u', paths.code + 'real/signed_preprocess.py'] + my_args, outfilepath, jobname='preprocess[1-22]')
def submit(args): my_args = [ '--annot_stem', args.annot_stem, 'main', '--chrnum', '$LSB_JOBINDEX' ] outfilepath = \ args.annot_stem + '.random.%I.creation.log' bsub.submit( ['python', '-u', paths.code + 'munge/randomize_annot_signs.py'] + my_args, outfilepath, jobname='randomize[1-22]', debug=args.debug)
def submit(args): my_args = ['--annot_stems', ' '.join(args.annot_stems), '--sumstats_stem', args.sumstats_stem, '--refpanel', args.refpanel, 'main', '--chrnum', '$LSB_JOBINDEX'] outfilepath = \ results_filename(args.annot_stems, args.sumstats_stem, chrnum='%I') + '.log' bsub.submit( ['python', '-u', __file__] + my_args, outfilepath, jobname='run[1-22]', debug=args.debug)
def submit(args): my_args = [ '--annot_stems', ' '.join(args.annot_stems), '--sumstats_stem', args.sumstats_stem, '--refpanel', args.refpanel, 'main', '--chrnum', '$LSB_JOBINDEX' ] outfilepath = \ results_filename(args.annot_stems, args.sumstats_stem, chrnum='%I') + '.log' bsub.submit(['python', '-u', __file__] + my_args, outfilepath, jobname='run[1-22]', debug=args.debug)
def submit_preprocess(self): if not self.preprocess_submitted(): print(str(self), 'pre-processing') my_args = ['--method_name', self.method(), 'preprocess'] + \ self.command_line_params() outfilepath = self.path_to_preprocessed_data() + '.preprocessing.out' bsub.submit( ['python', '-u', paths.code + 'methods/estimator_manager.py'] + my_args, outfilepath, jobname=self.preprocess_job_name(), memory_GB=self.preprocess_memoryreq_GB()) self.declare_preprocess_submitted() else: print(str(self), ': pre-processing unnecessary')
def submit_beta(beta_num): my_args = ['--sim_name', args.sim_name, 'main', '--beta_num', str(beta_num), '--sample_num', '$LSB_JOBINDEX'] outfilepath = \ sim.path_to_beta(beta_num) + \ '.sim_sumstats.%I.out' bsub.submit( ['python', '-u', paths.code + 'sim/sim_sumstats.py'] + my_args, outfilepath, jobname='simsumstats' + str(beta_num) + '[1-' + str(sim.num_samples_per_beta) + ']', # memory_GB=10.5) memory_GB=13)
def submit_beta(beta_num): my_args = [ '--sim_name', args.sim_name, 'main', '--beta_num', str(beta_num), '--sample_num', '$LSB_JOBINDEX' ] outfilepath = \ sim.path_to_beta(beta_num) + \ '.sim_sumstats.%I.out' bsub.submit( ['python', '-u', paths.code + 'sim/sim_sumstats.py'] + my_args, outfilepath, jobname='simsumstats' + str(beta_num) + '[1-' + str(sim.num_samples_per_beta) + ']', # memory_GB=10.5) memory_GB=13)
def submit(args): e = sm.Experiment(args.exp_name) for s in e.sims.values(): if hasattr(s, 'ignore'): print('ignoring', s.name) continue print('submitting', s.name) my_args = ['--exp-name', args.exp_name] + \ ['main', '--sim-name', s.name, '--beta-num', '$LSB_JOBINDEX'] outfilename = s.root_folder(create=True) + '.sim_sumstats.%I' bsub.submit(['python', '-u', __file__] + my_args, outfilename, # queue='medium', time_in_hours=40, queue='short', time_in_hours=12, jobname=s.name+'.simsumstats[1-{}]'.format(s.num_betas), debug=args.debug)
def submit_preprocess(self, s, debug=False): if not self.dependencies_satisfied(s): print(str(self), 'pre-processing') my_args = ['--method-name', self.method, '--sim-name', s.name, 'preprocess'] + \ self.command_line_params() outfilepath = self.refpanel.path + '.' + s.name + '.' + self.fsid() + \ '.preprocessing.out' bsub.submit( ['python', '-u', paths.code + 'sim/methods/estimator_manager.py'] + my_args, outfilepath, jobname='preprocess-' + self.fsid() + '-' + s.name, memory_GB=self.preprocess_memoryreq_GB, debug=debug) if not debug: self.declare_preprocess_submitted(s) else: print(str(self), ': pre-processing unnecessary')
def submit(args): e = sm.Experiment(args.exp_name) for s in e.sims.values(): if hasattr(s, 'ignore'): print('ignoring', s.name) continue print('submitting', s.name) my_args = ['--exp-name', args.exp_name] + \ ['main', '--sim-name', s.name, '--beta-num', '$LSB_JOBINDEX'] outfilename = s.root_folder(create=True) + '.sim_sumstats.%I' bsub.submit( ['python', '-u', __file__] + my_args, outfilename, # queue='medium', time_in_hours=40, queue='short', time_in_hours=12, jobname=s.name + '.simsumstats[1-{}]'.format(s.num_betas), debug=args.debug)
def preprocess(self, s): print('TruthRE is preprocessing', s.name, 'with refpanel=', self.params.refpanel) print(self.params) print('preprocessing', self.annotation.filestem()) for c in s.chromosomes: if not os.path.exists(self.annotation.conv_filename(c, full=True)): conv_command = [ 'python', '-u', paths.code + 'acor/acor.py', '--annot-chr', self.annotation.stem_chr, '--bfile-chr', self.refpanel.bfile_chr, '-fullconv', 'conv', '--chroms', str(c)] print(' '.join(conv_command)) outfilepath = self.annotation.filestem(c) + '.convbsub_out' bsub.submit( conv_command, outfilepath, jobname=self.params.annot_chr.replace('/','_') + \ ',conv,chr='+str(c))
def submit_runs(self, sim, overwrite=False, debug=False): #TODO: have it check whether there are more betas or more samples per beta, # and then have it decide whether to submit the betas in parallel or in groups # probably its easy to have the actual estimator manager be able to accept groups of # betas print('\n' + str(self), 'submitting', sim.name) my_args = ['--method_name', self.method(), 'run', '--sim_name', sim.name, '--batch_num', '$LSB_JOBINDEX'] + \ self.command_line_params() outfilepath = self.outfile_path(sim, '%I') if all(os.path.exists(self.results_path_stem(sim, beta_num)) for beta_num in range(1, sim.num_betas+1)) and not overwrite: print('submission unnecessary for', str(self)) else: bsub.submit( ['python', '-u', paths.code + 'methods/estimator_manager.py'] + my_args, outfilepath, jobname=self.run_job_name(sim), memory_GB=4, debug=debug)
def preprocess(self, s): print('LDSC is preprocessing', s.name, 'with refpanel=', self.params.refpanel) print(self.params) for annot_chr in self.params.annot_chr.split(','): a = pa.Annotation(paths.annotations + annot_chr) for c in range(1, 23): if not os.path.exists(a.ldscores_filename(c)): ldscores_command = [ 'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2', '--ld-wind-cm', str(self.params.ld_window), '--bfile', self.refpanel.bfile(c), '--annot', a.annot_filename(c), '--out', a.filestem(c) ] print(' '.join(ldscores_command)) outfilepath = a.filestem(c) + '.ldscoresbsub_out' bsub.submit( ldscores_command, outfilepath, jobname=self.params.annot_chr.replace('/','_') + \ ',ldcores,chr='+str(c))