Beispiel #1
0
    def preprocess(self, s):
        print('Acor is preprocessing', s.name, 'with refpanel=',
              self.params.refpanel)
        print(self.params)

        annots = [
            pa.Annotation(paths.annotations + aname)
            for aname in self.params.annot_chr.split(',')
        ]

        for a in annots:
            print('preprocessing', a.filestem())
            for c in s.chromosomes:
                if not os.path.exists(
                        a.conv_filename(c, full=self.params.fullconv)):
                    conv_command = [
                        'python', '-u', paths.code + 'acor/acor.py',
                        '--annot-chr', a.stem_chr,
                        '--bfile-chr', self.refpanel.bfile_chr] + \
                        (['-fullconv'] if self.params.fullconv else []) + \
                        ['conv',
                        '--chroms', str(c)]
                    print(' '.join(conv_command))
                    outfilepath = a.filestem(c) + '.' + \
                            ('full' if self.params.fullconv else '') + \
                            'convbsub_out'
                    bsub.submit(
                            conv_command,
                            outfilepath,
                            jobname=self.params.annot_chr.replace('/','_') + \
                                    ',conv,chr='+str(c))
Beispiel #2
0
    def preprocess(self, s):
        print('Acor is preprocessing', s.name,
                'with refpanel=', self.params.refpanel)
        print(self.params)

        annots = [pa.Annotation(paths.annotations + aname)
                for aname in self.params.annot_chr.split(',')]

        for a in annots:
            print('preprocessing', a.filestem())
            for c in s.chromosomes:
                if not os.path.exists(a.conv_filename(c, full=self.params.fullconv)):
                    conv_command = [
                        'python', '-u', paths.code + 'acor/acor.py',
                        '--annot-chr', a.stem_chr,
                        '--bfile-chr', self.refpanel.bfile_chr] + \
                        (['-fullconv'] if self.params.fullconv else []) + \
                        ['conv',
                        '--chroms', str(c)]
                    print(' '.join(conv_command))
                    outfilepath = a.filestem(c) + '.' + \
                            ('full' if self.params.fullconv else '') + \
                            'convbsub_out'
                    bsub.submit(
                            conv_command,
                            outfilepath,
                            jobname=self.params.annot_chr.replace('/','_') + \
                                    ',conv,chr='+str(c))
Beispiel #3
0
    def create_baseline_model(self):
        gss = [GenomicSubset(region) for region in LDSC.baseline_model_regions]

        # create the annotation file
        for chrnum in self.refpanel.chromosomes():
            print('creating baseline annot file for chr', chrnum)
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            sss = [
                SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum))
                for gs in gss
            ]
            SnpSubset.print_subsets(self.baseline_filename(chrnum), sss,
                                    LDSC.baseline_model_regions)

        # create the ldscores file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ldscores_command = [
                'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2',
                '--ld-wind-cm',
                str(self.params.ld_window / 1000.), '--bfile',
                d.genotypes_bedfile.filename, '--annot',
                self.baseline_filename(chrnum), '--out',
                self.baseline_l2_filestem(chrnum)
            ]
            print(' '.join(ldscores_command))
            outfilepath = self.baseline_l2_filestem(chrnum) + '.bsub_out'
            bsub.submit(ldscores_command,
                        outfilepath,
                        jobname='baseline,chr=' + str(chrnum))
Beispiel #4
0
    def preprocess(self):
        if self.params.baseline and not self.baseline_preprocessing_in_progress(
        ):
            print('baseline model not found. creating...')
            self.declare_baseline_preprocessing_in_progress()
            self.create_baseline_model()

        print('submitting ld score jobs for annotation of interest')
        gs = GenomicSubset(self.params.region)

        # create the annotation file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ss = SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum))
            SnpSubset.print_subsets(self.annotation_filename(chrnum), [ss],
                                    [self.params.region],
                                    add_other=True)

        # create the ldscores file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ldscores_command = [
                'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2',
                '--ld-wind-cm',
                str(self.params.ld_window / 1000.), '--bfile',
                d.genotypes_bedfile.filename, '--annot',
                self.annotation_filename(chrnum), '--out',
                self.annotation_l2_filestem(chrnum)
            ]
            print(' '.join(ldscores_command))
            outfilepath = self.annotation_l2_filestem(chrnum) + '.bsub_out'
            bsub.submit(ldscores_command,
                        outfilepath,
                        jobname=self.preprocessing_foldername() + ',chr=' +
                        str(chrnum))
Beispiel #5
0
    def preprocess(self):
        if self.params.baseline and not self.baseline_preprocessing_in_progress():
            print('baseline model not found. creating...')
            self.declare_baseline_preprocessing_in_progress()
            self.create_baseline_model()

        print('submitting ld score jobs for annotation of interest')
        gs = GenomicSubset(self.params.region)

        # create the annotation file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ss = SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum))
            SnpSubset.print_subsets(self.annotation_filename(chrnum),
                    [ss], [self.params.region], add_other=True)

        # create the ldscores file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ldscores_command = [
                    'python', '-u', paths.foreign + 'ldsc/ldsc.py',
                    '--l2',
                    '--ld-wind-cm', str(self.params.ld_window / 1000.),
                    '--bfile', d.genotypes_bedfile.filename,
                    '--annot', self.annotation_filename(chrnum),
                    '--out', self.annotation_l2_filestem(chrnum)]
            print(' '.join(ldscores_command))
            outfilepath = self.annotation_l2_filestem(chrnum) + '.bsub_out'
            bsub.submit(
                    ldscores_command,
                    outfilepath,
                    jobname=self.preprocessing_foldername()+',chr='+str(chrnum))
Beispiel #6
0
    def submit_runs(self, s, overwrite=False, debug=False):
        def outfile_path(batch_num):
            path = s.root_folder() + 'logs/'
            fs.makedir(path)
            return path + '{}-batch{}.out'.format(self.fsid(), batch_num)
        def run_job_name():
            return 'run-{}-{}[1-{}]'.format(
                self.fsid(), s.name, self.num_batches(s))

        if not self.missing_results(s) and not overwrite:
            print('submission unnecessary for', str(self), 'on', s.name)
            return
        if not self.dependencies_satisfied(s):
            print('\nERROR:', str(self), 'cannot submit', s.name, '. It needs preprocessing')
            return

        print('\n' + str(self), 'submitting', s.name)
        my_args = ['--method-name', self.method,
                '--sim-name', s.name,
                'run',
                '--batch-num', '$LSB_JOBINDEX'] + \
                self.command_line_params()
        outfilepath = outfile_path('%I')
        bsub.submit(
                ['python', '-u', paths.code + 'sim/methods/estimator_manager.py'] + my_args,
                outfilepath,
                jobname=run_job_name(),
                memory_GB=8,
                debug=debug)
Beispiel #7
0
    def create_baseline_model(self):
        gss = [GenomicSubset(region) for region in LDSC.baseline_model_regions]

        # create the annotation file
        for chrnum in self.refpanel.chromosomes():
            print('creating baseline annot file for chr', chrnum)
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            sss = [SnpSubset(d, gs.restricted_to_chrom_bedtool(chrnum)) for gs in gss]
            SnpSubset.print_subsets(self.baseline_filename(chrnum),
                    sss, LDSC.baseline_model_regions)

        # create the ldscores file
        for chrnum in self.refpanel.chromosomes():
            d = Dataset(self.params.refpanel, chrnum=chrnum)
            ldscores_command = [
                    'python', '-u', paths.foreign + 'ldsc/ldsc.py',
                    '--l2',
                    '--ld-wind-cm', str(self.params.ld_window / 1000.),
                    '--bfile', d.genotypes_bedfile.filename,
                    '--annot', self.baseline_filename(chrnum),
                    '--out', self.baseline_l2_filestem(chrnum)]
            print(' '.join(ldscores_command))
            outfilepath = self.baseline_l2_filestem(chrnum) + '.bsub_out'
            bsub.submit(
                    ldscores_command,
                    outfilepath,
                    jobname='baseline,chr='+str(chrnum))
def submit(args):
    my_args = ['main', '--chrom', '$LSB_JOBINDEX']
    d = Dataset('UK10Khg19.22')
    outfilepath = d.auxfiles_path + '../' + \
            '%I/.preprocess.out'
    bsub.submit(['python', '-u', paths.code + 'real/preprocess.py'] + my_args,
                outfilepath,
                jobname='preprocess[1-22]',
                memory_GB=16)
Beispiel #9
0
def submit(args):
    sim = SumstatSimulation(args.sim_name)
    my_args = ["--sim_name", args.sim_name, "main", "--beta_num", "$LSB_JOBINDEX"]
    outfilepath = sim.path() + ".sim_betas.%I.out"
    bsub.submit(
        ["python", "-u", paths.code + "sim/sim_betas.py"] + my_args,
        outfilepath,
        jobname="simbetas[1-" + str(sim.num_betas) + "]",
        memory_GB=5,
    )
def submit(args):
    my_args = ['main',
            '--chrom', '$LSB_JOBINDEX']
    d = Dataset('UK10Khg19.22')
    outfilepath = d.auxfiles_path + '../' + \
            '%I/.preprocess.out'
    bsub.submit(
            ['python', '-u', paths.code + 'real/preprocess.py'] + my_args,
            outfilepath,
            jobname='preprocess[1-22]',
            memory_GB=16)
Beispiel #11
0
def submit(args):
    my_args = [
        '--annot_stem', args.annot_stem, '--refpanel', args.refpanel, 'main',
        '--chrnum', '$LSB_JOBINDEX'
    ]
    outfilepath = \
            args.annot_stem + '.maf1p.%I.creation.log'

    bsub.submit(['python', '-u', __file__] + my_args,
                outfilepath,
                jobname='selectrare[1-22]')
def submit(args):
    my_args = ['--annot_stem', args.annot_stem,
            'main',
            '--chrnum', '$LSB_JOBINDEX']
    outfilepath = \
            args.annot_stem + '.random.%I.creation.log'

    bsub.submit(
            ['python', '-u', paths.code + 'munge/randomize_annot_signs.py'] + my_args,
            outfilepath,
            jobname='randomize[1-22]', debug=args.debug)
Beispiel #13
0
def submit(args):
    sim = SumstatSimulation(args.sim_name)
    my_args = [
        '--sim_name', args.sim_name, 'main', '--beta_num', '$LSB_JOBINDEX'
    ]
    outfilepath = sim.path() + \
            '.sim_betas.%I.out'
    bsub.submit(['python', '-u', paths.code + 'sim/sim_betas.py'] + my_args,
                outfilepath,
                jobname='simbetas[1-' + str(sim.num_betas) + ']',
                memory_GB=5)
Beispiel #14
0
def submit(args):
    my_args = [
        '--annot_stem', args.annot_stem, '--refpanel', args.refpanel, 'main',
        '--chrnum', '$LSB_JOBINDEX'
    ]
    outfilepath = \
            args.annot_stem + '.%I.cannot.log'

    bsub.submit(['python', '-u', paths.code + 'real/signed_preprocess.py'] +
                my_args,
                outfilepath,
                jobname='preprocess[1-22]')
Beispiel #15
0
def submit(args):
    my_args = ['--annot_stem', args.annot_stem,
            '--refpanel', args.refpanel,
            'main',
            '--chrnum', '$LSB_JOBINDEX']
    outfilepath = \
            args.annot_stem + '.maf1p.%I.creation.log'

    bsub.submit(
            ['python', '-u', __file__] + my_args,
            outfilepath,
            jobname='selectrare[1-22]')
def submit(args):
    my_args = ['--annot_stem', args.annot_stem,
            '--refpanel', args.refpanel,
            'main',
            '--chrnum', '$LSB_JOBINDEX']
    outfilepath = \
            args.annot_stem + '.%I.cannot.log'

    bsub.submit(
            ['python', '-u', paths.code + 'real/signed_preprocess.py'] + my_args,
            outfilepath,
            jobname='preprocess[1-22]')
Beispiel #17
0
def submit(args):
    my_args = [
        '--annot_stem', args.annot_stem, 'main', '--chrnum', '$LSB_JOBINDEX'
    ]
    outfilepath = \
            args.annot_stem + '.random.%I.creation.log'

    bsub.submit(
        ['python', '-u', paths.code + 'munge/randomize_annot_signs.py'] +
        my_args,
        outfilepath,
        jobname='randomize[1-22]',
        debug=args.debug)
Beispiel #18
0
def submit(args):
    my_args = ['--annot_stems', ' '.join(args.annot_stems),
        '--sumstats_stem', args.sumstats_stem,
        '--refpanel', args.refpanel,
        'main',
        '--chrnum', '$LSB_JOBINDEX']
    outfilepath = \
        results_filename(args.annot_stems, args.sumstats_stem,
                chrnum='%I') + '.log'

    bsub.submit(
            ['python', '-u', __file__] + my_args,
            outfilepath,
            jobname='run[1-22]', debug=args.debug)
Beispiel #19
0
def submit(args):
    my_args = [
        '--annot_stems', ' '.join(args.annot_stems), '--sumstats_stem',
        args.sumstats_stem, '--refpanel', args.refpanel, 'main', '--chrnum',
        '$LSB_JOBINDEX'
    ]
    outfilepath = \
        results_filename(args.annot_stems, args.sumstats_stem,
                chrnum='%I') + '.log'

    bsub.submit(['python', '-u', __file__] + my_args,
                outfilepath,
                jobname='run[1-22]',
                debug=args.debug)
Beispiel #20
0
 def submit_preprocess(self):
     if not self.preprocess_submitted():
         print(str(self), 'pre-processing')
         my_args = ['--method_name', self.method(),
                 'preprocess'] + \
                 self.command_line_params()
         outfilepath = self.path_to_preprocessed_data() + '.preprocessing.out'
         bsub.submit(
                 ['python', '-u', paths.code + 'methods/estimator_manager.py'] + my_args,
                 outfilepath,
                 jobname=self.preprocess_job_name(),
                 memory_GB=self.preprocess_memoryreq_GB())
         self.declare_preprocess_submitted()
     else:
         print(str(self), ': pre-processing unnecessary')
Beispiel #21
0
    def submit_beta(beta_num):
        my_args = ['--sim_name', args.sim_name,
                'main',
                '--beta_num', str(beta_num),
                '--sample_num', '$LSB_JOBINDEX']
        outfilepath = \
            sim.path_to_beta(beta_num) + \
            '.sim_sumstats.%I.out'

        bsub.submit(
                ['python', '-u', paths.code + 'sim/sim_sumstats.py'] + my_args,
                outfilepath,
                jobname='simsumstats' + str(beta_num) + '[1-' + str(sim.num_samples_per_beta) + ']',
                # memory_GB=10.5)
                memory_GB=13)
Beispiel #22
0
    def submit_beta(beta_num):
        my_args = [
            '--sim_name', args.sim_name, 'main', '--beta_num',
            str(beta_num), '--sample_num', '$LSB_JOBINDEX'
        ]
        outfilepath = \
            sim.path_to_beta(beta_num) + \
            '.sim_sumstats.%I.out'

        bsub.submit(
            ['python', '-u', paths.code + 'sim/sim_sumstats.py'] + my_args,
            outfilepath,
            jobname='simsumstats' + str(beta_num) + '[1-' +
            str(sim.num_samples_per_beta) + ']',
            # memory_GB=10.5)
            memory_GB=13)
Beispiel #23
0
def submit(args):
    e = sm.Experiment(args.exp_name)
    for s in e.sims.values():
        if hasattr(s, 'ignore'):
            print('ignoring', s.name)
            continue

        print('submitting', s.name)
        my_args = ['--exp-name', args.exp_name] + \
                ['main',
                '--sim-name', s.name,
                '--beta-num', '$LSB_JOBINDEX']
        outfilename = s.root_folder(create=True) + '.sim_sumstats.%I'
        bsub.submit(['python', '-u', __file__] + my_args,
                outfilename,
                # queue='medium', time_in_hours=40,
                queue='short', time_in_hours=12,
                jobname=s.name+'.simsumstats[1-{}]'.format(s.num_betas),
                debug=args.debug)
Beispiel #24
0
 def submit_preprocess(self, s, debug=False):
     if not self.dependencies_satisfied(s):
         print(str(self), 'pre-processing')
         my_args = ['--method-name', self.method,
                 '--sim-name', s.name,
                 'preprocess'] + \
                 self.command_line_params()
         outfilepath = self.refpanel.path + '.' + s.name + '.' + self.fsid() + \
                 '.preprocessing.out'
         bsub.submit(
             ['python', '-u', paths.code + 'sim/methods/estimator_manager.py'] + my_args,
             outfilepath,
             jobname='preprocess-' + self.fsid() + '-' + s.name,
             memory_GB=self.preprocess_memoryreq_GB,
             debug=debug)
         if not debug:
             self.declare_preprocess_submitted(s)
     else:
         print(str(self), ': pre-processing unnecessary')
Beispiel #25
0
def submit(args):
    e = sm.Experiment(args.exp_name)
    for s in e.sims.values():
        if hasattr(s, 'ignore'):
            print('ignoring', s.name)
            continue

        print('submitting', s.name)
        my_args = ['--exp-name', args.exp_name] + \
                ['main',
                '--sim-name', s.name,
                '--beta-num', '$LSB_JOBINDEX']
        outfilename = s.root_folder(create=True) + '.sim_sumstats.%I'
        bsub.submit(
            ['python', '-u', __file__] + my_args,
            outfilename,
            # queue='medium', time_in_hours=40,
            queue='short',
            time_in_hours=12,
            jobname=s.name + '.simsumstats[1-{}]'.format(s.num_betas),
            debug=args.debug)
Beispiel #26
0
    def preprocess(self, s):
        print('TruthRE is preprocessing', s.name,
                'with refpanel=', self.params.refpanel)
        print(self.params)

        print('preprocessing', self.annotation.filestem())
        for c in s.chromosomes:
            if not os.path.exists(self.annotation.conv_filename(c, full=True)):
                conv_command = [
                    'python', '-u', paths.code + 'acor/acor.py',
                    '--annot-chr', self.annotation.stem_chr,
                    '--bfile-chr', self.refpanel.bfile_chr,
                    '-fullconv',
                    'conv',
                    '--chroms', str(c)]
                print(' '.join(conv_command))
                outfilepath = self.annotation.filestem(c) + '.convbsub_out'
                bsub.submit(
                        conv_command,
                        outfilepath,
                        jobname=self.params.annot_chr.replace('/','_') + \
                                ',conv,chr='+str(c))
Beispiel #27
0
 def submit_runs(self, sim, overwrite=False, debug=False):
     #TODO: have it check whether there are more betas or more samples per beta,
     # and then have it decide whether to submit the betas in parallel or in groups
     # probably its easy to have the actual estimator manager be able to accept groups of
     # betas
     print('\n' + str(self), 'submitting', sim.name)
     my_args = ['--method_name', self.method(),
             'run',
             '--sim_name', sim.name,
             '--batch_num', '$LSB_JOBINDEX'] + \
             self.command_line_params()
     outfilepath = self.outfile_path(sim, '%I')
     if all(os.path.exists(self.results_path_stem(sim, beta_num))
         for beta_num in range(1, sim.num_betas+1)) and not overwrite:
         print('submission unnecessary for', str(self))
     else:
         bsub.submit(
                 ['python', '-u', paths.code + 'methods/estimator_manager.py'] + my_args,
                 outfilepath,
                 jobname=self.run_job_name(sim),
                 memory_GB=4,
                 debug=debug)
Beispiel #28
0
    def preprocess(self, s):
        print('LDSC is preprocessing', s.name, 'with refpanel=',
              self.params.refpanel)
        print(self.params)

        for annot_chr in self.params.annot_chr.split(','):
            a = pa.Annotation(paths.annotations + annot_chr)
            for c in range(1, 23):
                if not os.path.exists(a.ldscores_filename(c)):
                    ldscores_command = [
                        'python', '-u', paths.foreign + 'ldsc/ldsc.py', '--l2',
                        '--ld-wind-cm',
                        str(self.params.ld_window), '--bfile',
                        self.refpanel.bfile(c), '--annot',
                        a.annot_filename(c), '--out',
                        a.filestem(c)
                    ]
                    print(' '.join(ldscores_command))
                    outfilepath = a.filestem(c) + '.ldscoresbsub_out'
                    bsub.submit(
                            ldscores_command,
                            outfilepath,
                            jobname=self.params.annot_chr.replace('/','_') + \
                                    ',ldcores,chr='+str(c))