Esempio n. 1
0
    cluster.runCmd(job_name=job, cmd=cmd, logfile=log_file)

# set up config for association test
config = deepcopy(configdict)
config["assoc_type"] = assoc_type
config["null_model_file"] = configdict["data_prefix"] + "_null_model.RData"
if assoc_type == "aggregate":
    config["aggregate_variant_file"] = configdict[
        "data_prefix"] + "_aggregate_list_chr .RData"
config["out_prefix"] = configdict["data_prefix"] + "_" + assocScript
config["segment_file"] = segment_file
configfile = configdict["config_prefix"] + "_" + assocScript + ".config"
TopmedPipeline.writeConfig(config, configfile)

# get segments for each chromosome
chrom_list = TopmedPipeline.parseChromosomes(chromosomes).split(" ")
segment_list = TopmedPipeline.getChromSegments(segment_file, chrom_list)
segment_str = ["-".join([str(i) for i in s]) for s in segment_list]
segments = dict(zip(chrom_list, segment_str))

# run association tests
holdids_combine = []
for chromosome in chrom_list:
    job_assoc = assocScript + "_chr" + chromosome
    rscript = os.path.join(pipeline, "R", assocScript + ".R")
    args = ["-s", rscript, configfile, "--chromosome " + chromosome]
    # no email for jobs by segment
    jobid = cluster.submitJob(job_name=job_assoc,
                              cmd=driver,
                              args=args,
                              holdid=holdids,
Esempio n. 2
0
TopmedPipeline.writeConfig(config, configfile)

jobid = cluster.submitJob(job_name=job,
                          cmd=driver,
                          args=["-c", rscript, configfile, version],
                          holdid=[jobid],
                          array_range=chromosomes,
                          email=email,
                          print_only=print_only)

job = "combine_variants"

rscript = os.path.join(pipeline, "R", job + ".R")

config = dict()
config["chromosomes"] = TopmedPipeline.parseChromosomes(chromosomes)
config["in_file"] = configdict["data_prefix"] + "_pruned_variants_chr .RData"
config["out_file"] = configdict["data_prefix"] + "_pruned_variants.RData"
configfile = configdict["config_prefix"] + "_" + job + ".config"
TopmedPipeline.writeConfig(config, configfile)

jobid = cluster.submitJob(job_name=job,
                          cmd=driver,
                          args=[rscript, configfile, version],
                          holdid=[jobid],
                          email=email,
                          print_only=print_only)

job = "pca_byrel"

rscript = os.path.join(pipeline, "R", job + ".R")
Esempio n. 3
0
    config = deepcopy(configdict)
    config["sample_include_file"] = configdict["data_prefix"] + "_unrelated.RData"
    config["out_file"] = configdict["data_prefix"] + "_pruned_variants_chr .RData"
    configfile = configdict["config_prefix"] + "_" + job + ".config"
    TopmedPipeline.writeConfig(config, configfile)

    jobid = cluster.submitJob(job_name=job, cmd=driver, args=["-c", rscript, configfile, version], holdid=[jobid], array_range=chromosomes, email=email, print_only=print_only)


    job = "combine_variants"

    rscript = os.path.join(pipeline, "R", job + ".R")

    config = dict()
    config["chromosomes"] = TopmedPipeline.parseChromosomes(chromosomes)
    config["in_file"] = configdict["data_prefix"] + "_pruned_variants_chr .RData"
    config["out_file"] = configdict["data_prefix"] + "_pruned_variants.RData"
    configfile = configdict["config_prefix"] + "_" + job + ".config"
    TopmedPipeline.writeConfig(config, configfile)

    jobid = cluster.submitJob(job_name=job, cmd=driver, args=[rscript, configfile, version], holdid=[jobid], email=email, print_only=print_only)


job = "pca_byrel"

rscript = os.path.join(pipeline, "R", job + ".R")

config = deepcopy(configdict)
config["related_file"] = configdict["data_prefix"] + "_related.RData"
config["unrelated_file"] = configdict["data_prefix"] + "_unrelated.RData"
Esempio n. 4
0
if run_null_model:
    config["null_model_file"] = configdict["data_prefix"] + "_null_model.RData"
    config["phenotype_file"] = configdict["data_prefix"] + "_phenotypes.RData"

if assoc_type == "aggregate":
    config["aggregate_variant_file"] = configdict["data_prefix"] + "_aggregate_list_chr .RData"

assocScript = "assoc_" + assoc_type
config["out_prefix"] = configdict["data_prefix"] + "_" + assocScript
config["segment_file"] = segment_file
configfile = configdict["config_prefix"] + "_" + assocScript + ".config"
TopmedPipeline.writeConfig(config, configfile)


# get segments for each chromosome
chrom_list = TopmedPipeline.parseChromosomes(chromosomes).split(" ")
segment_list = TopmedPipeline.getChromSegments(segment_file, chrom_list)
segment_str = ["-".join([str(i) for i in s]) for s in segment_list]
segments = dict(zip(chrom_list, segment_str))


# run association tests
hold_combine = []
for chromosome in chrom_list:
    job_assoc = assocScript + "_chr" + chromosome
    rscript = os.path.join(pipeline, "R", assocScript + ".R")
    args = ["-s", rscript, configfile, "--chromosome " + chromosome, version]
    # no email for jobs by segment
    submitID = cluster.submitJob(job_name=job_assoc, cmd=driver, args=args, holdid=hold_null_agg, array_range=segments[chromosome], print_only=print_only)

    combScript = "assoc_combine"
Esempio n. 5
0
for subdir in ['config', 'log']:
    if not os.path.exists(configdict['output_file'] + '/' + subdir):
        os.mkdir(configdict['output_file'] + '/' + subdir)

#configdict = TopmedPipeline.directorySetup(configdict, subdirs=["config", "log"])

job = "vcf2gds"

rscript = os.path.join(pipeline, "R", job + ".R")

# parsing bcf files relies on streaming bcftools output, so can't run in parallel
if os.path.splitext(configdict["vcf_file"])[1] == ".bcf":
    ncores = None

chrom_string = TopmedPipeline.parseChromosomes(chromosomes)
chrom_list = chrom_string.split(' ')

for chrom in chrom_list:
    if os.path.isfile(configdict['gds_file'].replace('chr ',
                                                     'chr' + chrom)) == False:
        cmd = " ".join([
            'bsub -q big -n 4', "-R 'rusage[mem=45000]'", 'Rscript', rscript,
            config_dir, '--chromosome ' + chrom
        ])
        print(cmd)
        os.system(cmd)
#jobid = cluster.submitJob(job_name=job, cmd=driver, args=["-c", rscript, configfile], array_range=chromosomes, request_cores=ncores)

job = "merge_gds"