cluster_file = args.cluster_file cluster_type = args.cluster_type email = args.email print_only = args.print_only verbose = args.verbose version = "--version " + TopmedPipeline.__version__ cluster = TopmedPipeline.ClusterFactory.createCluster(cluster_type, cluster_file, verbose) pipeline = cluster.getPipelinePath() submitPath = cluster.getSubmitPath() driver = os.path.join(submitPath, "runRscript.sh") configdict = TopmedPipeline.readConfig(configfile) configdict = TopmedPipeline.directorySetup(configdict, subdirs=["log", "plots"]) # analysis init cluster.analysisInit(print_only=print_only) job = "locuszoom" rscript = os.path.join(pipeline, "R", job + ".R") # find number of jobs to submit by counting lines in file n = TopmedPipeline.countLines(configdict["locus_file"]) range = "1-" + str(n - 1) args = ["-s", rscript, configfile, version]
chromosomes = args.chromosomes segment_length = args.segment_length n_segments = args.n_segments cluster_file = args.cluster_file cluster_type = args.cluster_type email = args.email print_only = args.print_only verbose = args.verbose cluster = TopmedPipeline.ClusterFactory.createCluster(cluster_type, cluster_file, verbose) pipeline = os.path.dirname(os.path.abspath(sys.argv[0])) driver = os.path.join(pipeline, "runRscript.sh") configdict = TopmedPipeline.readConfig(configfile) configdict = TopmedPipeline.directorySetup( configdict, subdirs=["config", "data", "log", "plots", "report"]) # check type of association test - single-variant unrelated is handled differently no_pcrel = "pcrelate_file" not in configdict or configdict[ "pcrelate_file"] == "NA" no_grm = "grm_file" not in configdict or configdict["grm_file"] == "NA" single_unrel = assoc_type == "single" and no_pcrel and no_grm holdids = [] # null model if not single_unrel: job = "null_model"
cluster_type = args.cluster_type ncores = args.ncores email = args.email print_only = args.print_only verbose = args.verbose version = "--version " + TopmedPipeline.__version__ cluster = TopmedPipeline.ClusterFactory.createCluster(cluster_type, cluster_file, verbose) pipeline = cluster.getPipelinePath() submitPath = cluster.getSubmitPath() driver = os.path.join(submitPath, "runRscript.sh") configdict = TopmedPipeline.readConfig(configfile) configdict = TopmedPipeline.directorySetup( configdict, subdirs=["config", "data", "log", "report"]) # analysis init cluster.analysisInit(print_only=print_only) # null model job = "null_model" rscript = os.path.join(pipeline, "R", job + ".R") config = deepcopy(configdict) config["out_prefix"] = configdict["data_prefix"] + "_null_model" config["out_phenotype_file"] = configdict["data_prefix"] + "_phenotypes.RData" configfile = configdict["config_prefix"] + "_" + job + ".config"
n_segments = args.n_segments cluster_file = args.cluster_file cluster_type = args.cluster_type ncores = args.ncores email = args.email print_only = args.print_only verbose = args.verbose version = "--version " + TopmedPipeline.__version__ cluster = TopmedPipeline.ClusterFactory.createCluster(cluster_type, cluster_file, verbose) pipeline = cluster.getPipelinePath() driver = os.path.join(pipeline, "runRscript.sh") configdict = TopmedPipeline.readConfig(configfile) configdict = TopmedPipeline.directorySetup(configdict, subdirs=["config", "data", "log", "plots", "report"]) # hold is a list of submit IDs. A submit ID is a dict: # {jobname: [jobids]} hold_null_agg = [] # null model job = "null_model" # if a null model file is given in the config, skip this step run_null_model = "null_model_file" not in configdict if run_null_model: rscript = os.path.join(pipeline, "R", job + ".R")
cluster_file = args.cluster_file cluster_type = args.cluster_type ncores = args.ncores email = args.email print_only = args.print_only verbose = args.verbose version = "--version " + TopmedPipeline.__version__ cluster = TopmedPipeline.ClusterFactory.createCluster(cluster_type, cluster_file, verbose) pipeline = cluster.getPipelinePath() driver = os.path.join(pipeline, "runRscript.sh") configdict = TopmedPipeline.readConfig(configfile) configdict = TopmedPipeline.directorySetup( configdict, subdirs=["config", "data", "log", "plots"]) job = "find_unrelated" rscript = os.path.join(pipeline, "R", job + ".R") config = deepcopy(configdict) config["out_related_file"] = configdict["data_prefix"] + "_related.RData" config["out_unrelated_file"] = configdict["data_prefix"] + "_unrelated.RData" configfile = configdict["config_prefix"] + "_" + job + ".config" TopmedPipeline.writeConfig(config, configfile) jobid = cluster.submitJob(job_name=job, cmd=driver,
chromosomes = args.chromosomes #cluster_file = args.cluster_file #cluster_type = args.cluster_type ncores = args.ncores #email = args.email #print_only = args.print_only verbose = args.verbose #version = "--version " + TopmedPipeline.__version__ #cluster = TopmedPipeline.ClusterFactory.createCluster(cluster_type, cluster_file, verbose) pipeline = os.path.dirname(os.path.abspath(sys.argv[0])) #driver = os.path.join(pipeline, "runRscript.sh") configdict = TopmedPipeline.readConfig(configfile) for subdir in ["config", "data", "log", "plots"]: if not os.path.exists(configdict['output_file'] + '/' + subdir): os.mkdir(configdict['output_file'] + '/' + subdir) #configdict = TopmedPipeline.directorySetup(configdict, subdirs=["config", "data", "log"]) chrom_string = TopmedPipeline.parseChromosomes(chromosomes) chrom_list = chrom_string.split(' ') job = "grm" rscript = os.path.join(pipeline, "R", job + ".R") config = deepcopy(configdict) config["out_file"] = configdict['output_file'] + '/data/' + configdict[ "data_prefix"] + "_grm_chr .gds"
cluster_type = args.cluster_type ncores = args.ncores email = args.email print_only = args.print_only verbose = args.verbose version = "--version " + TopmedPipeline.__version__ cluster = TopmedPipeline.ClusterFactory.createCluster(cluster_type, cluster_file, verbose) pipeline = os.path.dirname(os.path.abspath(sys.argv[0])) submitPath = cluster.getSubmitPath() driver = os.path.join(submitPath, "runRscript.sh") configdict = TopmedPipeline.readConfig(configfile) configdict = TopmedPipeline.directorySetup(configdict, subdirs=["config", "log"]) # analysis init cluster.analysisInit(print_only=print_only) job = "vcf2gds" rscript = os.path.join(pipeline, "R", job + ".R") # parsing bcf files relies on streaming bcftools output, so can't run in parallel if os.path.splitext(configdict["vcf_file"])[1] == ".bcf": ncores = None jobid = cluster.submitJob(job_name=job,
chromosomes = args.chromosomes cluster_file = args.cluster_file cluster_type = args.cluster_type ncores = args.ncores email = args.email print_only = args.print_only verbose = args.verbose version = "--version " + TopmedPipeline.__version__ cluster = TopmedPipeline.ClusterFactory.createCluster(cluster_type, cluster_file, verbose) pipeline = cluster.getPipelinePath() driver = os.path.join(pipeline, "runRscript.sh") configdict = TopmedPipeline.readConfig(configfile) configdict = TopmedPipeline.directorySetup(configdict, subdirs=["config", "data", "log", "plots"]) job = "find_unrelated" rscript = os.path.join(pipeline, "R", job + ".R") config = deepcopy(configdict) config["out_related_file"] = configdict["data_prefix"] + "_related.RData" config["out_unrelated_file"] = configdict["data_prefix"] + "_unrelated.RData" configfile = configdict["config_prefix"] + "_" + job + ".config" TopmedPipeline.writeConfig(config, configfile) jobid = cluster.submitJob(job_name=job, cmd=driver, args=[rscript, configfile, version], email=email, print_only=print_only)
configfile = args.config_file cluster_file = args.cluster_file cluster_type = args.cluster_type email = args.email print_only = args.print_only verbose = args.verbose version = "--version " + TopmedPipeline.__version__ cluster = TopmedPipeline.ClusterFactory.createCluster(cluster_type, cluster_file, verbose) pipeline = cluster.getPipelinePath() driver = os.path.join(pipeline, "runRscript.sh") configdict = TopmedPipeline.readConfig(configfile) configdict = TopmedPipeline.directorySetup(configdict, subdirs=["config", "data", "log", "plots"]) job = "pcrelate" rscript = os.path.join(pipeline, "R", job + ".R") config = deepcopy(configdict) config["out_prefix"] = configdict["data_prefix"] configfile = configdict["config_prefix"] + "_" + job + ".config" TopmedPipeline.writeConfig(config, configfile) jobid = cluster.submitJob(job_name=job, cmd=driver, args=[rscript, configfile, version], email=email, print_only=print_only)
# help="show the version number and exit") args = parser.parse_args() assoc_type = args.assoc_type configfile = args.config_file chromosomes = args.chromosomes segment_length = args.segment_length n_segments = args.n_segments #email = args.email #print_only = args.print_only verbose = args.verbose #version = "--version " + TopmedPipeline.__version__ pipeline = os.path.dirname(os.path.abspath(sys.argv[0])) configdict = TopmedPipeline.readConfig(configfile) for subdir in ["config","data", "results", "log", "plots", "report"]: if not os.path.exists(configdict['output_file'] + '/' + subdir): os.mkdir(configdict['output_file'] + '/' + subdir) # check type of association test - single-variant unrelated is handled differently no_pcrel = "pcrelate_file" not in configdict or configdict["pcrelate_file"] == "NA" no_grm = "grm_file" not in configdict or configdict["grm_file"] == "NA" single_unrel = assoc_type == "single" and no_pcrel and no_grm #######job: null model############# if not single_unrel: job = "null_model" assocScript = "assoc_" + assoc_type if "test_type" in configdict:
configfile = args.config_file cluster_file = args.cluster_file cluster_type = args.cluster_type email = args.email print_only = args.print_only verbose = args.verbose version = "--version " + TopmedPipeline.__version__ cluster = TopmedPipeline.ClusterFactory.createCluster(cluster_type, cluster_file, verbose) pipeline = cluster.getPipelinePath() driver = os.path.join(pipeline, "runRscript.sh") configdict = TopmedPipeline.readConfig(configfile) configdict = TopmedPipeline.directorySetup(configdict, subdirs=["log", "plots"]) job = "locuszoom" rscript = os.path.join(pipeline, "R", job + ".R") # find number of jobs to submit by counting lines in file n = TopmedPipeline.countLines(configdict["locus_file"]) range = "1-" + str(n-1) args = ["-s", rscript, configfile, version] jobid = cluster.submitJob(job_name=job, cmd=driver, args=args, array_range=range, email=email, print_only=print_only)
chromosomes = args.chromosomes cluster_file = args.cluster_file cluster_type = args.cluster_type ncores = args.ncores email = args.email print_only = args.print_only verbose = args.verbose version = "--version " + TopmedPipeline.__version__ cluster = TopmedPipeline.ClusterFactory.createCluster(cluster_type, cluster_file, verbose) pipeline = os.path.dirname(os.path.abspath(sys.argv[0])) driver = os.path.join(pipeline, "runRscript.sh") configdict = TopmedPipeline.readConfig(configfile) configdict = TopmedPipeline.directorySetup(configdict, subdirs=["config", "log"]) job = "vcf2gds" rscript = os.path.join(pipeline, "R", job + ".R") # parsing bcf files relies on streaming bcftools output, so can't run in parallel if os.path.splitext(configdict["vcf_file"])[1] == ".bcf": ncores = None jobid = cluster.submitJob(job_name=job, cmd=driver, args=["-c", rscript, configfile, version], array_range=chromosomes, request_cores=ncores, email=email, print_only=print_only) job = "check_gds"