def main(argv): '''''' parser = argparse.ArgumentParser(description='Generates a Slurm script for running fastqc on various sequence data files') parser.add_argument("-j", "--cores", nargs=1, metavar='N', type=int, default=[1], help="The maximum number of cores to use, 0=exclusive. [Default: 1]") parser.add_argument("-p", "--partition", nargs=1, metavar="partition", default=["8hour"], choices=['bigmem', '8hour', 'compute'], help="The partition (or queue) to submit job to") parser.add_argument("rawfile", nargs="+", help="Files or directory of raw fastq/a sequences to process. If directory, -f filter is used to select files within.") parser.add_argument("-f", "--dir-filter", nargs=1, metavar='filter', default=["*.f*q"], help="A filter to match files when searching a directory. [Default: \"*.f*q]\"") #parser.add_argument("-k", "--kraken-file", nargs="+", help="Files or directory of kraken_classified fastq/a sequences. If directory, -K filter is used to select files within.") #parser.add_argument("-K", "--kraken-dir-filter", nargs=1, metavar='filter', default=["*_classified.f*q"], help="A filter to match files when searching a kraken result directory. [Default: \"*_classified.f*q\"]") args = parser.parse_args(argv[1:]) common.writecmd(argv) #print args # expand files rawfiles=common.expandFiles(args.rawfile, args.dir_filter[0]) #krakenfiles=common.expandFiles(args.kraken_file, args.kraken_dir_filter[0]) error=False if len(rawfiles) == 0: sys.stderr.write("No RAW files found: '%s'\n" % (" ".join(args.rawfile))) error=True #if len(krakenfiles) == 0: # sys.stderr.write("No KRAKEN files found: '%s'\n" % (" ".join(args.kraken_file))) # error=True if error: return 1 ## make the variable parts of script vars={} vars["rawfiles"] = " ".join(rawfiles) #vars["krakenfiles"] = " ".join(krakenfiles) if args.cores[0] == 0: vars["slurmheader"] = common.makeExclusiveHeader(partition=args.partition[0]) vars["cores"] = "16" else: vars["slurmheader"] = common.makeHeader(partition=args.partition[0], ntasks=args.cores[0]) vars["cores"] = args.cores[0] #vars["slurmheader"] = common.makeHeader(partition=args.partition[0], cores=1) vars["fastqcversion"] = subprocess.check_output(["rad-pipeline_module_version", "fastqc"]).rstrip() vars["parallelversion"] = subprocess.check_output(["rad-pipeline_module_version", "parallel"]).rstrip() vars["radpipelineversion"] = subprocess.check_output(["rad-pipeline_module_version", "rad-pipeline"]).rstrip() vars["CMD"] = " ".join(argv) jobscript = common.loadTemplate("fastqc.slurm") #print jobscript print jobscript.format(**vars) return 0
def main(argv): '''''' parser = argparse.ArgumentParser(description='Generates a slurm script for running pear of a selection of files') parser.add_argument("-j", "--cores", nargs=1, metavar='N', type=int, default=[8], help="The number of cores to use, 0=exclusive. [Default: 8]") parser.add_argument("-p", "--partition", nargs=1, metavar="partition", default=["8hour"], choices=['bigmem', '8hour', 'compute'], help="The partition (or queue) to submit job to [Default: 8hour]") parser.add_argument("file", nargs="+", help="Files or directory to process. If directory, -f filter is used to select files within.") parser.add_argument("-f", "--dir-filter", nargs=1, metavar='filter', default=["*_R1*.f*q*"], help="A filter to match files when searching a directory. [Default: \"*_R1*.f*q*\"]") parser.add_argument("-t", "--time", nargs=1, metavar='time', default=["01:00:00"], help="Job max runtime. [Default: 01:00:00]") parser.add_argument("-T", "--filename-trim", nargs=1, metavar='trim', default=[".f*q"], help="Bash REGEX to trim extension from end of filename. [Default: '.f*q']") args = parser.parse_args(argv[1:]) common.writecmd(argv) #print args ## make the variable parts of script vars={} if args.cores[0] == 0: vars["cores"] = "16" else: vars["cores"] = args.cores[0] if vars["cores"] > 8 and args.partition[0] == "8hour": args.partition[0] = "compute" if args.cores[0] == 0: vars["slurmheader"] = common.makeExclusiveHeader(partition=args.partition[0], time=args.time[0]) else: vars["slurmheader"] = common.makeHeader(partition=args.partition[0], ntasks=args.cores[0], time=args.time[0]) files = [] for f in args.file: if '*' in f or '?' in f or os.path.exists(f): if os.path.isdir(f): files.append("%s/%s" % (f, args.dir_filter[0])) else: files.append(f) else: sys.stderr.write("Warning: file '%s' does not exist and will be ignored\n") vars["files"] = " ".join(files) vars["pearversion"] = subprocess.check_output(["carlaseq_module_version", "pear-gcc"]).rstrip() vars["trim"] = args.filename_trim[0] vars["CMD"] = " ".join(argv) jobscript = common.loadTemplate("pear.slurm") print jobscript.format(**vars) return 0
def main(argv): '''''' parser = argparse.ArgumentParser(description='Generates a Slurm script for running blast against the probes on various fasta files') parser.add_argument("-j", "--cores", nargs=1, metavar='N', type=int, default=[1], help="The maximum number of cores to use, 0=exclusive. [Default: 1]") parser.add_argument("-p", "--partition", nargs=1, metavar="partition", default=["8hour"], choices=['bigmem', '8hour', 'compute', 'long'], help="The partition (or queue) to submit job to") parser.add_argument("rawfile", nargs="+", help="Files or directory of raw fastq/a sequences to process. If directory, -f filter is used to select files within.") parser.add_argument("-f", "--dir-filter", nargs=1, metavar='filter', default=["*.f*a"], help="A filter to match files when searching a directory. [Default: \"*.f*a\"]") parser.add_argument("-t", "--filename-trim", nargs=1, metavar='trim', default=[".*"], help="Bash REGEX to trim extension from end of filename. [Default: \".*\"]") parser.add_argument("-d", "--probe-database", nargs=1, metavar='database', default=["probes.fasta"], help="Filename of probes database fasta file [Default: probes.fasta]") args = parser.parse_args(argv[1:]) common.writecmd(argv) # expand files rawfiles=common.expandFiles(args.rawfile, args.dir_filter[0]) error=False if len(rawfiles) == 0: sys.stderr.write("No RAW files found: '%s'\n" % (" ".join(args.rawfile))) error=True if error: return 1 ## make the variable parts of script vars={} vars["rawfiles"] = " ".join(rawfiles) if args.cores[0] == 0: vars["slurmheader"] = common.makeExclusiveHeader(partition=args.partition[0]) vars["cores"] = "16" else: vars["slurmheader"] = common.makeHeader(partition=args.partition[0], ntasks=args.cores[0]) vars["cores"] = args.cores[0] vars["blastversion"] = subprocess.check_output(["carlaseq_module_version", "blast+"]).rstrip() vars["biostreamtoolsversion"] = subprocess.check_output(["carlaseq_module_version", "biostreamtools-gcc"]).rstrip() vars["trim"] = args.filename_trim[0] vars["probes"] = args.probe_database[0] vars["CMD"] = " ".join(argv) jobscript = common.loadTemplate("blastprobes.slurm") print jobscript.format(**vars) return 0
def main(argv): '''''' parser = argparse.ArgumentParser(description='Generates a slurm script for running kraken of a selection of files') parser.add_argument("-j", "--cores", nargs=1, metavar='N', type=int, default=[0], help="The number of cores to use, 0=exclusive. [Default: 0]") parser.add_argument("-p", "--partition", nargs=1, metavar="partition", default=["bigmem"], choices=['bigmem', '8hour', 'compute'], help="The partition (or queue) to submit job to") parser.add_argument("file", nargs="+", help="Files or directory to process. If directory, -f filter is used to select files within.") parser.add_argument("-f", "--dir-filter", nargs=1, metavar='filter', default=["*_R?.f*q*"], help="A filter to match files when searching a directory. [Default: '*_R?.f*q*']") args = parser.parse_args(argv[1:]) common.writecmd(argv) # expand files rawfiles=common.expandFiles(args.file, args.dir_filter[0]) #print args ## make the variable parts of script vars={} if args.cores[0] == 0: vars["slurmheader"] = common.makeExclusiveHeader(partition=args.partition[0]) else: vars["slurmheader"] = common.makeHeader(partition=args.partition[0], ntasks=args.cores[0], mem="16000") # files = [] # for f in args.file: # if '*' in f or '?' in f or os.path.exists(f): # if os.path.isdir(f): # files.append("%s/%s" % (f, args.dir_filter[0])) # else: # files.append(f) # else: # sys.stderr.write("Warning: file '%s' does not exist and will be ignored\n") vars["files"] = " ".join(rawfiles) vars["krakenversion"] = subprocess.check_output(["carlaseq_module_version", "kraken"]).rstrip() vars["CMD"] = " ".join(argv) jobscript = common.loadTemplate("kraken.slurm") print jobscript.format(**vars) return 0
def main(argv): '''''' parser = argparse.ArgumentParser(description='Generates a Slurm script for running fastqc on various sequence data files') parser.add_argument("-j", "--cores", nargs=1, metavar='N', type=int, default=[1], help="The maximum number of cores to use, 0=exclusive. [Default: 1]") parser.add_argument("-p", "--partition", nargs=1, metavar="partition", default=["8hour"], choices=['bigmem', '8hour', 'compute'], help="The partition (or queue) to submit job to") parser.add_argument("rawfile", nargs="+", help="Files or directory of raw fastq/a sequences to process. If directory, -f filter is used to select files within.") parser.add_argument("-f", "--dir-filter", nargs=1, metavar='filter', default=["*.f*q*"], help="A filter to match files when searching a directory. [Default: '*.f*q*']") parser.add_argument("-t", "--filename-trim", nargs=1, metavar='trim', default=[".f*q*"], help="Bash REGEX to trim extension from end of filename. [Default: '.f*q*']") args = parser.parse_args(argv[1:]) common.writecmd(argv) # expand files rawfiles=common.expandFiles(args.rawfile, args.dir_filter[0]) if len(rawfiles) == 0: sys.stderr.write("No RAW files found: '%s'\n" % (" ".join(args.rawfile))) return 1 ## make the variable parts of script vars={} vars["rawfiles"] = " ".join(rawfiles) if args.cores[0] == 0: vars["slurmheader"] = common.makeExclusiveHeader(partition=args.partition[0]) vars["cores"] = "16" else: vars["slurmheader"] = common.makeHeader(partition=args.partition[0], ntasks=args.cores[0]) vars["cores"] = args.cores[0] vars["trim"] = args.filename_trim[0] vars["CMD"] = " ".join(argv) jobscript = common.loadTemplate("fastqc.slurm") print jobscript.format(**vars) return 0
def main(argv): '''''' parser = argparse.ArgumentParser( description= 'Generates a slurm script for running pear of a selection of files') parser.add_argument( "-j", "--cores", nargs=1, metavar='N', type=int, default=[8], help="The number of cores to use, 0=exclusive. [Default: 8]") parser.add_argument( "-p", "--partition", nargs=1, metavar="partition", default=["8hour"], choices=['bigmem', '8hour', 'compute'], help="The partition (or queue) to submit job to [Default: 8hour]") parser.add_argument( "file", nargs="+", help= "Files or directory to process. If directory, -f filter is used to select files within." ) parser.add_argument( "-f", "--dir-filter", nargs=1, metavar='filter', default=["*_R1_*.f*q*"], help= "A filter to match files when searching a directory. [Default: \"*_R1_*.f*q*\"]" ) parser.add_argument("-t", "--time", nargs=1, metavar='time', default=["01:00:00"], help="Job max runtime. [Default: 01:00:00]") args = parser.parse_args(argv[1:]) common.writecmd(argv) #print args ## make the variable parts of script vars = {} if args.cores[0] == 0: vars["cores"] = "16" else: vars["cores"] = args.cores[0] if vars["cores"] > 8 and args.partition[0] == "8hour": args.partition[0] = "compute" if args.cores[0] == 0: vars["slurmheader"] = common.makeExclusiveHeader( partition=args.partition[0], time=args.time[0]) else: vars["slurmheader"] = common.makeHeader(partition=args.partition[0], ntasks=args.cores[0], time=args.time[0]) files = [] for f in args.file: if '*' in f or '?' in f or os.path.exists(f): if os.path.isdir(f): files.append("%s/%s" % (f, args.dir_filter[0])) else: files.append(f) else: sys.stderr.write( "Warning: file '%s' does not exist and will be ignored\n") vars["files"] = " ".join(files) vars["pearversion"] = subprocess.check_output( ["rad-pipeline_module_version", "pear-gcc"]).rstrip() vars["radpipelineversion"] = subprocess.check_output( ["rad-pipeline_module_version", "rad-pipeline"]).rstrip() vars["CMD"] = " ".join(argv) jobscript = common.loadTemplate("pear.slurm") print jobscript.format(**vars) return 0
def main(argv): '''''' parser = argparse.ArgumentParser( description= 'Generates a Slurm script for optimising stacks denovo_map.pl') parser.add_argument( "-j", "--cores", nargs=1, metavar='N', type=int, default=[0], help="Total number of cores to use, 0=exclusive. [Default: 0]") parser.add_argument( "--cores-task", nargs=1, metavar='T', type=int, default=[2], help="Number of cores each task (trial) uses. [Default: 2]") parser.add_argument("-p", "--partition", nargs=1, metavar="partition", default=["compute"], choices=['bigmem', '8hour', 'compute'], help="The partition (or queue) to submit job to") parser.add_argument( "-m", "--m-target", nargs=1, metavar='N', type=int, help="The target (i.e. centre) value to use for -m option.") parser.add_argument( "--m-count", nargs=1, metavar='N', type=int, default=[2], help= "The number of values to use either side of -m option. [Default: 2]") parser.add_argument( "-n", "--n-target", nargs=1, metavar='N', type=int, help="The target (i.e. centre) value to use for -n option.") parser.add_argument( "--n-count", nargs=1, metavar='N', type=int, default=[2], help= "The number of values to use between d and D for -n option. [Default: 2]" ) parser.add_argument( "-M", "--M-target", nargs=1, metavar='N', type=int, help="The target (i.e. centre) value to use for -M option.") parser.add_argument( "--M-count", nargs=1, metavar='N', type=int, default=[2], help= "The number of values to use between d and D for -n option. [Default: 2]" ) parser.add_argument( "--denovo-opts", nargs=1, metavar='OPTS', default=["-S -t"], help= "Other command line options to pass to denovo_map.pl. [Default: -S -t]" ) parser.add_argument( "--keep-denovo-log", nargs=1, metavar='N', type=bool, default=[False], help="The batch id to use for denovo_map.pl [Default: False]") parser.add_argument( "--batch-id", nargs=1, metavar='N', type=int, default=[2], help="The batch id to use for denovo_map.pl [Default: 2]") parser.add_argument( "file", nargs="+", help= "Files or directory to process. If directory, -f filter is used to select files within." ) parser.add_argument( "-f", "--dir-filter", nargs=1, metavar='filter', default=["*.f*q"], help= "A filter to match files when searching a directory. [Default: *.f*q]" ) args = parser.parse_args(argv[1:]) common.writecmd(argv) errors = False if args.m_target is None: sys.stderr.write("Error: option -m (--m-target) is required\n") errors = True if args.n_target is None: sys.stderr.write("Error: option -n (--n-target) is required\n") errors = True if args.M_target is None: sys.stderr.write("Error: option -M (--M-target) is required\n") errors = True if errors: return 1 ## make the variable parts of script subs = {} if args.cores[0] == 0: subs['slurmheader'] = common.makeExclusiveHeader( partition=args.partition[0], mem="64000") args.cores[0] = 16 else: subs['slurmheader'] = common.makeHeader(partition=args.partition[0], ntasks=args.cores[0]) files = [] for f in args.file: if '*' in f or '?' in f or os.path.exists(f): if os.path.isdir(f): files.append("%s/%s" % (f, args.dir_filter[0])) else: files.append(f) else: sys.stderr.write( "Warning: file '%s' does not exist and will be ignored\n" % f) subs['files'] = " ".join(files) subs['stacksversion'] = subprocess.check_output( ["rad-pipeline_module_version", "stacks-gcc"]).rstrip() vars["parallelversion"] = subprocess.check_output( ["rad-pipeline_module_version", "parallel"]).rstrip() vars["radpipelineversion"] = subprocess.check_output( ["rad-pipeline_module_version", "rad-pipeline"]).rstrip() subs['mvalues'] = " ".join( map( str, range(args.m_target[0] - args.m_count[0], args.m_target[0] + args.m_count[0] + 1))) subs['nvalues'] = " ".join( map( str, range(args.n_target[0] - args.n_count[0], args.n_target[0] + args.n_count[0] + 1))) subs['Mvalues'] = " ".join( map( str, range(args.M_target[0] - args.M_count[0], args.M_target[0] + args.M_count[0] + 1))) subs['CMD'] = " ".join(argv) if args.keep_denovo_log[0]: subs['nocpdenovo'] = "" else: subs['nocpdenovo'] = "#" subs['corestask'] = args.cores_task[0] subs['paralleljobs'] = str(int(args.cores[0] / args.cores_task[0])) subs['denovoopts'] = args.denovo_opts[0] subs['batchid'] = args.batch_id[0] ## validate inputs ## filecount = 0 if len(files) > 0: cmd = ["bash", "-c", 'ls -1 %s' % (" ".join(files))] #cmd.extend(files) filelist = subprocess.check_output(cmd).rstrip() #print "'%s'"%filelist filecount = len(filelist.split("\n")) if filecount < 2 or filecount > 6: sys.stderr.write( "Warning: suboptimal number of samples (%s). You should use 2 to 6 representitive samples.\n" % filecount) jobscript = common.loadTemplate("denovo_opt2.slurm") if jobscript != "": print jobscript.format(**subs) else: sys.stderr.write( "Error: failed to find template 'denovo_opt1.slurm'\n") return 0