for f in in_files: for ref in refs: if not os.path.isdir("{}/{}".format(args.output, ref)): os.makedirs("{}/{}".format(args.output, ref)) fq_path = os.path.join(args.input, f) if args.filesfile: if not fq_path in files: skipped = open( "{}/{}/skippedfiles.txt".format(args.output, ref), "a") skipped.write("{}\n".format(f)) skipped.close() elif not os.path.isfile(fq_path): continue workflow.add_task_gridable(cmd, reference=ref, depend=fq_path, target="{}{}_panphlan_map.csv.bz2".format( args.output, os.path.basename(f.strip(".fastq.gz"))), folder=args.output, threads=args.threads, cores=args.threads, db=args.dbfolder, time=4 * 60, mem=8 * 1000) workflow.go()
workflow.add_argument( "refs", default=None, desc="file with list of references (relative to dbfolder)") args = workflow.parse_args() cmd = "panphlan_profile.py -c {0} -i {0}/ --o_dna [target] --add_strains" if args.dbfolder: cmd += " --i_bowtie2_indexes {}".format(args.dbfolder) if args.ref: refs = [args.ref] elif args.refs: r = open(args.refs, "r") refs = [l.strip() for l in r] r.close() for ref in refs: if os.path.isdir(ref): workflow.add_task_gridable(cmd.format(ref, os.path.join(args.input, ref)), target="{}profiles/{}_pa.tsv".format( args.output, ref), cores=1, time=30, mem=1000) workflow.go()