def run(self): if not os.path.isfile( os.path.join( *[self.output_dir, "cfsan_output", "snpma.fasta"])): # create cfsansnp_output directory # cfsan_snp_out_dir = os.path.join(self.output_dir, "cfsan_snp_output") # if not os.path.isdir(cfsan_snp_out_dir): # os.makedirs(cfsan_snp_out_dir) # print("Directory for cfsansnp output made: ", cfsan_snp_out_dir) # mount info for datain and dataout dirs cfsan_snp_mounting = { self.output_dir: '/dataout', self.reference_dir: '/reference' } # command for creating the cfsan-snp cfsan_snp_configuration = self.config["parameters"][ "cfsan-snp-pipeline"] cfsan_snp_parameters = cfsan_snp_configuration["params"] cfsan_snp_command = f"bash -c 'run_snp_pipeline.sh -m {cfsan_snp_parameters['mirrored_input']} -o /dataout/cfsan_snp_output --samples_dir /dataout/input_reads/ /reference/{self.reference_file}'" # create cfsan-snp sketch object cfsan_snp = sb_programs.Run(command=cfsan_snp_command, path=cfsan_snp_mounting, image=cfsan_snp_configuration["image"], tag=cfsan_snp_configuration["tag"]) # run cfsan-snp cfsan_snp.run()
def gas_emmtype(output_dir, raw_read_file_path, id, fwd, rev, tredegar_config, logger): # path to seqsero results, if it doesn't exist run the seqsero object emmtyper_out = f"{output_dir}/emmtyper_output/{id}/{id}_1.results.xml" if not os.path.isfile(emmtyper_out): # seqsero ouput path emmtyper_output_path = os.path.join(output_dir, "emmtyper_output") pathlib.Path(emmtyper_output_path).mkdir(parents=True, exist_ok=True) # container mounting dictionary emmtyper_mounting = {raw_read_file_path: '/datain', emmtyper_output_path: '/dataout'} # container command emmtyper_configuration = tredegar_config["parameters"]["emm-typing-tool"] emmtyper_params = emmtyper_configuration["params"] emmtyper_command = f"emm_typing.py -1 /datain/{fwd} -2 /datain/{rev} -m {emmtyper_params['database']} -o /dataout/{id}/" # generate seqsero object emmtyper_obj = sb_programs.Run(command=emmtyper_command, path=emmtyper_mounting, image=emmtyper_configuration["image"], tag=emmtyper_configuration["tag"]) logger.info(f"Isolate {id} identified as identified as Streptococcus_pyogenes. Running emm-typing-tool for emm-type prediction") emmtyper_obj.run() # read the result file and return the serotype emm_type="" tree=ET.parse(emmtyper_out) root = tree.getroot() for result in root[1].findall("result"): if result.attrib['type'] == 'Final_EMM_type': emm_type=(result.attrib['value']) return emm_type.split(".")[0]
def salmonella_serotype(output_dir, raw_read_file_path, all_reads, id, tredegar_config, logger): # path to seqsero results, if it doesn't exist run the seqsero object seqsero_out = f"{output_dir}/seqsero_output/{id}/Seqsero_result.txt" if not os.path.isfile(seqsero_out): # seqsero ouput path seqsero_output_path = os.path.join(output_dir, "seqsero_output") pathlib.Path(seqsero_output_path).mkdir(parents=True, exist_ok=True) # container mounting dictionary seqsero_mounting = {raw_read_file_path: '/datain', seqsero_output_path: '/dataout'} # container command seqsero_configuration = tredegar_config["parameters"]["seqsero"] seqsero_params = seqsero_configuration["params"] seqsero_command = f"bash -c 'SeqSero.py -m2 -i /datain/{all_reads} -d /dataout/{id} {seqsero_params}'" # generate seqsero object seqsero_obj = sb_programs.Run(command=seqsero_command, path=seqsero_mounting, image=seqsero_configuration["image"], tag=seqsero_configuration["tag"]) logger.info(f"Isolate {id} identified as identified as S.enterica. Running SeqSero for serotype prediction. . .") seqsero_obj.run() # read the result file and return the serotype serotype = "" with open(seqsero_out) as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter="\t") for line in tsv_reader: try: if "Predicted serotype" in line[0]: serotype = line[1] except: pass return serotype
def calc_mash_dist(self, id, mash_mounting, sketch_name, mash_result): if not os.path.isfile( os.path.join( *[self.output_dir, "mash_species", id, mash_result])): # command for calculating mash distance mash_configuration = self.config["parameters"]["mash"] mash_dist_command = f"bash -c 'mash dist /db/RefSeqSketchesDefaults.msh /dataout/{id}/{sketch_name} > /dataout/{id}/{mash_result}'" # create mash distance object mash_dist = sb_programs.Run(command=mash_dist_command, path=mash_mounting, image=mash_configuration["image"], tag=mash_configuration["tag"]) # run mash distance mash_dist.run()
def create_mash_sketch(self, id, mash_mounting, fwd_read, rev_read, sketch_name): if not os.path.isfile( os.path.join( *[self.output_dir, "mash_species", id, sketch_name])): # command for creating the mash sketch mash_configuration = self.config["parameters"]["mash"] mash_sketch_command = f"bash -c 'mkdir -p /dataout/{id} && mash sketch -r -m 2 -o /dataout/{id}/{sketch_name} /datain/{fwd_read} /datain/{rev_read}'" # create mash sketch object mash_sketch = sb_programs.Run(command=mash_sketch_command, path=mash_mounting, image=mash_configuration["image"], tag=mash_configuration["tag"]) # run mash sketch mash_sketch.run()
def assembly_metrics(id, output_dir, assembly, quast_out_file, isolate_qual, tredegar_config, logger): # create and run quast object if results don't already exist if not os.path.isfile(quast_out_file): # generate the path for quast output quast_output_path = os.path.join(output_dir, "quast_output") pathlib.Path(quast_output_path).mkdir(parents=True, exist_ok=True) # quast mounting dictionary paths quast_mounting = {os.path.dirname(assembly): '/datain', quast_output_path: '/dataout'} # ensure an assembly was generated if not os.path.isfile(assembly): isolate_qual[id]["est_genome_length"] = "ASSEMBLY_FAILED" isolate_qual[id]["number_contigs"] = "ASSEMBLY_FAILED" return # create the quast command assembly_file_name = os.path.basename(assembly) quast_configuration = tredegar_config["parameters"]["quast"] quast_params = quast_configuration["params"] quast_command = f"bash -c 'quast.py /datain/{assembly_file_name} -o /dataout/{id} {quast_params}'" # create the quast object quast_obj = sb_programs.Run(command=quast_command, path=quast_mounting, image = quast_configuration["image"], tag = quast_configuration["tag"]) logger.info(f"Gathering {id} assembly quality metrics with Quast. . .") quast_obj.run() # open the quast results to capture relevant metrics with open(quast_out_file) as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter="\t") for line in tsv_reader: if "Total length" in line[0]: genome_length=line[1] isolate_qual[id]["est_genome_length"] = genome_length if "# contigs" in line[0]: number_contigs=line[1] isolate_qual[id]["number_contigs"] = number_contigs if not genome_length: logger.error(f"ERROR: No genome length predicted for isolate {id}") raise ValueError(f"Unable to predict genome length for isolate {id}") if not number_contigs: logger.error("No number of contigs predicted") raise ValueError(f"ERROR: Unable to predict number of contigs for isolate {id}")
def ref_free_snp(output_dir, group, ksnp3_matrix, foushee_config, logger): # run the SNP analysis process using ksnp3 if output does not already exist if not os.path.isfile(ksnp3_matrix): logger.info(f"Performing SNP analysis for isolates identified as {group}") # setup mounting in docker container ksnp3_mounting = {os.path.abspath(output_dir): '/datain', os.path.join(os.path.abspath(output_dir), "ksnp3_output", group): '/dataout'} # generate command to run shovill on the id ksnp3_configuration = foushee_config["parameters"]["ksnp3"] ksnp3_params = ksnp3_configuration["params"] ksnp3_command = f"kSNP3 -in /datain/ksnp3_output/{group}/{group}_assemblies.txt -outdir /dataout/ -k {ksnp3_params['kmer_length']} {ksnp3_params['core_snps_only']} " # # generate ksnp3 object and run it ksnp3_obj = sb_programs.Run(command=ksnp3_command, path=ksnp3_mounting, image=ksnp3_configuration["image"], tag=ksnp3_configuration["tag"]) ksnp3_obj.run()
def snp_matrix(output_dir, group, snp_dists_output_dir, snp_dists_result, foushee_config, logger): # run the SNP analysis process using snp_dists if output does not already exist if not os.path.isfile(snp_dists_result): logger.info(f"Performing SNP analysis for isolates identified as {group}") # create snp-dists output dir: pathlib.Path(os.path.join(snp_dists_output_dir)).mkdir(parents=True, exist_ok=True) # setup mounting in docker container snp_dists_mounting = {os.path.join(os.path.abspath(output_dir), "ksnp3_output", group): '/datain', snp_dists_output_dir: '/dataout'} # generate command to run shovill on the id snp_dists_configuration = foushee_config["parameters"]["snp-dists"] snp_dists_params = snp_dists_configuration["params"] snp_dists_command = f"bash -c 'snp-dists /datain/core_SNPs_matrix.fasta > /dataout/{group}_pairwise_snp_distance_matrix.tsv {snp_dists_params}'" # generate snp_dists object and then run it snp_dists_obj = sb_programs.Run(command=snp_dists_command, path=snp_dists_mounting, image=snp_dists_configuration["image"], tag=snp_dists_configuration["tag"]) snp_dists_obj.run()
def assemble_contigs(id, output_dir, clean_read_file_path, fwd_read_clean, rev_read_clean, memory, cpus, assembly, tredegar_config, logger): # create and run shovill object if results don't already exist if not os.path.isfile(assembly): # create shovill_output directory pathlib.Path(os.path.join(output_dir, "shovill_output")).mkdir(parents=True, exist_ok=True) # setup mounting in docker container shovill_mounting = {clean_read_file_path: '/datain', os.path.join(output_dir, "shovill_output"): '/dataout'} # generate command to run shovill on the id shovill_configuration = tredegar_config["parameters"]["shovill"] shovill_params = shovill_configuration["params"] shovill_command = f"bash -c 'shovill --outdir /dataout/{id}/ -R1 /datain/{fwd_read_clean} -R2 /datain/{rev_read_clean} --ram {memory} --cpus {cpus} --force {shovill_params}'" # generate shovill object shovill_obj = sb_programs.Run(command=shovill_command, path=shovill_mounting, image=shovill_configuration["image"], tag=shovill_configuration["tag"]) logger.info(f"Assemblying {id} with shovill. . .") shovill_obj.run()
def clean_reads(id, output_dir, raw_read_file_path, fwd_read, rev_read, fwd_read_clean, tredegar_config, logger): # path for the seqy clean result file seqy_clean_result = os.path.join(*[output_dir, "seqyclean_output", id, fwd_read_clean]) # create and run seqyclean object if it results don't already exist if not os.path.isfile(seqy_clean_result): # create seqyclean output directory pathlib.Path(os.path.join(output_dir, "seqyclean_output")).mkdir(parents=True, exist_ok=True) # docker mounting dictionary seqyclean_mounting = {raw_read_file_path: '/datain', os.path.join(output_dir, "seqyclean_output"): '/dataout'} # command for creating the mash sketch seqyclean_configuration = tredegar_config["parameters"]["seqyclean"] seqyclean_params = seqyclean_configuration["params"] seqyclean_command = f"bash -c 'seqyclean -1 /datain/{fwd_read} -2 /datain/{rev_read} -o /dataout/{id}/{id}_clean -minlen {seqyclean_params['minimum_read_length']} -c {seqyclean_params['contaminants']} {seqyclean_params['quality_trimming']}'" # generate command to run seqyclean on the id seqyclean_obj = sb_programs.Run(command=seqyclean_command, path=seqyclean_mounting, image=seqyclean_configuration["image"], tag=seqyclean_configuration["tag"]) logger.info(f"Cleaning {id} read data with seqyclean. . .") seqyclean_obj.run()
def read_metrics(id, output_dir, raw_read_file_path, all_reads, isolate_qual, cgp_out, tredegar_config, logger): # check for cg_pipeline output file if not exists run the cg_pipeline object if not os.path.isfile(cgp_out): # set genome length genome_length = isolate_qual[id]["est_genome_length"] # create cg_pipeline output path cg_pipeline_output_path = os.path.join(output_dir, "cg_pipeline_output") pathlib.Path(cg_pipeline_output_path).mkdir(parents=True, exist_ok=True) # generate path mounting for container cg_mounting = {raw_read_file_path: '/datain', cg_pipeline_output_path: '/dataout'} # generate command for cg_pipeline cgp_configuration = tredegar_config["parameters"]["cg_pipeline"] cgp_params = cgp_configuration["params"] cgp_result_file = id + "_readMetrics.tsv" cg_command = f"bash -c 'run_assembly_readMetrics.pl {cgp_params['subsample']} /datain/{all_reads} -e {genome_length} > /dataout/{cgp_result_file}\'" # generate the cg_pipeline object cg_obj = sb_programs.Run(command=cg_command, path=cg_mounting, image=cgp_configuration["image"], tag=cgp_configuration["tag"]) logger.info(f"Getting {id} sequencing quality metrics with CG Pipeline. . .") cg_obj.run() # open cg_pipeline results and capture relevant metrics with open(cgp_out) as tsv_file: tsv_reader = list(csv.DictReader(tsv_file, delimiter="\t")) for line in tsv_reader: if any(fwd_format in line["File"] for fwd_format in ["_1.fastq", "_R1.fastq", "_1P.fq.gz"]): isolate_qual[id]["r1_q"] = line["avgQuality"] isolate_qual[id]["est_cvg"] = float(line["coverage"]) if any(rev_format in line["File"] for rev_format in ["_2.fastq", "_R2.fastq", "_2P.fq.gz"]): isolate_qual[id]["r2_q"] = line["avgQuality"] isolate_qual[id]["est_cvg"] += float(line["coverage"])
def main(): #setup argparser to display help if no arguments class MyParser(argparse.ArgumentParser): def error(self, message): self.print_help() sys.stderr.write('\nerror: %s\n' % message) sys.exit(1) docker_config_path = os.path.abspath( os.path.dirname(__file__) + '/' + 'core/docker_config.json') parser = MyParser( description=f"StaPH-B ToolKit Programs v{autoupdate.version}", usage= "staphb-tk [optional arguments] <application> [application arguments]", add_help=True) subparsers = parser.add_subparsers(title='custom toolkit applications', metavar='', dest="subparser_name", parser_class=MyParser) parser.add_argument( "--docker_config", "-c", default=docker_config_path, metavar="<path>", help= "Configuration file for container images and tags; if none provided, default container versions will be used." ) parser.add_argument( "--get_docker_config", default=False, action="store_true", help="Get the default docker container configureation file.") parser.add_argument( "--list", "-l", default=False, action="store_true", help="List all of the software available in the toolkit.") parser.add_argument("--update", default=False, action="store_true", help="Check for and install a ToolKit update.") parser.add_argument( "--auto_update", default=False, action="store_true", help="Toggle automatic ToolKit updates. Default is off.") ###custom apps ## Mash Species parser_mash_species = subparsers.add_parser( 'mash_species', help= 'MASH_species uses a pre-sketched RefSeq database to identify the isolate species from paired-end read data.', usage="sb_mash_species <input> [options]") parser_mash_species.add_argument( "input", type=str, nargs='?', help="path to dir containing paire-end read files") parser_mash_species.add_argument("-o", metavar='path', default="", type=str, help="Path for output directory", required=False) #parser for applications #----------------------------------------- parser_abricate = subparsers.add_parser('abricate', add_help=False) parser_augur = subparsers.add_parser('augur', add_help=False) parser_bbtools = subparsers.add_parser('bbtools', add_help=False) parser_bwa = subparsers.add_parser('bwa', add_help=False) parser_canuracon = subparsers.add_parser('canu-racon', add_help=False) parser_centroid = subparsers.add_parser('centroid', add_help=False) parser_cfsansnp = subparsers.add_parser('cfsan-snp', add_help=False) parser_circlator = subparsers.add_parser('circlator', add_help=False) parser_clustalo = subparsers.add_parser('clustalo', add_help=False) parser_emmtypingtool = subparsers.add_parser('emm-typing-tool', add_help=False) parser_fastani = subparsers.add_parser('fastani', add_help=False) parser_fastqc = subparsers.add_parser('fastqc', add_help=False) parser_fasttree = subparsers.add_parser('fasttree', add_help=False) parser_filtong = subparsers.add_parser('filtlong', add_help=False) parser_flye = subparsers.add_parser('flye', add_help=False) parser_iqtree = subparsers.add_parser('iqtree', add_help=False) parser_ivar = subparsers.add_parser('ivar', add_help=False) parser_ivar_SC2 = subparsers.add_parser('ivar-SC2', add_help=False) parser_kma = subparsers.add_parser('kma', add_help=False) parser_kraken = subparsers.add_parser('kraken', add_help=False) parser_krakenbuild = subparsers.add_parser('kraken-build', add_help=False) parser_kraken2 = subparsers.add_parser('kraken2', add_help=False) parser_kraken2build = subparsers.add_parser('kraken2-build', add_help=False) parser_ksnp3 = subparsers.add_parser('ksnp3', add_help=False) parser_legsta = subparsers.add_parser('legsta', add_help=False) parser_lyveset = subparsers.add_parser('lyveset', add_help=False) parser_mafft = subparsers.add_parser('mafft', add_help=False) parser_mash = subparsers.add_parser('mash', add_help=False) parser_mashtree = subparsers.add_parser('mashtree', add_help=False) parser_medaka = subparsers.add_parser('medaka', add_help=False) parser_minimap2 = subparsers.add_parser('minimap2', add_help=False) parser_mlst = subparsers.add_parser('mlst', add_help=False) parser_mugsy = subparsers.add_parser('mugsy', add_help=False) parser_multiqc = subparsers.add_parser('multiqc', add_help=False) parser_nanoplot = subparsers.add_parser('nanoplot', add_help=False) parser_ncbiamrfinder_plus = subparsers.add_parser('ncbi-amrfinder-plus', add_help=False) parser_orthofinder = subparsers.add_parser('orthofinder', add_help=False) parser_pangolin = subparsers.add_parser('pangolin', add_help=False) parser_pilon = subparsers.add_parser('pilon', add_help=False) parser_plasmidseeker = subparsers.add_parser('plasmidseeker', add_help=False) parser_prokka = subparsers.add_parser('prokka', add_help=False) parser_quast = subparsers.add_parser('quast', add_help=False) parser_rasusa = subparsers.add_parser('rasusa', add_help=False) parser_raxml = subparsers.add_parser('raxml', add_help=False) parser_roary = subparsers.add_parser('roary', add_help=False) parser_salmid = subparsers.add_parser('salmid', add_help=False) parser_samtools = subparsers.add_parser('samtools', add_help=False) parser_seqsero = subparsers.add_parser('seqsero', add_help=False) parser_seqsero2 = subparsers.add_parser('seqsero2', add_help=False) parser_seqyclean = subparsers.add_parser('seqyclean', add_help=False) parser_seroba = subparsers.add_parser('seroba', add_help=False) parser_serotypefinder = subparsers.add_parser('serotypefinder', add_help=False) parser_shovill = subparsers.add_parser('shovill', add_help=False) parser_sistr = subparsers.add_parser('sistr', add_help=False) parser_skesa = subparsers.add_parser('skesa', add_help=False) parser_snippy = subparsers.add_parser('snippy', add_help=False) parser_snpdists = subparsers.add_parser('snp-dists', add_help=False) parser_snpsites = subparsers.add_parser('snp-sites', add_help=False) parser_spades = subparsers.add_parser('spades', add_help=False) parser_sratoolkit = subparsers.add_parser('sra-toolkit', add_help=False) parser_staramr = subparsers.add_parser('staramr', add_help=False) parser_tiptoft = subparsers.add_parser('tiptoft', add_help=False) parser_trimmomatic = subparsers.add_parser('trimmomatic', add_help=False) parser_unicycler = subparsers.add_parser('unicycler', add_help=False) parser_wtdbg2 = subparsers.add_parser('wtdbg2', add_help=False) #----------------------------------------- def print_prog_list(): print("Available programs:") header = ["Command", "Description", "-------", "-----------"] print(f"{header[0]:<25}{header[1]:^10}") print(f"{header[2]:<25}{header[3]:^10}") for key in progs: print(f"{key:<25}{progs[key]:^10}") return #handle the arguments and perform automatic path replacement parser_args = parser.parse_known_args() program = parser_args[0].subparser_name args = parser_args[1] #check for updates if parser_args[0].update: autoupdate.check_for_updates() sys.exit(0) if parser_args[0].auto_update: #get current status update_status = autoupdate.check_update_status() if update_status: autoupdate.toggle_updater(False) else: autoupdate.toggle_updater(True) if autoupdate.check_update_status(): autoupdate.check_for_updates() #give user docker config if asked if parser_args[0].get_docker_config: cwd = os.getcwd() copy( docker_config_path, os.path.join( os.getcwd(), date.today().strftime("%y-%m-%d") + "_docker_config.json")) sys.exit(0) #display list of programs if needed if parser_args[0].list: print_prog_list() sys.exit(0) if program == None: parser.print_help() sys.exit(1) #Run autopathing arg_string, path_map = path_replacer(args, os.getcwd()) # set the configuration file if parser_args[0].docker_config == "/core/docker_config.json": # use default config_file_path = os.path.abspath( os.path.dirname( os.path.realpath(__file__))) + parser_args[0].docker_config else: config_file_path = os.path.abspath(parser_args[0].docker_config) with open(config_file_path, 'r') as config_file: config = json.load(config_file) #Custom program specific execution code #----------------------------------------- if program == 'mash_species': #get output dir if supplied, if not set it to cwd output_dir = None if not parser_args[0].o: output_dir = os.getcwd() else: try: output_dir = os.path.abspath(parser_args[0].o) except (AttributeError, TypeError): print("Please enter a valid output path.") sys.exit(1) #get input path, if not supplied print help try: path = os.path.abspath(parser_args[0].input) except (AttributeError, TypeError) as e: parser_mash_species.print_help() print("Please enter a valid input path.") sys.exit(1) #create and run the mash species object mash_species_obj = MashSpecies(path=path, output_dir=output_dir) mash_species_obj.run() #Program specific execution code #----------------------------------------- if program == 'ivar-SC2': if not re.search('[a-zA-Z]', arg_string): arg_string = " " command = "ivar " + arg_string program_configuration = config["parameters"]["ivar-SC2"] if program == 'ivar': if not re.search('[a-zA-Z]', arg_string): arg_string = " " command = "ivar " + arg_string program_configuration = config["parameters"]["ivar"] if program == 'wtdbg2': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "wtdbg2 " + arg_string program_configuration = config["parameters"]["wtdbg2"] if program == 'trimmomatic': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "trimmomatic " + arg_string program_configuration = config["parameters"]["trimmomatic"] if program == 'tiptoft': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "tiptoft " + arg_string program_configuration = config["parameters"]["tiptoft"] if program == 'staramr': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "staramr " + arg_string program_configuration = config["parameters"]["staramr"] if program == 'sra-toolkit': if not re.search('[a-zA-Z]', arg_string): print( "SRA toolkit tool must be specified, e.g. staphb-tk sra-toolkit fasterq-dump, staphb-tk sra-toolkit sra-pileup, etc. \n\nMore info on SRA Toolkit usage at: https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=toolkit_doc." ) sys.exit() command = " " + arg_string program_configuration = config["parameters"]["sra-toolkit"] if program == 'snp-dists': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "snp-dists " + arg_string program_configuration = config["parameters"]["snp-dists"] if program == 'snp-sites': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "snp-sites " + arg_string program_configuration = config["parameters"]["snp-sites"] if program == 'snippy': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "snippy " + arg_string program_configuration = config["parameters"]["snippy"] if program == 'skesa': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "skesa " + arg_string program_configuration = config["parameters"]["skesa"] if program == 'sistr': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "sistr " + arg_string program_configuration = config["parameters"]["sistr"] if program == 'seroba': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "seroba " + arg_string program_configuration = config["parameters"]["seroba"] if program == 'seqsero2': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "SeqSero2_package.py " + arg_string program_configuration = config["parameters"]["seqsero2"] if program == 'salmid': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "SalmID.py " + arg_string program_configuration = config["parameters"]["salmid"] if program == 'rasusa': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "rasusa " + arg_string program_configuration = config["parameters"]["rasusa"] if program == 'plasmidseeker': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "plasmidseeker.pl " + arg_string program_configuration = config["parameters"]["plasmidseeker"] if program == 'pangolin': if not re.search('[a-zA-Z]', arg_string): arg_string = "--help" command = "pangolin " + arg_string program_configuration = config["parameters"]["pangolin"] if program == 'pilon': if not re.search('[a-zA-Z]', arg_string): arg_string = "--help" command = "pilon " + arg_string program_configuration = config["parameters"]["pilon"] if program == 'orthofinder': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "orthofinder " + arg_string program_configuration = config["parameters"]["orthofinder"] if program == 'ncbi-amrfinder-plus': if not re.search('[a-zA-Z]', arg_string): arg_string = "--help" command = "amrfinder " + arg_string program_configuration = config["parameters"]["ncbi-amrfinder-plus"] if program == 'nanoplot': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "NanoPlot " + arg_string program_configuration = config["parameters"]["nanoplot"] if program == 'multiqc': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "multiqc " + arg_string program_configuration = config["parameters"]["multiqc"] if program == 'mugsy': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "mugsy " + arg_string program_configuration = config["parameters"]["mugsy"] if program == 'mlst': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "mlst " + arg_string program_configuration = config["parameters"]["mlst"] if program == 'medaka': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "medaka " + arg_string program_configuration = config["parameters"]["medaka"] if program == 'mashtree': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "mashtree " + arg_string program_configuration = config["parameters"]["mashtree"] if program == 'legsta': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "legsta " + arg_string program_configuration = config["parameters"]["legsta"] if program == 'ksnp3': if not re.search('[a-zA-Z]', arg_string): arg_string = "" command = "kSNP3 " + arg_string program_configuration = config["parameters"]["ksnp3"] if program == 'kraken2-build': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "kraken2-build " + arg_string program_configuration = config["parameters"]["kraken2"] if program == 'kraken2': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "kraken2 " + arg_string program_configuration = config["parameters"]["kraken2"] if program == 'kraken-build': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "kraken-build " + arg_string program_configuration = config["parameters"]["kraken"] if program == 'kraken': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "kraken " + arg_string program_configuration = config["parameters"]["kraken"] if program == 'kma': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "kma " + arg_string program_configuration = config["parameters"]["kma"] if program == 'flye': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "flye " + arg_string program_configuration = config["parameters"]["flye"] if program == 'filtlong': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "filtlong " + arg_string program_configuration = config["parameters"]["filtlong"] if program == 'fastqc': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "fastqc " + arg_string program_configuration = config["parameters"]["fastqc"] if program == 'fasttree': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "FastTree " + arg_string program_configuration = config["parameters"]["fasttree"] if program == 'fastani': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "fastANI " + arg_string program_configuration = config["parameters"]["fastani"] if program == 'emm-typing-tool': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "emm_typing.py " + arg_string program_configuration = config["parameters"]["emm-typing-tool"] if program == 'circlator': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "circlator " + arg_string program_configuration = config["parameters"]["circlator"] if program == 'cfsan-snp': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "cfsan_snp_pipeline " + arg_string program_configuration = config["parameters"]["cfsan-snp-pipeline"] if program == 'canu-racon': if not re.search('[a-zA-Z]', arg_string): print( "This is a bundled application that requires a specific commands to be used (i.e. staphb-tk canu-racon canu -h) please see the documentation for Canu, Minimap2 and Racon to use." ) sys.exit() command = " " + arg_string program_configuration = config["parameters"]["canu-racon"] if program == 'bbtools': if not re.search('[a-zA-Z]', arg_string): print( "BBTools shell script must be specified, e.g. staphb-tk bbtools bbmap.sh, staphb-tk bbtools bbduk.sh, etc. \n\nMore info on BBTools at https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/." ) sys.exit() command = " " + arg_string program_configuration = config["parameters"]["bbtools"] if program == 'raxml': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "raxmlHPC " + arg_string program_configuration = config["parameters"]["raxml"] if program == 'spades': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "spades.py " + arg_string program_configuration = config["parameters"]["spades"] if program == 'mash': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "mash " + arg_string program_configuration = config["parameters"]["mash"] if program == 'seqyclean': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "seqyclean " + arg_string program_configuration = config["parameters"]["seqyclean"] if program == 'shovill': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "shovill " + arg_string program_configuration = config["parameters"]["shovill"] if program == 'prokka': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "prokka " + arg_string program_configuration = config["parameters"]["prokka"] if program == 'clustalo': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "clustalo " + arg_string program_configuration = config["parameters"]["clustalo"] if program == 'abricate': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "abricate " + arg_string program_configuration = config["parameters"]["abricate"] if program == 'augur': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "augur " + arg_string program_configuration = config["parameters"]["augur"] if program == 'iqtree': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "iqtree " + arg_string program_configuration = config["parameters"]["iqtree"] if program == 'lyveset': if not re.search('[a-zA-Z]', arg_string): print( "Lyev-SET perl script must be specified, e.g. staphb-tk lyveset launch_set.pl, staphb-tk lyveset set_manage.pl, staphb-tk lyveset run_assembly_readMeterics.pl. \n\nMore info on Lyve-SET usage at: github.com/lskatz/lyve-SET." ) sys.exit() command = "" + arg_string program_configuration = config["parameters"]["lyveset"] if program == 'quast': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "quast.py " + arg_string program_configuration = config["parameters"]["quast"] if program == 'roary': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "roary " + arg_string program_configuration = config["parameters"]["roary"] if program == 'seqsero': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "SeqSero.py " + arg_string program_configuration = config["parameters"]["seqsero"] if program == 'samtools': if not re.search('[a-zA-Z]', arg_string): arg_string = "" command = "samtools " + arg_string program_configuration = config["parameters"]["samtools"] if program == 'serotypefinder': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "serotypefinder.pl " + arg_string program_configuration = config["parameters"]["serotypefinder"] if program == 'bwa': if not re.search('[a-zA-Z]', arg_string): arg_string = "" command = "bwa " + arg_string program_configuration = config["parameters"]["bwa"] if program == 'minimap2': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "minimap2 " + arg_string program_configuration = config["parameters"]["minimap2"] if program == 'centroid': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "centroid.py " + arg_string program_configuration = config["parameters"]["centroid"] if program == 'unicycler': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "unicycler " + arg_string program_configuration = config["parameters"]["unicycler"] if program == 'mafft': if not re.search('[a-zA-Z]', arg_string): arg_string = "-h" command = "mafft " + arg_string program_configuration = config["parameters"]["mafft"] #Run the program #----------------------------------------- program_object = sb_prog.Run(command=command, path=path_map, image=program_configuration["image"], tag=program_configuration["tag"]) program_object.run()
def ecoli_serotype(output_dir, assembly, id, tredegar_config, logger): # ambiguous allele calls matched_wzx = ["O2", "O50", "O17", "O77", "O118", "O151", "O169", "O141ab", "O141ac"] matched_wzy = ["O13", "O135", "O17", "O44", "O123", "O186"] # path to serotypefinder results file, if it doesn't exist run the serotypefinder stf_out = f"{output_dir}/serotypefinder_output/{id}/results_tab.txt" if not os.path.isfile(stf_out): # output path for serotypefinder serotypefinder_output_path = os.path.join(output_dir, "serotypefinder_output") pathlib.Path(serotypefinder_output_path).mkdir(parents=True, exist_ok=True) # setup container mounting if not os.path.isfile(assembly): return assembly_path = os.path.dirname(assembly) stf_mounting = {assembly_path: '/datain', serotypefinder_output_path: '/dataout'} # generate serotypefinder command assembly_name = os.path.basename(assembly) stf_configuration = tredegar_config["parameters"]["serotypefinder"] stf_params = stf_configuration["params"] stf_command = f"serotypefinder.pl -d {stf_params['database']} -i /datain/{assembly_name} -b /blast-2.2.26/ -o /dataout/{id} -s {stf_params['species']} -k {stf_params['nucleotide_agreement']} -l {stf_params['percent_coverage']}" # create serotypefinder object stf_obj = sb_programs.Run(command=stf_command, path=stf_mounting, image=stf_configuration["image"], tag=stf_configuration["tag"]) logger.info(f"Isolate {id} identified as E.coli. Running SerotypeFinder for serotype prediction") stf_obj.run() # process the results of serotypefinder as per literature guidelines (Joensen, et al. 2015, DOI: 10.1128/JCM.00008-15) with open(stf_out) as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter="\t") wzx_allele = "" wzy_allele = "" wzm_allele = "" h_type = "" for line in tsv_reader: if "fl" in line [0]: h_type = line[5] if line[0] == "wzx": wzx_allele = line[5] if line[0] == "wzy": wzy_allele = line[5] if line[0] == "wzm": wzm_allele = line[5] o_type = wzx_allele if not wzx_allele: o_type = wzy_allele if not wzx_allele and not wzy_allele: o_type = wzm_allele if o_type in matched_wzx: o_type = wzy_allele if o_type in matched_wzy: o_type = wzx_allele serotype = f"{o_type}:{h_type}" # NA if no o-type or h-type identified if serotype == ":": serotype = "NA" return serotype