예제 #1
0
    def run(self):
        if not os.path.isfile(
                os.path.join(
                    *[self.output_dir, "cfsan_output", "snpma.fasta"])):

            # create cfsansnp_output directory
            # cfsan_snp_out_dir = os.path.join(self.output_dir, "cfsan_snp_output")
            # if not os.path.isdir(cfsan_snp_out_dir):
            #     os.makedirs(cfsan_snp_out_dir)
            #     print("Directory for cfsansnp output made: ", cfsan_snp_out_dir)

            # mount info for datain and dataout dirs
            cfsan_snp_mounting = {
                self.output_dir: '/dataout',
                self.reference_dir: '/reference'
            }

            # command for creating the cfsan-snp
            cfsan_snp_configuration = self.config["parameters"][
                "cfsan-snp-pipeline"]
            cfsan_snp_parameters = cfsan_snp_configuration["params"]
            cfsan_snp_command = f"bash -c 'run_snp_pipeline.sh -m {cfsan_snp_parameters['mirrored_input']} -o /dataout/cfsan_snp_output --samples_dir /dataout/input_reads/ /reference/{self.reference_file}'"

            # create cfsan-snp sketch object
            cfsan_snp = sb_programs.Run(command=cfsan_snp_command,
                                        path=cfsan_snp_mounting,
                                        image=cfsan_snp_configuration["image"],
                                        tag=cfsan_snp_configuration["tag"])

            # run cfsan-snp
            cfsan_snp.run()
예제 #2
0
def gas_emmtype(output_dir, raw_read_file_path, id, fwd, rev,  tredegar_config, logger):
    # path to seqsero results, if it doesn't exist run the seqsero object
    emmtyper_out = f"{output_dir}/emmtyper_output/{id}/{id}_1.results.xml"
    if not os.path.isfile(emmtyper_out):
        # seqsero ouput path
        emmtyper_output_path = os.path.join(output_dir, "emmtyper_output")
        pathlib.Path(emmtyper_output_path).mkdir(parents=True, exist_ok=True)
        # container mounting dictionary
        emmtyper_mounting = {raw_read_file_path: '/datain', emmtyper_output_path: '/dataout'}

        # container command
        emmtyper_configuration = tredegar_config["parameters"]["emm-typing-tool"]
        emmtyper_params = emmtyper_configuration["params"]
        emmtyper_command = f"emm_typing.py -1 /datain/{fwd} -2 /datain/{rev} -m {emmtyper_params['database']} -o /dataout/{id}/"

        # generate seqsero object
        emmtyper_obj = sb_programs.Run(command=emmtyper_command, path=emmtyper_mounting, image=emmtyper_configuration["image"], tag=emmtyper_configuration["tag"])

        logger.info(f"Isolate {id} identified as identified as Streptococcus_pyogenes. Running emm-typing-tool for emm-type prediction")
        emmtyper_obj.run()

    # read the result file and return the serotype
    emm_type=""
    tree=ET.parse(emmtyper_out)
    root = tree.getroot()
    for result in root[1].findall("result"):
        if result.attrib['type'] == 'Final_EMM_type':

            emm_type=(result.attrib['value'])
    return emm_type.split(".")[0]
예제 #3
0
def salmonella_serotype(output_dir, raw_read_file_path, all_reads, id, tredegar_config, logger):
    # path to seqsero results, if it doesn't exist run the seqsero object
    seqsero_out = f"{output_dir}/seqsero_output/{id}/Seqsero_result.txt"
    if not os.path.isfile(seqsero_out):
        # seqsero ouput path
        seqsero_output_path = os.path.join(output_dir, "seqsero_output")
        pathlib.Path(seqsero_output_path).mkdir(parents=True, exist_ok=True)

        # container mounting dictionary
        seqsero_mounting = {raw_read_file_path: '/datain', seqsero_output_path: '/dataout'}

        # container command
        seqsero_configuration = tredegar_config["parameters"]["seqsero"]
        seqsero_params = seqsero_configuration["params"]
        seqsero_command = f"bash -c 'SeqSero.py -m2 -i /datain/{all_reads} -d /dataout/{id} {seqsero_params}'"

        # generate seqsero object
        seqsero_obj = sb_programs.Run(command=seqsero_command, path=seqsero_mounting, image=seqsero_configuration["image"], tag=seqsero_configuration["tag"])

        logger.info(f"Isolate {id} identified as identified as S.enterica. Running SeqSero for serotype prediction. . .")
        seqsero_obj.run()

    # read the result file and return the serotype
    serotype = ""
    with open(seqsero_out) as tsv_file:
        tsv_reader = csv.reader(tsv_file, delimiter="\t")
        for line in tsv_reader:
            try:
                if "Predicted serotype" in line[0]:
                    serotype = line[1]
            except:
                pass
    return serotype
예제 #4
0
    def calc_mash_dist(self, id, mash_mounting, sketch_name, mash_result):
        if not os.path.isfile(
                os.path.join(
                    *[self.output_dir, "mash_species", id, mash_result])):

            # command for calculating mash distance
            mash_configuration = self.config["parameters"]["mash"]
            mash_dist_command = f"bash -c 'mash dist /db/RefSeqSketchesDefaults.msh /dataout/{id}/{sketch_name} > /dataout/{id}/{mash_result}'"

            # create mash distance object
            mash_dist = sb_programs.Run(command=mash_dist_command,
                                        path=mash_mounting,
                                        image=mash_configuration["image"],
                                        tag=mash_configuration["tag"])
            # run mash distance
            mash_dist.run()
예제 #5
0
    def create_mash_sketch(self, id, mash_mounting, fwd_read, rev_read,
                           sketch_name):
        if not os.path.isfile(
                os.path.join(
                    *[self.output_dir, "mash_species", id, sketch_name])):

            # command for creating the mash sketch
            mash_configuration = self.config["parameters"]["mash"]
            mash_sketch_command = f"bash -c 'mkdir -p /dataout/{id} && mash sketch -r -m 2 -o /dataout/{id}/{sketch_name} /datain/{fwd_read} /datain/{rev_read}'"

            # create mash sketch object
            mash_sketch = sb_programs.Run(command=mash_sketch_command,
                                          path=mash_mounting,
                                          image=mash_configuration["image"],
                                          tag=mash_configuration["tag"])
            # run mash sketch
            mash_sketch.run()
예제 #6
0
def assembly_metrics(id, output_dir, assembly, quast_out_file, isolate_qual, tredegar_config, logger):
    # create and run quast object if results don't already exist
    if not os.path.isfile(quast_out_file):

        # generate the path for quast output
        quast_output_path = os.path.join(output_dir, "quast_output")
        pathlib.Path(quast_output_path).mkdir(parents=True, exist_ok=True)

        # quast mounting dictionary paths
        quast_mounting = {os.path.dirname(assembly): '/datain', quast_output_path: '/dataout'}

        # ensure an assembly was generated
        if not os.path.isfile(assembly):
            isolate_qual[id]["est_genome_length"] = "ASSEMBLY_FAILED"
            isolate_qual[id]["number_contigs"] = "ASSEMBLY_FAILED"
            return

        # create the quast command
        assembly_file_name = os.path.basename(assembly)
        quast_configuration = tredegar_config["parameters"]["quast"]
        quast_params = quast_configuration["params"]
        quast_command = f"bash -c 'quast.py /datain/{assembly_file_name} -o /dataout/{id} {quast_params}'"

        # create the quast object
        quast_obj = sb_programs.Run(command=quast_command, path=quast_mounting, image = quast_configuration["image"], tag = quast_configuration["tag"])

        logger.info(f"Gathering {id} assembly quality metrics with Quast. . .")
        quast_obj.run()

    # open the quast results to capture relevant metrics
    with open(quast_out_file) as tsv_file:
        tsv_reader = csv.reader(tsv_file, delimiter="\t")
        for line in tsv_reader:
            if "Total length" in line[0]:
                genome_length=line[1]
                isolate_qual[id]["est_genome_length"] = genome_length
            if "# contigs" in line[0]:
                number_contigs=line[1]
                isolate_qual[id]["number_contigs"] = number_contigs
        if not genome_length:
            logger.error(f"ERROR: No genome length predicted for isolate {id}")
            raise ValueError(f"Unable to predict genome length for isolate {id}")
        if not number_contigs:
            logger.error("No number of contigs predicted")
            raise ValueError(f"ERROR: Unable to predict number of contigs for isolate {id}")
예제 #7
0
def ref_free_snp(output_dir, group, ksnp3_matrix, foushee_config, logger):

    # run the SNP analysis process using ksnp3 if output does not already exist
    if not os.path.isfile(ksnp3_matrix):
        logger.info(f"Performing SNP analysis for isolates identified as {group}")

        # setup mounting in docker container
        ksnp3_mounting = {os.path.abspath(output_dir): '/datain',
                          os.path.join(os.path.abspath(output_dir), "ksnp3_output", group): '/dataout'}

        # generate command to run shovill on the id
        ksnp3_configuration = foushee_config["parameters"]["ksnp3"]
        ksnp3_params = ksnp3_configuration["params"]
        ksnp3_command = f"kSNP3 -in /datain/ksnp3_output/{group}/{group}_assemblies.txt -outdir /dataout/ -k {ksnp3_params['kmer_length']} {ksnp3_params['core_snps_only']} "

        # # generate ksnp3 object and run it
        ksnp3_obj = sb_programs.Run(command=ksnp3_command, path=ksnp3_mounting, image=ksnp3_configuration["image"], tag=ksnp3_configuration["tag"])
        ksnp3_obj.run()
예제 #8
0
def snp_matrix(output_dir, group, snp_dists_output_dir, snp_dists_result, foushee_config, logger):
     #  run the SNP analysis process using snp_dists if output does not already exist
    if not os.path.isfile(snp_dists_result):
        logger.info(f"Performing SNP analysis for isolates identified as {group}")

        # create snp-dists output dir:
        pathlib.Path(os.path.join(snp_dists_output_dir)).mkdir(parents=True, exist_ok=True)

        # setup mounting in docker container
        snp_dists_mounting = {os.path.join(os.path.abspath(output_dir), "ksnp3_output", group): '/datain', snp_dists_output_dir: '/dataout'}

        # generate command to run shovill on the id
        snp_dists_configuration = foushee_config["parameters"]["snp-dists"]
        snp_dists_params = snp_dists_configuration["params"]
        snp_dists_command = f"bash -c 'snp-dists /datain/core_SNPs_matrix.fasta > /dataout/{group}_pairwise_snp_distance_matrix.tsv {snp_dists_params}'"

        # generate snp_dists object and then run it
        snp_dists_obj = sb_programs.Run(command=snp_dists_command, path=snp_dists_mounting, image=snp_dists_configuration["image"], tag=snp_dists_configuration["tag"])
        snp_dists_obj.run()
예제 #9
0
def assemble_contigs(id, output_dir, clean_read_file_path, fwd_read_clean, rev_read_clean, memory, cpus, assembly, tredegar_config, logger):
    # create and run shovill object if results don't already exist
    if not os.path.isfile(assembly):

        # create shovill_output directory
        pathlib.Path(os.path.join(output_dir, "shovill_output")).mkdir(parents=True, exist_ok=True)

        # setup mounting in docker container
        shovill_mounting = {clean_read_file_path: '/datain', os.path.join(output_dir, "shovill_output"): '/dataout'}

        # generate command to run shovill on the id
        shovill_configuration = tredegar_config["parameters"]["shovill"]
        shovill_params = shovill_configuration["params"]
        shovill_command = f"bash -c 'shovill --outdir /dataout/{id}/ -R1 /datain/{fwd_read_clean} -R2 /datain/{rev_read_clean} --ram {memory} --cpus {cpus} --force {shovill_params}'"

        # generate shovill object
        shovill_obj = sb_programs.Run(command=shovill_command, path=shovill_mounting, image=shovill_configuration["image"], tag=shovill_configuration["tag"])

        logger.info(f"Assemblying {id} with shovill. . .")
        shovill_obj.run()
예제 #10
0
def clean_reads(id, output_dir, raw_read_file_path, fwd_read, rev_read, fwd_read_clean, tredegar_config, logger):
    # path for the seqy clean result file
    seqy_clean_result = os.path.join(*[output_dir, "seqyclean_output", id, fwd_read_clean])

    # create and run seqyclean object if it results don't already exist
    if not os.path.isfile(seqy_clean_result):

        # create seqyclean output directory
        pathlib.Path(os.path.join(output_dir, "seqyclean_output")).mkdir(parents=True, exist_ok=True)

        # docker mounting dictionary
        seqyclean_mounting = {raw_read_file_path: '/datain', os.path.join(output_dir, "seqyclean_output"): '/dataout'}

        # command for creating the mash sketch
        seqyclean_configuration = tredegar_config["parameters"]["seqyclean"]
        seqyclean_params = seqyclean_configuration["params"]
        seqyclean_command = f"bash -c 'seqyclean -1 /datain/{fwd_read} -2 /datain/{rev_read} -o /dataout/{id}/{id}_clean -minlen {seqyclean_params['minimum_read_length']} -c {seqyclean_params['contaminants']} {seqyclean_params['quality_trimming']}'"

        # generate command to run seqyclean on the id
        seqyclean_obj = sb_programs.Run(command=seqyclean_command, path=seqyclean_mounting, image=seqyclean_configuration["image"], tag=seqyclean_configuration["tag"])

        logger.info(f"Cleaning {id} read data with seqyclean. . .")
        seqyclean_obj.run()
예제 #11
0
def read_metrics(id, output_dir, raw_read_file_path, all_reads, isolate_qual, cgp_out, tredegar_config, logger):
    # check for cg_pipeline output file if not exists run the cg_pipeline object
    if not os.path.isfile(cgp_out):
        # set  genome length
        genome_length = isolate_qual[id]["est_genome_length"]

        # create cg_pipeline output path
        cg_pipeline_output_path = os.path.join(output_dir, "cg_pipeline_output")
        pathlib.Path(cg_pipeline_output_path).mkdir(parents=True, exist_ok=True)

        # generate path mounting for container
        cg_mounting = {raw_read_file_path: '/datain', cg_pipeline_output_path: '/dataout'}

        # generate command for cg_pipeline
        cgp_configuration = tredegar_config["parameters"]["cg_pipeline"]
        cgp_params = cgp_configuration["params"]
        cgp_result_file = id + "_readMetrics.tsv"
        cg_command = f"bash -c 'run_assembly_readMetrics.pl {cgp_params['subsample']} /datain/{all_reads} -e {genome_length} > /dataout/{cgp_result_file}\'"

        # generate the cg_pipeline object
        cg_obj = sb_programs.Run(command=cg_command, path=cg_mounting, image=cgp_configuration["image"], tag=cgp_configuration["tag"])

        logger.info(f"Getting {id} sequencing quality metrics with CG Pipeline. . .")
        cg_obj.run()

    # open cg_pipeline results and capture relevant metrics
    with open(cgp_out) as tsv_file:
        tsv_reader = list(csv.DictReader(tsv_file, delimiter="\t"))

        for line in tsv_reader:
            if any(fwd_format in line["File"] for fwd_format in ["_1.fastq", "_R1.fastq", "_1P.fq.gz"]):
                isolate_qual[id]["r1_q"] = line["avgQuality"]
                isolate_qual[id]["est_cvg"] = float(line["coverage"])
            if any(rev_format in line["File"] for rev_format in ["_2.fastq", "_R2.fastq", "_2P.fq.gz"]):
                isolate_qual[id]["r2_q"] = line["avgQuality"]
                isolate_qual[id]["est_cvg"] += float(line["coverage"])
예제 #12
0
def main():

    #setup argparser to display help if no arguments
    class MyParser(argparse.ArgumentParser):
        def error(self, message):
            self.print_help()
            sys.stderr.write('\nerror: %s\n' % message)
            sys.exit(1)

    docker_config_path = os.path.abspath(
        os.path.dirname(__file__) + '/' + 'core/docker_config.json')

    parser = MyParser(
        description=f"StaPH-B ToolKit Programs v{autoupdate.version}",
        usage=
        "staphb-tk [optional arguments] <application> [application arguments]",
        add_help=True)
    subparsers = parser.add_subparsers(title='custom toolkit applications',
                                       metavar='',
                                       dest="subparser_name",
                                       parser_class=MyParser)
    parser.add_argument(
        "--docker_config",
        "-c",
        default=docker_config_path,
        metavar="<path>",
        help=
        "Configuration file for container images and tags; if none provided, default container versions will be used."
    )
    parser.add_argument(
        "--get_docker_config",
        default=False,
        action="store_true",
        help="Get the default docker container configureation file.")
    parser.add_argument(
        "--list",
        "-l",
        default=False,
        action="store_true",
        help="List all of the software available in the toolkit.")
    parser.add_argument("--update",
                        default=False,
                        action="store_true",
                        help="Check for and install a ToolKit update.")
    parser.add_argument(
        "--auto_update",
        default=False,
        action="store_true",
        help="Toggle automatic ToolKit updates. Default is off.")
    ###custom apps
    ## Mash Species
    parser_mash_species = subparsers.add_parser(
        'mash_species',
        help=
        'MASH_species uses a pre-sketched RefSeq database to identify the isolate species from paired-end read data.',
        usage="sb_mash_species <input> [options]")
    parser_mash_species.add_argument(
        "input",
        type=str,
        nargs='?',
        help="path to dir containing paire-end read files")
    parser_mash_species.add_argument("-o",
                                     metavar='path',
                                     default="",
                                     type=str,
                                     help="Path for output directory",
                                     required=False)

    #parser for applications
    #-----------------------------------------
    parser_abricate = subparsers.add_parser('abricate', add_help=False)
    parser_augur = subparsers.add_parser('augur', add_help=False)
    parser_bbtools = subparsers.add_parser('bbtools', add_help=False)
    parser_bwa = subparsers.add_parser('bwa', add_help=False)
    parser_canuracon = subparsers.add_parser('canu-racon', add_help=False)
    parser_centroid = subparsers.add_parser('centroid', add_help=False)
    parser_cfsansnp = subparsers.add_parser('cfsan-snp', add_help=False)
    parser_circlator = subparsers.add_parser('circlator', add_help=False)
    parser_clustalo = subparsers.add_parser('clustalo', add_help=False)
    parser_emmtypingtool = subparsers.add_parser('emm-typing-tool',
                                                 add_help=False)
    parser_fastani = subparsers.add_parser('fastani', add_help=False)
    parser_fastqc = subparsers.add_parser('fastqc', add_help=False)
    parser_fasttree = subparsers.add_parser('fasttree', add_help=False)
    parser_filtong = subparsers.add_parser('filtlong', add_help=False)
    parser_flye = subparsers.add_parser('flye', add_help=False)
    parser_iqtree = subparsers.add_parser('iqtree', add_help=False)
    parser_ivar = subparsers.add_parser('ivar', add_help=False)
    parser_ivar_SC2 = subparsers.add_parser('ivar-SC2', add_help=False)
    parser_kma = subparsers.add_parser('kma', add_help=False)
    parser_kraken = subparsers.add_parser('kraken', add_help=False)
    parser_krakenbuild = subparsers.add_parser('kraken-build', add_help=False)
    parser_kraken2 = subparsers.add_parser('kraken2', add_help=False)
    parser_kraken2build = subparsers.add_parser('kraken2-build',
                                                add_help=False)
    parser_ksnp3 = subparsers.add_parser('ksnp3', add_help=False)
    parser_legsta = subparsers.add_parser('legsta', add_help=False)
    parser_lyveset = subparsers.add_parser('lyveset', add_help=False)
    parser_mafft = subparsers.add_parser('mafft', add_help=False)
    parser_mash = subparsers.add_parser('mash', add_help=False)
    parser_mashtree = subparsers.add_parser('mashtree', add_help=False)
    parser_medaka = subparsers.add_parser('medaka', add_help=False)
    parser_minimap2 = subparsers.add_parser('minimap2', add_help=False)
    parser_mlst = subparsers.add_parser('mlst', add_help=False)
    parser_mugsy = subparsers.add_parser('mugsy', add_help=False)
    parser_multiqc = subparsers.add_parser('multiqc', add_help=False)
    parser_nanoplot = subparsers.add_parser('nanoplot', add_help=False)
    parser_ncbiamrfinder_plus = subparsers.add_parser('ncbi-amrfinder-plus',
                                                      add_help=False)
    parser_orthofinder = subparsers.add_parser('orthofinder', add_help=False)
    parser_pangolin = subparsers.add_parser('pangolin', add_help=False)
    parser_pilon = subparsers.add_parser('pilon', add_help=False)
    parser_plasmidseeker = subparsers.add_parser('plasmidseeker',
                                                 add_help=False)
    parser_prokka = subparsers.add_parser('prokka', add_help=False)
    parser_quast = subparsers.add_parser('quast', add_help=False)
    parser_rasusa = subparsers.add_parser('rasusa', add_help=False)
    parser_raxml = subparsers.add_parser('raxml', add_help=False)
    parser_roary = subparsers.add_parser('roary', add_help=False)
    parser_salmid = subparsers.add_parser('salmid', add_help=False)
    parser_samtools = subparsers.add_parser('samtools', add_help=False)
    parser_seqsero = subparsers.add_parser('seqsero', add_help=False)
    parser_seqsero2 = subparsers.add_parser('seqsero2', add_help=False)
    parser_seqyclean = subparsers.add_parser('seqyclean', add_help=False)
    parser_seroba = subparsers.add_parser('seroba', add_help=False)
    parser_serotypefinder = subparsers.add_parser('serotypefinder',
                                                  add_help=False)
    parser_shovill = subparsers.add_parser('shovill', add_help=False)
    parser_sistr = subparsers.add_parser('sistr', add_help=False)
    parser_skesa = subparsers.add_parser('skesa', add_help=False)
    parser_snippy = subparsers.add_parser('snippy', add_help=False)
    parser_snpdists = subparsers.add_parser('snp-dists', add_help=False)
    parser_snpsites = subparsers.add_parser('snp-sites', add_help=False)
    parser_spades = subparsers.add_parser('spades', add_help=False)
    parser_sratoolkit = subparsers.add_parser('sra-toolkit', add_help=False)
    parser_staramr = subparsers.add_parser('staramr', add_help=False)
    parser_tiptoft = subparsers.add_parser('tiptoft', add_help=False)
    parser_trimmomatic = subparsers.add_parser('trimmomatic', add_help=False)
    parser_unicycler = subparsers.add_parser('unicycler', add_help=False)
    parser_wtdbg2 = subparsers.add_parser('wtdbg2', add_help=False)

    #-----------------------------------------

    def print_prog_list():
        print("Available programs:")
        header = ["Command", "Description", "-------", "-----------"]
        print(f"{header[0]:<25}{header[1]:^10}")
        print(f"{header[2]:<25}{header[3]:^10}")
        for key in progs:
            print(f"{key:<25}{progs[key]:^10}")
        return

    #handle the arguments and perform automatic path replacement
    parser_args = parser.parse_known_args()
    program = parser_args[0].subparser_name
    args = parser_args[1]

    #check for updates
    if parser_args[0].update:
        autoupdate.check_for_updates()
        sys.exit(0)

    if parser_args[0].auto_update:
        #get current status
        update_status = autoupdate.check_update_status()
        if update_status:
            autoupdate.toggle_updater(False)
        else:
            autoupdate.toggle_updater(True)

    if autoupdate.check_update_status():
        autoupdate.check_for_updates()

    #give user docker config if asked
    if parser_args[0].get_docker_config:
        cwd = os.getcwd()
        copy(
            docker_config_path,
            os.path.join(
                os.getcwd(),
                date.today().strftime("%y-%m-%d") + "_docker_config.json"))
        sys.exit(0)

    #display list of programs if needed
    if parser_args[0].list:
        print_prog_list()
        sys.exit(0)

    if program == None:
        parser.print_help()
        sys.exit(1)

    #Run autopathing
    arg_string, path_map = path_replacer(args, os.getcwd())

    # set the configuration file
    if parser_args[0].docker_config == "/core/docker_config.json":
        # use default
        config_file_path = os.path.abspath(
            os.path.dirname(
                os.path.realpath(__file__))) + parser_args[0].docker_config
    else:
        config_file_path = os.path.abspath(parser_args[0].docker_config)

    with open(config_file_path, 'r') as config_file:
        config = json.load(config_file)

    #Custom program specific execution code
    #-----------------------------------------
    if program == 'mash_species':
        #get output dir if supplied, if not set it to cwd
        output_dir = None
        if not parser_args[0].o:
            output_dir = os.getcwd()
        else:
            try:
                output_dir = os.path.abspath(parser_args[0].o)
            except (AttributeError, TypeError):
                print("Please enter a valid output path.")
                sys.exit(1)

        #get input path, if not supplied print help
        try:
            path = os.path.abspath(parser_args[0].input)
        except (AttributeError, TypeError) as e:
            parser_mash_species.print_help()
            print("Please enter a valid input path.")
            sys.exit(1)

        #create and run the mash species object
        mash_species_obj = MashSpecies(path=path, output_dir=output_dir)
        mash_species_obj.run()

    #Program specific execution code
    #-----------------------------------------
    if program == 'ivar-SC2':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = " "
        command = "ivar " + arg_string
        program_configuration = config["parameters"]["ivar-SC2"]

    if program == 'ivar':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = " "
        command = "ivar " + arg_string
        program_configuration = config["parameters"]["ivar"]

    if program == 'wtdbg2':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "wtdbg2 " + arg_string
        program_configuration = config["parameters"]["wtdbg2"]

    if program == 'trimmomatic':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "trimmomatic " + arg_string
        program_configuration = config["parameters"]["trimmomatic"]

    if program == 'tiptoft':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "tiptoft " + arg_string
        program_configuration = config["parameters"]["tiptoft"]

    if program == 'staramr':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "staramr " + arg_string
        program_configuration = config["parameters"]["staramr"]

    if program == 'sra-toolkit':
        if not re.search('[a-zA-Z]', arg_string):
            print(
                "SRA toolkit tool must be specified, e.g. staphb-tk sra-toolkit fasterq-dump, staphb-tk sra-toolkit sra-pileup, etc. \n\nMore info on SRA Toolkit usage at: https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=toolkit_doc."
            )
            sys.exit()
        command = " " + arg_string
        program_configuration = config["parameters"]["sra-toolkit"]

    if program == 'snp-dists':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "snp-dists " + arg_string
        program_configuration = config["parameters"]["snp-dists"]

    if program == 'snp-sites':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "snp-sites " + arg_string
        program_configuration = config["parameters"]["snp-sites"]

    if program == 'snippy':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "snippy " + arg_string
        program_configuration = config["parameters"]["snippy"]

    if program == 'skesa':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "skesa " + arg_string
        program_configuration = config["parameters"]["skesa"]

    if program == 'sistr':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "sistr " + arg_string
        program_configuration = config["parameters"]["sistr"]

    if program == 'seroba':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "seroba " + arg_string
        program_configuration = config["parameters"]["seroba"]

    if program == 'seqsero2':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "SeqSero2_package.py " + arg_string
        program_configuration = config["parameters"]["seqsero2"]

    if program == 'salmid':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "SalmID.py " + arg_string
        program_configuration = config["parameters"]["salmid"]

    if program == 'rasusa':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "rasusa " + arg_string
        program_configuration = config["parameters"]["rasusa"]

    if program == 'plasmidseeker':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "plasmidseeker.pl " + arg_string
        program_configuration = config["parameters"]["plasmidseeker"]

    if program == 'pangolin':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "--help"
        command = "pangolin " + arg_string
        program_configuration = config["parameters"]["pangolin"]

    if program == 'pilon':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "--help"
        command = "pilon " + arg_string
        program_configuration = config["parameters"]["pilon"]

    if program == 'orthofinder':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "orthofinder " + arg_string
        program_configuration = config["parameters"]["orthofinder"]

    if program == 'ncbi-amrfinder-plus':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "--help"
        command = "amrfinder " + arg_string
        program_configuration = config["parameters"]["ncbi-amrfinder-plus"]

    if program == 'nanoplot':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "NanoPlot " + arg_string
        program_configuration = config["parameters"]["nanoplot"]

    if program == 'multiqc':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "multiqc " + arg_string
        program_configuration = config["parameters"]["multiqc"]

    if program == 'mugsy':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "mugsy " + arg_string
        program_configuration = config["parameters"]["mugsy"]

    if program == 'mlst':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "mlst " + arg_string
        program_configuration = config["parameters"]["mlst"]

    if program == 'medaka':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "medaka " + arg_string
        program_configuration = config["parameters"]["medaka"]

    if program == 'mashtree':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "mashtree " + arg_string
        program_configuration = config["parameters"]["mashtree"]

    if program == 'legsta':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "legsta " + arg_string
        program_configuration = config["parameters"]["legsta"]

    if program == 'ksnp3':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = ""
        command = "kSNP3 " + arg_string
        program_configuration = config["parameters"]["ksnp3"]

    if program == 'kraken2-build':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "kraken2-build " + arg_string
        program_configuration = config["parameters"]["kraken2"]

    if program == 'kraken2':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "kraken2 " + arg_string
        program_configuration = config["parameters"]["kraken2"]

    if program == 'kraken-build':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "kraken-build " + arg_string
        program_configuration = config["parameters"]["kraken"]

    if program == 'kraken':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "kraken " + arg_string
        program_configuration = config["parameters"]["kraken"]

    if program == 'kma':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "kma " + arg_string
        program_configuration = config["parameters"]["kma"]

    if program == 'flye':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "flye " + arg_string
        program_configuration = config["parameters"]["flye"]

    if program == 'filtlong':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "filtlong " + arg_string
        program_configuration = config["parameters"]["filtlong"]

    if program == 'fastqc':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "fastqc " + arg_string
        program_configuration = config["parameters"]["fastqc"]

    if program == 'fasttree':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "FastTree " + arg_string
        program_configuration = config["parameters"]["fasttree"]

    if program == 'fastani':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "fastANI " + arg_string
        program_configuration = config["parameters"]["fastani"]

    if program == 'emm-typing-tool':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "emm_typing.py " + arg_string
        program_configuration = config["parameters"]["emm-typing-tool"]

    if program == 'circlator':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "circlator " + arg_string
        program_configuration = config["parameters"]["circlator"]

    if program == 'cfsan-snp':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "cfsan_snp_pipeline " + arg_string
        program_configuration = config["parameters"]["cfsan-snp-pipeline"]

    if program == 'canu-racon':
        if not re.search('[a-zA-Z]', arg_string):
            print(
                "This is a bundled application that requires a specific commands to be used (i.e. staphb-tk canu-racon canu -h) please see the documentation for Canu, Minimap2 and Racon to use."
            )
            sys.exit()
        command = " " + arg_string
        program_configuration = config["parameters"]["canu-racon"]

    if program == 'bbtools':
        if not re.search('[a-zA-Z]', arg_string):
            print(
                "BBTools shell script must be specified, e.g. staphb-tk bbtools bbmap.sh, staphb-tk bbtools bbduk.sh, etc. \n\nMore info on BBTools at https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/."
            )
            sys.exit()
        command = " " + arg_string
        program_configuration = config["parameters"]["bbtools"]

    if program == 'raxml':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "raxmlHPC " + arg_string
        program_configuration = config["parameters"]["raxml"]

    if program == 'spades':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "spades.py " + arg_string
        program_configuration = config["parameters"]["spades"]

    if program == 'mash':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "mash " + arg_string
        program_configuration = config["parameters"]["mash"]

    if program == 'seqyclean':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "seqyclean " + arg_string
        program_configuration = config["parameters"]["seqyclean"]

    if program == 'shovill':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "shovill " + arg_string
        program_configuration = config["parameters"]["shovill"]

    if program == 'prokka':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "prokka " + arg_string
        program_configuration = config["parameters"]["prokka"]

    if program == 'clustalo':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "clustalo " + arg_string
        program_configuration = config["parameters"]["clustalo"]

    if program == 'abricate':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "abricate " + arg_string
        program_configuration = config["parameters"]["abricate"]

    if program == 'augur':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "augur " + arg_string
        program_configuration = config["parameters"]["augur"]

    if program == 'iqtree':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "iqtree " + arg_string
        program_configuration = config["parameters"]["iqtree"]

    if program == 'lyveset':
        if not re.search('[a-zA-Z]', arg_string):
            print(
                "Lyev-SET perl script must be specified, e.g. staphb-tk lyveset launch_set.pl, staphb-tk lyveset set_manage.pl, staphb-tk lyveset run_assembly_readMeterics.pl. \n\nMore info on Lyve-SET usage at: github.com/lskatz/lyve-SET."
            )
            sys.exit()
        command = "" + arg_string
        program_configuration = config["parameters"]["lyveset"]

    if program == 'quast':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "quast.py " + arg_string
        program_configuration = config["parameters"]["quast"]

    if program == 'roary':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "roary " + arg_string
        program_configuration = config["parameters"]["roary"]

    if program == 'seqsero':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "SeqSero.py " + arg_string
        program_configuration = config["parameters"]["seqsero"]

    if program == 'samtools':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = ""
        command = "samtools " + arg_string
        program_configuration = config["parameters"]["samtools"]

    if program == 'serotypefinder':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "serotypefinder.pl " + arg_string
        program_configuration = config["parameters"]["serotypefinder"]

    if program == 'bwa':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = ""
        command = "bwa " + arg_string
        program_configuration = config["parameters"]["bwa"]

    if program == 'minimap2':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "minimap2 " + arg_string
        program_configuration = config["parameters"]["minimap2"]

    if program == 'centroid':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "centroid.py " + arg_string
        program_configuration = config["parameters"]["centroid"]

    if program == 'unicycler':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "unicycler " + arg_string
        program_configuration = config["parameters"]["unicycler"]

    if program == 'mafft':
        if not re.search('[a-zA-Z]', arg_string):
            arg_string = "-h"
        command = "mafft " + arg_string
        program_configuration = config["parameters"]["mafft"]

    #Run the program
    #-----------------------------------------
    program_object = sb_prog.Run(command=command,
                                 path=path_map,
                                 image=program_configuration["image"],
                                 tag=program_configuration["tag"])
    program_object.run()
예제 #13
0
def ecoli_serotype(output_dir, assembly, id, tredegar_config, logger):
    # ambiguous allele calls
    matched_wzx = ["O2", "O50", "O17", "O77", "O118", "O151", "O169", "O141ab", "O141ac"]
    matched_wzy = ["O13", "O135", "O17", "O44", "O123", "O186"]

    # path to serotypefinder results file, if it doesn't exist run the serotypefinder
    stf_out = f"{output_dir}/serotypefinder_output/{id}/results_tab.txt"
    if not os.path.isfile(stf_out):
        # output path for serotypefinder
        serotypefinder_output_path = os.path.join(output_dir, "serotypefinder_output")
        pathlib.Path(serotypefinder_output_path).mkdir(parents=True, exist_ok=True)

        # setup container mounting
        if not os.path.isfile(assembly):
            return
        assembly_path = os.path.dirname(assembly)
        stf_mounting = {assembly_path: '/datain', serotypefinder_output_path: '/dataout'}

        # generate serotypefinder command
        assembly_name = os.path.basename(assembly)
        stf_configuration = tredegar_config["parameters"]["serotypefinder"]
        stf_params = stf_configuration["params"]
        stf_command = f"serotypefinder.pl -d {stf_params['database']} -i /datain/{assembly_name} -b /blast-2.2.26/ -o /dataout/{id} -s {stf_params['species']} -k {stf_params['nucleotide_agreement']} -l {stf_params['percent_coverage']}"

        # create serotypefinder object
        stf_obj = sb_programs.Run(command=stf_command, path=stf_mounting, image=stf_configuration["image"], tag=stf_configuration["tag"])
        logger.info(f"Isolate {id} identified as E.coli. Running SerotypeFinder for serotype prediction")
        stf_obj.run()

    # process the results of serotypefinder as per literature guidelines (Joensen, et al. 2015, DOI: 10.1128/JCM.00008-15)
    with open(stf_out) as tsv_file:
        tsv_reader = csv.reader(tsv_file, delimiter="\t")
        wzx_allele = ""
        wzy_allele = ""
        wzm_allele = ""
        h_type = ""

        for line in tsv_reader:
            if "fl" in line [0]:
                h_type = line[5]

            if line[0] == "wzx":
                wzx_allele = line[5]
            if line[0] == "wzy":
                wzy_allele = line[5]
            if line[0] == "wzm":
                wzm_allele = line[5]

        o_type = wzx_allele
        if not wzx_allele:
            o_type = wzy_allele
        if not wzx_allele and not wzy_allele:
            o_type = wzm_allele

        if o_type in matched_wzx:
            o_type = wzy_allele
        if o_type in matched_wzy:
            o_type = wzx_allele
        serotype = f"{o_type}:{h_type}"

        # NA if no o-type or h-type identified
        if serotype == ":":
            serotype = "NA"

    return serotype