Exemple #1
0
    def read_metrics(self):
        cg_out_dir = self.cg_out_dir
        if not os.path.isdir(cg_out_dir):
            os.makedirs(cg_out_dir)
            print("Directory for CG Pipeline output made: ", cg_out_dir)

        for read in self.runfiles.reads:
            #get id
            id = self.runfiles.reads[read].id
            cgp_result = id + "_readMetrics.tsv"

            if not os.path.isfile(cg_out_dir + cgp_result):

                # change self.path to local dir if path is a basemounted dir
                if os.path.isdir(self.path + "/AppResults"):
                    self.path = self.output_dir

                # get paths to fastq files
                fwd = os.path.abspath(self.runfiles.reads[read].fwd).replace(self.path, "")

                if "_R1" in fwd:
                    reads = fwd.replace("_R1", "*")
                else:
                    reads = fwd.replace("_1", "*")

                # create paths for data
                mounting = {self.path:'/datain', cg_out_dir:'/dataout'}
                out_dir = '/dataout'
                in_dir = '/datain'

                fastani_obj = run_fastani.FastANI(path=self.path, output_dir=self.output_dir)
                fastani_reference = fastani_obj.fastani()[1][id]

                if "LMP18" in fastani_reference:
                    genome_length = 3.0
                elif "SAP18" in fastani_reference:
                    genome_length = 5.0
                else:
                    genome_length = input("In Mbp, what is the expected genome size of %s?" % (id))
                    try:
                        float(genome_length)
                    except ValueError:
                        print("A number was not entered")

                genome_length = float(genome_length)*1000000
                print("Estimated genome length for isolate %s: " % id + str(int(genome_length)))

                # build command for running run_assembly_readMetrics.pl
                command = "bash -c 'run_assembly_readMetrics.pl {in_dir}/{reads} -e {genome_length} > " \
                          "{out_dir}/{cgp_result}'".format(in_dir=in_dir,out_dir=out_dir,reads=reads,
                                                           genome_length=genome_length,cgp_result=cgp_result)

                # call the docker process
                print("Getting read metrics for isolate %s"%(id))
                calldocker.call("staphb/lyveset",command,'/dataout',mounting)

            print("CG Pipeline results for isolate %s saved to: %s%s"%(id,cg_out_dir,cgp_result))
Exemple #2
0
    def spades(self):
        # create output directory
        spades_out_dir = self.spades_out_dir
        if not os.path.isdir(spades_out_dir):
            os.makedirs(spades_out_dir)
            print("Directory for spades output made: ", spades_out_dir)

        for read in self.runfiles.reads:
            # get id
            id = self.runfiles.reads[read].id
            spades_results = "%s/%s/" % (spades_out_dir, id)
            if not os.path.isdir(spades_results):
                os.makedirs(spades_results)

            if not os.path.isfile(spades_results + "/contigs.fasta"):
                # change self.path to local dir if path is a basemounted dir
                if os.path.isdir(self.path + "/AppResults"):
                    self.path = self.output_dir

                # get paths to fastq files
                if self.runfiles.reads[read].paired:
                    fwd = os.path.abspath(
                        self.runfiles.reads[read].fwd).replace(self.path, "")
                    rev = os.path.abspath(
                        self.runfiles.reads[read].rev).replace(self.path, "")

                # create paths for data
                mounting = {self.path: '/datain', spades_results: '/dataout'}
                out_dir = '/dataout'
                in_dir = '/datain'

                # build command for creating sketches and generating mash distance table
                # TODO write elif to catch single read data

                if self.runfiles.reads[read].paired:
                    command = "bash -c 'spades.py -1 {in_dir}/{fwd} -2 {in_dir}/{rev} -o " \
                             "{out_dir}/ -t {threads} {extra_params}'".format(in_dir=in_dir,
                                                                                 spades_results=spades_results,
                                                                                 out_dir=out_dir,
                                                                                 threads=self.threads,
                                                                                 extra_params=self.extra_params,
                                                                                 fwd=fwd,rev=rev)

                # call the docker process
                print("Generating SPAdes assembly for sample " + id)
                calldocker.call("staphb/spades", command, '/dataout', mounting)

                print("SPAdes assembly for isolate %s saved to: %s" %
                      (id, spades_results))
Exemple #3
0
    def quast(self):
        # create output directory
        quast_out_dir = self.quast_out_dir

        if not os.path.isdir(quast_out_dir):
            os.makedirs(quast_out_dir)
            print("Directory for Quast output made: ", quast_out_dir)

        for read in self.runfiles.reads:
            # get id
            id = self.runfiles.reads[read].id
            # change self.path to local dir if path is a basemounted dir
            if os.path.isdir(self.path + "/AppResults"):
                self.path = self.output_dir
            print(self.path)
            fastani_obj = run_fastani.FastANI(path=self.path,
                                              output_dir=self.output_dir)
            fastani_reference = fastani_obj.fastani()[1][id]
            reference_genome = "/%s" % fastani_reference

            assembly = "/spades_output/%s/contigs.fasta" % id
            quast_results = "%s/%s/" % (quast_out_dir, id)
            if not os.path.isdir(quast_results):
                os.makedirs(quast_results)

            # create paths for data
            mounting = {
                self.path: '/datain',
                quast_results: '/dataout',
                self.db: '/db'
            }
            out_dir = '/dataout/'
            in_dir = '/datain/'
            db = '/db/'

            command = "bash -c 'quast.py {in_dir}{assembly} -r {db}{reference_genome} -o {out_dir}'".format(
                assembly=assembly,
                id=id,
                out_dir=out_dir,
                reference_genome=reference_genome,
                in_dir=in_dir,
                db=db)

            # call the docker process
            #print("Generating Quast assembly metrics")
            calldocker.call("staphb/quast:5.0.0", command, '/dataout/',
                            mounting)
Exemple #4
0
    def fastani(self):
        # create output directory
        fastani_out_dir = self.fastani_out_dir

        if not os.path.isdir(fastani_out_dir):
            os.makedirs(fastani_out_dir)
            print("Directory for fastani output made: ", fastani_out_dir)

        taxons = {}
        reference_genomes = {}

        for read in self.runfiles.reads:
            # get id
            id = self.runfiles.reads[read].id
            fastani_result = "/fastani_%s.out" % id
            # change self.path to local dir if path is a basemounted dir
            if os.path.isdir(self.path + "/AppResults"):
                self.path = self.output_dir
            assembly = "/spades_output/%s/contigs.fasta" % id

            if not os.path.isfile(assembly):
                spades_obj = run_spades.Spades(path=self.path,
                                               output_dir=self.output_dir)
                spades_obj.spades()

            # create paths for data
            mounting = {
                self.path: '/datain',
                fastani_out_dir: '/dataout',
                self.db: '/db'
            }
            ref_list = "/reference_list.txt"
            out_dir = '/dataout/'
            in_dir = '/datain/'
            db = '/db/'

            command = "bash -c 'fastANI -q {in_dir}{assembly} --rl {db}{ref_list} -o " \
                      "{out_dir}/{fastani_result}'".format(assembly=assembly,out_dir=out_dir,db=db, in_dir=in_dir,
                                                           ref_list=ref_list, fastani_result=fastani_result)

            # call the docker process
            if not os.path.isfile("%s/%s" % (fastani_out_dir, fastani_result)):
                print("Generating FastANI report for sample " + id)
                calldocker.call("staphb/fastani", command, '/dataout',
                                mounting)

            with open("%s/%s" % (fastani_out_dir, fastani_result)) as file:
                tsv_reader = csv.reader(file, delimiter="\t", quotechar='"')
                predicted_taxon = ""
                reference_genome = str(os.path.basename(next(tsv_reader)[1]))
                if "SAP18-0432" in reference_genome:
                    predicted_taxon = "Salmonella enterica subsp. enterica serover Enteritidis"
                elif "SAP18-H9654" in reference_genome:
                    predicted_taxon = "Salmonella enterica subsp. enterica serover Enteritidis"
                elif "SAP18-6199" in reference_genome:
                    predicted_taxon = "Salmonella enterica subsp. enterica serover Typhimurium"
                elif "SAP18-8729" in reference_genome:
                    predicted_taxon = "Salmonella enterica subsp. enterica serover Newport"
                elif "LMP18-H2446" in reference_genome:
                    predicted_taxon = "Listeria monocytogenes"
                elif "LMP18-H8393" in reference_genome:
                    predicted_taxon = "Listeria monocytogenes"
                else:
                    raise ValueError(
                        "Sample %s not identified as a 2018 PT isolate" % id)

            taxons[id] = predicted_taxon
            reference_genomes[id] = reference_genome

        return [taxons, reference_genomes]
Exemple #5
0
    def cfsansnp(self):
        # create output directory
        cfsansnp_out_dir = self.cfsansnp_out_dir
        cfsan_read_dir = cfsansnp_out_dir + "/cfsan-reads/"

        if not os.path.isdir(cfsansnp_out_dir):
            os.makedirs(cfsansnp_out_dir)
            print("Directory for cfsansnp output made: ", cfsansnp_out_dir)

        for read in self.runfiles.reads:
            # get id
            id = self.runfiles.reads[read].id
            cfsansnp_result = "/%s/%s/snpma.fasta" % (cfsansnp_out_dir, id)
            # change self.path to local dir if path is a basemounted dir
            if os.path.isdir(self.path + "/AppResults"):
                self.path = self.output_dir

            fastani_obj = run_fastani.FastANI(path=self.path,
                                              output_dir=self.output_dir)
            fastani_reference = fastani_obj.fastani()[1][id]

            # create paths for data
            mounting = {
                self.path: '/datain',
                cfsansnp_out_dir: '/dataout',
                self.db: '/db'
            }
            out_dir = '/dataout/'
            in_dir = '/datain/'
            db = '/db/'

            fwd_read = "/%s/raw_reads/" % in_dir + os.path.basename(
                self.runfiles.reads[read].fwd)
            rev_read = "/%s/raw_reads/" % in_dir + os.path.basename(
                self.runfiles.reads[read].rev)

            if not os.path.isdir(cfsan_read_dir):
                os.makedirs(cfsan_read_dir)
                print("Directory for cfsansnp read dir made: ", cfsan_read_dir)

            if not os.path.isdir(cfsan_read_dir + id):
                os.makedirs(cfsan_read_dir + id)
            if not os.path.islink(
                    cfsan_read_dir + id + "/" +
                    os.path.basename(self.runfiles.reads[read].fwd)):
                os.symlink(
                    fwd_read, cfsan_read_dir + id + "/" +
                    os.path.basename(self.runfiles.reads[read].fwd))
            if not os.path.islink(
                    cfsan_read_dir + id + "/" +
                    os.path.basename(self.runfiles.reads[read].rev)):
                os.symlink(
                    rev_read, cfsan_read_dir + id + "/" +
                    os.path.basename(self.runfiles.reads[read].rev))

            reference_genome = "/%s" % fastani_reference
            command = "bash -c 'run_snp_pipeline.sh -m soft -o {out_dir}{id} -s {out_dir}cfsan-reads " \
                      "{db}{reference_genome}'".format(out_dir=out_dir,db=db, in_dir=in_dir, id=id,
                                                       reference_genome=reference_genome)
            # call the docker process
            if not os.path.isfile(cfsansnp_result):
                print("Generating cfsansnp output for sample " + id)
                calldocker.call("staphb/cfsan-snp-pipeline:2.0.2", command,
                                '/dataout', mounting)

            shutil.rmtree(cfsan_read_dir, ignore_errors=True)