def read_metrics(self): cg_out_dir = self.cg_out_dir if not os.path.isdir(cg_out_dir): os.makedirs(cg_out_dir) print("Directory for CG Pipeline output made: ", cg_out_dir) for read in self.runfiles.reads: #get id id = self.runfiles.reads[read].id cgp_result = id + "_readMetrics.tsv" if not os.path.isfile(cg_out_dir + cgp_result): # change self.path to local dir if path is a basemounted dir if os.path.isdir(self.path + "/AppResults"): self.path = self.output_dir # get paths to fastq files fwd = os.path.abspath(self.runfiles.reads[read].fwd).replace(self.path, "") if "_R1" in fwd: reads = fwd.replace("_R1", "*") else: reads = fwd.replace("_1", "*") # create paths for data mounting = {self.path:'/datain', cg_out_dir:'/dataout'} out_dir = '/dataout' in_dir = '/datain' fastani_obj = run_fastani.FastANI(path=self.path, output_dir=self.output_dir) fastani_reference = fastani_obj.fastani()[1][id] if "LMP18" in fastani_reference: genome_length = 3.0 elif "SAP18" in fastani_reference: genome_length = 5.0 else: genome_length = input("In Mbp, what is the expected genome size of %s?" % (id)) try: float(genome_length) except ValueError: print("A number was not entered") genome_length = float(genome_length)*1000000 print("Estimated genome length for isolate %s: " % id + str(int(genome_length))) # build command for running run_assembly_readMetrics.pl command = "bash -c 'run_assembly_readMetrics.pl {in_dir}/{reads} -e {genome_length} > " \ "{out_dir}/{cgp_result}'".format(in_dir=in_dir,out_dir=out_dir,reads=reads, genome_length=genome_length,cgp_result=cgp_result) # call the docker process print("Getting read metrics for isolate %s"%(id)) calldocker.call("staphb/lyveset",command,'/dataout',mounting) print("CG Pipeline results for isolate %s saved to: %s%s"%(id,cg_out_dir,cgp_result))
def spades(self): # create output directory spades_out_dir = self.spades_out_dir if not os.path.isdir(spades_out_dir): os.makedirs(spades_out_dir) print("Directory for spades output made: ", spades_out_dir) for read in self.runfiles.reads: # get id id = self.runfiles.reads[read].id spades_results = "%s/%s/" % (spades_out_dir, id) if not os.path.isdir(spades_results): os.makedirs(spades_results) if not os.path.isfile(spades_results + "/contigs.fasta"): # change self.path to local dir if path is a basemounted dir if os.path.isdir(self.path + "/AppResults"): self.path = self.output_dir # get paths to fastq files if self.runfiles.reads[read].paired: fwd = os.path.abspath( self.runfiles.reads[read].fwd).replace(self.path, "") rev = os.path.abspath( self.runfiles.reads[read].rev).replace(self.path, "") # create paths for data mounting = {self.path: '/datain', spades_results: '/dataout'} out_dir = '/dataout' in_dir = '/datain' # build command for creating sketches and generating mash distance table # TODO write elif to catch single read data if self.runfiles.reads[read].paired: command = "bash -c 'spades.py -1 {in_dir}/{fwd} -2 {in_dir}/{rev} -o " \ "{out_dir}/ -t {threads} {extra_params}'".format(in_dir=in_dir, spades_results=spades_results, out_dir=out_dir, threads=self.threads, extra_params=self.extra_params, fwd=fwd,rev=rev) # call the docker process print("Generating SPAdes assembly for sample " + id) calldocker.call("staphb/spades", command, '/dataout', mounting) print("SPAdes assembly for isolate %s saved to: %s" % (id, spades_results))
def quast(self): # create output directory quast_out_dir = self.quast_out_dir if not os.path.isdir(quast_out_dir): os.makedirs(quast_out_dir) print("Directory for Quast output made: ", quast_out_dir) for read in self.runfiles.reads: # get id id = self.runfiles.reads[read].id # change self.path to local dir if path is a basemounted dir if os.path.isdir(self.path + "/AppResults"): self.path = self.output_dir print(self.path) fastani_obj = run_fastani.FastANI(path=self.path, output_dir=self.output_dir) fastani_reference = fastani_obj.fastani()[1][id] reference_genome = "/%s" % fastani_reference assembly = "/spades_output/%s/contigs.fasta" % id quast_results = "%s/%s/" % (quast_out_dir, id) if not os.path.isdir(quast_results): os.makedirs(quast_results) # create paths for data mounting = { self.path: '/datain', quast_results: '/dataout', self.db: '/db' } out_dir = '/dataout/' in_dir = '/datain/' db = '/db/' command = "bash -c 'quast.py {in_dir}{assembly} -r {db}{reference_genome} -o {out_dir}'".format( assembly=assembly, id=id, out_dir=out_dir, reference_genome=reference_genome, in_dir=in_dir, db=db) # call the docker process #print("Generating Quast assembly metrics") calldocker.call("staphb/quast:5.0.0", command, '/dataout/', mounting)
def fastani(self): # create output directory fastani_out_dir = self.fastani_out_dir if not os.path.isdir(fastani_out_dir): os.makedirs(fastani_out_dir) print("Directory for fastani output made: ", fastani_out_dir) taxons = {} reference_genomes = {} for read in self.runfiles.reads: # get id id = self.runfiles.reads[read].id fastani_result = "/fastani_%s.out" % id # change self.path to local dir if path is a basemounted dir if os.path.isdir(self.path + "/AppResults"): self.path = self.output_dir assembly = "/spades_output/%s/contigs.fasta" % id if not os.path.isfile(assembly): spades_obj = run_spades.Spades(path=self.path, output_dir=self.output_dir) spades_obj.spades() # create paths for data mounting = { self.path: '/datain', fastani_out_dir: '/dataout', self.db: '/db' } ref_list = "/reference_list.txt" out_dir = '/dataout/' in_dir = '/datain/' db = '/db/' command = "bash -c 'fastANI -q {in_dir}{assembly} --rl {db}{ref_list} -o " \ "{out_dir}/{fastani_result}'".format(assembly=assembly,out_dir=out_dir,db=db, in_dir=in_dir, ref_list=ref_list, fastani_result=fastani_result) # call the docker process if not os.path.isfile("%s/%s" % (fastani_out_dir, fastani_result)): print("Generating FastANI report for sample " + id) calldocker.call("staphb/fastani", command, '/dataout', mounting) with open("%s/%s" % (fastani_out_dir, fastani_result)) as file: tsv_reader = csv.reader(file, delimiter="\t", quotechar='"') predicted_taxon = "" reference_genome = str(os.path.basename(next(tsv_reader)[1])) if "SAP18-0432" in reference_genome: predicted_taxon = "Salmonella enterica subsp. enterica serover Enteritidis" elif "SAP18-H9654" in reference_genome: predicted_taxon = "Salmonella enterica subsp. enterica serover Enteritidis" elif "SAP18-6199" in reference_genome: predicted_taxon = "Salmonella enterica subsp. enterica serover Typhimurium" elif "SAP18-8729" in reference_genome: predicted_taxon = "Salmonella enterica subsp. enterica serover Newport" elif "LMP18-H2446" in reference_genome: predicted_taxon = "Listeria monocytogenes" elif "LMP18-H8393" in reference_genome: predicted_taxon = "Listeria monocytogenes" else: raise ValueError( "Sample %s not identified as a 2018 PT isolate" % id) taxons[id] = predicted_taxon reference_genomes[id] = reference_genome return [taxons, reference_genomes]
def cfsansnp(self): # create output directory cfsansnp_out_dir = self.cfsansnp_out_dir cfsan_read_dir = cfsansnp_out_dir + "/cfsan-reads/" if not os.path.isdir(cfsansnp_out_dir): os.makedirs(cfsansnp_out_dir) print("Directory for cfsansnp output made: ", cfsansnp_out_dir) for read in self.runfiles.reads: # get id id = self.runfiles.reads[read].id cfsansnp_result = "/%s/%s/snpma.fasta" % (cfsansnp_out_dir, id) # change self.path to local dir if path is a basemounted dir if os.path.isdir(self.path + "/AppResults"): self.path = self.output_dir fastani_obj = run_fastani.FastANI(path=self.path, output_dir=self.output_dir) fastani_reference = fastani_obj.fastani()[1][id] # create paths for data mounting = { self.path: '/datain', cfsansnp_out_dir: '/dataout', self.db: '/db' } out_dir = '/dataout/' in_dir = '/datain/' db = '/db/' fwd_read = "/%s/raw_reads/" % in_dir + os.path.basename( self.runfiles.reads[read].fwd) rev_read = "/%s/raw_reads/" % in_dir + os.path.basename( self.runfiles.reads[read].rev) if not os.path.isdir(cfsan_read_dir): os.makedirs(cfsan_read_dir) print("Directory for cfsansnp read dir made: ", cfsan_read_dir) if not os.path.isdir(cfsan_read_dir + id): os.makedirs(cfsan_read_dir + id) if not os.path.islink( cfsan_read_dir + id + "/" + os.path.basename(self.runfiles.reads[read].fwd)): os.symlink( fwd_read, cfsan_read_dir + id + "/" + os.path.basename(self.runfiles.reads[read].fwd)) if not os.path.islink( cfsan_read_dir + id + "/" + os.path.basename(self.runfiles.reads[read].rev)): os.symlink( rev_read, cfsan_read_dir + id + "/" + os.path.basename(self.runfiles.reads[read].rev)) reference_genome = "/%s" % fastani_reference command = "bash -c 'run_snp_pipeline.sh -m soft -o {out_dir}{id} -s {out_dir}cfsan-reads " \ "{db}{reference_genome}'".format(out_dir=out_dir,db=db, in_dir=in_dir, id=id, reference_genome=reference_genome) # call the docker process if not os.path.isfile(cfsansnp_result): print("Generating cfsansnp output for sample " + id) calldocker.call("staphb/cfsan-snp-pipeline:2.0.2", command, '/dataout', mounting) shutil.rmtree(cfsan_read_dir, ignore_errors=True)