Example #1
0
 def objectprep(self):
     """Create objects to store data and metadata for each sample. Also, perform necessary file manipulations"""
     # Move the files to subfolders and create objects
     self.runmetadata = createobject.ObjectCreation(self)
     if self.runmetadata.extension == 'fastq':
         # To streamline the CLARK process, decompress and combine .gz and paired end files as required
         printtime(
             'Decompressing and combining .fastq files for CLARK analysis',
             self.start)
         fileprep.Fileprep(self)
     else:
         printtime('Using .fasta files for CLARK analysis', self.start)
         for sample in self.runmetadata.samples:
             sample.general.combined = sample.general.fastqfiles[0]
Example #2
0
 def objectprep(self):
     from spadespipeline import createobject
     # Only find the data files if a datapath is provided
     if self.datapath:
         self.runmetadata = createobject.ObjectCreation(self)
     else:
         for sample in self.runmetadata.samples:
             sample.general.abundancefile = sample.general.abundance
             sample.general.assignmentfile = sample.general.classification
             sample.general.fastqfiles = [sample.general.combined]
     # Print the metadata to file
     metadataprinter.MetadataPrinter(self)
     # Load the results in the csv files into dictionaries
     self.taxids()
Example #3
0
 def quality(self):
     """
     Creates sequence objects and runs the quality assessment (FastQC), and quality trimming (bbduk) on the
     supplied sequences
     """
     from spadespipeline import mMLST
     from spadespipeline import quaster
     from spadespipeline import prodigal
     from spadespipeline import createobject
     import shutil
     # Create the objects
     self.runmetadata = createobject.ObjectCreation(self)
     # Determine the amount of physical memory in the system
     from psutil import virtual_memory
     mem = virtual_memory()
     # If the total amount of memory is greater than 100GB (this could probably be lowered), run CLARK
     if mem.total >= 100000000000:
         # Run CLARK typing on the .fastq and .fasta files
         from metagenomefilter import automateCLARK
         automateCLARK.PipelineInit(self, 'typing')
     else:
         printtime('Not enough RAM to run CLARK!', self.start)
     # Create a list of the files to process
     fasta_files = glob.glob(self.path + "*.fasta")
     # For each of the fasta files, create a folder based on the name of the file - 2014-SEQ-0276.fasta will
     # be placed in a folder named 2014-SEQ-0276
     for fasta in fasta_files:
         # Set the name of the folder
         fasta_dir = os.path.splitext(fasta)[0]
         # Create the folder
         make_path(fasta_dir)
         # Determine the name and extension of the fasta file
         fastaname = os.path.split(fasta)[-1]
         # Copy the file into the appropriate folder
         shutil.copy(fasta, os.path.join(fasta_dir, fastaname))
     # Perform quality assessments. After each method completes, print the metadata to file
     # Run gene predictions
     prodigal.Prodigal(self)
     metadataprinter.MetadataPrinter(self)
     # Run rMLST
     mMLST.PipelineInit(self, 'rmlst')
     metadataprinter.MetadataPrinter(self)
     # Run quast assembly metrics
     for sample in self.runmetadata.samples:
         sample.general.filteredfile = sample.general.bestassemblyfile
     quaster.Quast(self)
     # Calculate the depth of coverage as well as other quality metrics using Qualimap
     metadataprinter.MetadataPrinter(self)
Example #4
0
 def populate(self):
     from spadespipeline import createobject
     # Move the files to subfolders and create objects
     if not self.pipeline:
         self.metadata = createobject.ObjectCreation(self)
     # Create and populate the .core attribute
     for sample in self.metadata.samples:
         setattr(sample, self.analysistype, GenObject())
         sample[self.analysistype].alleles = self.genes
         sample[self.analysistype].allelenames = [
             os.path.split(x)[1].split('.')[0] for x in self.genes
         ]
         sample[self.analysistype].profile = self.profile
         sample[self.analysistype].alleledir = self.coregenelocation
         sample[self.analysistype].reportdir = os.path.join(
             sample.general.outputdirectory, self.analysistype)
Example #5
0
 def annotatethreads(self):
     """
     Perform multi-threaded prokka annotations of each strain
     """
     import spadespipeline.createobject as createobject
     # Move the files to subfolders and create objects
     self.runmetadata = createobject.ObjectCreation(self)
     # Fix headers
     self.headerthreads()
     printtime('Performing prokka analyses', self.start)
     # Create and start threads
     for i in range(self.cpus):
         # Send the threads to the appropriate destination function
         threads = Thread(target=self.annotate, args=())
         # Set the daemon to true - something to do with thread management
         threads.setDaemon(True)
         # Start the threading
         threads.start()
     for sample in self.runmetadata.samples:
         # Create the prokka attribute in the metadata object
         setattr(sample, 'prokka', GenObject())
         # docker run -v /path/to/sequences:/path/to/sequences coreGenome
         # prokka 2014-SEQ-0275.fasta --force --genus Escherichia --species coli --usegenus --addgenes
         # --prefix 2014-SEQ-0275 --locustag EC0275 --outputdir /path/to/sequences/2014-SEQ-0275/prokka
         sample.prokka.outputdir = os.path.join(
             sample.general.outputdirectory, 'prokka')
         # TODO Incorporate MASH/rMLST/user inputted genus, species results in the system call
         # Create the system call
         sample.prokka.command = 'docker run -v {}:{} {} ' \
                                 'prokka {} ' \
                                 '--force ' \
                                 '--genus {} ' \
                                 '--species {} ' \
                                 '--usegenus ' \
                                 '--addgenes ' \
                                 '--prefix {} ' \
                                 '--locustag {} ' \
                                 '--outdir {}' \
             .format(self.sequencepath, self.sequencepath, self.dockerimage, sample.general.fixedheaders,
                     self.genus, self.species, sample.name, sample.name, sample.prokka.outputdir)
         # sample.name.split('-')[-1]
         self.queue.put(sample)
     self.queue.join()
     # Create the core genome
     self.codingthreads()