Beispiel #1
0
 def quality_features(self, analysis):
     """
     Extract features from assemblies such as total genome size, longest contig, and N50
     """
     features = quality.QualityFeatures(self, analysis)
     features.main()
     metadataprinter.MetadataPrinter(self)
Beispiel #2
0
 def helper(self):
     """Helper function for file creation (if desired), manipulation, quality assessment,
     and trimming as well as the assembly"""
     # Simple assembly without requiring accessory files (SampleSheet.csv, etc).
     if self.basicassembly:
         self.runmetadata = Basic(self)
     else:
         # Populate the runmetadata object by parsing the SampleSheet.csv, GenerateFASTQRunStatistics.xml, and
         # RunInfo.xml files
         self.runinfo = os.path.join(self.path, 'RunInfo.xml')
         self.runmetadata = runMetadata.Metadata(self)
         # Extract the flowcell ID and the instrument name if the RunInfo.xml file was provided
         self.runmetadata.parseruninfo()
         # Extract PhiX mapping information from the run
         phi = phix.PhiX(self)
         phi.main()
         # Populate the lack of bclcall and nohup call into the metadata sheet
         for sample in self.runmetadata.samples:
             sample.commands = GenObject()
             sample.commands.nohupcall = 'NA'
             sample.commands.bclcall = 'NA'
         # Move/link the FASTQ files to strain-specific working directories
         fastqmover.FastqMover(self)
     # Print the metadata to file
     metadataprinter.MetadataPrinter(self)
Beispiel #3
0
 def qualimap(self):
     """
     Calculate the depth of coverage as well as other quality metrics using Qualimap
     """
     qual = depth.QualiMap(self)
     qual.main()
     metadataprinter.MetadataPrinter(self)
Beispiel #4
0
 def assemble_genomes(self):
     """
     Use skesa to assemble genomes
     """
     assembly = skesa.Skesa(self)
     assembly.main()
     metadataprinter.MetadataPrinter(self)
Beispiel #5
0
 def quality(self):
     """
     Creates quality objects and runs quality assessments and quality processes on the
     supplied sequences
     """
     # Validate that the FASTQ files are in the proper format, and that there are no issues e.g. different numbers
     # of forward and reverse reads, read length longer than quality score length, proper extension
     self.fastq_validate()
     # Run FastQC on the unprocessed fastq files
     self.fastqc_raw()
     # Perform quality trimming and FastQC on the trimmed files
     self.quality_trim()
     # Run FastQC on the trimmed files
     self.fastqc_trimmed()
     # Perform error correcting on the reads
     self.error_correct()
     # Detect contamination in the reads
     self.contamination_detection()
     # Run FastQC on the processed fastq files
     self.fastqc_trimmedcorrected()
     # Exit if only pre-processing of data is requested
     metadataprinter.MetadataPrinter(self)
     if self.preprocess:
         printtime('Pre-processing complete', self.starttime)
         quit()
Beispiel #6
0
 def genome_qaml(self):
     """
     Use GenomeQAML to determine the quality of the assemblies
     """
     g_qaml = quality.GenomeQAML(self)
     g_qaml.main()
     metadataprinter.MetadataPrinter(self)
Beispiel #7
0
 def sixteens(self):
     """
     Run the 16S analyses
     """
     SixteensFull(self, self.commit, self.starttime, self.homepath,
                  'sixteens_full', 0.95)
     metadataprinter.MetadataPrinter(self)
Beispiel #8
0
 def mlst(self):
     """
      MLST analyses
     """
     MLSTSippr(self, self.commit, self.starttime, self.homepath, 'MLST',
               1.0, True)
     metadataprinter.MetadataPrinter(self)
Beispiel #9
0
 def vtyper(self):
     """
     Virulence typing
     """
     vtype = vtyper.PrimerFinder(self, 'vtyper')
     vtype.main()
     metadataprinter.MetadataPrinter(self)
Beispiel #10
0
 def univec(self):
     """
     Univec contamination search
     """
     uni = univec.PipelineInit(self, 'univec', False, 80, True)
     Univec(uni)
     metadataprinter.MetadataPrinter(self)
Beispiel #11
0
 def serosippr(self):
     """
     Serotyping analyses
     """
     Serotype(self, self.commit, self.starttime, self.homepath, 'serosippr',
              0.95, True)
     metadataprinter.MetadataPrinter(self)
Beispiel #12
0
 def plasmids(self):
     """
     Plasmid finding
     """
     Plasmids(self, self.commit, self.starttime, self.homepath,
              'plasmidfinder', 0.8, False, True)
     metadataprinter.MetadataPrinter(self)
Beispiel #13
0
 def plasmid_extractor(self):
     """
     Extracts and types plasmid sequences
     """
     plasmids = PlasmidExtractor(self)
     plasmids.main()
     metadataprinter.MetadataPrinter(self)
Beispiel #14
0
 def genesippr(self):
     """
     Find genes of interest
     """
     GeneSippr(self, self.commit, self.starttime, self.homepath,
               'genesippr', 0.95, False, False)
     metadataprinter.MetadataPrinter(self)
Beispiel #15
0
 def ressippr(self):
     """
     Resistance finding - raw reads
     """
     res = Resistance(self, self.commit, self.starttime, self.homepath,
                      'resfinder', 0.8, False, True)
     res.main()
     metadataprinter.MetadataPrinter(self)
Beispiel #16
0
 def run_gdcs(self):
     """
     Determine the presence of genomically-dispersed conserved sequences for Escherichia, Listeria, and Salmonella
     strains
     """
     # Run the GDCS analysis
     GDCS(self)
     metadataprinter.MetadataPrinter(self)
Beispiel #17
0
 def virulence(self):
     """
     Virulence gene detection
     """
     vir = Virulence(self, self.commit, self.starttime, self.homepath,
                     'virulence', 0.95, False, True)
     vir.reporter()
     metadataprinter.MetadataPrinter(self)
Beispiel #18
0
 def prophages(self, cutoff=90):
     """
     Prophage detection
     :param cutoff: cutoff value to be used in the analyses
     """
     pro = GeneSeekrMethod.PipelineInit(self, 'prophages', False, cutoff,
                                        True)
     Prophages(pro)
     metadataprinter.MetadataPrinter(self)
Beispiel #19
0
 def coregenome(self):
     """
     Core genome calculation
     """
     coregen = GeneSeekrMethod.PipelineInit(self, 'coregenome', True, 70,
                                            False)
     core.CoreGenome(coregen)
     core.AnnotatedCore(self)
     metadataprinter.MetadataPrinter(self)
Beispiel #20
0
    def clark(self):
        """
        Run CLARK metagenome analyses on the raw reads and assemblies if the system has adequate resources
        """
        # Determine the amount of physical memory in the system
        mem = virtual_memory()
        # If the total amount of memory is greater than 100GB (this could probably be lowered), run CLARK
        if mem.total >= 100000000000:
            # Run CLARK typing on the .fastq and .fasta files
            automateCLARK.PipelineInit(self)
            automateCLARK.PipelineInit(self, 'fastq')

        else:
            # Run CLARK typing on the .fastq and .fasta files
            automateCLARK.PipelineInit(self, light=True)
            automateCLARK.PipelineInit(self, 'fastq', light=True)
        metadataprinter.MetadataPrinter(self)
Beispiel #21
0
 def __init__(self):
     from argparse import ArgumentParser
     from time import time
     # Parser for arguments
     parser = ArgumentParser(
         description='Performs ePCR using a supplied primer file. The primers must be in the format: '
                     '<name>\t<forward primer>\t<reverse primer>\t<max size allowed between primers>\n.'
                     'Sequence files must be stored in <path>/sequences'
     )
     parser.add_argument('path',
                         help='Specify path in which reports are to be stored')
     parser.add_argument('-s', '--sequencepath',
                         required=True,
                         help='Path to assembly files')
     parser.add_argument('-f', '--primerfile',
                         required=True,
                         help='The name and path of the file containing the primers')
     # Get the arguments into an object
     arguments = parser.parse_args()
     self.starttime = time()
     # Add trailing slashes to the path variables to ensure consistent formatting (os.path.join)
     self.path = os.path.join(arguments.path, '')
     self.sequencepath = os.path.join(arguments.sequencepath, '')
     self.primerfile = arguments.primerfile
     # Initialise variables
     self.runmetadata = MetadataObject()
     self.reffilepath = False
     self.analysistype = 'ePCR'
     self.reportpath = os.path.join(self.path, 'reports')
     make_path(self.reportpath)
     # Initialise metadata
     self.runmetadata.samples = self.setup()
     self.logfile = os.path.join(self.path, 'vtyper_logfile.txt')
     # Run the analyses
     Vtyper(self, self.analysistype)
     # Create a report
     self.reporter()
     # Print the metadata to file
     printtime('Printing metadata to file', self.starttime)
     metadataprinter.MetadataPrinter(self)
     # Print a bold, green exit statement
     print(u'\033[92m' + u'\033[1m' + u'\nElapsed Time: %0.2f seconds' % (time() - self.starttime) + u'\033[0m')
Beispiel #22
0
 def main(self):
     """
     Run the methods in the correct order
     """
     # Start the assembly
     self.helper()
     # Create the quality object
     self.create_quality_object()
     # Run the quality analyses
     self.quality()
     # Perform assembly
     self.assemble()
     # Perform genus-agnostic typing
     self.agnostictyping()
     # Perform typing
     self.typing()
     # Create a report
     reporter.Reporter(self)
     # Compress or remove all large, temporary files created by the pipeline
     compress.Compress(self)
     metadataprinter.MetadataPrinter(self)
Beispiel #23
0
 def __init__(self, inputobject):
     from queue import Queue
     self.path = inputobject.path
     self.sequencepath = inputobject.databasesequencepath
     self.start = inputobject.start
     self.cpus = inputobject.cpus
     self.genus = inputobject.genus
     self.species = inputobject.species
     self.runmetadata = MetadataObject()
     self.dockerimage = inputobject.dockerimage
     # Set and create necessary folders
     self.coregenelocation = os.path.join(self.path, 'coregenes',
                                          self.genus)
     self.profilelocation = os.path.join(self.path, 'profile', self.genus)
     make_path(self.profilelocation)
     make_path(self.coregenelocation)
     # Create class variables
     self.genes = dict()
     self.genenames = dict()
     self.genesequence = dict()
     self.cdsset = dict()
     self.coresequence = dict()
     self.geneset = set()
     self.corealleles = dict()
     self.coreset = set()
     self.profiles = dict()
     self.queue = Queue()
     self.corequeue = Queue()
     self.codingqueue = Queue()
     self.headerqueue = Queue()
     # self.devnull = open(os.devnull, 'wb')
     self.logfile = inputobject.logfile
     # Run the analyses
     self.annotatethreads()
     # Print the metadata to file
     metadataprinter.MetadataPrinter(self)
Beispiel #24
0
    def validate_fastq(self):
        """
        Runs reformat.sh on the FASTQ files. If a CalledProcessError arises, do not proceed with the assembly of
        these files
        """
        printtime('Validating FASTQ files', self.start)
        validated_reads = list()
        for sample in self.metadata:
            # Tiny files can pass the validation tests - ensure that they don't
            size = os.path.getsize(sample.general.fastqfiles[0])
            if size >= 1000000:
                # Try to run reformat.sh on the reads - on any errors try to run repair.sh
                try:
                    out, err, cmd = bbtools.validate_reads(
                        forward_in=sample.general.fastqfiles[0],
                        returncmd=True)
                    write_to_logfile(out, err, self.logfile,
                                     sample.general.logout,
                                     sample.general.logerr, None, None)
                    # Add the sample to the list of samples with FASTQ files that pass this validation step
                    validated_reads.append(sample)
                except CalledProcessError:
                    # Set the file names for the reformatted and repaired files
                    outputfile1 = os.path.join(
                        sample.general.outputdirectory,
                        '{}_reformatted_R1.fastq.gz'.format(sample.name))
                    repair_file1 = os.path.join(
                        sample.general.outputdirectory,
                        '{}_repaired_R1.fastq.gz'.format(sample.name))
                    if len(sample.general.fastqfiles) == 2:
                        outputfile2 = os.path.join(
                            sample.general.outputdirectory,
                            '{}_reformatted_R2.fastq.gz'.format(sample.name))
                        repair_file2 = os.path.join(
                            sample.general.outputdirectory,
                            '{}_repaired_R2.fastq.gz'.format(sample.name))
                    else:
                        outputfile2 = str()
                        repair_file2 = str()
                    # Try to use reformat.sh to repair the reads - if this fails, discard the sample from the analyses
                    try:
                        printtime(
                            'Errors detected in FASTQ files for sample {sample}. Please check the following files'
                            ' for details {log} {logout} {logerr}. Using reformat.sh to attempt to repair issues'
                            .format(sample=sample.name,
                                    log=self.logfile,
                                    logout=sample.general.logout,
                                    logerr=sample.general.logerr), self.start)
                        if not os.path.isfile(outputfile1):
                            # Run reformat.sh
                            out, err, cmd = bbtools.reformat_reads(
                                forward_in=sample.general.fastqfiles[0],
                                forward_out=outputfile1,
                                returncmd=True)
                            write_to_logfile(out, err, self.logfile,
                                             sample.general.logout,
                                             sample.general.logerr, None, None)
                            # Run repair.sh (if necessary)
                            if outputfile2:
                                out, err, cmd = bbtools.repair_reads(
                                    forward_in=outputfile1,
                                    forward_out=repair_file1,
                                    returncmd=True)
                                write_to_logfile(out, err, self.logfile,
                                                 sample.general.logout,
                                                 sample.general.logerr, None,
                                                 None)
                        # Ensure that the output file(s) exist before declaring this a success
                        if os.path.isfile(outputfile1):
                            # Update the fastqfiles attribute to point to the repaired files
                            sample.general.fastqfiles = [
                                repair_file1, repair_file2
                            ] if repair_file2 else [outputfile1]
                            # Add the sample object to the list of samples passing the FASTQ validation step
                            validated_reads.append(sample)
                    except CalledProcessError:
                        # The file(s) can be created even if there is STDERR from reformat.sh
                        if os.path.isfile(outputfile1) and outputfile2:
                            try:
                                out, err, cmd = bbtools.repair_reads(
                                    forward_in=outputfile1,
                                    forward_out=repair_file1,
                                    returncmd=True)
                                write_to_logfile(out, err, self.logfile,
                                                 sample.general.logout,
                                                 sample.general.logerr, None,
                                                 None)
                                # Update the fastqfiles attribute to point to the repaired files
                                sample.general.fastqfiles = [repair_file1, repair_file2] if repair_file2 else \
                                    [repair_file1]
                                # Add the sample object to the list of samples passing the FASTQ validation step
                                validated_reads.append(sample)
                            except CalledProcessError:
                                # Write in the logs that there was an error detected in the FASTQ files
                                write_to_logfile(
                                    'An error was detected in the FASTQ files for sample {}. '
                                    'These files will not be processed further'
                                    .format(sample.name),
                                    'An error was detected in the FASTQ files for sample {}. '
                                    'These files will not be processed further'
                                    .format(sample.name), self.logfile,
                                    sample.general.logout,
                                    sample.general.logerr, None, None)
                                # Update metadata objects with error
                                self.error(sample, 'fastq_error')
                        else:
                            # Write in the logs that there was an error detected in the FASTQ files
                            write_to_logfile(
                                'An error was detected in the FASTQ files for sample {}. '
                                'These files will not be processed further'.
                                format(sample.name),
                                'An error was detected in the FASTQ files for sample {}. '
                                'These files will not be processed further'.
                                format(sample.name), self.logfile,
                                sample.general.logout, sample.general.logerr,
                                None, None)

                            # Update metadata objects with error
                            self.error(sample, 'fastq_error')
            else:
                # Update metadata objects with error
                self.error(sample, 'files_too_small')
        # Print the metadata to file
        metadataprinter.MetadataPrinter(self)
        # Overwrite self.metadata with objects that do not fail the validation
        self.metadata = validated_reads
Beispiel #25
0
 def fastqc_trimmedcorrected(self):
     """
     Run FastQC on the processed fastq files
     """
     self.qualityobject.fastqcthreader('trimmedcorrected')
     metadataprinter.MetadataPrinter(self)
Beispiel #26
0
 def prodigal(self):
     """
     Use prodigal to detect open reading frames in the assemblies
     """
     prodigal.Prodigal(self)
     metadataprinter.MetadataPrinter(self)
Beispiel #27
0
 def sistr(self):
     """
     Sistr
     """
     sistr.Sistr(self, 'sistr')
     metadataprinter.MetadataPrinter(self)
Beispiel #28
0
 def mash(self):
     """
     Run mash to determine closest refseq genome
     """
     mash.Mash(self, 'mash')
     metadataprinter.MetadataPrinter(self)
Beispiel #29
0
 def contamination_detection(self):
     """
     Calculate the levels of contamination in the reads
     """
     self.qualityobject.contamination_finder()
     metadataprinter.MetadataPrinter(self)
Beispiel #30
0
 def resfinder(self):
     """
     Resistance finding - assemblies
     """
     ResFinder(self)
     metadataprinter.MetadataPrinter(self)