def assemble_genomes(self):
     """
     Use skesa to assemble genomes
     """
     assembly = skesa.Skesa(inputobject=self)
     assembly.main()
     metadataprinter.MetadataPrinter(inputobject=self)
 def evaluate_assemblies(self):
     """
     Evaluate assemblies with Quast
     """
     qual = evaluate.AssemblyEvaluation(inputobject=self)
     qual.main()
     metadataprinter.MetadataPrinter(inputobject=self)
 def quality_features(self, analysis):
     """
     Extract features from assemblies such as total genome size, longest contig, and N50
     """
     features = quality.QualityFeatures(inputobject=self, analysis=analysis)
     features.main()
     metadataprinter.MetadataPrinter(self)
 def contamination_detection(self):
     """
     Calculate the levels of contamination in the reads
     """
     self.qualityobject.contamination_finder(report_path=self.reportpath,
                                             debug=self.debug)
     metadataprinter.MetadataPrinter(inputobject=self)
 def quality(self):
     """
     Creates quality objects and runs quality assessments and quality processes on the
     supplied sequences
     """
     # Validate that the FASTQ files are in the proper format, and that there are no issues e.g. different numbers
     # of forward and reverse reads, read length longer than quality score length, proper extension
     if not self.debug:
         self.fastq_validate()
     # Run FastQC on the unprocessed fastq files
     self.fastqc_raw()
     # Perform quality trimming and FastQC on the trimmed files
     self.quality_trim()
     # Run FastQC on the trimmed files
     self.fastqc_trimmed()
     # Perform error correcting on the reads
     self.error_correct()
     # Detect contamination in the reads
     self.contamination_detection()
     # Run FastQC on the processed fastq files
     self.fastqc_trimmedcorrected()
     # Exit if only pre-processing of data is requested
     metadataprinter.MetadataPrinter(inputobject=self)
     if self.preprocess:
         logging.info('Pre-processing complete')
         quit()
 def sixteens(self):
     """
     Run the 16S analyses
     """
     sixteen_s = BLAST(args=self, analysistype='sixteens_full', cutoff=95)
     sixteen_s.seekr()
     metadataprinter.MetadataPrinter(inputobject=self)
 def virulence(self):
     """
     Virulence gene detection
     """
     virulence = BLAST(args=self, analysistype='virulence')
     virulence.seekr()
     metadataprinter.MetadataPrinter(inputobject=self)
 def resfinder(self):
     """
     Resistance finding - assemblies
     """
     resfinder = BLAST(args=self, analysistype='resfinder_assembled')
     resfinder.seekr()
     metadataprinter.MetadataPrinter(inputobject=self)
 def seqsero(self):
     """
     Run SeqSero2 on Salmonella samples
     """
     seqsero = SeqSero(self)
     seqsero.main()
     metadataprinter.MetadataPrinter(inputobject=self)
 def sistr(self):
     """
     Sistr
     """
     sistr_obj = sistr.Sistr(inputobject=self, analysistype='sistr')
     sistr_obj.main()
     metadataprinter.MetadataPrinter(inputobject=self)
 def geneseekr(self):
     """
     Find genes of interest
     """
     geneseekr = BLAST(args=self, analysistype='genesippr', cutoff=95)
     geneseekr.seekr()
     metadataprinter.MetadataPrinter(inputobject=self)
 def helper(self):
     """Helper function for file creation (if desired), manipulation, quality assessment,
     and trimming as well as the assembly"""
     # Simple assembly without requiring accessory files (SampleSheet.csv, etc).
     if self.basicassembly:
         self.runmetadata = Basic(inputobject=self)
     else:
         # Populate the runmetadata object by parsing the SampleSheet.csv, GenerateFASTQRunStatistics.xml, and
         # RunInfo.xml files
         self.runinfo = os.path.join(self.path, 'RunInfo.xml')
         self.runmetadata = runMetadata.Metadata(passed=self)
         # Extract the flowcell ID and the instrument name if the RunInfo.xml file was provided
         self.runmetadata.parseruninfo()
         # Extract PhiX mapping information from the run
         phi = phix.PhiX(inputobject=self)
         phi.main()
         # Populate the lack of bclcall and nohup call into the metadata sheet
         for sample in self.runmetadata.samples:
             sample.commands = GenObject()
             sample.commands.nohupcall = 'NA'
             sample.commands.bclcall = 'NA'
         # Move/link the FASTQ files to strain-specific working directories
         fastqmover.FastqMover(inputobject=self)
     # Print the metadata to file
     metadataprinter.MetadataPrinter(inputobject=self)
 def mash(self):
     """
     Run mash to determine closest refseq genome
     """
     logging.info('Running MASH analyses')
     mash.Mash(inputobject=self, analysistype='mash')
     metadataprinter.MetadataPrinter(inputobject=self)
 def run_gdcs(self):
     """
     Determine the presence of genomically-dispersed conserved sequences (genes from MLST, rMLST, and cgMLST
     analyses)
     """
     # Run the GDCS analysis
     gdcs = GDCS(inputobject=self)
     gdcs.main()
     metadataprinter.MetadataPrinter(inputobject=self)
 def legacy_vtyper(self):
     """
     Legacy vtyper - uses ePCR
     """
     legacy_vtyper = LegacyVtyper(inputobject=self,
                                  analysistype='legacy_vtyper',
                                  mismatches=2)
     legacy_vtyper.vtyper()
     metadataprinter.MetadataPrinter(inputobject=self)
 def prophages(self):
     """
     Prophage detection
     """
     prophages = Prophages(args=self,
                           analysistype='prophages',
                           cutoff=90,
                           unique=True)
     prophages.seekr()
     metadataprinter.MetadataPrinter(inputobject=self)
 def univec(self):
     """
     Univec contamination search
     """
     univec = Univec(args=self,
                     analysistype='univec',
                     cutoff=80,
                     unique=True)
     univec.seekr()
     metadataprinter.MetadataPrinter(inputobject=self)
 def coregenome(self):
     """
     Core genome calculation
     """
     coregen = core.CoreGenome(args=self,
                               analysistype='coregenome',
                               genus_specific=True)
     coregen.seekr()
     core.AnnotatedCore(inputobject=self)
     metadataprinter.MetadataPrinter(inputobject=self)
 def rmlst_assembled(self):
     """
     Run rMLST analyses on assemblies
     """
     if not os.path.isfile(os.path.join(self.reportpath, 'rmlst.csv')):
         rmlst = BLAST(args=self, analysistype='rmlst', cutoff=100)
         rmlst.seekr()
     else:
         parse = ReportParse(args=self, analysistype='rmlst')
         parse.report_parse()
     metadataprinter.MetadataPrinter(inputobject=self)
 def ec_typer(self):
     """
     Assembly-based serotyping
     """
     ec = ECTyper(metadata=self.runmetadata,
                  report_path=self.reportpath,
                  assembly_path=os.path.join(self.path, 'raw_assemblies'),
                  threads=self.cpus,
                  logfile=self.logfile)
     ec.main()
     metadataprinter.MetadataPrinter(inputobject=self)
 def univec(self):
     """
     Univec contamination search
     """
     if not os.path.isfile(os.path.join(self.reportpath, 'univec.csv')):
         univec = Univec(args=self,
                         analysistype='univec',
                         cutoff=80,
                         unique=True)
         univec.seekr()
     metadataprinter.MetadataPrinter(inputobject=self)
 def assembly_stats(self):
     """
     Perform some basic quality analyses on the assemblies
     """
     # Calculate assembly metrics on raw assemblies
     self.quality_features(analysis='polished')
     # ORF detection
     self.prodigal()
     # CLARK analyses
     self.clark()
     metadataprinter.MetadataPrinter(inputobject=self)
 def sixteens(self):
     """
     Run the 16S analyses
     """
     SixteensFull(args=self,
                  pipelinecommit=self.commit,
                  startingtime=self.starttime,
                  scriptpath=self.homepath,
                  analysistype='sixteens_full',
                  cutoff=0.95)
     metadataprinter.MetadataPrinter(inputobject=self)
 def serosippr(self):
     """
     Serotyping analyses
     """
     Serotype(args=self,
              pipelinecommit=self.commit,
              startingtime=self.starttime,
              scriptpath=self.homepath,
              analysistype='serosippr',
              cutoff=0.90,
              pipeline=True)
     metadataprinter.MetadataPrinter(inputobject=self)
 def prophages(self, cutoff=90):
     """
     Prophage detection
     :param cutoff: cutoff value to be used in the analyses
     """
     prophages = Prophages(args=self,
                           analysistype='prophages',
                           cutoff=cutoff,
                           unique=True)
     if not os.path.isfile(os.path.join(self.reportpath, 'prophages.csv')):
         prophages.seekr()
     metadataprinter.MetadataPrinter(inputobject=self)
 def serosippr(self):
     """
     Serotyping analyses
     """
     #          pipeline=True)
     sero = BLAST(args=self,
                  analysistype='serosippr',
                  cutoff=90,
                  genus_specific=True,
                  unique=True)
     sero.seekr()
     metadataprinter.MetadataPrinter(inputobject=self)
    def mob_suite(self):
        """

        """
        mob = MobRecon(metadata=self.runmetadata.samples,
                       analysistype='mobrecon',
                       databasepath=self.reffilepath,
                       threads=self.cpus,
                       logfile=self.logfile,
                       reportpath=self.reportpath)
        mob.mob_recon()
        metadataprinter.MetadataPrinter(inputobject=self)
 def genesippr(self):
     """
     Find genes of interest
     """
     GeneSippr(args=self,
               pipelinecommit=self.commit,
               startingtime=self.starttime,
               scriptpath=self.homepath,
               analysistype='genesippr',
               cutoff=0.95,
               pipeline=False,
               revbait=False)
     metadataprinter.MetadataPrinter(inputobject=self)
Exemple #29
0
    def objectprep(self):

        # Only find the data files if a datapath is provided
        if self.datapath:
            self.runmetadata = createobject.ObjectCreation(self)
        else:
            for sample in self.runmetadata.samples:
                sample.general.abundancefile = sample.general.abundance
                sample.general.assignmentfile = sample.general.classification
                sample.general.fastqfiles = [sample.general.combined]
        # Print the metadata to file
        metadataprinter.MetadataPrinter(self)
        # Load the results in the csv files into dictionaries
        self.taxids()
 def ressippr(self):
     """
     Resistance finding - raw reads
     """
     res = Resistance(args=self,
                      pipelinecommit=self.commit,
                      startingtime=self.starttime,
                      scriptpath=self.homepath,
                      analysistype='resfinder',
                      cutoff=0.7,
                      pipeline=False,
                      revbait=True)
     res.main()
     metadataprinter.MetadataPrinter(inputobject=self)