def runBayesHammer(fastq_sin, fastq_frw, fastq_rev, sample_name, parent_dir): try: assert( (fastq_sin and os.path.isfile(fastq_sin) or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev))) ) except: sys.stderr.write("ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"); raise RuntimeError # set up directory structure workspace_name = "BayesHammer" workspace = uF.setupDirectory(parent_dir, workspace_name) # create logging object log_file = workspace + 'BayesHammer.log' logObject = uF.createLoggerObject(log_file) ### Perform single end cutadapt operation if fastq_sin and os.path.isfile(fastq_sin): # create Fastq object FastqObj = Fastq(fastq_sin, sample_name, logObject) FastqObj.error_correction(workspace) ### Perform paired-end cutadapt operation elif fastq_frw and fastq_rev: # create FastqPaired Object FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject) # trim adpaters using trim-galore preset settings for nextera and return resulting FASTQ files in gzip compressed format FastqPairedObj.error_correction(workspace) # create successful completion file if steps completed! conf_file = open(parent_dir + "BAYESHAMMER.txt", 'w') conf_file.write("BayesHammer: Module Completed Succesfully!") conf_file.close()
def runStrainGST(fastq_sin, fastq_frw, fastq_rev, db, sample_name, parent_dir, options_kmerize, options_straingst): try: assert ((fastq_sin and os.path.isfile(fastq_sin) or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev)))) except: sys.stderr.write( "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n" ) raise RuntimeError try: assert (os.path.isfile(db)) except: sys.stderr.write( "ERROR: StrainGST pangenome database file is not available.") raise RuntimeError() # set up directory structure workspace_name = "StrainGST" workspace = uF.setupDirectory(parent_dir, workspace_name) # create logging object log_file = workspace + 'StrainGST.log' logObject = uF.createLoggerObject(log_file) kmer_file = None ### Perform single end QC analysis if fastq_sin and os.path.isfile(fastq_sin): # create Fastq object FastqObj = Fastq(fastq_sin, sample_name, logObject) # run strainge kmerize kmer_file = FastqObj.kmerize(workspace, options=options_kmerize) ### Perform paired-end QC analysis elif fastq_frw and fastq_rev: # create Fastq object FastqObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject) # run strainge kmerize kmer_file = FastqObj.kmerize(workspace, options=options_kmerize) # create Kmer object KmerObj = Kmer(kmer_file, sample_name, logObject) # run straingst KmerObj.run_straingst(workspace, db, options=options_straingst) # produce kmer histogram - in progress - issues running. # KmerObj.create_histogram(workspace) # create successful completion file if steps completed! conf_file = open(parent_dir + "STRAINGST.txt", 'w') conf_file.write("StrainGST: Module Completed Succesfully!") conf_file.close()
def runFastQC(fastq_sin, fastq_frw, fastq_rev, sample_name, parent_dir, cores): try: assert ((fastq_sin and os.path.isfile(fastq_sin)) or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev))) except: sys.stderr.write( "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n" ) raise RuntimeError # set up directory structure workspace_name = "FastQC" workspace = uF.setupDirectory(parent_dir, workspace_name) # create logging object log_file = workspace + 'FastQC.log' logObject = uF.createLoggerObject(log_file) ### Perform single end QC analysis if fastq_sin and os.path.isfile(fastq_sin): # create Fastq object FastqObj = Fastq(fastq_sin, sample_name, logObject) # validate FASTQ file is indeed a FASTQ file valid = FastqObj.validate() if not valid: sys.stderr.write( "ERROR: FASTQ file %s seems to be in invalid format. Exiting now...\n" % FastqObj.fastq) sys.exit(1) # run FastQC and parse results. fastqcResDir = FastqObj.run_qc(workspace, cores=cores) ### Perform paired-end QC analysis elif fastq_frw and fastq_rev: # create FastqPaired Object FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject) # validate FASTQ file is indeed a FASTQ file valid = FastqPairedObj.validate() if not valid: sys.stderr.write( "ERROR: At least one of the FASTQ files seems to be in an invalid format. Exiting now ...\n" ) sys.exit(1) # run FastQC and parse results. fastqcResDirs = FastqPairedObj.run_qc(workspace, cores=cores) # create successful completion file if steps completed! conf_file = open(parent_dir + "FASTQC.txt", 'w') conf_file.write("FastQC: Module Completed Succesfully!") conf_file.close()
def runRefAlignment(fastq_sin, fastq_frw, fastq_rev, reference_fasta, sample_name, parent_dir, bwa_options, cores): try: assert( (os.path.isfile(fastq_sin)) or (os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev)) ) except: sys.stderr.write("ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"); raise RuntimeError try: assert(os.path.isfile(reference_fasta)) except: sys.stderr.write("ERROR: Reference FASTA file does not have the correct format.\n"); raise RuntimeError # set up directory structure workspace_name = "ReferenceAlignment" workspace = uF.setupDirectory(parent_dir, workspace_name) # create logging object log_file = workspace + 'ReferenceAlignment.log' logObject = uF.createLoggerObject(log_file) sam_file = None ### Perform single end QC analysis if fastq_sin and os.path.isfile(fastq_sin): # create Fastq object FastqObj = Fastq(fastq_sin, sample_name, logObject) # Align reads to reference genome sam_file = FastqObj.align_to_reference(workspace, reference_fasta, options=bwa_options, cores=cores) ### Perform paired-end QC analysis elif fastq_frw and fastq_rev: # create FastqPaired Object FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject) # Align reads to reference genome sam_file = FastqPairedObj.align_to_reference(workspace, reference_fasta, options=bwa_options, cores=cores) # create Alignment object AlignmentObj = Alignment(sam_file, sample_name, logObject) # compress SAM to BAM AlignmentObj.compress_sam(workspace, clean=True) # sort BAM file AlignmentObj.sort_bam(workspace, clean=True) # index BAM file AlignmentObj.index_bam(workspace) # mark duplicates AlignmentObj.mark_dups(workspace, clean=True) # index BAM file AlignmentObj.index_bam(workspace) # create successful completion file if steps completed! conf_file = open(parent_dir + "REFALIGNMENT.txt", 'w') conf_file.write("Reference Alignment: Module Completed Succesfully!") conf_file.close()
def runNanoCanu(nanopore_fastq, sample_dir, sample_name, canu_options, memory, cores): try: assert (nanopore_fastq and os.path.isfile(nanopore_fastq)) except: raise RuntimeError( "ERROR: FASTQ input(s) were not provided properly. Please fix. Raising exception\n" ) unicycler_options = canu_options.strip('"') # set up directory structure workspace_name = "Canu_Assembly" workspace = uF.setupDirectory(sample_dir, workspace_name) # create logging object log_file = workspace + 'Canu_Assembly.log' logObject = uF.createLoggerObject(log_file) if nanopore_fastq.endswith('.gz'): FastqObj = Fastq(nanopore_fastq, sample_name, logObject) FastqObj.create_new_instance(workspace, compress=False, change_reference=True) # Initialize Nanopore Object NanoporeObj = Nanopore(FastqObj.fastq, sample_name, logObject) # Run Canu for assembly NanoporeObj.run_canu(workspace, options=canu_options, memory=memory, cores=cores) # Clean up temporary FASTQ instance os.system('rm -f %s' % FastqObj.fastq) else: # Initialize Nanopore Object NanoporeObj = Nanopore(nanopore_fastq, sample_name, logObject) # Run Canu for assembly NanoporeObj.run_canu(workspace, options=canu_options, memory=memory, cores=cores) conf_file = open(sample_dir + "CANU_ASSEMBLY.txt", 'w') conf_file.write("Canu Assembly: Module Completed Succesfully!") conf_file.close()
def runSortMeRNA(fastq_sin, fastq_frw, fastq_rev, database_dir, sample_name, parent_dir, cores, no_gzip): try: assert ((fastq_sin and os.path.isfile(fastq_sin) or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev)))) except: sys.stderr.write( "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n" ) raise RuntimeError # set up directory structure workspace_name = "SortMeRNA" workspace = uF.setupDirectory(parent_dir, workspace_name) # create logging object log_file = workspace + 'SortMeRNA.log' logObject = uF.createLoggerObject(log_file) ### Perform single end QC analysis if fastq_sin and os.path.isfile(fastq_sin): # create Fastq object FastqObj = Fastq(fastq_sin, sample_name, logObject) # split up ribosomal and non-ribosomal RNA data FastqObj.filter_ribo_rna(workspace, database_dir, cores=cores, compress=(not no_gzip)) ### Perform paired-end QC analysis elif fastq_frw and fastq_rev: # create FastqPaired Object FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject) # split up ribosomal and non-ribosomal RNA data FastqPairedObj.filter_ribo_rna(workspace, database_dir, cores=cores, compress=(not no_gzip)) # create successful completion file if steps completed! conf_file = open(parent_dir + "SORTMERNA.txt", 'w') conf_file.write("SortMeRNA: Module Completed Succesfully!") conf_file.close()
def runKneadData(fastq_sin, fastq_frw, fastq_rev, kneaddata_options, sample_name, parent_dir, cores, no_gzip): try: assert ((fastq_sin and os.path.isfile(fastq_sin) or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev)))) except: sys.stderr.write( "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n" ) raise RuntimeError # set up directory structure workspace_name = "KneadData" workspace = uF.setupDirectory(parent_dir, workspace_name) # create logging object log_file = workspace + 'KneadData.log' logObject = uF.createLoggerObject(log_file) ### Perform single end QC analysis if fastq_sin and os.path.isfile(fastq_sin): # create Fastq object FastqObj = Fastq(fastq_sin, sample_name, logObject) # bin reads taxonomically using Centrifuge FastqObj.run_kneaddata(workspace, options=kneaddata_options, cores=cores, compress=not (no_gzip)) ### Perform paired-end QC analysis elif fastq_frw and fastq_rev: # create FastqPaired Object FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject) # bin reads taxonomically using Centrifuge FastqPairedObj.run_kneaddata(workspace, options=kneaddata_options, cores=cores, compress=not (no_gzip)) # create successful completion file if steps completed! conf_file = open(parent_dir + "KNEADDATA.txt", 'w') conf_file.write("KneadData: Module Completed Succesfully!") conf_file.close()
def runCentrifuge(fastq_sin, fastq_frw, fastq_rev, centrifuge_index, sample_name, parent_dir, cores): try: assert ((fastq_sin and os.path.isfile(fastq_sin) or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev)))) except: sys.stderr.write( "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n" ) raise RuntimeError # set up directory structure workspace_name = "Centrifuge" workspace = uF.setupDirectory(parent_dir, workspace_name) # create logging object log_file = workspace + 'Centrifuge.log' logObject = uF.createLoggerObject(log_file) ### Perform single end QC analysis if fastq_sin and os.path.isfile(fastq_sin): # create Fastq object FastqObj = Fastq(fastq_sin, sample_name, logObject) # bin reads taxonomically using Centrifuge FastqObj.bin_taxonomically(workspace, centrifuge_index, cores=cores) ### Perform paired-end QC analysis elif fastq_frw and fastq_rev: # create FastqPaired Object FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject) # bin reads taxonomically using Centrifuge FastqPairedObj.bin_taxonomically(workspace, centrifuge_index, cores=cores) # create successful completion file if steps completed! conf_file = open(parent_dir + "CENTRIFUGE.txt", 'w') conf_file.write("Centrifuge: Module Completed Succesfully!") conf_file.close()
def runSymlinkInput(fastq_sin, fastq_frw, fastq_rev, parent_dir, sample_name): try: assert ((fastq_sin and os.path.isfile(fastq_sin)) or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev))) except: sys.stderr.write( "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n" ) raise RuntimeError # set up directory structure workspace_name = "Symlink_Input" workspace = uF.setupDirectory(parent_dir, workspace_name) # create logging object log_file = workspace + 'Symlink.log' logObject = uF.createLoggerObject(log_file) ### Perform single end QC analysis if fastq_sin and os.path.isfile(fastq_sin): # create Fastq object FastqObj = Fastq(fastq_sin, sample_name, logObject) # create symlink FastqObj.create_symlink(workspace) ### Perform paired-end QC analysis elif fastq_frw and fastq_rev: # create FastqPaired Object FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject) # create symlink FastqPairedObj.create_symlink(workspace) conf_file = open(parent_dir + "SYMLINK_INPUT.txt", 'w') conf_file.write("SymlinkInput: Module Completed Succesfully!") conf_file.close()
def runTrimmomatic(fastq_sin, fastq_frw, fastq_rev, sample_name, parent_dir, trimmomatic_options, cores, no_gzip): try: assert( (fastq_sin and os.path.isfile(fastq_sin) or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev))) ) except: sys.stderr.write("ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"); raise RuntimeError trimmomatic_options = trimmomatic_options.strip('"') # set up directory structure workspace_name = "QualityTrim" workspace = uF.setupDirectory(parent_dir, workspace_name) # create logging object log_file = workspace + 'QualityTrim.log' logObject = uF.createLoggerObject(log_file) ### Perform single end trimmomatic operation if fastq_sin and os.path.isfile(fastq_sin): # create Fastq object FastqObj = Fastq(fastq_sin, sample_name, logObject) # trim adapters using cutadapt and return resulting FASTQ file in gzip compressed format FastqObj.quality_trim(workspace, options=trimmomatic_options, cores=cores, compress=(not no_gzip)) ### Perform paired-end trimmomatic operation elif fastq_frw and fastq_rev: # create FastqPaired Object FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject) # trim adpaters using cutadapt and return resulting FASTQ files in gzip compressed format FastqPairedObj.quality_trim(workspace, options=trimmomatic_options, cores=cores, compress=(not no_gzip)) # create successful completion file if steps completed! conf_file = open(parent_dir + "QUALITYTRIM.txt", 'w') conf_file.write("QualityTrim: Module Completed Succesfully!") conf_file.close()
def runAdapterTrim(fastq_sin, fastq_frw, fastq_rev, sample_name, parent_dir, trimgalore_options, cutadapt_options): try: assert ((fastq_sin and os.path.isfile(fastq_sin) or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev)))) except: sys.stderr.write( "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n" ) raise RuntimeError trimgalore_options = trimgalore_options.strip('"') cutadapt_options = cutadapt_options.strip('"') try: assert (not (trimgalore_options and cutadapt_options)) except: sys.stderr.write( "ERROR: Both filtering options with cutadapt and trim galore provided. Can only use one adapter trimmer. Exiting now ...\n" ) raise RuntimeError # set up directory structure workspace_name = "AdapterTrim" workspace = uF.setupDirectory(parent_dir, workspace_name) # create logging object log_file = workspace + 'AdapterTrim.log' logObject = uF.createLoggerObject(log_file) ### Perform single end cutadapt operation if fastq_sin and os.path.isfile(fastq_sin): # create Fastq object FastqObj = Fastq(fastq_sin, sample_name, logObject) # trim adpaters using trim-galore preset settings for nextera and return resulting FASTQ files in gzip compressed format if cutadapt_options: FastqObj.cutadapt_adapter_trim(workspace, options=cutadapt_options) else: FastqObj.trim_galore_adapter_trim(workspace, options=trimgalore_options) ### Perform paired-end cutadapt operation elif fastq_frw and fastq_rev: # create FastqPaired Object FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject) # trim adpaters using trim-galore preset settings for nextera and return resulting FASTQ files in gzip compressed format if cutadapt_options: FastqPairedObj.cutadapt_adapter_trim(workspace, options=cutadapt_options) else: FastqPairedObj.trim_galore_adapter_trim(workspace, options=trimgalore_options) # create successful completion file if steps completed! conf_file = open(parent_dir + "ADAPTERTRIM.txt", 'w') conf_file.write("AdapterTrim: Module Completed Succesfully!") conf_file.close()
def runSubsample(fastq_sin, fastq_frw, fastq_rev, sample_name, parent_dir, reads, bases, no_gzip): try: assert ((fastq_sin and os.path.isfile(fastq_sin)) or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev))) except: sys.stderr.write( "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n" ) raise RuntimeError # set up directory structure workspace_name = "Subsample" workspace = uF.setupDirectory(parent_dir, workspace_name) # create logging object log_file = workspace + 'Subsample.log' logObject = uF.createLoggerObject(log_file) ### Perform single end QC analysis if fastq_sin and os.path.isfile(fastq_sin): # create Fastq object FastqObj = Fastq(fastq_sin, sample_name, logObject) # run FastQC and parse results. if reads: FastqObj.subsample(workspace, reads=reads, compress=(not no_gzip)) elif bases: FastqObj.downsample(workspace, bases=bases, compress=(not no_gzip)) else: logObject.error( "No subsampling quantity specified defaulting to 100K reads being subsampled!" ) FastqObj.subsample(workspace, reads=reads, compress=(not no_gzip)) ### Perform paired-end QC analysis elif fastq_frw and fastq_rev: # create FastqPaired Object FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject) # run FastQC and parse results. if reads: FastqPairedObj.subsample(workspace, reads=reads, compress=(not no_gzip)) elif bases: FastqPairedObj.downsample(workspace, bases=bases, compress=(not no_gzip)) else: logObject.error( "No subsampling quantity specified defaulting to 100K ") FastqPairedObj.subsample(workspace, reads=reads, compress=(not no_gzip)) # create successful completion file if steps completed! conf_file = open(parent_dir + "SUBSAMPLE.txt", 'w') conf_file.write("Subsample: Module Completed Succesfully!") conf_file.close()