def cli(): # Import the argument parser from allele_finder.py parent_parser = allele_finder.cli() parser = ArgumentParser(parents=[parent_parser]) # Get the arguments into an object arguments = parser.parse_args() SetupLogging(debug=arguments.verbose) # Run the allele-finding pipeline finder = allele_finder.AlleleFinder( path=arguments.path, targetfile=arguments.targetfile, analysis_type=arguments.blast, fasta_path=arguments.fasta_path, genesippr=arguments.genesippr, metadata_file=arguments.metadatafile, cutoff=arguments.cutoff, target_alleles=arguments.no_target_alleles, allele_hashing=arguments.allele_hashing, amino_acid=arguments.amino_acid, one_based=arguments.one_based) finder.main() # Extract the dictionary of records from the allele finding records = finder.records logging.info('Allele finding complete') # Run the profiling pipeline profiler = ProfileAlleles(path=arguments.path, fasta_path=arguments.fasta_path, records=records, amino_acid=arguments.amino_acid) profiler.main() logging.info('Allele Profiling complete')
def cli(): # Parser for arguments parser = ArgumentParser( description='Translate allele files in nucleotide format to amino acid. ' 'Remove duplicates. Keep notes.') parser.add_argument('-p', '--path', required=True, help='Specify path containing allele files.') parser.add_argument( '--profile', action='store_true', help= 'Optionally parse the nucleic acid profile, and create the corresponding reduced amino ' 'acid profile') parser.add_argument( '-o', '--one_based', action='store_true', help='Use 1-based indexing rather than the default 0-based') # Get the arguments into an object arguments = parser.parse_args() SetupLogging(debug=True) translate = Translate(path=arguments.path, profile=arguments.profile, one_based=arguments.one_based) translate.main() logging.info('Allele translation complete!')
def supremacy(args): SetupLogging(debug=args.debug) # Create supremacy object finder = PrimerFinder(sequence_path=args.sequencepath, primer_file=args.primerfile, mismatches=args.mismatches, kmer_length=args.kmerlength, cpus=args.cpus, analysistype='ePCR') # Run the script finder.main()
def ultimatum(args): SetupLogging(debug=args.debug) # Create metadata objects for the samples args.runmetadata = MetadataObject() args.runmetadata.samples = Filer.filer(args) finder = Ultimatum(metadataobject=args.runmetadata.samples, sequencepath=args.sequencepath, reportpath=os.path.join(args.sequencepath, 'reports'), primerfile=args.primerfile, primer_format=args.primer_format, mismatches=args.mismatches, export_amplicons=args.export_amplicons) finder.main()
def cli(): # Parser for arguments parser = ArgumentParser( description= 'Downloads and decompresses FASTA assemblies from the NCBI FTP') parser.add_argument('-p', '--path', required=True, help='Path to folder containing necessary tables') parser.add_argument( '-o', '--outputpath', help= 'Path in which files are to be downloaded. Default is "path/downloads"' ) parser.add_argument( '-a', '--accessiontable', default='pathogens.csv', help= 'Name of metadata table from NCBI (must be in the supplied path). Generate the table ' 'from NCBI pathogens ' 'e.g. https://www.ncbi.nlm.nih.gov/pathogens/isolates/#/search/taxgroup_name:%22Salmonella' '%20enterica%22 ' 'Select Download: -> Data type: Metadata -> Download. Default name is pathogens.csv' ) parser.add_argument( '-n', '--numthreads', default=3, type=int, choices=[1, 2, 3, 4, 5, 6], help='Number of concurrent downloads to perform. Default is 3') parser.add_argument( '-s', '--sleeptime', default=0, type=int, help= 'Amount of time in seconds you would like the script to sleep until it starts the ' 'download. Default is 0. NOTE: There are 3600 seconds in an hour.') arguments = parser.parse_args() SetupLogging() download = AssemblyDownload(path=arguments.path, outputpath=arguments.outputpath, accessiontable=arguments.accessiontable, threads=arguments.numthreads, sleeptime=arguments.sleeptime) download.main() logging.info('NCBI assembly download complete!')
def __init__(self, spectra_path, filename, start_time, outputpath, classic, extensions): """ :param spectra_path: Path to .spa/.spc files :param filename: Path to .xls(x) file with renaming information. :param start_time: Time the analyses started :param outputpath: Path to folder in which the renamed files are to be stored :param classic: BOOL whether to use the "classic" method of file renaming. :param extension: BOOL whether the file extension is .spc """ SetupLogging() # Define variables based on supplied arguments if spectra_path.startswith('~'): self.spectra_path = os.path.abspath( os.path.expanduser(os.path.join(spectra_path))) else: self.spectra_path = self.file = os.path.abspath( os.path.join(spectra_path)) assert os.path.isdir(self.spectra_path), 'Supplied sequence path is not a valid directory {0!r:s}'\ .format(self.spectra_path) if filename.startswith('~'): self.file = os.path.abspath( os.path.expanduser(os.path.join(filename))) else: self.file = os.path.abspath(os.path.join(filename)) # If the path to the file wasn't provided, check the spectra folder if not os.path.isfile(self.file): self.file = os.path.join(self.spectra_path, filename) # If the file still can't be found, check the parental folder of the spectra folder if not os.path.isfile(self.file): self.file = os.path.join(os.path.dirname(self.spectra_path), filename) self.start = start_time assert os.path.isfile(self.file), 'Cannot find the supplied Excel file ({0!r:s}) with the file information. ' \ 'Please ensure that this file is in the path, and there\'s no spelling ' \ 'mistakes'.format(self.file) # Set the output path self.outputpath = os.path.join(outputpath) # Create the output path as required make_path(self.outputpath) # Determine the naming scheme self.classic = classic # Variable for extensions of files to rename self.extensions = extensions # Create class variable self.metadata = list()
def main(): parser = ArgumentParser(description='Perform virus typing') parser.add_argument( '-db', '--dbpath', required=True, help='Path of folder containing .gb database files to process.') parser.add_argument( '-d', '--debug', action='store_true', help='Allow debug-level logging to be printed to the terminal') # Get the arguments into an object arguments = parser.parse_args() SetupLogging(debug=arguments.debug) virus_typer_db = VirusTypeDB(db_path=arguments.dbpath) virus_typer_db.main()
def cli(): # Parser for arguments parser = ArgumentParser(description='Determines profiles of strains against previously calculated allele database ' 'and profile. Creates and/or updates both the database of allele definitions ' 'and the profile based on novel alleles and/or profiles discovered') parser.add_argument('-p', '--path', required=True, help='Specify path. Note that due to code reuse, the query sequence files must be in the ' '"query" sub-folder, the alleles must be in the "alleles" sub-folder') parser.add_argument('-aa', '--amino_acid', action='store_true', help='The query sequences are protein.') # Get the arguments into an object arguments = parser.parse_args() SetupLogging(debug=True) # Run the profiling pipeline updater = Updater(path=arguments.path, amino_acid=arguments.amino_acid) updater.main() logging.info('Allele Updating complete')
def cli(): # Parser for arguments parser = ArgumentParser( description='Extract the genes of interest from a profile file') parser.add_argument('-p', '--profile', required=True, help='Name and path of profile file.') parser.add_argument( '-n', '--names', required=True, help= 'Name and path to a file containing the gene names (one per line) to be extracted ' 'from the profile') # Get the arguments into an object arguments = parser.parse_args() SetupLogging(debug=True) reduce = ProfileReduce(profile=arguments.profile, names=arguments.names) reduce.main() logging.info('Profile reduction complete!')
def cli(): # Parser for arguments parser = ArgumentParser( description='Downloads and compresses FASTQ files from SRA') parser.add_argument('-p', '--path', required=True, help='Path to folder containing necessary tables') parser.add_argument( '-r', '--runinfotable', default='SraRunInfo.csv', help= 'Name of SRA accession table from NCBI (must be in the supplied path). Generate the table ' 'from NCBI SRA ' 'e.g. https://www.ncbi.nlm.nih.gov/sra?LinkName=bioproject_sra_all&from_uid=309770 ' 'Select Send to: -> File -> RunInfo. Default is SraRunInfo.csv') parser.add_argument( '-n', '--name', choices=['Run', 'LibraryName', 'Sample', 'BioSample', 'SampleName'], default='SampleName', help= 'Column name to use for the final naming of the FASTQ files. Default is SampleName' ) parser.add_argument( '-t', '--threads', default=multiprocessing.cpu_count() - 1, help= 'Number of threads. Default is the number of cores in the system minus one' ) arguments = parser.parse_args() SetupLogging() download = SRAdownload(path=arguments.path, runinfotable=arguments.runinfotable, column_name=arguments.name, threads=arguments.threads) download.main() logging.info('SRA download complete!')
def identity(args): SetupLogging(debug=args.debug) # Create metadata objects for the samples args.runmetadata = MetadataObject() args.runmetadata.samples = Filer.filer(args) if args.analysistype == 'vtyper': epcr = VtyperIP(metadataobject=args.runmetadata.samples, analysistype=args.analysistype, reportpath=os.path.join(args.sequencepath, 'reports')) epcr.vtyper() else: epcr = CustomIP(metadataobject=args.runmetadata.samples, sequencepath=args.sequencepath, reportpath=os.path.join(args.sequencepath, 'reports'), primerfile=args.primerfile, min_amplicon_size=args.minampliconsize, max_amplicon_size=args.maxampliconsize, primer_format=args.primer_format, mismatches=args.mismatches, export_amplicons=args.export_amplicons, contigbreaks=args.contigbreaks) epcr.main()
def legacy(args): # Prep the args object to be used in the legacy script SetupLogging(debug=args.debug) args.reportpath = os.path.join(args.sequencepath, 'reports') args.runmetadata = MetadataObject() # Create metadata objects for the samples args.runmetadata.samples = Filer.filer(args) if args.analysistype == 'vtyper': # Perform vtx typing vtyper = Vtyper(inputobject=args, analysistype='vtyper_legacy', mismatches=args.mismatches) vtyper.vtyper() else: epcr = Custom(inputobject=args, analysistype='custom_epcr', primerfile=args.primerfile, ampliconsize=args.maxampliconsize, mismatches=args.mismatches, primer_format=args.primer_format, export_amplicons=args.export_amplicons) epcr.main()
def main(): parser = ArgumentParser(description='Perform virus typing') parser.add_argument( '-s', '--sequencepath', required=True, help='Path of folder containing .ab1 files to process.') parser.add_argument('-r', '--reportpath', required=True, help='Path in which reports are to be created') parser.add_argument( '-d', '--debug', action='store_true', help='Allow debug-level logging to be printed to the terminal') # Get the arguments into an object arguments = parser.parse_args() SetupLogging(debug=arguments.debug) virus_typer = VirusTyping(sequencepath=arguments.sequencepath, reportpath=arguments.reportpath) virus_typer.main()
def __init__(self, start, sequencepath, referencefilepath, scriptpath, debug): """ :param start: :param sequencepath: :param referencefilepath: :param scriptpath: """ self.debug = debug SetupLogging(self.debug) logging.info('Welcome to the CFIA bacterial typing pipeline {}'.format( __version__)) # Define variables from the arguments - there may be a more streamlined way to do this self.sequencepath = os.path.join(sequencepath) self.path = self.sequencepath self.targetpath = os.path.join(referencefilepath) self.reffilepath = self.targetpath # Define the start time self.starttime = start self.start = self.starttime # Use the argument for the number of threads to use, or default to the number of cpus in the system self.cpus = multiprocessing.cpu_count() - 1 # Assertions to ensure that the provided variables are valid assert os.path.isdir(self.sequencepath), 'Supplied path location is not a valid directory {0!r:s}'\ .format(self.sequencepath) self.reportpath = os.path.join(self.sequencepath, 'reports') assert os.path.isdir(self.targetpath), 'Reference file path is not a valid directory {0!r:s}'\ .format(self.targetpath) self.commit = __version__ self.homepath = scriptpath self.analysistype = 'assembly_typing' self.genus_specific = False self.logfile = os.path.join(self.sequencepath, 'logfile') self.pipeline = True # Initialise the metadata object self.metadata = list() self.runmetadata = MetadataObject()
def cli(): # Parser for arguments parser = ArgumentParser( description= 'Finds the target sequences in allele files. Useful if you have an allele ' 'database, and want to attribute subtypes e.g. STEC subtyping to your newly ' 'expanded alleles') parser.add_argument( '-a', '--allelepath', required=True, help='Name and path of folder containing generated allele files') parser.add_argument( '-t', '--targetpath', required=True, help='Name and path of folder containing sequencing target sequences') parser.add_argument( '-r', '--reportpath', required=True, help='Name and path of folder in which reports are to be created') parser.add_argument('-g', '--gene', required=True, choices=['stx1A', 'stx1B', 'stx2A', 'stx2B'], help='Name of gene being profiled') SetupLogging() arguments = parser.parse_args() # Run the pipeline attributer = Attribute(allelepath=arguments.allelepath, targetpath=arguments.targetpath, reportpath=arguments.reportpath, gene=arguments.gene) attributer.main() logging.info('Allele Attribution complete!')
def cli(): # Parser for arguments parser = ArgumentParser(add_help=False) parser.add_argument('-p', '--path', required=True, help='Specify path.') parser.add_argument( '-t', '--targetfile', required=True, help= 'Name of file containing probe sequence to search. The file can be a multi-FASTA. The ' 'header for each sequence must be unique, as it will be used as the name of the gene.' 'This file must be located in the supplied path folder.') parser.add_argument('-f', '--fasta_path', help='Path to folder containing local files to BLAST.') parser.add_argument( '-g', '--genesippr', action='store_true', help= 'Enable mode to specifically create alleles for the defined set of genes used in ' 'the GeneSippr analysis') parser.add_argument( '-m', '--metadatafile', help= 'Name of combined metadata file used to parse the genus of each local assembly. This ' 'file must be located in the supplied path folder. NOTE: This is only required if ' 'performing the "GeneSippr-specific" analysis') parser.add_argument( '-b', '--blast', choices=['local', 'remote', 'both'], default='local', help= 'Choose whether to run either local or remote BLAST, or both. Default is local' ) parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose mode') parser.add_argument( '-c', '--cutoff', default=80, type=int, help='Percent identity cutoff to use when parsing BLAST outputs') parser.add_argument( '-n', '--no_target_alleles', action='store_false', help= 'Do not include the target alleles in the output allele. If the alleles are stored, they ' 'will be the first allele in the multi-FASTA file (allele_0 or ' 'allele_COMPUTED_HASH - see below)') parser.add_argument( '-a', '--allele_hashing', action='store_true', help= 'Use the first eight digits of the computed hash of the allele sequence as the allele ' 'identifier (e.g. _503e35061a) rather than the arbitrary _0, _1, etc.') parser.add_argument( '-aa', '--amino_acid', choices=['targets_nt', 'targets_aa'], help= 'Find the amino acid sequence of alleles. The target alleles supplied can either be ' 'nucleotide or amino acid. Default is nucleotide') parser.add_argument( '-o', '--one_based', action='store_true', help='Use 1-based indexing rather than the default 0-based') arg_parser = ArgumentParser(parents=[parser]) # Get the arguments into an object arguments = arg_parser.parse_args() SetupLogging(debug=arguments.verbose) # Run the pipeline pipeline = AlleleFinder(path=arguments.path, targetfile=arguments.targetfile, analysis_type=arguments.blast, fasta_path=arguments.fasta_path, genesippr=arguments.genesippr, metadata_file=arguments.metadatafile, cutoff=arguments.cutoff, target_alleles=arguments.no_target_alleles, allele_hashing=arguments.allele_hashing, amino_acid=arguments.amino_acid, one_based=arguments.one_based) pipeline.main() logging.info('Allele finding complete') return parser
.format(tf=self.test_folder) self.assembly_typer = assembly_typer self.validate_pass = False if __name__ == '__main__': # Parser for arguments parser = ArgumentParser(description='Run integration tests on COWBAT pipeline') parser.add_argument('-r', '--reference_folder', required=True, help='Path to reference folder with CSV reports with expected results.') parser.add_argument('-t', '--test_folder', required=True, help='Path to test folder with CSV reports with observed results .') parser.add_argument('-a', '--assembly', action='store_true', help='The assembly typing pipeline was used to process the run, rather than full COWBAT') # Get the arguments into an object args = parser.parse_args() # Pretty logging! SetupLogging() # Test the reports. validate_outputs = ValidateCowbat(reference_folder=args.reference_folder, test_folder=args.test_folder, assembly_typer=args.assembly) validate_outputs.validate_cowbat() if validate_outputs.validate_pass: logging.info('COWBAT successfully validated! :D') else: logging.error('COWBAT not successfully validated.')
def cli(): # Import the argument parser from allele_finder.py parent_parser = allele_finder.cli() parser = ArgumentParser(parents=[parent_parser]) parser.add_argument('-min', '--min', default=20, type=int, help='Minimum size of probe to create') parser.add_argument('-max', '--max', default=50, type=int, help='Maximum size of probe to create') parser.add_argument( '-c', '--cutoff', default=70, help='Cutoff percent identity of a nucleotide location to use') parser.add_argument('-gc', '--percentgc', default=50, type=int, help='Desired percent GC of the probe') parser.add_argument( '-r', '--runblast', action='store_true', help= 'Run BLAST analyses on the supplied target file. If not enabled, then the program assumes ' 'that the supplied file includes all the desired alleles to use to create the probe' ) parser.add_argument( '-aa', '--amino_acid', choices=['targets_nt', 'targets_aa'], help= 'Find the amino acid sequence of alleles. The target alleles supplied can either be ' 'nucleotide or amino acid. Default is nucleotide') parser.add_argument( '-o', '--one_based', action='store_true', help='Use 1-based indexing rather than the default 0-based') # Get the arguments into an object arguments = parser.parse_args() SetupLogging(debug=arguments.verbose) if arguments.runblast: # Run the allele-finding pipeline finder = allele_finder.AlleleFinder( path=arguments.path, targetfile=arguments.targetfile, analysis_type=arguments.blast, fasta_path=arguments.fasta_path, genesippr=arguments.genesippr, metadata_file=arguments.metadatafile, cutoff=arguments.cutoff, amino_acid=arguments.amino_acid, one_based=arguments.one_based) finder.main() # Run the pipeline probes = Probes(path=arguments.path, targetfile=arguments.targetfile, min_length=arguments.min, max_length=arguments.max, cutoff=arguments.cutoff, perc_gc=arguments.percentgc, blast=arguments.runblast, one_based=arguments.one_based) probes.main() logging.info('Probe finding complete')
def __init__(self, args): """ Initialises the variables required for this class :param args: list of arguments passed to the script """ self.debug = args.debug SetupLogging(self.debug) logging.info( 'Welcome to the CFIA OLC Workflow for Bacterial Assembly and Typing (COWBAT) version {version}' .format(version=__version__)) # Define variables from the arguments - there may be a more streamlined way to do this self.args = args if args.sequencepath.startswith('~'): self.path = os.path.abspath( os.path.expanduser(os.path.join(args.sequencepath))) else: self.path = os.path.abspath(os.path.join(args.sequencepath)) self.sequencepath = self.path if args.referencefilepath.startswith('~'): self.reffilepath = os.path.expanduser( os.path.abspath(os.path.join(args.referencefilepath))) else: self.reffilepath = os.path.abspath( os.path.join(args.referencefilepath)) self.numreads = args.numreads self.preprocess = args.preprocess # Define the start time self.starttime = args.startingtime if args.customsamplesheet: if args.customsamplesheet.startswith('~'): self.customsamplesheet = os.path.expanduser( os.path.abspath(os.path.join(self.customsamplesheet))) else: self.customsamplesheet = os.path.abspath( os.path.join(args.customsamplesheet)) else: self.customsamplesheet = args.customsamplesheet if self.customsamplesheet: assert os.path.isfile(self.customsamplesheet), 'Cannot find custom sample sheet as specified {css}' \ .format(css=self.customsamplesheet) self.basicassembly = args.basicassembly if not self.customsamplesheet and not os.path.isfile( os.path.join(self.path, 'SampleSheet.csv')): self.basicassembly = True logging.warning( 'Could not find a sample sheet. Performing basic assembly (no run metadata captured)' ) # Use the argument for the number of threads to use, or default to the number of cpus in the system self.cpus = args.threads if args.threads else multiprocessing.cpu_count( ) - 1 # Assertions to ensure that the provided variables are valid make_path(self.path) assert os.path.isdir( self.path ), 'Supplied path location is not a valid directory {0!r:s}'.format( self.path) self.reportpath = os.path.join(self.path, 'reports') make_path(self.reportpath) assert os.path.isdir(self.reffilepath), 'Reference file path is not a valid directory {0!r:s}' \ .format(self.reffilepath) self.commit = __version__ self.homepath = args.homepath self.logfile = os.path.join(self.path, 'logfile') self.runinfo = str() self.pipeline = True self.qualityobject = MetadataObject() # Initialise the metadata object self.runmetadata = MetadataObject()