def check_sub_prereqs(_options: ConfigType) -> List[str]: """ Check if all required applications and datafiles are present. options is irrelevant here """ # Tuple is ( binary_name, optional) _required_binaries = [ ('blastp', False), ('makeblastdb', False), ] _required_files = [ ('subclusterprots.fasta', False), ('subclusterprots.fasta.phr', False), ('subclusterprots.fasta.pin', False), ('subclusterprots.fasta.psq', False), ('subclusters.txt', False) ] failure_messages = [] for binary_name, optional in _required_binaries: if path.locate_executable(binary_name) is None and not optional: failure_messages.append("Failed to locate file: %r" % binary_name) for file_name, optional in _required_files: if path.locate_file(_get_datafile_path(file_name)) is None and not optional: failure_messages.append("Failed to locate file: %r" % file_name) return failure_messages
def check_prereqs() -> List[str]: failure_messages = [] for binary_name, optional in [('hmmscan', False), ('hmmpress', False)]: if path.locate_executable(binary_name) is None and not optional: failure_messages.append("Failed to locate executable for %r" % binary_name) markov_models = [path.get_full_path(__file__, 'data', filename) for filename in [ 'abmotifs.hmm', 'dockingdomains.hmm', 'ksdomains.hmm', 'nrpspksdomains.hmm']] binary_extensions = ['.h3f', '.h3i', '.h3m', '.h3p'] for hmm in markov_models: if path.locate_file(hmm) is None: failure_messages.append("Failed to locate file %r" % hmm) continue for ext in binary_extensions: binary = "{}{}".format(hmm, ext) if path.locate_file(binary) is None: result = subprocessing.run_hmmpress(hmm) if not result.successful(): failure_messages.append('Failed to hmmpress {!r}: {}'.format(hmm, result.stderr)) break return failure_messages
def check_prereqs() -> List[str]: "Check if all required applications are around" options = get_config() # Tuple is ( binary_name, optional) _required_binaries = [ ('blastp', False), ('makeblastdb', False), ('diamond', False), ] _required_files = [ ('geneclusterprots.dmnd', False), ('geneclusterprots.fasta', False), ('geneclusters.txt', False), ] clusterblastdir = os.path.join(options.database_dir, "clusterblast") failure_messages = [] for binary_name, optional in _required_binaries: if path.locate_executable(binary_name) is None and not optional: failure_messages.append("Failed to locate file: %r" % binary_name) for file_name, optional in _required_files: if path.locate_file(os.path.join(clusterblastdir, file_name)) is None and not optional: failure_messages.append("Failed to locate file: %r" % file_name) failure_messages.extend(check_known_prereqs(options)) failure_messages.extend(check_sub_prereqs(options)) return failure_messages
def check_prereqs(): """ Ensures all required external programs are available """ failure_messages = [] for binary_name in ['hmmpfam2']: if not path.locate_executable(binary_name): failure_messages.append("Failed to locate executable for %r" % binary_name) return failure_messages
def check_prereqs() -> List[str]: "Check if all required applications are around" failure_messages = [] for binary_name in ['muscle', 'fasttree']: if path.locate_executable(binary_name) is None: failure_messages.append("Failed to locate file: %r" % binary_name) return failure_messages
def check_prereqs() -> List[str]: "Checks if all required files and applications are around" _binary_extensions = ['.h3f', '.h3i', '.h3m', '.h3p'] failure_messages = [] for binary_name in ['hmmpfam2', 'hmmscan', 'hmmpress']: if not path.locate_executable(binary_name): failure_messages.append("Failed to locate file: %r" % binary_name) # Get all HMM profile names from XML file for profile in ["PKSI-KR.hmm2", "PKSI-KS_N.hmm2", "PKSI-KS_C.hmm2", "PKSI-AT.hmm2", "PKSI-ACP.hmm2", "PKSI-DH.hmm2", "Thioesterase.hmm2", "PKSI-ER.hmm2", "aa-activating.aroundLys.hmm2", "p450.hmm2"]: full_hmm_path = path.get_full_path(__file__, "data", profile) if path.locate_file(full_hmm_path) is None: failure_messages.append("Failed to locate file: %s" % profile) continue if profile.endswith(".hmm2"): continue for ext in _binary_extensions: binary = "{hmm}{ext}".format(hmm=full_hmm_path, ext=ext) if not path.locate_file(binary): result = subprocessing.run_hmmpress(full_hmm_path) if not result.successful(): failure_messages.append("Failed to hmmpress {!r}: {!r}".format(profile, result.stderr)) # hmmpress generates _all_ binary files in one go, so stop the loop break binary_mtime = os.path.getmtime(binary) hmm_mtime = os.path.getmtime(full_hmm_path) if hmm_mtime < binary_mtime: # generated file younger than hmm profile, do nothing continue try: for filename in glob.glob("{}.h3?".format(full_hmm_path)): logging.debug("removing outdated file %r", filename) os.remove(filename) except OSError as err: failure_messages.append("Failed to remove outdated binary file for %s: %s" % (profile, err)) break result = subprocessing.run_hmmpress(full_hmm_path) if not result.successful(): failure_messages.append("Failed to hmmpress %r: %r" % (profile, result.stderr)) failure_messages.append("HMM binary files outdated. %s (changed: %s) vs %s (changed: %s)" % (profile, datetime.datetime.fromtimestamp(hmm_mtime), binary, datetime.datetime.fromtimestamp(binary_mtime))) # hmmpress generates _all_ binary files in one go, so stop the loop break return failure_messages
def check_prereqs() -> List[str]: """Check for prerequisites""" failure_messages = [] for binary_name, _ in [("meme", "4.11.1"), ("fimo", "4.11.1")]: if path.locate_executable(binary_name) is None: failure_messages.append( "Failed to locate executable for {!r}".format(binary_name)) # TODO: Check binary version here return failure_messages
def check_prereqs() -> List[str]: """ Check prereqs hmmpfam2: used to find extra HMM hits not in hmm_detection """ failure_messages = [] for binary_name in ['hmmpfam2']: if path.locate_executable(binary_name) is None: failure_messages.append("Failed to locate executable for %r" % binary_name) return failure_messages
def check_prereqs() -> List[str]: """ Check the prerequisites. java: NRPSPredictor, sandpuma muscle: sandpuma, at_analysis, kr_analysis, minowa, orderfinder hmmsearch: minowa """ failure_messages = [] for binary_name in ["hmmsearch", "muscle", "java"]: if path.locate_executable(binary_name) is None: failure_messages.append("Failed to locate executable for %r" % binary_name) return failure_messages
def check_prereqs() -> List[str]: """ Checks if the required external programs are available """ failure_messages = [] for binary_name, optional in [('hmmpfam2', False), ('fimo', True)]: present = True if path.locate_executable(binary_name) is None: present = False if not optional: failure_messages.append("Failed to locate executable for %r" % binary_name) if binary_name == "fimo": get_config().fimo_present = present return failure_messages
def check_known_prereqs(options: ConfigType) -> List[str]: """ Determines if any prerequisite data files or executables are missing Arguments: options: antismash Config Returns: a list of error messages, one for each failing prequisite check """ failure_messages = [] for binary_name in ['blastp', 'makeblastdb', options.cb_diamond_executable]: if path.locate_executable(binary_name) is None: failure_messages.append("Failed to locate file: %r" % binary_name) return failure_messages
def check_prereqs() -> List[str]: """ Check that prereqs are satisfied. hmmpress is only required if the databases have not yet been generated. """ failure_messages = [] for binary_name in ["hmmsearch", "hmmpress"]: if not path.locate_executable(binary_name): failure_messages.append("Failed to locate executable for %r" % binary_name) # no point checking the data if we can't use it if failure_messages: return failure_messages failure_messages.extend(prepare_data(logging_only=True)) return failure_messages
def check_prereqs() -> List[str]: """ Make sure the external tools to use are available """ failure_messages = [] options = get_config() if options.genefinding_tool in ['none']: return failure_messages binaries = [] if options.genefinding_tool in ['prodigal', 'prodigal-m']: binaries = ['prodigal'] elif options.taxon == 'fungi': binaries = ['glimmerhmm'] for binary_name in binaries: if not locate_executable(binary_name): failure_messages.append("Failed to locate executable for %r" % binary_name) return failure_messages
def check_prereqs() -> List[str]: """ Ensure at least one database exists and is valid """ failure_messages = [] for binary_name in ['hmmscan']: if not path.locate_executable(binary_name): failure_messages.append("Failed to locate executable: %r" % binary_name) data_dir = get_config().database_dir try: version = pfamdb.find_latest_database_version(data_dir) except ValueError as err: failure_messages.append(str(err)) return failure_messages data_path = os.path.join(data_dir, "pfam", version) failure_messages.extend(pfamdb.check_db(data_path)) return failure_messages
def check_prereqs() -> List[str]: """Check for prerequisites """ failure_messages = [] for binary_name in ['hmmscan', 'hmmpress']: if path.locate_executable(binary_name) is None: failure_messages.append("Failed to locate file: %r" % binary_name) database = os.path.join(get_config().database_dir, 'resfam', 'Resfams.hmm') if path.locate_file(database) is None: failure_messages.append('Failed to locate Resfam database in %s' % database) failure_messages.extend(prepare_data(logging_only=True)) return failure_messages
def check_prereqs() -> List[str]: """ Check that prereqs are satisfied. hmmpress is only required if the databases have not yet been generated. """ failure_messages = [] for binary_name, optional in [('hmmsearch', False), ('hmmpress', False)]: if path.locate_executable(binary_name) is None and not optional: failure_messages.append("Failed to locate executable for %r" % binary_name) profiles = None # Check that hmmdetails.txt is readable and well-formatted try: profiles = get_signature_profiles() except ValueError as err: failure_messages.append(str(err)) # the path to the markov model hmm = path.get_full_path(__file__, 'data', 'bgc_seeds.hmm') hmm_files = [os.path.join("data", sig.hmm_file) for sig in profiles] if path.locate_file(hmm) is None: # try to generate file from all specified profiles in hmmdetails try: with open(hmm, 'w') as all_hmms_handle: for hmm_file in hmm_files: with open(path.get_full_path(__file__, hmm_file), 'r') as handle: all_hmms_handle.write(handle.read()) except OSError: failure_messages.append('Failed to generate file {!r}'.format(hmm)) # if previous steps have failed, the remainder will too, so don't try if failure_messages: return failure_messages binary_extensions = ['.h3f', '.h3i', '.h3m', '.h3p'] for ext in binary_extensions: binary = "{}{}".format(hmm, ext) if path.locate_file(binary) is None: result = run_hmmpress(hmm) if not result.successful(): failure_messages.append('Failed to hmmpress {!r}: {}'.format( hmm, result.stderr)) break return failure_messages
def check_prereqs() -> List[str]: """ Checks the prereqs for the lanthipeptide module. fimo is optional, having it available increases accuracy in the RODEO subsection """ failure_messages = [] for binary_name, optional in [('hmmpfam2', False), ('fimo', True)]: present = True if path.locate_executable(binary_name) is None: present = False if not optional: failure_messages.append("Failed to locate executable for %r" % binary_name) slot = '{}_present'.format(binary_name) conf = get_config() if hasattr(conf, slot): setattr(conf, slot, present) return failure_messages
def check_prereqs() -> List[str]: "Check if all required applications are around" options = get_config() # Tuple is ( binary_name, optional) _required_binaries = [ 'blastp', 'makeblastdb', options.cb_diamond_executable, ] failure_messages = [] for binary_name in _required_binaries: if path.locate_executable(binary_name) is None: failure_messages.append("Failed to locate file: %r" % binary_name) failure_messages.extend(prepare_data(logging_only=True)) failure_messages.extend(check_known_prereqs(options)) failure_messages.extend(check_sub_prereqs(options)) return failure_messages
def check_prereqs() -> List[str]: "Checks if all required files and applications are around" failure_messages = [] for binary_name in ['hmmpfam2', 'hmmscan', 'hmmpress']: if not path.locate_executable(binary_name): failure_messages.append("Failed to locate file: %r" % binary_name) # Get all HMM profile names from XML file for profile in [ "PKSI-KR.hmm2", "PKSI-KS_N.hmm2", "PKSI-KS_C.hmm2", "PKSI-AT.hmm2", "PKSI-ACP.hmm2", "PKSI-DH.hmm2", "Thioesterase.hmm2", "PKSI-ER.hmm2", "p450.hmm2" ]: full_hmm_path = path.get_full_path(__file__, "data", profile) if path.locate_file(full_hmm_path) is None: failure_messages.append("Failed to locate file: %s" % profile) continue return failure_messages
def check_prereqs() -> List[str]: "Check if all required applications are around" failure_messages = [] for binary_name in ['muscle', 'hmmscan', 'hmmpress', 'fasttree', 'java']: if path.locate_executable(binary_name) is None: failure_messages.append("Failed to locate file: %r" % binary_name) for hmm in ['smcogs.hmm']: hmm = path.get_full_path(__file__, 'data', hmm) if path.locate_file(hmm) is None: failure_messages.append("Failed to locate file %r" % hmm) continue for ext in ['.h3f', '.h3i', '.h3m', '.h3p']: binary = "%s%s" % (hmm, ext) if path.locate_file(binary) is None: # regenerate them result = subprocessing.run_hmmpress(hmm) if not result.successful(): failure_messages.append("Failed to hmmpress %s: %s" % (hmm, result.stderr.rstrip())) break return failure_messages
def check_prereqs() -> List[str]: """ Check the prerequisites. hmmscan: domain detection blastp: CLF and starter unit analysis HMMs: t2pks.hmm Returns: a list of strings describing any errors, if they occurred """ failure_messages = [] for binary_name in ['hmmscan', "hmmpress", 'blastp']: if path.locate_executable(binary_name) is None: failure_messages.append("Failed to locate file: %r" % binary_name) for blastdb in ['KSIII', 'AT', 'LIG']: for ext in ['.fasta', '.phr', '.pin', '.psq']: dbfile = path.get_full_path(__file__, 'data', blastdb + ext) if path.locate_file(dbfile) is None: failure_messages.append("Failed to locate file %r" % dbfile) failure_messages.extend(prepare_data(logging_only=True)) return failure_messages
def check_known_prereqs(_options: ConfigType) -> List[str]: """ Determines if any prerequisite data files or executables are missing Arguments: options: antismash Config Returns: a list of error messages, one for each failing prequisite check """ failure_messages = [] for binary_name, optional in [('blastp', False), ('makeblastdb', False), ('diamond', False)]: if path.locate_executable(binary_name) is None and not optional: failure_messages.append("Failed to locate file: %r" % binary_name) for file_name, optional in [('knownclusterprots.fasta', False), ('knownclusterprots.dmnd', False), ('knownclusters.txt', False)]: if path.locate_file( _get_datafile_path(file_name)) is None and not optional: failure_messages.append("Failed to locate file: %r" % file_name) return failure_messages
def check_prereqs() -> List[str]: """ Check the prerequisites. hmmscan: domain detection blastp: CLF and starter unit analysis HMMs: t2pks.hmm Returns: a list of strings describing any errors, if they occurred """ failure_messages = [] for binary_name in ['hmmscan', 'blastp']: if path.locate_executable(binary_name) is None: failure_messages.append("Failed to locate file: %r" % binary_name) for hmm in ['t2pks.hmm']: hmm = path.get_full_path(__file__, 'data', hmm) if path.locate_file(hmm) is None: failure_messages.append("Failed to locate file %r" % hmm) continue for ext in ['.h3f', '.h3i', '.h3m', '.h3p']: binary = "%s%s" % (hmm, ext) if path.locate_file(binary) is None: # regenerate them result = subprocessing.run_hmmpress(hmm) if not result.successful(): failure_messages.append("Failed to hmmpress %s: %s" % (hmm, result.stderr.rstrip())) break for blastdb in ['KSIII', 'AT', 'LIG']: for ext in ['.fasta', '.phr', '.pin', '.psq']: dbfile = path.get_full_path(__file__, 'data', blastdb + ext) if path.locate_file(dbfile) is None: failure_messages.append("Failed to locate file %r" % dbfile) continue return failure_messages