Exemple #1
0
def check_prereqs() -> List[str]:
    "Checks if all required files and applications are around"
    _binary_extensions = ['.h3f', '.h3i', '.h3m', '.h3p']

    failure_messages = []

    for binary_name in ['hmmpfam2', 'hmmscan', 'hmmpress']:
        if not path.locate_executable(binary_name):
            failure_messages.append("Failed to locate file: %r" % binary_name)

    # Get all HMM profile names from XML file
    for profile in ["PKSI-KR.hmm2", "PKSI-KS_N.hmm2", "PKSI-KS_C.hmm2", "PKSI-AT.hmm2",
                    "PKSI-ACP.hmm2", "PKSI-DH.hmm2", "Thioesterase.hmm2", "PKSI-ER.hmm2",
                    "aa-activating.aroundLys.hmm2", "p450.hmm2"]:
        full_hmm_path = path.get_full_path(__file__, "data", profile)

        if path.locate_file(full_hmm_path) is None:
            failure_messages.append("Failed to locate file: %s" % profile)
            continue

        if profile.endswith(".hmm2"):
            continue

        for ext in _binary_extensions:
            binary = "{hmm}{ext}".format(hmm=full_hmm_path, ext=ext)
            if not path.locate_file(binary):
                result = subprocessing.run_hmmpress(full_hmm_path)
                if not result.successful():
                    failure_messages.append("Failed to hmmpress {!r}: {!r}".format(profile, result.stderr))

                # hmmpress generates _all_ binary files in one go, so stop the loop
                break

            binary_mtime = os.path.getmtime(binary)
            hmm_mtime = os.path.getmtime(full_hmm_path)
            if hmm_mtime < binary_mtime:
                # generated file younger than hmm profile, do nothing
                continue
            try:
                for filename in glob.glob("{}.h3?".format(full_hmm_path)):
                    logging.debug("removing outdated file %r", filename)
                    os.remove(filename)
            except OSError as err:
                failure_messages.append("Failed to remove outdated binary file for %s: %s" %
                                        (profile, err))
                break
            result = subprocessing.run_hmmpress(full_hmm_path)
            if not result.successful():
                failure_messages.append("Failed to hmmpress %r: %r" % (profile, result.stderr))
                failure_messages.append("HMM binary files outdated. %s (changed: %s) vs %s (changed: %s)" %
                                        (profile, datetime.datetime.fromtimestamp(hmm_mtime),
                                         binary, datetime.datetime.fromtimestamp(binary_mtime)))
            # hmmpress generates _all_ binary files in one go, so stop the loop
            break

    return failure_messages
Exemple #2
0
def check_prereqs() -> List[str]:
    failure_messages = []
    for binary_name, optional in [('hmmscan', False), ('hmmpress', False)]:
        if path.locate_executable(binary_name) is None and not optional:
            failure_messages.append("Failed to locate executable for %r" %
                                    binary_name)

    markov_models = [path.get_full_path(__file__, 'data', filename) for filename in [
                                'abmotifs.hmm', 'dockingdomains.hmm',
                                'ksdomains.hmm', 'nrpspksdomains.hmm']]

    binary_extensions = ['.h3f', '.h3i', '.h3m', '.h3p']

    for hmm in markov_models:
        if path.locate_file(hmm) is None:
            failure_messages.append("Failed to locate file %r" % hmm)
            continue
        for ext in binary_extensions:
            binary = "{}{}".format(hmm, ext)
            if path.locate_file(binary) is None:
                result = subprocessing.run_hmmpress(hmm)
                if not result.successful():
                    failure_messages.append('Failed to hmmpress {!r}: {}'.format(hmm, result.stderr))
                break

    return failure_messages
Exemple #3
0
def ensure_database_pressed(filepath: str,
                            return_not_raise: bool = False) -> List[str]:
    """ Ensures that the given HMMer database exists and that the hmmpress
        generated files aren't out of date.

        Arguments:
            filepath: the path to the HMMer database
            return_not_raise: whether to catch errors and return their messages as strings

        Returns:
            any encountered error messages, will never be populated without return_not_raise == True
    """
    components = [
        "{}{}".format(filepath, ext)
        for ext in ['.h3f', '.h3i', '.h3m', '.h3p']
    ]

    if path.is_outdated(components, filepath):
        logging.info("%s components missing or obsolete, re-pressing database",
                     filepath)
        result = subprocessing.run_hmmpress(filepath)
        if not result.successful():
            msg = "Failed to hmmpress {!r}: {}".format(filepath, result.stderr)
            if not return_not_raise:
                raise RuntimeError(msg)
            return [msg]
    return []
Exemple #4
0
def check_prereqs() -> List[str]:
    """ Check that prereqs are satisfied. hmmpress is only required if the
        databases have not yet been generated.
    """
    failure_messages = []
    for binary_name, optional in [('hmmsearch', False), ('hmmpress', False)]:
        if path.locate_executable(binary_name) is None and not optional:
            failure_messages.append("Failed to locate executable for %r" %
                                    binary_name)

    profiles = None
    # Check that hmmdetails.txt is readable and well-formatted
    try:
        profiles = get_signature_profiles()
    except ValueError as err:
        failure_messages.append(str(err))

    # the path to the markov model
    hmm = path.get_full_path(__file__, 'data', 'bgc_seeds.hmm')
    hmm_files = [os.path.join("data", sig.hmm_file) for sig in profiles]
    if path.locate_file(hmm) is None:
        # try to generate file from all specified profiles in hmmdetails
        try:
            with open(hmm, 'w') as all_hmms_handle:
                for hmm_file in hmm_files:
                    with open(path.get_full_path(__file__, hmm_file),
                              'r') as handle:
                        all_hmms_handle.write(handle.read())
        except OSError:
            failure_messages.append('Failed to generate file {!r}'.format(hmm))

    # if previous steps have failed, the remainder will too, so don't try
    if failure_messages:
        return failure_messages

    binary_extensions = ['.h3f', '.h3i', '.h3m', '.h3p']
    for ext in binary_extensions:
        binary = "{}{}".format(hmm, ext)
        if path.locate_file(binary) is None:
            result = run_hmmpress(hmm)
            if not result.successful():
                failure_messages.append('Failed to hmmpress {!r}: {}'.format(
                    hmm, result.stderr))
            break

    return failure_messages
Exemple #5
0
def ensure_database_pressed(filepath: str,
                            return_not_raise: bool = False) -> List[str]:
    """ Ensures that the given HMMer database exists and that the hmmpress
        generated files aren't out of date.

        Arguments:
            filepath: the path to the HMMer database
            return_not_raise: whether to catch errors and return their messages as strings

        Returns:
            any encountered error messages, will never be populated without return_not_raise == True
    """
    try:
        modified_time = os.path.getmtime(filepath)
    except FileNotFoundError as err:
        if not return_not_raise:
            raise
        return [str(err)]
    components = [
        "{}{}".format(filepath, ext)
        for ext in ['.h3f', '.h3i', '.h3m', '.h3p']
    ]
    outdated = False
    for component in components:
        if not path.locate_file(
                component) or os.path.getmtime(component) < modified_time:
            logging.info("%s does not exist or is out of date, hmmpressing %s",
                         component, filepath)
            outdated = True
            break

    if outdated:
        result = subprocessing.run_hmmpress(filepath)
        if not result.successful():
            msg = "Failed to hmmpress {!r}: {}".format(filepath, result.stderr)
            if not return_not_raise:
                raise RuntimeError(msg)
            return [msg]
    return []
Exemple #6
0
def check_prereqs() -> List[str]:
    "Check if all required applications are around"
    failure_messages = []
    for binary_name in ['muscle', 'hmmscan', 'hmmpress', 'fasttree', 'java']:
        if path.locate_executable(binary_name) is None:
            failure_messages.append("Failed to locate file: %r" % binary_name)

    for hmm in ['smcogs.hmm']:
        hmm = path.get_full_path(__file__, 'data', hmm)
        if path.locate_file(hmm) is None:
            failure_messages.append("Failed to locate file %r" % hmm)
            continue
        for ext in ['.h3f', '.h3i', '.h3m', '.h3p']:
            binary = "%s%s" % (hmm, ext)
            if path.locate_file(binary) is None:
                # regenerate them
                result = subprocessing.run_hmmpress(hmm)
                if not result.successful():
                    failure_messages.append("Failed to hmmpress %s: %s" %
                                            (hmm, result.stderr.rstrip()))
                break
    return failure_messages
Exemple #7
0
def check_prereqs() -> List[str]:
    """ Check the prerequisites.
            hmmscan: domain detection
            blastp: CLF and starter unit analysis
            HMMs: t2pks.hmm

        Returns:
            a list of strings describing any errors, if they occurred
    """
    failure_messages = []
    for binary_name in ['hmmscan', 'blastp']:
        if path.locate_executable(binary_name) is None:
            failure_messages.append("Failed to locate file: %r" % binary_name)

    for hmm in ['t2pks.hmm']:
        hmm = path.get_full_path(__file__, 'data', hmm)
        if path.locate_file(hmm) is None:
            failure_messages.append("Failed to locate file %r" % hmm)
            continue
        for ext in ['.h3f', '.h3i', '.h3m', '.h3p']:
            binary = "%s%s" % (hmm, ext)
            if path.locate_file(binary) is None:
                # regenerate them
                result = subprocessing.run_hmmpress(hmm)
                if not result.successful():
                    failure_messages.append("Failed to hmmpress %s: %s" %
                                            (hmm, result.stderr.rstrip()))
                break

    for blastdb in ['KSIII', 'AT', 'LIG']:
        for ext in ['.fasta', '.phr', '.pin', '.psq']:
            dbfile = path.get_full_path(__file__, 'data', blastdb + ext)
            if path.locate_file(dbfile) is None:
                failure_messages.append("Failed to locate file %r" % dbfile)
                continue

    return failure_messages