コード例 #1
0
def scan_for_functions(cds_features: List[CDSFeature], database: str,
                       hmmscan_opts: Optional[List[str]] = None) -> Dict[str, HMMResult]:
    """ Finds possible classifications for the provided genes.

        Arguments:
            cds_features: a list of CDSFeatures to classify
            database: the path to the database to check
            hmmscan_opts: a list of extra options to provide to hmmscan

        Returns:
            a dictionary mapping CDS name to a list of HMMResult instances of
                classifications
    """
    search_fasta = fasta.get_fasta_from_features(cds_features)
    results = subprocessing.run_hmmscan(database, search_fasta, hmmscan_opts)
    hmm_lengths = utils.get_hmm_lengths(database)
    hmm_results = refine_hmmscan_results(results, hmm_lengths)

    best_hits = {}  # type: Dict[str, HMMResult]

    for cds in cds_features:
        cds_name = cds.get_name()
        hits = hmm_results.get(cds_name)
        if not hits:
            continue
        best_hits[cds_name] = hits[0]

    return best_hits
コード例 #2
0
ファイル: hmmer.py プロジェクト: SBGlab/antismash
def run_hmmer(record: Record, features: Iterable[CDSFeature], max_evalue: float,
              min_score: float, database: str, tool: str, filter_overlapping: bool = True
              ) -> HmmerResults:
    """ Build hmmer results for the given features

        Arguments:
            record: the Record instance to run hmmer over
            features: the list of CDSFeatures to run over specifically
            max_evalue: a maximum evalue allowed for hits (exclusive)
            min_evalue: a minimum evalue allowed for hits (exclusive)
            database: the database to search for hits within
            tool: the name of the specific tool calling into this module
    """
    if not os.path.exists(database):
        raise ValueError("Given database does not exist: %s" % database)
    query_sequence = fasta.get_fasta_from_features(features)
    hmmscan_results = subprocessing.run_hmmscan(database, query_sequence, opts=["--cut_tc"])
    hits = build_hits(record, hmmscan_results, min_score, max_evalue, database)
    if filter_overlapping:
        results_by_cds = defaultdict(list)
        for hit in hits:
            results_by_cds[hit.locus_tag].append(hit)
        cutoffs = pfamdb.get_pfam_cutoffs(database)
        hits = []
        for locus_hits in results_by_cds.values():
            hits.extend(remove_overlapping(locus_hits, cutoffs))
    return HmmerResults(record.id, max_evalue, min_score, database, tool, hits)
コード例 #3
0
ファイル: hmmer.py プロジェクト: eburgoswisc/antismash
def run_hmmer(record: Record, features: List[CDSFeature], max_evalue: float,
              min_score: float, database: str, tool: str) -> HmmerResults:
    """ Build hmmer results for the given features"""
    if not os.path.exists(database):
        raise ValueError("Given database does not exist: %s" % database)
    query_sequence = fasta.get_fasta_from_features(features)
    hmmscan_results = subprocessing.run_hmmscan(database, query_sequence)
    hits = build_hits(record, hmmscan_results, min_score, max_evalue, database)
    return HmmerResults(record.id, max_evalue, min_score, database, tool, hits)
コード例 #4
0
def find_ks_domains(fasta: str) -> Dict[str, List[HMMResult]]:
    """ Analyse KS domains & PKS/NRPS protein domain composition to detect NRPS/PKS types

        Arguments:
            fasta: a group of features in fasta format

        Returns:
            a dictionary mapping feature name to a list of KS domain results for that feature
    """
    opts = ["--cut_tc"]
    ks_file = path.get_full_path(__file__, "data", "ksdomains.hmm")
    lengths = utils.get_hmm_lengths(ks_file)
    domains = subprocessing.run_hmmscan(ks_file, fasta, opts)
    return refine_hmmscan_results(domains, lengths, neighbour_mode=True)
コード例 #5
0
def find_ab_motifs(fasta: str) -> Dict[str, List[HMMResult]]:
    """ Analyse for abMotifs

        Arguments:
            fasta: a group of features in fasta format

        Returns:
            a dictionary mapping feature name to a list of motif results for that feature
    """
    opts = ["-E", "0.25"]
    motif_file = path.get_full_path(__file__, "data", "abmotifs.hmm")
    abmotif_results = subprocessing.run_hmmscan(motif_file, fasta, opts)
    lengths = utils.get_hmm_lengths(motif_file)
    return refine_hmmscan_results(abmotif_results, lengths, neighbour_mode=True)
コード例 #6
0
def run_t2pks_hmmscan(cluster: Cluster) -> Dict[str, List[HMMResult]]:
    """ Runs hmmscan for type II PKS proteins on coding sequences in cluster

        Arguments:
            cluster: Cluster on which the type II PKS hmmscan shall be run

        Returns:
            a dictionary of key: cds and value: list of HMMResults, for hmmscan results of the cluster
    """
    cluster_fasta = fasta.get_fasta_from_features(cluster.cds_children)
    hmm_file = path.get_full_path(__file__, "data", "t2pks.hmm")
    hmm_results = subprocessing.run_hmmscan(hmm_file,
                                            cluster_fasta,
                                            opts=['--cut_tc'])
    hmm_lengths = get_hmm_lengths(hmm_file)
    return refine_hmmscan_results(hmm_results, hmm_lengths)
コード例 #7
0
def find_domains(fasta: str, record: Record) -> Dict[str, List[HMMResult]]:
    """ Analyse for C/A/PCP/E/KS/AT/ATd/DH/KR/ER/ACP/TE/TD/COM/Docking/MT/CAL domains

        Arguments:
            fasta: a group of features in fasta format
            record: the Record that contains all the features

        Returns:
            a dictionary mapping feature name to a list of domain results for that feature
    """
    opts = ["--cut_tc"]
    nrpspks_file = path.get_full_path(__file__, "data", "nrpspksdomains.hmm")
    nrpspksdomain_results = subprocessing.run_hmmscan(nrpspks_file, fasta, opts)
    lengths = utils.get_hmm_lengths(nrpspks_file)
    domains = refine_hmmscan_results(nrpspksdomain_results, lengths, neighbour_mode=True)
    return filter_nonterminal_docking_domains(record, domains)
コード例 #8
0
def run_t2pks_hmmscan(
        cds_features: Iterable[CDSFeature]) -> Dict[str, List[HMMResult]]:
    """ Runs hmmscan for type II PKS proteins on the given CDSFeatures

        Arguments:
            cluster: Protocluster on which to run the type II PKS hmmscan

        Returns:
            a dictionary of key: cds and value: list of HMMResults, for hmmscan results of the cluster
    """
    cluster_fasta = fasta.get_fasta_from_features(cds_features)
    hmm_file = path.get_full_path(__file__, "data", "t2pks.hmm")
    hmm_results = subprocessing.run_hmmscan(hmm_file,
                                            cluster_fasta,
                                            opts=['--cut_tc'])
    hmm_lengths = get_hmm_lengths(hmm_file)
    return refine_hmmscan_results(hmm_results, hmm_lengths)
コード例 #9
0
ファイル: hmmer.py プロジェクト: setubazie/antismash
def run_hmmer(record: Record, features: Iterable[CDSFeature], max_evalue: float,
              min_score: float, database: str, tool: str) -> HmmerResults:
    """ Build hmmer results for the given features

        Arguments:
            record: the Record instance to run hmmer over
            features: the list of CDSFeatures to run over specifically
            max_evalue: a maximum evalue allowed for hits (exclusive)
            min_evalue: a minimum evalue allowed for hits (exclusive)
            database: the database to search for hits within
            tool: the name of the specific tool calling into this module
    """
    if not os.path.exists(database):
        raise ValueError("Given database does not exist: %s" % database)
    query_sequence = fasta.get_fasta_from_features(features)
    hmmscan_results = subprocessing.run_hmmscan(database, query_sequence, opts=["--cut_tc"])
    hits = build_hits(record, hmmscan_results, min_score, max_evalue, database)
    return HmmerResults(record.id, max_evalue, min_score, database, tool, hits)
コード例 #10
0
def classify_genes(
        cds_features: List[CDSFeature]) -> Dict[str, List[HMMResult]]:
    """ Finds possible classifications for the provided genes.

        Arguments:
            cds_features: a list of CDSFeatures to classify

        Returns:
            a dictionary mapping CDS name to a list of HMMResult instances of
                classifications
    """
    smcogs_fasta = fasta.get_fasta_from_features(cds_features)
    smcogs_opts = ["-E", "1E-6"]
    hmm_file = path.get_full_path(__file__, "data", "smcogs.hmm")
    smcogs_results = subprocessing.run_hmmscan(hmm_file, smcogs_fasta,
                                               smcogs_opts)
    hmm_lengths = utils.get_hmm_lengths(hmm_file)
    return refine_hmmscan_results(smcogs_results, hmm_lengths)