コード例 #1
0
def scan_for_functions(cds_features: List[CDSFeature], database: str,
                       hmmscan_opts: Optional[List[str]] = None) -> Dict[str, HMMResult]:
    """ Finds possible classifications for the provided genes.

        Arguments:
            cds_features: a list of CDSFeatures to classify
            database: the path to the database to check
            hmmscan_opts: a list of extra options to provide to hmmscan

        Returns:
            a dictionary mapping CDS name to a list of HMMResult instances of
                classifications
    """
    search_fasta = fasta.get_fasta_from_features(cds_features)
    results = subprocessing.run_hmmscan(database, search_fasta, hmmscan_opts)
    hmm_lengths = utils.get_hmm_lengths(database)
    hmm_results = refine_hmmscan_results(results, hmm_lengths)

    best_hits = {}  # type: Dict[str, HMMResult]

    for cds in cds_features:
        cds_name = cds.get_name()
        hits = hmm_results.get(cds_name)
        if not hits:
            continue
        best_hits[cds_name] = hits[0]

    return best_hits
コード例 #2
0
def find_ks_domains(fasta: str) -> Dict[str, List[HMMResult]]:
    """ Analyse KS domains & PKS/NRPS protein domain composition to detect NRPS/PKS types

        Arguments:
            fasta: a group of features in fasta format

        Returns:
            a dictionary mapping feature name to a list of KS domain results for that feature
    """
    opts = ["--cut_tc"]
    ks_file = path.get_full_path(__file__, "data", "ksdomains.hmm")
    lengths = utils.get_hmm_lengths(ks_file)
    domains = subprocessing.run_hmmscan(ks_file, fasta, opts)
    return refine_hmmscan_results(domains, lengths, neighbour_mode=True)
コード例 #3
0
def find_ab_motifs(fasta: str) -> Dict[str, List[HMMResult]]:
    """ Analyse for abMotifs

        Arguments:
            fasta: a group of features in fasta format

        Returns:
            a dictionary mapping feature name to a list of motif results for that feature
    """
    opts = ["-E", "0.25"]
    motif_file = path.get_full_path(__file__, "data", "abmotifs.hmm")
    abmotif_results = subprocessing.run_hmmscan(motif_file, fasta, opts)
    lengths = utils.get_hmm_lengths(motif_file)
    return refine_hmmscan_results(abmotif_results, lengths, neighbour_mode=True)
コード例 #4
0
def run_t2pks_hmmscan(cluster: Cluster) -> Dict[str, List[HMMResult]]:
    """ Runs hmmscan for type II PKS proteins on coding sequences in cluster

        Arguments:
            cluster: Cluster on which the type II PKS hmmscan shall be run

        Returns:
            a dictionary of key: cds and value: list of HMMResults, for hmmscan results of the cluster
    """
    cluster_fasta = fasta.get_fasta_from_features(cluster.cds_children)
    hmm_file = path.get_full_path(__file__, "data", "t2pks.hmm")
    hmm_results = subprocessing.run_hmmscan(hmm_file,
                                            cluster_fasta,
                                            opts=['--cut_tc'])
    hmm_lengths = get_hmm_lengths(hmm_file)
    return refine_hmmscan_results(hmm_results, hmm_lengths)
コード例 #5
0
def find_domains(fasta: str, record: Record) -> Dict[str, List[HMMResult]]:
    """ Analyse for C/A/PCP/E/KS/AT/ATd/DH/KR/ER/ACP/TE/TD/COM/Docking/MT/CAL domains

        Arguments:
            fasta: a group of features in fasta format
            record: the Record that contains all the features

        Returns:
            a dictionary mapping feature name to a list of domain results for that feature
    """
    opts = ["--cut_tc"]
    nrpspks_file = path.get_full_path(__file__, "data", "nrpspksdomains.hmm")
    nrpspksdomain_results = subprocessing.run_hmmscan(nrpspks_file, fasta, opts)
    lengths = utils.get_hmm_lengths(nrpspks_file)
    domains = refine_hmmscan_results(nrpspksdomain_results, lengths, neighbour_mode=True)
    return filter_nonterminal_docking_domains(record, domains)
コード例 #6
0
def run_t2pks_hmmscan(
        cds_features: Iterable[CDSFeature]) -> Dict[str, List[HMMResult]]:
    """ Runs hmmscan for type II PKS proteins on the given CDSFeatures

        Arguments:
            cluster: Protocluster on which to run the type II PKS hmmscan

        Returns:
            a dictionary of key: cds and value: list of HMMResults, for hmmscan results of the cluster
    """
    cluster_fasta = fasta.get_fasta_from_features(cds_features)
    hmm_file = path.get_full_path(__file__, "data", "t2pks.hmm")
    hmm_results = subprocessing.run_hmmscan(hmm_file,
                                            cluster_fasta,
                                            opts=['--cut_tc'])
    hmm_lengths = get_hmm_lengths(hmm_file)
    return refine_hmmscan_results(hmm_results, hmm_lengths)
コード例 #7
0
def classify_genes(
        cds_features: List[CDSFeature]) -> Dict[str, List[HMMResult]]:
    """ Finds possible classifications for the provided genes.

        Arguments:
            cds_features: a list of CDSFeatures to classify

        Returns:
            a dictionary mapping CDS name to a list of HMMResult instances of
                classifications
    """
    smcogs_fasta = fasta.get_fasta_from_features(cds_features)
    smcogs_opts = ["-E", "1E-6"]
    hmm_file = path.get_full_path(__file__, "data", "smcogs.hmm")
    smcogs_results = subprocessing.run_hmmscan(hmm_file, smcogs_fasta,
                                               smcogs_opts)
    hmm_lengths = utils.get_hmm_lengths(hmm_file)
    return refine_hmmscan_results(smcogs_results, hmm_lengths)