コード例 #1
0
ファイル: test_region.py プロジェクト: stogqy/antismash
 def test_unique_clusters(self):
     protoclusters = [create_protocluster(i, 10, product=prod) for i, prod in enumerate("ABC")]
     candidates = [CandidateCluster(CandidateCluster.kinds.INTERLEAVED, protoclusters[:2]),
                   CandidateCluster(CandidateCluster.kinds.INTERLEAVED, protoclusters[1:])]
     assert protoclusters[1] in candidates[0].protoclusters and protoclusters[1] in candidates[1].protoclusters
     region = Region(candidate_clusters=candidates)
     unique_clusters = region.get_unique_protoclusters()
     # if the protocluster in both candidates is repeated, there'll be an extra
     assert len(unique_clusters) == 3
     assert unique_clusters == protoclusters
コード例 #2
0
def score_against_protoclusters(label: str, region: Region,
                                hits_by_reference: HitsByReference,
                                query_components: Dict[CDSCollection,
                                                       Components],
                                mode: Mode) -> VariantResults:
    """ Performs a protocluster vs protocluster comparison

        Arguments:
            label: the name to attach to the results
            region: the query Region
            hits_by_reference: a dictionary mapping ReferenceRecord to
                                a dictionary mapping reference CDS name to Hit
            query_components: a dictionary mapping the region and each contained protocluster to
                                a Components instance with the relevant data
            mode: the Mode in which to run the analysis

        Returns:
            a VariantResults instance
    """
    score_matrix: Dict[int, Dict[ReferenceRegion, Dict[
        ReferenceProtocluster,
        ReferenceScorer]]] = defaultdict(lambda: defaultdict(dict))
    reference_best_scores: Dict[Protocluster, Dict[
        ReferenceRegion, float]] = defaultdict(lambda: defaultdict(float))
    local_hits = filter_by_query_area(region, hits_by_reference)
    for ref_region in local_hits:
        hits_for_ref_region = {ref_region: local_hits[ref_region]}
        for ref_protocluster in ref_region.protoclusters:
            hits = filter_by_reference_protocluster(ref_protocluster,
                                                    hits_for_ref_region)
            for protocluster in region.get_unique_protoclusters():
                for scorer in score_query_area(protocluster, hits,
                                               query_components[protocluster],
                                               mode):
                    score = max(
                        scorer.final_score,
                        reference_best_scores[protocluster][ref_region])
                    reference_best_scores[protocluster][ref_region] = score
                    score_matrix[protocluster.get_protocluster_number(
                    )][ref_region][ref_protocluster] = scorer

    reference_total_scores: Dict[ReferenceRegion, float] = defaultdict(float)
    for ref_region_to_score in reference_best_scores.values():
        for ref_region, score in ref_region_to_score.items():
            reference_total_scores[ref_region] += score

    region_ranking = sorted(reference_total_scores.items(),
                            key=lambda x: x[1],
                            reverse=True)

    region_ranking, score_matrix, best_hits = apply_limits_to_rankings(
        region_ranking, score_matrix, local_hits)
    return VariantResults(label, region_ranking,
                          ProtoToProtoScores(score_matrix), best_hits)
コード例 #3
0
def score_as_protoclusters(label: str, region: Region,
                           hits_by_reference: HitsByReference,
                           query_components: Dict[CDSCollection, Components],
                           mode: Mode) -> VariantResults:
    """ Performs a protocluster vs reference region comparison

        Arguments:
            label: the name to attach to the results
            region: the query Region
            hits_by_reference: a dictionary mapping ReferenceRecord to
                                a dictionary mapping reference CDS name to Hit
            query_components: a dictionary mapping the region and each contained protocluster to
                                a Components instance with the relevant data
            mode: the Mode in which to run the analysis

        Returns:
            a VariantResults instance
    """
    local_hits = filter_by_query_area(region, hits_by_reference)

    total_scores: Dict[ReferenceRegion, float] = defaultdict(float)

    scores: Dict[int, Dict[ReferenceRegion,
                           ReferenceScorer]] = defaultdict(dict)
    for protocluster in region.get_unique_protoclusters():
        for scorer in score_query_area(protocluster, local_hits,
                                       query_components[protocluster], mode):
            total_scores[scorer.reference] += calculate_protocluster_ranking(
                scorer)
            scores[protocluster.get_protocluster_number()][
                scorer.reference] = scorer

    ranking = sorted(total_scores.items(), key=lambda x: x[1], reverse=True)
    ranking, scores, best_hits = apply_limits_to_rankings(
        ranking, scores, local_hits)
    return VariantResults(label, ranking, ProtoToRegionScores(scores),
                          best_hits)