예제 #1
0
    def test_overlaps_but_not_contains(self):
        # should get gene2 and gene3
        rules = rule_parser.Parser(
            "\n".join([
                "RULE Overlap CUTOFF 25 NEIGHBOURHOOD 5 CONDITIONS modelB and modelF "
                "RULE OverlapImpossible CUTOFF 25 NEIGHBOURHOOD 5 CONDITIONS modelA and modelF"
            ]), self.test_names).rules
        detected_types, cluster_type_hits = hmm_detection.apply_cluster_rules(
            self.record, self.results_by_id, rules)
        assert detected_types == {
            "GENE_2": {
                "Overlap": {"modelB"}
            },
            "GENE_3": {
                "Overlap": {"modelF"}
            }
        }

        assert cluster_type_hits == {"Overlap": {"GENE_2", "GENE_3"}}

        # only 1 cluster should be found, since it requires both genes
        # if forming clusters by .is_contained_by(), 2 clusters will be formed
        # if finding rule hits uses .is_contained_by(), no clusters will be formed
        rules_by_name = {rule.name: rule for rule in rules}
        clusters = hmm_detection.find_protoclusters(self.record,
                                                    cluster_type_hits,
                                                    rules_by_name)
        assert len(clusters) == 1
        assert clusters[0].product == "Overlap"
        assert clusters[0].core_location.start == 30000
        assert clusters[0].core_location.end == 90000
예제 #2
0
    def test_apply_cluster_rules(self):
        detected_types, cluster_type_hits = hmm_detection.apply_cluster_rules(
            self.record, self.results_by_id, self.rules)
        for gid in detected_types:
            detected_types[gid] = set(detected_types[gid])
        expected_types = {
            "GENE_1":
            set(["MetaboliteA", "MetaboliteB", "MetaboliteC", "MetaboliteD"]),
            "GENE_2":
            set(["MetaboliteC", "MetaboliteD"]),
            "GENE_3":
            set(["Metabolite0"]),
            "GENE_4":
            set(["MetaboliteA"]),
            "GENE_5":
            set(["Metabolite1", "MetaboliteA"])
        }
        assert detected_types == expected_types

        assert cluster_type_hits == {
            "MetaboliteA": {"GENE_1", "GENE_4", "GENE_5"},
            "MetaboliteB": {"GENE_1"},
            "MetaboliteC": {"GENE_1", "GENE_2"},
            'MetaboliteD': {'GENE_1', 'GENE_2'},
            'Metabolite0': {'GENE_3'},
            'Metabolite1': {'GENE_5'}
        }