Python Record.add_protocluster 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: antismash.common.secmet

클래스/타입: Record

메소드/함수: add_protocluster

hotexamples.com에서의 예제들: 5

Python Record.add_protocluster - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 antismash.common.secmet.Record.add_protocluster에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Record(17)

add_cds_feature(6)

add_subregion(6)

add_cds_motif(5)

add_cluster(5)

add_protocluster(5)

add_cluster_border(4)

add_feature(4)

clear_clusters(3)

add_candidate_cluster(3)

add_biopython_feature(3)

clear_protoclusters(2)

clear_pfam_domains(2)

add_region(2)

clear_cds_motifs(2)

clear_antismash_domains(2)

add_supercluster(2)

add_pfam_domain(2)

clear_cluster_borders(1)

clear_regions(1)

예제 #1

파일 보기

파일: main.py 프로젝트: rchurt/antismash

def run_detection(record: Record, options: ConfigType,
                  module_results: Dict[str, Union[ModuleResults, Dict[str, Any]]]) -> Dict[str, float]:
    """ Detect different secondary metabolite clusters, PFAMs, and domains.

        Arguments:
            record: the Record to run detection over
            options: antiSMASH config
            module_results: a dictionary mapping a module's name to results from
                            a previous run on this module, as a ModuleResults subclass
                            or in JSON form

        Returns:
            the time taken by each detection module as a dictionary
    """
    timings = {}  # type: Dict[str, float]

    # run full genome detections
    for module in [full_hmmer]:
        run_module(record, cast(AntismashModule, module), options, module_results, timings)
        results = module_results.get(module.__name__)
        if results:
            assert isinstance(results, ModuleResults)
            logging.debug("Adding detection results from %s to record", module.__name__)
            results.add_to_record(record)

    # generate cluster predictions
    logging.info("Detecting secondary metabolite clusters")
    for module in [hmm_detection, cassis, clusterfinder_probabilistic]:
        run_module(record, cast(AntismashModule, module), options, module_results, timings)
        results = module_results.get(module.__name__)
        if results:
            assert isinstance(results, DetectionResults)
            for protocluster in results.get_predicted_protoclusters():
                record.add_protocluster(protocluster)
            for region in results.get_predicted_subregions():
                record.add_subregion(region)

    logging.debug("%d protoclusters found", len(record.get_protoclusters()))
    logging.debug("%d subregions found", len(record.get_subregions()))

    record.create_candidate_clusters()
    record.create_regions()

    if not record.get_regions():
        logging.info("No regions detected, skipping record")
        record.skip = "No regions detected"
        return timings

    logging.info("%d region(s) detected in record", len(record.get_regions()))

    # finally, run any detection limited to genes in clusters
    for module in [nrps_pks_domains, cluster_hmmer, genefunctions]:
        run_module(record, cast(AntismashModule, module), options, module_results, timings)
        results = module_results.get(module.__name__)
        if results:
            assert isinstance(results, ModuleResults)
            logging.debug("Adding detection results from %s to record", module.__name__)
            results.add_to_record(record)

    return timings

예제 #2

파일 보기

 def test_genbank(self):
     dummy_record = Record(Seq("A" * 100, generic_dna))
     clusters = [
         create_protocluster(3, 20, "prodA"),
         create_protocluster(25, 41, "prodB")
     ]
     for cluster in clusters:
         dummy_record.add_protocluster(cluster)
     subregion = SubRegion(FeatureLocation(35, 71), "test", 0.7)
     dummy_record.add_subregion(subregion)
     candidate = CandidateCluster(CandidateCluster.kinds.NEIGHBOURING,
                                  clusters)
     dummy_record.add_candidate_cluster(candidate)
     region = Region(candidate_clusters=[candidate], subregions=[subregion])
     dummy_record.add_region(region)
     with NamedTemporaryFile(suffix=".gbk") as output:
         region.write_to_genbank(output.name)
         bio = list(seqio.parse(output.name))
     assert len(bio) == 1
     print(bio[0].features)
     rec = Record.from_biopython(bio[0], taxon="bacteria")
     assert len(rec.get_regions()) == 1
     new = rec.get_region(0)
     assert new.location.start == 3 - region.location.start
     assert new.location.end == 71 - region.location.start
     assert new.products == region.products
     assert new.probabilities == region.probabilities

예제 #3

파일 보기

파일: test_hmm_detection.py 프로젝트: yexianingyue/antismash

class HmmDetectionTest(unittest.TestCase):
    def setUp(self):
        self.config = build_config([])
        self.rules_file = path.get_full_path(__file__, "..", "cluster_rules",
                                             "strict.txt")
        self.signature_file = path.get_full_path(__file__, "..", "data",
                                                 "hmmdetails.txt")
        self.signature_names = {
            sig.name
            for sig in core.get_signature_profiles()
        }
        self.filter_file = path.get_full_path(__file__, "..",
                                              "filterhmmdetails.txt")
        self.results_by_id = {
            "GENE_1": [
                FakeHSPHit("modelA", "GENE_1", 0, 10, 50, 0),
                FakeHSPHit("modelB", "GENE_1", 0, 10, 50, 0)
            ],
            "GENE_2": [
                FakeHSPHit("modelC", "GENE_2", 0, 10, 50, 0),
                FakeHSPHit("modelB", "GENE_2", 0, 10, 50, 0)
            ],
            "GENE_3": [
                FakeHSPHit("modelC", "GENE_3", 0, 10, 50, 0),
                FakeHSPHit("modelF", "GENE_3", 0, 10, 50, 0)
            ],
            "GENE_4": [
                FakeHSPHit("modelA", "GENE_4", 0, 10, 50, 0),
                FakeHSPHit("modelE", "GENE_4", 0, 10, 50, 0)
            ],
            "GENE_5": [
                FakeHSPHit("modelA", "GENE_5", 0, 10, 50, 0),
                FakeHSPHit("modelG", "GENE_5", 0, 10, 50, 0)
            ]
        }
        self.feature_by_id = {
            "GENE_1": DummyCDS(0, 30000, locus_tag="GENE_1"),
            "GENE_2": DummyCDS(30000, 50000, locus_tag="GENE_2"),
            "GENE_3": DummyCDS(70000, 90000, locus_tag="GENE_3"),
            "GENE_X": DummyCDS(95000, 100000, locus_tag="GENE_X"),  # no hits
            "GENE_4": DummyCDS(125000, 140000, locus_tag="GENE_4"),
            "GENE_5": DummyCDS(130000, 150000, locus_tag="GENE_5")
        }

        self.test_names = {
            "modelA", "modelB", "modelC", "modelF", "modelG", "a", "b", "c",
            "d"
        }

        self.rules = rule_parser.Parser(
            "\n".join([
                "RULE MetaboliteA CUTOFF 10 NEIGHBOURHOOD 5 CONDITIONS modelA",
                "RULE MetaboliteB CUTOFF 10 NEIGHBOURHOOD 5 CONDITIONS cds(modelA and modelB)",
                "RULE MetaboliteC CUTOFF 10 NEIGHBOURHOOD 5 CONDITIONS (modelA and modelB)",
                "RULE MetaboliteD CUTOFF 20 NEIGHBOURHOOD 5 CONDITIONS minimum(2,[modelC,modelB]) and modelA",
                "RULE Metabolite0 CUTOFF 1 NEIGHBOURHOOD 3 CONDITIONS modelF",
                "RULE Metabolite1 CUTOFF 1 NEIGHBOURHOOD 3 CONDITIONS modelG"
            ]), self.test_names).rules
        self.features = []
        for gene_id in self.feature_by_id:
            self.features.append(self.feature_by_id[gene_id])
        self.features.sort(
            key=lambda x: x.location.start)  # vital for py3 < 3.5
        self.record = Record()
        self.record._record.seq = Seq("A" * 150000)
        for feature in self.features:
            self.record.add_cds_feature(feature)

    def tearDown(self):
        # clear out any leftover config adjustments
        destroy_config()

    def test_overlaps_but_not_contains(self):
        # should get gene2 and gene3
        rules = rule_parser.Parser(
            "\n".join([
                "RULE Overlap CUTOFF 25 NEIGHBOURHOOD 5 CONDITIONS modelB and modelF "
                "RULE OverlapImpossible CUTOFF 25 NEIGHBOURHOOD 5 CONDITIONS modelA and modelF"
            ]), self.test_names).rules
        detected_types, cluster_type_hits = hmm_detection.apply_cluster_rules(
            self.record, self.results_by_id, rules)
        assert detected_types == {
            "GENE_2": {
                "Overlap": {"modelB"}
            },
            "GENE_3": {
                "Overlap": {"modelF"}
            }
        }

        assert cluster_type_hits == {"Overlap": {"GENE_2", "GENE_3"}}

        # only 1 cluster should be found, since it requires both genes
        # if forming clusters by .is_contained_by(), 2 clusters will be formed
        # if finding rule hits uses .is_contained_by(), no clusters will be formed
        rules_by_name = {rule.name: rule for rule in rules}
        clusters = hmm_detection.find_protoclusters(self.record,
                                                    cluster_type_hits,
                                                    rules_by_name)
        assert len(clusters) == 1
        assert clusters[0].product == "Overlap"
        assert clusters[0].core_location.start == 30000
        assert clusters[0].core_location.end == 90000

    def test_core(self):
        # should be no failing prerequisites
        assert core.check_prereqs(self.config) == []
        # always runs
        assert core.is_enabled(None)

    def test_apply_cluster_rules(self):
        detected_types, cluster_type_hits = hmm_detection.apply_cluster_rules(
            self.record, self.results_by_id, self.rules)
        for gid in detected_types:
            detected_types[gid] = set(detected_types[gid])
        expected_types = {
            "GENE_1":
            set(["MetaboliteA", "MetaboliteB", "MetaboliteC", "MetaboliteD"]),
            "GENE_2":
            set(["MetaboliteC", "MetaboliteD"]),
            "GENE_3":
            set(["Metabolite0"]),
            "GENE_4":
            set(["MetaboliteA"]),
            "GENE_5":
            set(["Metabolite1", "MetaboliteA"])
        }
        assert detected_types == expected_types

        assert cluster_type_hits == {
            "MetaboliteA": {"GENE_1", "GENE_4", "GENE_5"},
            "MetaboliteB": {"GENE_1"},
            "MetaboliteC": {"GENE_1", "GENE_2"},
            'MetaboliteD': {'GENE_1', 'GENE_2'},
            'Metabolite0': {'GENE_3'},
            'Metabolite1': {'GENE_5'}
        }

    def test_find_protoclusters(self):
        cds_features_by_type = {
            "MetaboliteA": {"GENE_1", "GENE_4", "GENE_5"},
            "MetaboliteB": {"GENE_1"},
            "MetaboliteC": {"GENE_1", "GENE_2"},
            'MetaboliteD': {'GENE_1', 'GENE_2'},
            'Metabolite0': {'GENE_3'},
            'Metabolite1': {'GENE_5'}
        }
        rules = {rule.name: rule for rule in self.rules}
        for cluster in hmm_detection.find_protoclusters(
                self.record, cds_features_by_type, rules):
            self.record.add_protocluster(cluster)
        assert len(self.record.get_protoclusters()) == 7
        cluster_products = sorted(
            [cluster.product for cluster in self.record.get_protoclusters()])
        assert cluster_products == sorted(
            ["Metabolite%s" % i for i in "01AABCD"])
        self.record.create_candidate_clusters()
        assert len(self.record.get_candidate_clusters()) == 3
        self.record.create_regions()
        assert len(self.record.get_regions()) == 3
        result_regions = []
        for region in self.record.get_regions():
            result_regions.append(
                sorted(cds.get_name() for cds in region.cds_children))

        expected_regions = [["GENE_1", "GENE_2"], ["GENE_3"],
                            ["GENE_4", "GENE_5"]]
        assert result_regions == expected_regions

    def test_create_rules(self):
        rules = hmm_detection.create_rules(self.rules_file,
                                           self.signature_names)
        assert len(rules) == open(self.rules_file).read().count("\nRULE")
        t1pks_rules = [rule for rule in rules if rule.name == "T1PKS"]
        assert len(t1pks_rules) == 1
        rule = t1pks_rules[0]
        assert rule.cutoff == 20000
        assert rule.neighbourhood == 20000

    def test_profiles_parsing(self):
        profiles = signatures.get_signature_profiles()
        assert len(profiles) == 250  # ensures we don't delete any by accident

    def test_filter(self):
        # fake HSPs all in one CDS with overlap > 20 and query_ids from the same equivalence group

        # not overlapping by > 20
        first = FakeHSPHit("AMP-binding", "A", 50, 90, 0.1, None)
        second = FakeHSPHit("A-OX", "A", 70, 100, 0.5, None)
        new, by_id = hmm_detection.filter_results([first, second],
                                                  {"A": [first, second]},
                                                  self.filter_file,
                                                  self.signature_names)
        assert new == [first, second]
        assert by_id == {"A": [first, second]}

        # overlapping, in same group
        first.hit_end = 91
        assert hmm_detection.hsp_overlap_size(first, second) == 21
        new, by_id = hmm_detection.filter_results([first, second],
                                                  {"A": [first, second]},
                                                  self.filter_file,
                                                  self.signature_names)
        assert new == [second]
        assert by_id == {"A": [second]}

        # overlapping, not in same group
        second.query_id = "none"
        new, by_id = hmm_detection.filter_results([first, second],
                                                  {"A": [first, second]},
                                                  self.filter_file,
                                                  self.signature_names)
        assert new == [first, second]
        assert by_id == {"A": [first, second]}

        # not in the same CDS, but int he same group
        second.hit_id = "B"
        second.query_id = "A-OX"
        new, by_id = hmm_detection.filter_results([first, second], {
            "A": [first],
            "B": [second]
        }, self.filter_file, self.signature_names)
        assert new == [first, second]
        assert by_id == {"A": [first], "B": [second]}

    def test_filter_multiple(self):
        # all in one CDS no overlap and the same query_ids -> cull all but the best score

        # not overlapping, not same query_id
        first = FakeHSPHit("AMP-binding", "A", 50, 60, 0.1, None)
        second = FakeHSPHit("A-OX", "A", 70, 100, 0.5, None)
        both = [first, second]
        by_id = {"A": [first, second]}
        new, by_id = hmm_detection.filter_result_multiple(
            list(both), dict(by_id))
        assert new == [first, second]
        assert by_id == {"A": [first, second]}

        # not overlapping, same query_id
        first.query_id = "A-OX"
        new, by_id = hmm_detection.filter_result_multiple(
            list(both), dict(by_id))
        assert new == [second]
        assert by_id == {"A": [second]}

        # not in same CDS, same query_id
        second.hit_id = "B"
        by_id = {"A": [first], "B": [second]}
        new, by_id = hmm_detection.filter_result_multiple(
            list(both), dict(by_id))
        assert new == [first, second]
        assert by_id == {"A": [first], "B": [second]}

    def test_equivalence_groups(self):
        group_file = path.get_full_path(os.path.dirname(__file__),
                                        "filterhmmdetails.txt")
        sets = []
        with open(group_file) as group_lines:
            sets = [set(line.strip().split(',')) for line in group_lines]

        # ensure they have at least two elements
        assert all(len(s) > 1 for s in sets)

        # ensure that the groups are disjoint
        for i, group in enumerate(sets):
            for other in sets[i + 1:]:
                assert group.isdisjoint(other)

    def test_hsp_overlap_size(self):
        overlap_size = hmm_detection.hsp_overlap_size
        first = FakeHSPHit("A", "A", 50, 60, 0., None)
        second = FakeHSPHit("B", "B", 70, 100, 0., None)
        # no overlap
        assert overlap_size(first, second) == 0
        first.hit_end = 70
        # still no overlap, end isn't inclusive
        assert overlap_size(first, second) == 0
        # a mix of second starting inside first
        for i in range(1, 30):
            first.hit_end += 1
            assert overlap_size(first, second) == i
        # second wholly contained
        first.hit_end = 110
        assert overlap_size(first, second) == 30

        # first inside second
        first.hit_start = 75
        assert overlap_size(first, second) == 25

        # first inside second, but direction reversed
        first.hit_end = 50
        with self.assertRaises(AssertionError):
            overlap_size(first, second)

    def test_hmm_files_and_details_match(self):
        data_dir = path.get_full_path(os.path.dirname(__file__), "data", "")
        details_files = {
            prof.path
            for prof in signatures.get_signature_profiles()
        }
        details_files = {
            filepath.replace(data_dir, "")
            for filepath in details_files
        }
        data_dir_contents = set(glob.glob(data_dir + "*.hmm"))
        data_dir_contents = {
            filepath.replace(data_dir, "")
            for filepath in data_dir_contents
        }
        # ignore bgc_seeds.hmm for the sake of comparison, it's a generated aggregate
        data_dir_contents.discard("bgc_seeds.hmm")
        missing_files = details_files - data_dir_contents
        assert not missing_files
        extra_files = data_dir_contents - details_files
        assert not extra_files
        # finally, just to be sure
        assert data_dir_contents == details_files

예제 #4

파일 보기

class TestCandidateCluster(unittest.TestCase):
    def setUp(self):
        self.record = Record(Seq("A" * 100))
        clusters = [create_cluster(0, 0, 10, 10)]
        for cluster in clusters:
            self.record.add_protocluster(cluster)

    def test_kinds_attachment(self):
        assert CandidateCluster.kinds == CandidateClusterKind

    def test_record_linkage(self):
        cluster = CandidateCluster(CandidateCluster.kinds.INTERLEAVED,
                                   self.record.get_protoclusters())
        with self.assertRaisesRegex(
                ValueError, "CandidateCluster not contained in record"):
            cluster.get_candidate_cluster_number()
        self.record.add_candidate_cluster(cluster)
        assert cluster.get_candidate_cluster_number() == 1

    def test_bad_kind(self):
        with self.assertRaisesRegex(TypeError,
                                    "should be CandidateClusterKind"):
            CandidateCluster("berf", self.record.get_protoclusters())

    def test_no_clusters(self):
        with self.assertRaisesRegex(ValueError,
                                    "cannot exist without at least one"):
            CandidateCluster(CandidateCluster.kinds.INTERLEAVED, [])

    def test_rules(self):
        cluster = CandidateCluster(CandidateCluster.kinds.INTERLEAVED,
                                   self.record.get_protoclusters())
        assert cluster.detection_rules == [
            cluster.detection_rule
            for cluster in self.record.get_protoclusters()
        ]

    def test_smiles_and_polymer(self):
        cluster = CandidateCluster(CandidateCluster.kinds.INTERLEAVED,
                                   self.record.get_protoclusters())
        assert cluster.smiles_structure is None
        assert cluster.polymer is None

    def test_conversion(self):
        kind = CandidateClusterKind.INTERLEAVED
        original = CandidateCluster(kind,
                                    self.record.get_protoclusters(),
                                    smiles="dummy smiles",
                                    polymer="dummy polymer")
        self.record.add_candidate_cluster(original)
        assert original.products == ["a"]
        assert len(original.protoclusters) == 1
        bios = original.to_biopython()
        assert len(bios) == 1
        bio = bios[0]
        assert bio.qualifiers["product"] == ["a"]
        assert bio.qualifiers["kind"] == [str(kind)]
        assert bio.qualifiers["candidate_cluster_number"] == [
            str(original.get_candidate_cluster_number())
        ]
        assert bio.qualifiers["SMILES"] == ["dummy smiles"]
        assert bio.qualifiers["polymer"] == ["dummy polymer"]
        assert bio.qualifiers["contig_edge"] == ["True"]
        real = CandidateCluster.from_biopython(bio, record=self.record)
        assert isinstance(real, CandidateCluster)
        assert len(real.protoclusters) == len(self.record.get_protoclusters())
        for reference, record_cluster in zip(real.protoclusters,
                                             self.record.get_protoclusters()):
            assert reference is record_cluster

        # attempt a conversion with a record missing the cluster
        self.record.clear_protoclusters()
        with self.assertRaisesRegex(
                ValueError,
                "record does not contain all expected protoclusters"):
            CandidateCluster.from_biopython(bio, record=self.record)
        # and with no record
        with self.assertRaisesRegex(ValueError, "record instance required"):
            CandidateCluster.from_biopython(bio)

    def test_core(self):
        protos = [
            create_cluster(5, 10, 20, 25, "a"),
            create_cluster(30, 40, 50, 60, "b")
        ]
        cluster = CandidateCluster(CandidateClusterKind.NEIGHBOURING,
                                   protos,
                                   smiles="dummy",
                                   polymer="dummy")
        assert cluster.core_location == FeatureLocation(10, 50)

    def test_comparison(self):
        candidate = CandidateCluster(CandidateClusterKind.NEIGHBOURING,
                                     [create_cluster(5, 10, 20, 25, "a")])
        longer = CandidateCluster(CandidateClusterKind.NEIGHBOURING,
                                  [create_cluster(5, 10, 40, 45, "a")])
        after = CandidateCluster(CandidateClusterKind.NEIGHBOURING,
                                 [create_cluster(10, 20, 40, 45, "a")])

        def check(first, second):
            assert first < second
            assert first < second.location
            assert sorted([second, first]) == [first, second]

        check(candidate, after)
        check(longer, candidate)
        check(longer, after)
        assert sorted([after, candidate, longer]) == [longer, candidate, after]

예제 #5

파일 보기

class TestCandidateCluster(unittest.TestCase):
    def setUp(self):
        self.record = Record(Seq("A" * 100))
        clusters = [create_cluster(0, 0, 10, 10)]
        for cluster in clusters:
            self.record.add_protocluster(cluster)

    def test_kinds_attachment(self):
        assert CandidateCluster.kinds == CandidateClusterKind

    def test_record_linkage(self):
        cluster = CandidateCluster(CandidateCluster.kinds.INTERLEAVED,
                                   self.record.get_protoclusters())
        with self.assertRaisesRegex(
                ValueError, "CandidateCluster not contained in record"):
            cluster.get_candidate_cluster_number()
        self.record.add_candidate_cluster(cluster)
        assert cluster.get_candidate_cluster_number() == 1

    def test_bad_kind(self):
        with self.assertRaisesRegex(TypeError,
                                    "should be CandidateClusterKind"):
            CandidateCluster("berf", self.record.get_protoclusters())

    def test_no_clusters(self):
        with self.assertRaisesRegex(ValueError,
                                    "cannot exist without at least one"):
            CandidateCluster(CandidateCluster.kinds.INTERLEAVED, [])

    def test_rules(self):
        cluster = CandidateCluster(CandidateCluster.kinds.INTERLEAVED,
                                   self.record.get_protoclusters())
        assert cluster.detection_rules == [
            cluster.detection_rule
            for cluster in self.record.get_protoclusters()
        ]

    def test_smiles_and_polymer(self):
        cluster = CandidateCluster(CandidateCluster.kinds.INTERLEAVED,
                                   self.record.get_protoclusters())
        assert cluster.smiles_structure is None
        assert cluster.polymer is None

    def test_conversion(self):
        kind = CandidateClusterKind.INTERLEAVED
        original = CandidateCluster(kind,
                                    self.record.get_protoclusters(),
                                    smiles="dummy smiles",
                                    polymer="dummy polymer")
        self.record.add_candidate_cluster(original)
        assert original.products == ["a"]
        assert len(original.protoclusters) == 1
        bios = original.to_biopython()
        assert len(bios) == 1
        bio = bios[0]
        assert bio.qualifiers["product"] == ["a"]
        assert bio.qualifiers["kind"] == [str(kind)]
        assert bio.qualifiers["candidate_cluster_number"] == [
            str(original.get_candidate_cluster_number())
        ]
        assert bio.qualifiers["SMILES"] == ["dummy smiles"]
        assert bio.qualifiers["polymer"] == ["dummy polymer"]
        assert bio.qualifiers["contig_edge"] == ["True"]
        regenerated = CandidateCluster.from_biopython(bio)
        assert isinstance(regenerated, TemporaryCandidateCluster)
        assert regenerated.products == original.products
        assert regenerated.location == original.location
        assert regenerated.smiles_structure == original.smiles_structure
        assert regenerated.polymer == original.polymer
        proto_numbers = [
            cluster.get_protocluster_number()
            for cluster in self.record.get_protoclusters()
        ]
        assert regenerated.protoclusters == proto_numbers
        assert regenerated.kind == original.kind

        real = regenerated.convert_to_real_feature(self.record)
        assert isinstance(real, CandidateCluster)
        assert len(real.protoclusters) == len(self.record.get_protoclusters())
        for reference, record_cluster in zip(real.protoclusters,
                                             self.record.get_protoclusters()):
            assert reference is record_cluster

        # attempt a conversion with a record missing the cluster
        self.record.clear_protoclusters()
        with self.assertRaisesRegex(ValueError,
                                    "Not all referenced clusters are present"):
            regenerated.convert_to_real_feature(self.record)