def create_cluster(self, rule_name, start, end): rule = self.rules_by_name[rule_name] core = FeatureLocation(start, end) surrounds = FeatureLocation(max(0, start - rule.neighbourhood), end + rule.neighbourhood) return Protocluster(core, surrounds, tool="testing", cutoff=rule.cutoff, neighbourhood_range=rule.neighbourhood, product=rule_name, detection_rule="rule text")
def test_bad_pfam_domain(self): protein_location = FeatureLocation(5, 10) with self.assertRaisesRegex(TypeError, "PFAMDomain description must be a string"): PFAMDomain(FeatureLocation(2, 5), description=None, protein_location=protein_location, identifier="PF00002", tool="test", locus_tag="dummy") with self.assertRaisesRegex(TypeError, "Domain must be given domain as a string"): PFAMDomain(FeatureLocation(2, 5), description="desc", protein_location=protein_location, identifier="PF00002", domain=5, tool="test", locus_tag="dummy") for ident in ["PF0002", "FAKE003", "PF", "PF000003", "PF00003.a"]: with self.assertRaisesRegex(ValueError, "invalid"): PFAMDomain(FeatureLocation(2, 5), description="desc", protein_location=protein_location, identifier=ident, tool="test", locus_tag="dummy")
def test_pfam_domain(self): original = PFAMDomain(FeatureLocation(2, 5), description="test", protein_location=FeatureLocation(5, 10), identifier="PF00002.17", domain="p450", tool="toolname", locus_tag="dummyCDS") original.domain_id = "domain_id" original.database = "db" original.detection = "someprogram" original.evalue = 1e-5 original.score = 5. original.locus_tag = "locus" original.label = "somelabel" original.translation = "ARNDCQ" original.gene_ontologies = GOQualifier({ 'GO:0004871': 'signal transducer activity', 'GO:0007165': 'signal transduction', 'GO:0016020': 'membrane' }) new = PFAMDomain.from_biopython(original.to_biopython()[0]) for slot in [ "tool", "domain_id", "database", "detection", "evalue", "score", "locus_tag", "label", "translation", "domain", "protein_location", "identifier", "version" ]: assert getattr(original, slot) == getattr(new, slot) assert original.gene_ontologies.go_entries == new.gene_ontologies.go_entries assert original.full_identifier == new.full_identifier
def test_conversion(self): protein_location = FeatureLocation(0, 1) domain = AntismashDomain(FeatureLocation(1, 3, 1), locus_tag="locus", tool="test", protein_location=protein_location) domain.domain_subtype = "subtest" domain.specificity = ["a", "c", "f"] domain.asf.add("first") domain.asf.add("second") assert domain.tool == "test" assert domain.created_by_antismash assert domain.locus_tag == "locus" bio = domain.to_biopython() assert len(bio) == 1 assert bio[0].qualifiers["aSTool"] == ["test"] assert bio[0].qualifiers["tool"] == ["antismash"] new_domain = AntismashDomain.from_biopython(bio[0]) assert new_domain.domain_subtype == domain.domain_subtype == "subtest" assert new_domain.specificity == domain.specificity == ["a", "c", "f"] assert new_domain.asf.hits == domain.asf.hits assert new_domain.asf.hits == ["first", "second"] assert new_domain.tool == domain.tool == "test" assert new_domain.created_by_antismash assert new_domain.locus_tag == "locus" assert new_domain.protein_location == protein_location
def test_parent_linkage(self): child = CDSCollection(FeatureLocation(20, 40), feature_type="test", child_collections=[]) assert child.parent is None parent = CDSCollection(FeatureLocation(10, 50), feature_type="test", child_collections=[child]) assert child.parent is parent
def setUp(self): self.protein_location = FeatureLocation(1, 5) self.location = FeatureLocation(6, 10) self.tool = 'rrefinder_test' self.domain = 'RRE_type_a' self.description = 'This is a test RRE' self.locus_tag = 'locus_tag_a' self.identifier = 'RREFam001' self.rre = RRE(self.location, self.description, self.protein_location, self.identifier, self.tool, self.locus_tag, self.domain)
def test_translation_outside_record(self): rec = DummyRecord(seq="A" * 10) for location in [ FeatureLocation(0, AfterPosition(6), strand=1), FeatureLocation(BeforePosition(4), 10, strand=-1) ]: bio = SeqFeature(location, type="CDS") bio.qualifiers["translation"] = ["M" * 5] with self.assertRaisesRegex(SecmetInvalidInputError, "translation extends out of record"): CDSFeature.from_biopython(bio, record=rec)
def test_simple(self): location = FeatureLocation(0, AfterPosition(3), 1) size = 9 assert not self.run(size, location, size - 3) assert self.run(size, location, size - 2) # single ambiguous amino assert self.run(size, location, size) location = FeatureLocation(BeforePosition(3), 9, -1) assert not self.run(size + 3, location, size) assert self.run(size + 2, location, size) # single ambiguous amino assert self.run(size, location, size)
def setUp(self): self.protein_location = FeatureLocation(1, 5) self.location = FeatureLocation(6, 10) self.domain = 'RRE_type_a' self.description = 'This is a test RRE' self.locus_tag = 'locus_tag_a' self.identifier = 'RREFam001' self.version = 1 self.full_identifier = '%s.%d' % (self.identifier, self.version) self.rre = RREDomain(self.location, self.description, self.protein_location, self.full_identifier, self.locus_tag, self.domain) self.rre.domain_id = f"{self.locus_tag}_{self.identifier}_1"
def setUp(self): self.magic_split = Seq("ATGGCAxxxxxxGGTxxxxxxATTTGT") self.magic = Seq("ATGGCAGGTATTTGT") self.translation = "MAGIC" self.sub_locations = [ FeatureLocation(0, 6, strand=1), FeatureLocation(12, 15, strand=1), FeatureLocation(21, 27, strand=1) ] self.location = CompoundLocation(self.sub_locations) self.cds = CDSFeature(self.location, locus_tag="compound", translation="A")
def test_required_identifiers(self): with self.assertRaisesRegex( ValueError, "requires at least one of: gene, protein_id, locus_tag"): CDSFeature(FeatureLocation(1, 5, 1), translation="A") assert CDSFeature(FeatureLocation(1, 5, 1), locus_tag="foo", translation="A") assert CDSFeature(FeatureLocation(1, 5, 1), protein_id="foo", translation="A") assert CDSFeature(FeatureLocation(1, 5, 1), gene="foo", translation="A")
def test_root(self): child = CDSCollection(FeatureLocation(20, 40), feature_type="test", child_collections=[]) assert child.get_root() is child parent = CDSCollection(FeatureLocation(10, 50), feature_type="test", child_collections=[child]) assert child.get_root() is parent grandparent = CDSCollection(FeatureLocation(0, 60), feature_type="test", child_collections=[parent]) for col in [child, parent, grandparent]: assert col.get_root() is grandparent
def test_bad_child(self): with self.assertRaises(AssertionError): child = CDSCollection(FeatureLocation(10, 50), feature_type="test", child_collections=[]) CDSCollection(FeatureLocation(20, 40), feature_type="test", child_collections=[child]) with self.assertRaises(AssertionError): cds = DummyCDS(25, 35) CDSCollection(FeatureLocation(20, 40), feature_type="test", child_collections=[cds])
def convert_hits_to_features(self) -> None: '''Convert all the hits found to features''' domain_counts = defaultdict(int) # type: Dict[str, int] for locus_tag, hits in self.hit_info.items(): for hit in hits: location = location_from_string(hit['location']) protein_location = FeatureLocation(hit['protein_start'], hit['protein_end']) rre_feature = RRE(location, hit['description'], protein_location, tool=self.tool, identifier=hit['identifier'], locus_tag=locus_tag, domain=hit['domain']) # Set additional properties for attr in ['score', 'evalue', 'label', 'translation']: setattr(rre_feature, attr, hit[attr]) rre_feature.database = self.database rre_feature.detection = self.detection domain_counts[ hit['domain']] += 1 # 1-indexed, so increment before use rre_feature.domain_id = "{}_{}_{:04d}".format( self.tool, rre_feature.locus_tag, domain_counts[hit['domain']]) self.features.append(rre_feature)
def setUp(self): self.config = build_config(["--cf-create-clusters", "--cf-mean-threshold", "0.6", "--cf-min-cds", "5", "--cf-min-pfams", "5"], modules=[clusterfinder], isolated=True) update_config({"enabled_cluster_types": []}) self.record = DummyRecord(seq=Seq("A" * 2000)) for start, end, probability, pfam_id in [(10, 20, 0.1, 'PF77777'), (30, 40, 0.3, 'PF00106'), (50, 60, 0.4, 'PF00107'), (60, 70, 0.7, 'PF00109'), (70, 80, 0.98, 'PF08484'), (90, 100, 0.8, 'PF02401'), (100, 110, 0.32, 'PF04369'), (110, 120, 1.0, 'PF00128'), (130, 140, 0.2, 'PF77776'), (500, 505, None, 'PF77775'), (1010, 1020, 0.1, 'PF77774'), (1030, 1040, 0.3, 'PF00106'), (1050, 1060, 0.4, 'PF00107'), (1060, 1070, 0.7, 'PF00109'), (1070, 1080, 0.98, 'PF08484'), (1090, 1100, 0.8, 'PF02401'), (1100, 1110, 0.32, 'PF04369'), (1110, 1120, 1.0, 'PF00128')]: location = FeatureLocation(start, end, strand=1) self.record.add_cds_feature(CDSFeature(location, locus_tag=str(start), translation="A")) pfam = PFAMDomain(location, "dummy_description", protein_start=start + 1, protein_end=end-1, identifier=pfam_id, tool="test") pfam.domain_id = "pfam_%d" % start pfam.probability = probability self.record.add_pfam_domain(pfam)
def test_angstrom(self): domain = AntismashDomain(FeatureLocation(1, 2), "test") domain.domain_id = "query" domain.translation = self.aligns[domain.domain_id].replace("-", "") sig = extract_sig.get_34_aa_signature(domain) assert sig == "L--SFDASLFEMYLLTGGDRNMYGPTEATMCATW"
def test_mixed_strand(self): bio = self.cds.to_biopython()[0] for location in [ CompoundLocation([ FeatureLocation(1, 5, strand=-1), FeatureLocation(8, 10, strand=1) ]), CompoundLocation([ FeatureLocation(1, 5, strand=1), FeatureLocation(8, 10, strand=None) ]) ]: bio.location = location with self.assertRaisesRegex( ValueError, "compound locations with mixed strands"): CDSFeature.from_biopython(bio)
def test_bad_translation(self): loc = FeatureLocation(1, 5, 1) for trans in [None, "A?", "A!", ""]: with self.assertRaisesRegex( ValueError, "valid translation required|invalid translation characters" ): CDSFeature(loc, locus_tag="test", translation=trans)
def test_extends_past_before(self): self.reverse_strand() self.sub_locations[0] = FeatureLocation(BeforePosition(2), self.sub_locations[0].end, strand=-1) self.cds.location = CompoundLocation(self.sub_locations[::-1]) new = self.cds.get_sub_location_from_protein_coordinates(0, 7) assert new.start == 3
def test_extends_past_after(self): self.sub_locations[-1] = FeatureLocation(21, AfterPosition(29), strand=1) self.cds.location = CompoundLocation(self.sub_locations) new = self.cds.get_sub_location_from_protein_coordinates(0, 7) assert new.end == 27
def test_simple_location_forward_complete(self): cds = CDSFeature(FeatureLocation(0, 15, 1), locus_tag="simple", translation="A") new = cds.get_sub_location_from_protein_coordinates(0, 5) extracted = new.extract(self.magic) assert extracted == self.magic assert extracted.translate() == self.translation
def setUp(self): self.domain = PFAMDomain(FeatureLocation(1, 6), "description", protein_start=3, protein_end=5, domain="p450", identifier="PF00001", tool="test")
def test_conversion(self): prot_loc = FeatureLocation(1, 2) original = CDSMotif(FeatureLocation(2, 5), tool="test", locus_tag="locus", protein_location=prot_loc) assert original.tool == "test" assert original.created_by_antismash assert original.locus_tag == "locus" assert original.protein_location == prot_loc bio_features = original.to_biopython() assert len(bio_features) == 1 new = CDSMotif.from_biopython(bio_features[0]) assert new.tool == original.tool == "test" assert new.locus_tag == original.locus_tag == "locus" assert new.protein_location == prot_loc assert new.created_by_antismash
def test_invalid_qualifier(self): cds = CDSFeature(FeatureLocation(1, 5, 1), locus_tag="test", translation="A") for bad in ["bad", ["stuff"], {}, 1]: with self.assertRaisesRegex( TypeError, "can only be set to an instance of SecMetQualifier"): cds.sec_met = bad
def test_frameshifted_location(self): location = CompoundLocation( [FeatureLocation(3, 9, 1), FeatureLocation(8, 14, 1)]) assert len(location) == 12 seq = Seq("ATGATGAGCCCTCGTCTAGACTACAATGA") extracted = location.extract(seq) assert extracted == "ATGAGCCCCTCG" assert len(extracted) == len(location) translation = extracted.translate() assert translation == "MSPS" cds = CDSFeature(location, locus_tag="test", translation=translation) new = cds.get_sub_location_from_protein_coordinates(1, 3) assert isinstance(new, CompoundLocation) assert len(new.parts) == 2 assert new.start == 6 assert new.end == 11
def setUp(self): domain = PFAMDomain(FeatureLocation(1, 6), "description", protein_start=3, protein_end=5, domain="p450", identifier="PF00001", tool="test") self.alignment = Alignment(domain, "WLAD-QGAR", "WLae.rGAR", 10, 19)
def generate_domains(self): inputs = fasta.read_fasta( path.get_full_path(__file__, 'data', 'PKS_KS.input')) domains = [] last_end = 0 for translation in inputs.values(): location = FeatureLocation(last_end + 10, last_end + len(translation) * 3 + 16) domain = DummyAntismashDomain(location=location) domain.translation = translation domains.append(domain) domain.domain = "PKS_KS" location = FeatureLocation( last_end + 10, last_end + len(domains[-1].translation) * 3 + 16) domains.append(DummyAntismashDomain(location=location)) domains[-1].domain = "PKS_KR" return domains
def test_non_antismash_motif(self): original = ExternalCDSMotif(FeatureLocation(7, 10), {}) assert not original.created_by_antismash bio_features = original.to_biopython() assert len(bio_features) == 1, bio_features new = CDSMotif.from_biopython(bio_features[0]) assert isinstance(new, ExternalCDSMotif) assert new.tool == original.tool assert not new.created_by_antismash
def test_tool_conversion(self): original = CDSMotif(FeatureLocation(2, 5), tool="test") assert original.tool == "test" assert original.created_by_antismash bio_features = original.to_biopython() assert len(bio_features) == 1 new = CDSMotif.from_biopython(bio_features[0]) assert new.tool == original.tool == "test" assert new.created_by_antismash
def test_non_antismash_motif(self): original = CDSMotif(FeatureLocation(7, 10)) assert original.tool is None assert not original.created_by_antismash bio_features = original.to_biopython() assert len(bio_features) == 1 new = CDSMotif.from_biopython(bio_features[0]) assert new.tool is None assert not new.created_by_antismash