def create_cluster(self, rule_name, start, end):
     rule = self.rules_by_name[rule_name]
     core = FeatureLocation(start, end)
     surrounds = FeatureLocation(max(0, start - rule.neighbourhood), end + rule.neighbourhood)
     return Protocluster(core, surrounds, tool="testing", cutoff=rule.cutoff,
                         neighbourhood_range=rule.neighbourhood, product=rule_name,
                         detection_rule="rule text")
Esempio n. 2
0
 def test_bad_pfam_domain(self):
     protein_location = FeatureLocation(5, 10)
     with self.assertRaisesRegex(TypeError,
                                 "PFAMDomain description must be a string"):
         PFAMDomain(FeatureLocation(2, 5),
                    description=None,
                    protein_location=protein_location,
                    identifier="PF00002",
                    tool="test",
                    locus_tag="dummy")
     with self.assertRaisesRegex(TypeError,
                                 "Domain must be given domain as a string"):
         PFAMDomain(FeatureLocation(2, 5),
                    description="desc",
                    protein_location=protein_location,
                    identifier="PF00002",
                    domain=5,
                    tool="test",
                    locus_tag="dummy")
     for ident in ["PF0002", "FAKE003", "PF", "PF000003", "PF00003.a"]:
         with self.assertRaisesRegex(ValueError, "invalid"):
             PFAMDomain(FeatureLocation(2, 5),
                        description="desc",
                        protein_location=protein_location,
                        identifier=ident,
                        tool="test",
                        locus_tag="dummy")
Esempio n. 3
0
 def test_pfam_domain(self):
     original = PFAMDomain(FeatureLocation(2, 5),
                           description="test",
                           protein_location=FeatureLocation(5, 10),
                           identifier="PF00002.17",
                           domain="p450",
                           tool="toolname",
                           locus_tag="dummyCDS")
     original.domain_id = "domain_id"
     original.database = "db"
     original.detection = "someprogram"
     original.evalue = 1e-5
     original.score = 5.
     original.locus_tag = "locus"
     original.label = "somelabel"
     original.translation = "ARNDCQ"
     original.gene_ontologies = GOQualifier({
         'GO:0004871': 'signal transducer activity',
         'GO:0007165': 'signal transduction',
         'GO:0016020': 'membrane'
     })
     new = PFAMDomain.from_biopython(original.to_biopython()[0])
     for slot in [
             "tool", "domain_id", "database", "detection", "evalue",
             "score", "locus_tag", "label", "translation", "domain",
             "protein_location", "identifier", "version"
     ]:
         assert getattr(original, slot) == getattr(new, slot)
     assert original.gene_ontologies.go_entries == new.gene_ontologies.go_entries
     assert original.full_identifier == new.full_identifier
    def test_conversion(self):
        protein_location = FeatureLocation(0, 1)
        domain = AntismashDomain(FeatureLocation(1, 3, 1), locus_tag="locus",
                                 tool="test", protein_location=protein_location)
        domain.domain_subtype = "subtest"
        domain.specificity = ["a", "c", "f"]
        domain.asf.add("first")
        domain.asf.add("second")
        assert domain.tool == "test"
        assert domain.created_by_antismash
        assert domain.locus_tag == "locus"

        bio = domain.to_biopython()
        assert len(bio) == 1
        assert bio[0].qualifiers["aSTool"] == ["test"]
        assert bio[0].qualifiers["tool"] == ["antismash"]
        new_domain = AntismashDomain.from_biopython(bio[0])
        assert new_domain.domain_subtype == domain.domain_subtype == "subtest"
        assert new_domain.specificity == domain.specificity == ["a", "c", "f"]
        assert new_domain.asf.hits == domain.asf.hits
        assert new_domain.asf.hits == ["first", "second"]
        assert new_domain.tool == domain.tool == "test"
        assert new_domain.created_by_antismash
        assert new_domain.locus_tag == "locus"
        assert new_domain.protein_location == protein_location
Esempio n. 5
0
 def test_parent_linkage(self):
     child = CDSCollection(FeatureLocation(20, 40),
                           feature_type="test",
                           child_collections=[])
     assert child.parent is None
     parent = CDSCollection(FeatureLocation(10, 50),
                            feature_type="test",
                            child_collections=[child])
     assert child.parent is parent
Esempio n. 6
0
 def setUp(self):
     self.protein_location = FeatureLocation(1, 5)
     self.location = FeatureLocation(6, 10)
     self.tool = 'rrefinder_test'
     self.domain = 'RRE_type_a'
     self.description = 'This is a test RRE'
     self.locus_tag = 'locus_tag_a'
     self.identifier = 'RREFam001'
     self.rre = RRE(self.location, self.description, self.protein_location,
                    self.identifier, self.tool, self.locus_tag, self.domain)
Esempio n. 7
0
 def test_translation_outside_record(self):
     rec = DummyRecord(seq="A" * 10)
     for location in [
             FeatureLocation(0, AfterPosition(6), strand=1),
             FeatureLocation(BeforePosition(4), 10, strand=-1)
     ]:
         bio = SeqFeature(location, type="CDS")
         bio.qualifiers["translation"] = ["M" * 5]
         with self.assertRaisesRegex(SecmetInvalidInputError,
                                     "translation extends out of record"):
             CDSFeature.from_biopython(bio, record=rec)
Esempio n. 8
0
    def test_simple(self):
        location = FeatureLocation(0, AfterPosition(3), 1)
        size = 9
        assert not self.run(size, location, size - 3)
        assert self.run(size, location, size - 2)  # single ambiguous amino
        assert self.run(size, location, size)

        location = FeatureLocation(BeforePosition(3), 9, -1)
        assert not self.run(size + 3, location, size)
        assert self.run(size + 2, location, size)  # single ambiguous amino
        assert self.run(size, location, size)
Esempio n. 9
0
 def setUp(self):
     self.protein_location = FeatureLocation(1, 5)
     self.location = FeatureLocation(6, 10)
     self.domain = 'RRE_type_a'
     self.description = 'This is a test RRE'
     self.locus_tag = 'locus_tag_a'
     self.identifier = 'RREFam001'
     self.version = 1
     self.full_identifier = '%s.%d' % (self.identifier, self.version)
     self.rre = RREDomain(self.location, self.description,
                          self.protein_location, self.full_identifier,
                          self.locus_tag, self.domain)
     self.rre.domain_id = f"{self.locus_tag}_{self.identifier}_1"
Esempio n. 10
0
 def setUp(self):
     self.magic_split = Seq("ATGGCAxxxxxxGGTxxxxxxATTTGT")
     self.magic = Seq("ATGGCAGGTATTTGT")
     self.translation = "MAGIC"
     self.sub_locations = [
         FeatureLocation(0, 6, strand=1),
         FeatureLocation(12, 15, strand=1),
         FeatureLocation(21, 27, strand=1)
     ]
     self.location = CompoundLocation(self.sub_locations)
     self.cds = CDSFeature(self.location,
                           locus_tag="compound",
                           translation="A")
Esempio n. 11
0
 def test_required_identifiers(self):
     with self.assertRaisesRegex(
             ValueError,
             "requires at least one of: gene, protein_id, locus_tag"):
         CDSFeature(FeatureLocation(1, 5, 1), translation="A")
     assert CDSFeature(FeatureLocation(1, 5, 1),
                       locus_tag="foo",
                       translation="A")
     assert CDSFeature(FeatureLocation(1, 5, 1),
                       protein_id="foo",
                       translation="A")
     assert CDSFeature(FeatureLocation(1, 5, 1),
                       gene="foo",
                       translation="A")
Esempio n. 12
0
 def test_root(self):
     child = CDSCollection(FeatureLocation(20, 40),
                           feature_type="test",
                           child_collections=[])
     assert child.get_root() is child
     parent = CDSCollection(FeatureLocation(10, 50),
                            feature_type="test",
                            child_collections=[child])
     assert child.get_root() is parent
     grandparent = CDSCollection(FeatureLocation(0, 60),
                                 feature_type="test",
                                 child_collections=[parent])
     for col in [child, parent, grandparent]:
         assert col.get_root() is grandparent
Esempio n. 13
0
    def test_bad_child(self):
        with self.assertRaises(AssertionError):
            child = CDSCollection(FeatureLocation(10, 50),
                                  feature_type="test",
                                  child_collections=[])
            CDSCollection(FeatureLocation(20, 40),
                          feature_type="test",
                          child_collections=[child])

        with self.assertRaises(AssertionError):
            cds = DummyCDS(25, 35)
            CDSCollection(FeatureLocation(20, 40),
                          feature_type="test",
                          child_collections=[cds])
Esempio n. 14
0
    def convert_hits_to_features(self) -> None:
        '''Convert all the hits found to features'''
        domain_counts = defaultdict(int)  # type: Dict[str, int]
        for locus_tag, hits in self.hit_info.items():
            for hit in hits:
                location = location_from_string(hit['location'])
                protein_location = FeatureLocation(hit['protein_start'],
                                                   hit['protein_end'])
                rre_feature = RRE(location,
                                  hit['description'],
                                  protein_location,
                                  tool=self.tool,
                                  identifier=hit['identifier'],
                                  locus_tag=locus_tag,
                                  domain=hit['domain'])

                # Set additional properties
                for attr in ['score', 'evalue', 'label', 'translation']:
                    setattr(rre_feature, attr, hit[attr])

                rre_feature.database = self.database
                rre_feature.detection = self.detection

                domain_counts[
                    hit['domain']] += 1  # 1-indexed, so increment before use
                rre_feature.domain_id = "{}_{}_{:04d}".format(
                    self.tool, rre_feature.locus_tag,
                    domain_counts[hit['domain']])

                self.features.append(rre_feature)
    def setUp(self):
        self.config = build_config(["--cf-create-clusters",
                                    "--cf-mean-threshold", "0.6",
                                    "--cf-min-cds", "5",
                                    "--cf-min-pfams", "5"], modules=[clusterfinder],
                                   isolated=True)
        update_config({"enabled_cluster_types": []})

        self.record = DummyRecord(seq=Seq("A" * 2000))
        for start, end, probability, pfam_id in [(10, 20, 0.1, 'PF77777'),
                                                 (30, 40, 0.3, 'PF00106'),
                                                 (50, 60, 0.4, 'PF00107'),
                                                 (60, 70, 0.7, 'PF00109'),
                                                 (70, 80, 0.98, 'PF08484'),
                                                 (90, 100, 0.8, 'PF02401'),
                                                 (100, 110, 0.32, 'PF04369'),
                                                 (110, 120, 1.0, 'PF00128'),
                                                 (130, 140, 0.2, 'PF77776'),
                                                 (500, 505, None, 'PF77775'),
                                                 (1010, 1020, 0.1, 'PF77774'),
                                                 (1030, 1040, 0.3, 'PF00106'),
                                                 (1050, 1060, 0.4, 'PF00107'),
                                                 (1060, 1070, 0.7, 'PF00109'),
                                                 (1070, 1080, 0.98, 'PF08484'),
                                                 (1090, 1100, 0.8, 'PF02401'),
                                                 (1100, 1110, 0.32, 'PF04369'),
                                                 (1110, 1120, 1.0, 'PF00128')]:
            location = FeatureLocation(start, end, strand=1)
            self.record.add_cds_feature(CDSFeature(location, locus_tag=str(start), translation="A"))
            pfam = PFAMDomain(location, "dummy_description", protein_start=start + 1,
                              protein_end=end-1, identifier=pfam_id, tool="test")
            pfam.domain_id = "pfam_%d" % start
            pfam.probability = probability
            self.record.add_pfam_domain(pfam)
    def test_angstrom(self):
        domain = AntismashDomain(FeatureLocation(1, 2), "test")
        domain.domain_id = "query"
        domain.translation = self.aligns[domain.domain_id].replace("-", "")

        sig = extract_sig.get_34_aa_signature(domain)
        assert sig == "L--SFDASLFEMYLLTGGDRNMYGPTEATMCATW"
Esempio n. 17
0
 def test_mixed_strand(self):
     bio = self.cds.to_biopython()[0]
     for location in [
             CompoundLocation([
                 FeatureLocation(1, 5, strand=-1),
                 FeatureLocation(8, 10, strand=1)
             ]),
             CompoundLocation([
                 FeatureLocation(1, 5, strand=1),
                 FeatureLocation(8, 10, strand=None)
             ])
     ]:
         bio.location = location
         with self.assertRaisesRegex(
                 ValueError, "compound locations with mixed strands"):
             CDSFeature.from_biopython(bio)
Esempio n. 18
0
 def test_bad_translation(self):
     loc = FeatureLocation(1, 5, 1)
     for trans in [None, "A?", "A!", ""]:
         with self.assertRaisesRegex(
                 ValueError,
                 "valid translation required|invalid translation characters"
         ):
             CDSFeature(loc, locus_tag="test", translation=trans)
Esempio n. 19
0
 def test_extends_past_before(self):
     self.reverse_strand()
     self.sub_locations[0] = FeatureLocation(BeforePosition(2),
                                             self.sub_locations[0].end,
                                             strand=-1)
     self.cds.location = CompoundLocation(self.sub_locations[::-1])
     new = self.cds.get_sub_location_from_protein_coordinates(0, 7)
     assert new.start == 3
Esempio n. 20
0
    def test_extends_past_after(self):
        self.sub_locations[-1] = FeatureLocation(21,
                                                 AfterPosition(29),
                                                 strand=1)
        self.cds.location = CompoundLocation(self.sub_locations)

        new = self.cds.get_sub_location_from_protein_coordinates(0, 7)
        assert new.end == 27
Esempio n. 21
0
 def test_simple_location_forward_complete(self):
     cds = CDSFeature(FeatureLocation(0, 15, 1),
                      locus_tag="simple",
                      translation="A")
     new = cds.get_sub_location_from_protein_coordinates(0, 5)
     extracted = new.extract(self.magic)
     assert extracted == self.magic
     assert extracted.translate() == self.translation
Esempio n. 22
0
 def setUp(self):
     self.domain = PFAMDomain(FeatureLocation(1, 6),
                              "description",
                              protein_start=3,
                              protein_end=5,
                              domain="p450",
                              identifier="PF00001",
                              tool="test")
Esempio n. 23
0
    def test_conversion(self):
        prot_loc = FeatureLocation(1, 2)
        original = CDSMotif(FeatureLocation(2, 5),
                            tool="test",
                            locus_tag="locus",
                            protein_location=prot_loc)
        assert original.tool == "test"
        assert original.created_by_antismash
        assert original.locus_tag == "locus"
        assert original.protein_location == prot_loc

        bio_features = original.to_biopython()
        assert len(bio_features) == 1
        new = CDSMotif.from_biopython(bio_features[0])
        assert new.tool == original.tool == "test"
        assert new.locus_tag == original.locus_tag == "locus"
        assert new.protein_location == prot_loc
        assert new.created_by_antismash
Esempio n. 24
0
 def test_invalid_qualifier(self):
     cds = CDSFeature(FeatureLocation(1, 5, 1),
                      locus_tag="test",
                      translation="A")
     for bad in ["bad", ["stuff"], {}, 1]:
         with self.assertRaisesRegex(
                 TypeError,
                 "can only be set to an instance of SecMetQualifier"):
             cds.sec_met = bad
Esempio n. 25
0
    def test_frameshifted_location(self):
        location = CompoundLocation(
            [FeatureLocation(3, 9, 1),
             FeatureLocation(8, 14, 1)])
        assert len(location) == 12
        seq = Seq("ATGATGAGCCCTCGTCTAGACTACAATGA")
        extracted = location.extract(seq)
        assert extracted == "ATGAGCCCCTCG"
        assert len(extracted) == len(location)
        translation = extracted.translate()
        assert translation == "MSPS"

        cds = CDSFeature(location, locus_tag="test", translation=translation)
        new = cds.get_sub_location_from_protein_coordinates(1, 3)
        assert isinstance(new, CompoundLocation)
        assert len(new.parts) == 2
        assert new.start == 6
        assert new.end == 11
Esempio n. 26
0
 def setUp(self):
     domain = PFAMDomain(FeatureLocation(1, 6),
                         "description",
                         protein_start=3,
                         protein_end=5,
                         domain="p450",
                         identifier="PF00001",
                         tool="test")
     self.alignment = Alignment(domain, "WLAD-QGAR", "WLae.rGAR", 10, 19)
Esempio n. 27
0
    def generate_domains(self):
        inputs = fasta.read_fasta(
            path.get_full_path(__file__, 'data', 'PKS_KS.input'))
        domains = []
        last_end = 0
        for translation in inputs.values():
            location = FeatureLocation(last_end + 10,
                                       last_end + len(translation) * 3 + 16)
            domain = DummyAntismashDomain(location=location)
            domain.translation = translation
            domains.append(domain)
            domain.domain = "PKS_KS"

        location = FeatureLocation(
            last_end + 10, last_end + len(domains[-1].translation) * 3 + 16)
        domains.append(DummyAntismashDomain(location=location))
        domains[-1].domain = "PKS_KR"
        return domains
Esempio n. 28
0
    def test_non_antismash_motif(self):
        original = ExternalCDSMotif(FeatureLocation(7, 10), {})
        assert not original.created_by_antismash

        bio_features = original.to_biopython()
        assert len(bio_features) == 1, bio_features
        new = CDSMotif.from_biopython(bio_features[0])
        assert isinstance(new, ExternalCDSMotif)
        assert new.tool == original.tool
        assert not new.created_by_antismash
Esempio n. 29
0
    def test_tool_conversion(self):
        original = CDSMotif(FeatureLocation(2, 5), tool="test")
        assert original.tool == "test"
        assert original.created_by_antismash

        bio_features = original.to_biopython()
        assert len(bio_features) == 1
        new = CDSMotif.from_biopython(bio_features[0])
        assert new.tool == original.tool == "test"
        assert new.created_by_antismash
Esempio n. 30
0
    def test_non_antismash_motif(self):
        original = CDSMotif(FeatureLocation(7, 10))
        assert original.tool is None
        assert not original.created_by_antismash

        bio_features = original.to_biopython()
        assert len(bio_features) == 1
        new = CDSMotif.from_biopython(bio_features[0])
        assert new.tool is None
        assert not new.created_by_antismash