Ejemplo n.º 1
0
    def setUp(self):
        self.config = build_config(["--cf-create-clusters",
                                    "--cf-mean-threshold", "0.6",
                                    "--cf-min-cds", "5",
                                    "--cf-min-pfams", "5"], modules=[clusterfinder],
                                   isolated=True)
        update_config({"enabled_cluster_types": []})

        self.record = DummyRecord(seq=Seq("A" * 2000))
        for start, end, probability, pfam_id in [(10, 20, 0.1, 'PF77777'),
                                                 (30, 40, 0.3, 'PF00106'),
                                                 (50, 60, 0.4, 'PF00107'),
                                                 (60, 70, 0.7, 'PF00109'),
                                                 (70, 80, 0.98, 'PF08484'),
                                                 (90, 100, 0.8, 'PF02401'),
                                                 (100, 110, 0.32, 'PF04369'),
                                                 (110, 120, 1.0, 'PF00128'),
                                                 (130, 140, 0.2, 'PF77776'),
                                                 (500, 505, None, 'PF77775'),
                                                 (1010, 1020, 0.1, 'PF77774'),
                                                 (1030, 1040, 0.3, 'PF00106'),
                                                 (1050, 1060, 0.4, 'PF00107'),
                                                 (1060, 1070, 0.7, 'PF00109'),
                                                 (1070, 1080, 0.98, 'PF08484'),
                                                 (1090, 1100, 0.8, 'PF02401'),
                                                 (1100, 1110, 0.32, 'PF04369'),
                                                 (1110, 1120, 1.0, 'PF00128')]:
            location = FeatureLocation(start, end, strand=1)
            self.record.add_cds_feature(CDSFeature(location, locus_tag=str(start), translation="A"))
            pfam = DummyPFAMDomain(location=location, protein_start=start + 1,
                                   protein_end=end-1, identifier=pfam_id)
            pfam.domain_id = "pfam_%d" % start
            pfam.probability = probability
            self.record.add_pfam_domain(pfam)
Ejemplo n.º 2
0
    def test_blank_records(self):
        blank_no_pfams = DummyRecord()
        blank_no_ids = Record(Seq("ATGTTATGAGGGTCATAACAT", generic_dna))
        fake_pfam = DummyPFAMDomain(identifier="PF00000")
        blank_no_ids.add_pfam_domain(fake_pfam)

        assert not pfam2go.get_gos_for_pfams(blank_no_pfams)
        assert not pfam2go.get_gos_for_pfams(blank_no_ids)
Ejemplo n.º 3
0
 def setUp(self):
     self.record = secmet.Record()
     # except for Thioesterase, all domains were found in BN001301.1
     # TE domains were found in Y16952
     for filename, domain_type in [("PKS_KS.input", "PKS_KS"), ("AT.input", "PKS_AT"),
                                   ("ACP.input", "ACP"), ("DH.input", "PKS_DH"),
                                   ("KR.input", "PKS_KR"), ("TE.input", "Thioesterase"),
                                   ("ER.input", "PKS_ER")]:
         for domain in rebuild_domains(filename, domain_type):
             self.record.add_antismash_domain(domain)
     # these PFAMs found in BN001301.1 with clusterhmmer, one was excluded
     # to avoid a Biopython SearchIO bug
     domain_fasta = fasta.read_fasta(path.get_full_path(__file__, 'data', "p450.input"))
     for name, translation in domain_fasta.items():
         pfam_domain = DummyPFAMDomain(domain="p450", domain_id="PFAM_p450_" + name)
         pfam_domain.translation = translation
         self.record.add_pfam_domain(pfam_domain)
Ejemplo n.º 4
0
 def test_domains_of_interest(self):
     domain = DummyPFAMDomain(domain="p450")
     analysis = ActiveSiteAnalysis("not-p450", (domain, ), "PKSI-KR.hmm2",
                                   [5, 6], ["C", "S"])
     assert analysis.domains_of_interest == []
     analysis = ActiveSiteAnalysis("p450", (domain, ), "PKSI-KR.hmm2",
                                   [5, 6], ["C", "S"])
     assert analysis.domains_of_interest == [domain]
Ejemplo n.º 5
0
def set_dummy_with_pfams(pfam_ids: Dict[str, FeatureLocation]) -> DummyRecord:
    pfam_domains = []
    for pfam_id, pfam_location in pfam_ids.items():
        domain_id = '%s.%d.%d' % (pfam_id, pfam_location.start,
                                  pfam_location.end)
        pfam_domain = DummyPFAMDomain(location=pfam_location,
                                      protein_start=0,
                                      protein_end=5,
                                      identifier=pfam_id,
                                      domain_id=domain_id)
        pfam_domains.append(pfam_domain)
    return DummyRecord(features=pfam_domains)
Ejemplo n.º 6
0
 def test_add_results_to_record(self):
     pfams = {
         'PF00015.2': FeatureLocation(0, 3),
         'PF00351.1': FeatureLocation(0, 3),
         'PF00015.27': FeatureLocation(3, 6)
     }
     fake_record = set_dummy_with_pfams(pfams)
     fake_duplicate_pfam = DummyPFAMDomain(identifier="PF00015.2")
     fake_record.add_pfam_domain(fake_duplicate_pfam)
     assert fake_duplicate_pfam in fake_record.get_pfam_domains()
     gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record)
     fake_results = pfam2go.Pfam2GoResults(fake_record.id,
                                           gos_for_fake_pfam)
     fake_results.add_to_record(fake_record)
     assert fake_duplicate_pfam.full_identifier == 'PF00015.2'
     for pfam in fake_record.get_pfam_domains():
         assert sorted(pfam.gene_ontologies.ids) == sorted(
             fake_results.get_all_gos(pfam))
         # make sure identical pfams (with different version numbers) all have the same gene ontologies
         if pfam.identifier == "PF00015":
             assert pfam.version in [2, 27]
             assert sorted(pfam.gene_ontologies.ids) == sorted(
                 fake_results.get_all_gos(fake_duplicate_pfam))
Ejemplo n.º 7
0
 def setUp(self):
     self.domain = DummyPFAMDomain(domain="p450")
     self.alignment = Alignment(self.domain, "WLAD-QGAR", "WLaer.rGA", 10,
                                19)