def setUp(self):
        self.config = build_config(["--cf-create-clusters",
                                    "--cf-mean-threshold", "0.6",
                                    "--cf-min-cds", "5",
                                    "--cf-min-pfams", "5"], modules=[clusterfinder],
                                   isolated=True)
        update_config({"enabled_cluster_types": []})

        self.record = DummyRecord(seq=Seq("A" * 2000))
        for start, end, probability, pfam_id in [(10, 20, 0.1, 'PF77777'),
                                                 (30, 40, 0.3, 'PF00106'),
                                                 (50, 60, 0.4, 'PF00107'),
                                                 (60, 70, 0.7, 'PF00109'),
                                                 (70, 80, 0.98, 'PF08484'),
                                                 (90, 100, 0.8, 'PF02401'),
                                                 (100, 110, 0.32, 'PF04369'),
                                                 (110, 120, 1.0, 'PF00128'),
                                                 (130, 140, 0.2, 'PF77776'),
                                                 (500, 505, None, 'PF77775'),
                                                 (1010, 1020, 0.1, 'PF77774'),
                                                 (1030, 1040, 0.3, 'PF00106'),
                                                 (1050, 1060, 0.4, 'PF00107'),
                                                 (1060, 1070, 0.7, 'PF00109'),
                                                 (1070, 1080, 0.98, 'PF08484'),
                                                 (1090, 1100, 0.8, 'PF02401'),
                                                 (1100, 1110, 0.32, 'PF04369'),
                                                 (1110, 1120, 1.0, 'PF00128')]:
            location = FeatureLocation(start, end, strand=1)
            self.record.add_cds_feature(CDSFeature(location, locus_tag=str(start), translation="A"))
            pfam = PFAMDomain(location, "dummy_description", protein_start=start + 1,
                              protein_end=end-1, identifier=pfam_id, tool="test")
            pfam.domain_id = "pfam_%d" % start
            pfam.probability = probability
            self.record.add_pfam_domain(pfam)
Ejemplo n.º 2
0
 def test_add_results_to_record(self):
     pfams = {
         'PF00015.2': FeatureLocation(0, 3),
         'PF00351.1': FeatureLocation(0, 3),
         'PF00015.27': FeatureLocation(3, 6)
     }
     fake_record = set_dummy_with_pfams(pfams)
     fake_duplicate_pfam = PFAMDomain(location=FeatureLocation(6, 9),
                                      description='DUPLICATE',
                                      protein_start=0,
                                      protein_end=5,
                                      identifier="PF00015.2",
                                      tool="test")
     fake_duplicate_pfam.domain_id = 'DUPLICATE'
     fake_record.add_pfam_domain(fake_duplicate_pfam)
     assert fake_duplicate_pfam in fake_record.get_pfam_domains()
     gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record)
     fake_results = pfam2go.Pfam2GoResults(fake_record.id,
                                           gos_for_fake_pfam)
     fake_results.add_to_record(fake_record)
     assert fake_duplicate_pfam.full_identifier == 'PF00015.2'
     for pfam in fake_record.get_pfam_domains():
         assert sorted(pfam.gene_ontologies.ids) == sorted(
             fake_results.get_all_gos(pfam))
         # make sure identical pfams (with different version numbers) all have the same gene ontologies
         if pfam.identifier == "PF00015":
             assert pfam.version in [2, 27]
             assert sorted(pfam.gene_ontologies.ids) == sorted(
                 fake_results.get_all_gos(fake_duplicate_pfam))
Ejemplo n.º 3
0
 def test_bad_pfam_domain(self):
     protein_location = FeatureLocation(5, 10)
     with self.assertRaisesRegex(TypeError,
                                 "PFAMDomain description must be a string"):
         PFAMDomain(FeatureLocation(2, 5),
                    description=None,
                    protein_location=protein_location,
                    identifier="PF00002",
                    tool="test",
                    locus_tag="dummy")
     with self.assertRaisesRegex(TypeError,
                                 "Domain must be given domain as a string"):
         PFAMDomain(FeatureLocation(2, 5),
                    description="desc",
                    protein_location=protein_location,
                    identifier="PF00002",
                    domain=5,
                    tool="test",
                    locus_tag="dummy")
     for ident in ["PF0002", "FAKE003", "PF", "PF000003", "PF00003.a"]:
         with self.assertRaisesRegex(ValueError, "invalid"):
             PFAMDomain(FeatureLocation(2, 5),
                        description="desc",
                        protein_location=protein_location,
                        identifier=ident,
                        tool="test",
                        locus_tag="dummy")
Ejemplo n.º 4
0
def set_dummy_with_pfams(pfam_ids: Dict[str, FeatureLocation]) -> DummyRecord:
    pfam_domains = []
    for pfam_id, pfam_location in pfam_ids.items():
        pfam_domain = PFAMDomain(location=pfam_location,
                                 description='FAKE',
                                 protein_start=0,
                                 protein_end=5,
                                 identifier=pfam_id,
                                 tool="test")
        pfam_domain.domain_id = '%s.%d.%d' % (pfam_id, pfam_location.start,
                                              pfam_location.end)
        pfam_domains.append(pfam_domain)
    return DummyRecord(features=pfam_domains)
Ejemplo n.º 5
0
 def add_to_record(self, record: Record) -> None:
     db_version = pfamdb.get_db_version_from_path(self.database)
     for i, hit in enumerate(self.hits):
         protein_location = FeatureLocation(hit["protein_start"], hit["protein_end"])
         pfam_feature = PFAMDomain(location_from_string(hit["location"]),
                                   description=hit["description"], protein_location=protein_location,
                                   identifier=hit["identifier"], tool=self.tool, locus_tag=hit["locus_tag"])
         for key in ["label", "locus_tag", "domain", "evalue",
                     "score", "translation"]:
             setattr(pfam_feature, key, hit[key])
         pfam_feature.database = db_version
         pfam_feature.detection = "hmmscan"
         pfam_feature.domain_id = "{}_{}_{:04d}".format(self.tool, pfam_feature.locus_tag, i + 1)
         record.add_pfam_domain(pfam_feature)
Ejemplo n.º 6
0
    def test_blank_records(self):
        blank_no_pfams = DummyRecord()
        blank_no_ids = Record(Seq("ATGTTATGAGGGTCATAACAT", generic_dna))
        fake_pfam_location = FeatureLocation(0, 12)
        fake_pfam = PFAMDomain(location=fake_pfam_location,
                               description='MCPsignal',
                               protein_start=0,
                               protein_end=5,
                               identifier="PF00000",
                               tool="test")
        fake_pfam.domain_id = 'BLANK'
        blank_no_ids.add_pfam_domain(fake_pfam)

        assert not pfam2go.get_gos_for_pfams(blank_no_pfams)
        assert not pfam2go.get_gos_for_pfams(blank_no_ids)
Ejemplo n.º 7
0
 def add_to_record(self, record: Record) -> None:
     """ Adds the hits as PFAMDomains to the given record """
     db_version = pfamdb.get_db_version_from_path(self.database)
     for i, hit in enumerate(self.hits):
         protein_location = FeatureLocation(hit.protein_start, hit.protein_end)
         pfam_feature = PFAMDomain(location_from_string(hit.location),
                                   description=hit.description, protein_location=protein_location,
                                   identifier=hit.identifier, tool=self.tool, locus_tag=hit.locus_tag)
         for key in ["label", "locus_tag", "domain", "evalue",
                     "score", "translation"]:
             setattr(pfam_feature, key, getattr(hit, key))
         pfam_feature.database = db_version
         pfam_feature.detection = "hmmscan"
         pfam_feature.domain_id = "{}_{}_{:04d}".format(self.tool, pfam_feature.locus_tag, i + 1)
         record.add_pfam_domain(pfam_feature)
Ejemplo n.º 8
0
 def setUp(self):
     self.domain = PFAMDomain(FeatureLocation(1, 6),
                              "description",
                              protein_start=3,
                              protein_end=5,
                              domain="p450",
                              identifier="PF00001",
                              tool="test")
Ejemplo n.º 9
0
 def setUp(self):
     domain = PFAMDomain(FeatureLocation(1, 6),
                         "description",
                         protein_start=3,
                         protein_end=5,
                         domain="p450",
                         identifier="PF00001",
                         tool="test")
     self.alignment = Alignment(domain, "WLAD-QGAR", "WLae.rGAR", 10, 19)
Ejemplo n.º 10
0
 def test_bad_pfam_domain(self):
     with self.assertRaisesRegex(TypeError,
                                 "PFAMDomain description must be a string"):
         PFAMDomain(FeatureLocation(2, 5),
                    description=None,
                    protein_start=5,
                    protein_end=10,
                    identifier="PF00002",
                    tool="test")
     with self.assertRaisesRegex(TypeError,
                                 "Domain must be given domain as a string"):
         PFAMDomain(FeatureLocation(2, 5),
                    description="desc",
                    protein_start=5,
                    protein_end=10,
                    identifier="PF00002",
                    domain=5,
                    tool="test")
     with self.assertRaisesRegex(
             ValueError,
             "A PFAMDomain protein location cannot end before it starts"):
         PFAMDomain(FeatureLocation(2, 5),
                    description="desc",
                    protein_start=10,
                    protein_end=5,
                    identifier="PF00002",
                    tool="test")
     with self.assertRaisesRegex(ValueError, "invalid literal for int()"):
         PFAMDomain(FeatureLocation(2, 5),
                    description="desc",
                    protein_start=10,
                    protein_end="nope",
                    identifier="PF00002",
                    tool="test")
     for ident in ["PF0002", "FAKE003", "PF", "PF000003", "PF00003.a"]:
         with self.assertRaisesRegex(ValueError, "invalid"):
             PFAMDomain(FeatureLocation(2, 5),
                        description="desc",
                        protein_start=10,
                        protein_end="nope",
                        identifier=ident,
                        tool="test")
Ejemplo n.º 11
0
 def test_pfam_domain(self):
     original = PFAMDomain(FeatureLocation(2, 5),
                           description="test",
                           protein_start=5,
                           protein_end=10,
                           identifier="PF00002.17",
                           domain="p450",
                           tool="toolname")
     original.domain_id = "domain_id"
     original.database = "db"
     original.detection = "someprogram"
     original.evalue = 1e-5
     original.score = 5.
     original.locus_tag = "locus"
     original.label = "somelabel"
     original.translation = "ARNDCQ"
     original.gene_ontologies = GOQualifier({
         'GO:0004871': 'signal transducer activity',
         'GO:0007165': 'signal transduction',
         'GO:0016020': 'membrane'
     })
     new = PFAMDomain.from_biopython(original.to_biopython()[0])
     for slot in [
             "tool", "domain_id", "database", "detection", "evalue",
             "score", "locus_tag", "label", "translation", "domain",
             "protein_start", "protein_end", "identifier", "version"
     ]:
         assert getattr(original, slot) == getattr(new, slot)
     assert original.gene_ontologies.go_entries == new.gene_ontologies.go_entries
     assert original.full_identifier == new.full_identifier