Ejemplo n.º 1
0
 def test_parse_broken_qualifier(self):
     # test if wrong separator between GO ID and description (semicolon instead of colon) is caught
     broken_qualifier = [
         "GO:0004871: signal transducer activity",
         "GO:0007165; signal transduction"
     ]
     with self.assertRaisesRegex(ValueError, "Cannot parse qualifier"):
         GOQualifier.from_biopython(broken_qualifier)
Ejemplo n.º 2
0
 def test_biopython_to_and_from(self):
     original = GOQualifier({
         'GO:0004871': 'signal transducer activity',
         'GO:0007165': 'signal transduction',
         'GO:0016020': 'membrane'
     })
     new = GOQualifier.from_biopython(original.to_biopython())
     assert original.go_entries == new.go_entries
Ejemplo n.º 3
0
    def from_biopython(bio_feature: SeqFeature, feature: "PFAMDomain" = None,  # type: ignore
                       leftovers: Dict[str, List[str]] = None) -> "PFAMDomain":
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)
        # grab mandatory qualifiers and create the class
        description = leftovers.pop("description")[0]
        p_start = int(leftovers.pop("protein_start")[0])
        p_end = int(leftovers.pop("protein_end")[0])
        xref = leftovers.get("db_xref", [])  # only remove the interesting part
        name = None
        for i, ref in enumerate(xref):
            if ref.startswith("PF"):
                name = ref
            xref.pop(i)
            break
        if name is None:
            raise SecmetInvalidInputError("PFAMDomain missing identifier")
        tool = leftovers.pop("aSTool")[0]

        feature = PFAMDomain(bio_feature.location, description, p_start, p_end,
                             identifier=name, tool=tool)

        # grab optional qualifiers
        feature.gene_ontologies = GOQualifier.from_biopython(leftovers.pop("gene_ontologies", []))
        if "probability" in leftovers:
            feature.probability = float(leftovers["probability"][0])

        # grab parent optional qualifiers
        updated = super(PFAMDomain, feature).from_biopython(bio_feature, feature=feature, leftovers=leftovers)
        assert isinstance(updated, PFAMDomain)
        return updated
Ejemplo n.º 4
0
 def test_pfam_domain(self):
     original = PFAMDomain(FeatureLocation(2, 5),
                           description="test",
                           protein_start=5,
                           protein_end=10,
                           identifier="PF00002.17",
                           domain="p450",
                           tool="toolname")
     original.domain_id = "domain_id"
     original.database = "db"
     original.detection = "someprogram"
     original.evalue = 1e-5
     original.score = 5.
     original.locus_tag = "locus"
     original.label = "somelabel"
     original.translation = "ARNDCQ"
     original.gene_ontologies = GOQualifier({
         'GO:0004871': 'signal transducer activity',
         'GO:0007165': 'signal transduction',
         'GO:0016020': 'membrane'
     })
     new = PFAMDomain.from_biopython(original.to_biopython()[0])
     for slot in [
             "tool", "domain_id", "database", "detection", "evalue",
             "score", "locus_tag", "label", "translation", "domain",
             "protein_start", "protein_end", "identifier", "version"
     ]:
         assert getattr(original, slot) == getattr(new, slot)
     assert original.gene_ontologies.go_entries == new.gene_ontologies.go_entries
     assert original.full_identifier == new.full_identifier
Ejemplo n.º 5
0
 def test_go_ids(self):
     original_go_entries = {
         'GO:0004871': 'signal transducer activity',
         'GO:0007165': 'signal transduction',
         'GO:0016020': 'membrane'
     }
     go_qualifier = GOQualifier(original_go_entries)
     assert set(go_qualifier.ids) == set(original_go_entries)
Ejemplo n.º 6
0
 def add_to_record(self, record: Record) -> None:
     """Add GeneOntologies objects to the respective PFAMDomains.
     Arguments:
         record: Record to which to add GeneOntologies
     """
     if record.id != self.record_id:
         raise ValueError("Record to store in and record analysed don't match")
     for domain in self.pfam_domains_with_gos:
         domain.gene_ontologies = GOQualifier(self.get_all_gos(domain))
Ejemplo n.º 7
0
    def from_biopython(cls: Type[T],
                       bio_feature: SeqFeature,
                       feature: T = None,
                       leftovers: Dict[str, List[str]] = None,
                       record: Any = None) -> T:
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)
        # grab mandatory qualifiers and create the class
        description = leftovers.pop("description")[0]
        p_start = int(leftovers.pop("protein_start")[0])
        p_end = int(leftovers.pop("protein_end")[0])
        xref = leftovers.get("db_xref", [])  # only remove the interesting part
        name = None
        for i, ref in enumerate(xref):
            if ref.startswith("PF"):
                name = ref
            xref.pop(i)
            break
        if name is None:
            raise SecmetInvalidInputError("PFAMDomain missing identifier")
        tool = leftovers.pop("aSTool")[0]
        locus_tag = leftovers.pop("locus_tag", ["(unknown)"])[0]

        feature = cls(bio_feature.location,
                      description,
                      FeatureLocation(p_start, p_end),
                      identifier=name,
                      tool=tool,
                      locus_tag=locus_tag)

        # grab optional qualifiers
        feature.gene_ontologies = GOQualifier.from_biopython(
            leftovers.pop("gene_ontologies", []))

        # grab parent optional qualifiers
        updated = super().from_biopython(bio_feature,
                                         feature=feature,
                                         leftovers=leftovers,
                                         record=record)
        assert isinstance(updated, PFAMDomain)
        return updated