def test_parse_broken_qualifier(self): # test if wrong separator between GO ID and description (semicolon instead of colon) is caught broken_qualifier = [ "GO:0004871: signal transducer activity", "GO:0007165; signal transduction" ] with self.assertRaisesRegex(ValueError, "Cannot parse qualifier"): GOQualifier.from_biopython(broken_qualifier)
def test_biopython_to_and_from(self): original = GOQualifier({ 'GO:0004871': 'signal transducer activity', 'GO:0007165': 'signal transduction', 'GO:0016020': 'membrane' }) new = GOQualifier.from_biopython(original.to_biopython()) assert original.go_entries == new.go_entries
def from_biopython(bio_feature: SeqFeature, feature: "PFAMDomain" = None, # type: ignore leftovers: Dict[str, List[str]] = None) -> "PFAMDomain": if leftovers is None: leftovers = Feature.make_qualifiers_copy(bio_feature) # grab mandatory qualifiers and create the class description = leftovers.pop("description")[0] p_start = int(leftovers.pop("protein_start")[0]) p_end = int(leftovers.pop("protein_end")[0]) xref = leftovers.get("db_xref", []) # only remove the interesting part name = None for i, ref in enumerate(xref): if ref.startswith("PF"): name = ref xref.pop(i) break if name is None: raise SecmetInvalidInputError("PFAMDomain missing identifier") tool = leftovers.pop("aSTool")[0] feature = PFAMDomain(bio_feature.location, description, p_start, p_end, identifier=name, tool=tool) # grab optional qualifiers feature.gene_ontologies = GOQualifier.from_biopython(leftovers.pop("gene_ontologies", [])) if "probability" in leftovers: feature.probability = float(leftovers["probability"][0]) # grab parent optional qualifiers updated = super(PFAMDomain, feature).from_biopython(bio_feature, feature=feature, leftovers=leftovers) assert isinstance(updated, PFAMDomain) return updated
def test_pfam_domain(self): original = PFAMDomain(FeatureLocation(2, 5), description="test", protein_start=5, protein_end=10, identifier="PF00002.17", domain="p450", tool="toolname") original.domain_id = "domain_id" original.database = "db" original.detection = "someprogram" original.evalue = 1e-5 original.score = 5. original.locus_tag = "locus" original.label = "somelabel" original.translation = "ARNDCQ" original.gene_ontologies = GOQualifier({ 'GO:0004871': 'signal transducer activity', 'GO:0007165': 'signal transduction', 'GO:0016020': 'membrane' }) new = PFAMDomain.from_biopython(original.to_biopython()[0]) for slot in [ "tool", "domain_id", "database", "detection", "evalue", "score", "locus_tag", "label", "translation", "domain", "protein_start", "protein_end", "identifier", "version" ]: assert getattr(original, slot) == getattr(new, slot) assert original.gene_ontologies.go_entries == new.gene_ontologies.go_entries assert original.full_identifier == new.full_identifier
def test_go_ids(self): original_go_entries = { 'GO:0004871': 'signal transducer activity', 'GO:0007165': 'signal transduction', 'GO:0016020': 'membrane' } go_qualifier = GOQualifier(original_go_entries) assert set(go_qualifier.ids) == set(original_go_entries)
def add_to_record(self, record: Record) -> None: """Add GeneOntologies objects to the respective PFAMDomains. Arguments: record: Record to which to add GeneOntologies """ if record.id != self.record_id: raise ValueError("Record to store in and record analysed don't match") for domain in self.pfam_domains_with_gos: domain.gene_ontologies = GOQualifier(self.get_all_gos(domain))
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None, leftovers: Dict[str, List[str]] = None, record: Any = None) -> T: if leftovers is None: leftovers = Feature.make_qualifiers_copy(bio_feature) # grab mandatory qualifiers and create the class description = leftovers.pop("description")[0] p_start = int(leftovers.pop("protein_start")[0]) p_end = int(leftovers.pop("protein_end")[0]) xref = leftovers.get("db_xref", []) # only remove the interesting part name = None for i, ref in enumerate(xref): if ref.startswith("PF"): name = ref xref.pop(i) break if name is None: raise SecmetInvalidInputError("PFAMDomain missing identifier") tool = leftovers.pop("aSTool")[0] locus_tag = leftovers.pop("locus_tag", ["(unknown)"])[0] feature = cls(bio_feature.location, description, FeatureLocation(p_start, p_end), identifier=name, tool=tool, locus_tag=locus_tag) # grab optional qualifiers feature.gene_ontologies = GOQualifier.from_biopython( leftovers.pop("gene_ontologies", [])) # grab parent optional qualifiers updated = super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record) assert isinstance(updated, PFAMDomain) return updated