def convert_hits_to_features(self) -> None: '''Convert all the hits found to features''' domain_counts = defaultdict(int) # type: Dict[str, int] for locus_tag, hits in self.hit_info.items(): for hit in hits: location = location_from_string(hit['location']) protein_location = FeatureLocation(hit['protein_start'], hit['protein_end']) rre_feature = RRE(location, hit['description'], protein_location, tool=self.tool, identifier=hit['identifier'], locus_tag=locus_tag, domain=hit['domain']) # Set additional properties for attr in ['score', 'evalue', 'label', 'translation']: setattr(rre_feature, attr, hit[attr]) rre_feature.database = self.database rre_feature.detection = self.detection domain_counts[ hit['domain']] += 1 # 1-indexed, so increment before use rre_feature.domain_id = "{}_{}_{:04d}".format( self.tool, rre_feature.locus_tag, domain_counts[hit['domain']]) self.features.append(rre_feature)
def from_json(cls, accession: str, data: Dict[str, Any], cdses: Dict[str, ReferenceCDS], cds_mapping: Dict[str, str]) -> "ReferenceProtocluster": """ Reconstructs a ReferenceProtocluster from a JSON representation """ cores = [cdses[core] for core in data["core_cdses"]] location = location_from_string(data["location"]) return cls(accession, location.start, location.end, cds_mapping, cdses, cores, data["product"])
def feature_from_json(data: Union[str, Dict]) -> SeqFeature: """ Converts a JSON representation of a feature into a SeqFeature """ if isinstance(data, str): data = json.loads(data, object_pairs_hook=OrderedDict) assert isinstance(data, dict) return SeqFeature(location=location_from_string(data["location"]), type=data["type"], id=data["id"], qualifiers=data["qualifiers"])
def rebuild_references(annotations: Dict) -> Dict[str, List[Reference]]: """ Rebuilds the SeqRecord 'references' annotation from JSON """ bases = annotations["references"] refs = [] for ref in bases: new_reference = Reference() new_reference.__dict__ = ref new_reference.location = [location_from_string(loc) for loc in ref["location"]] refs.append(new_reference) annotations["references"] = refs return annotations
def convert(self, location, expected_type=FeatureLocation): assert isinstance(location, expected_type) before_string = str(location) print(before_string) # just for help when debugging a failing test after_string = str(location) assert isinstance(after_string, str) assert before_string == after_string new_location = location_from_string(after_string) assert isinstance(new_location, expected_type) return new_location
def from_json(json: Dict[str, Any], record: Record) -> Optional["LanthiResults"]: if json.get("schema_version") != LanthiResults.schema_version: logging.warning("Discarding Lanthipeptide results, schema version mismatch") return None results = LanthiResults(json["record_id"]) for locus, motifs in json["motifs"].items(): for motif in motifs: results.motifs_by_locus[locus].append(Prepeptide.from_json(motif)) results.clusters = {int(key): set(val) for key, val in json["protoclusters"].items()} for location, name in json["new_cds_features"]: cds = all_orfs.create_feature_from_location(record, location_from_string(location), label=name) results.new_cds_features.add(cds) return results
def add_to_record(self, record: Record) -> None: db_version = pfamdb.get_db_version_from_path(self.database) for i, hit in enumerate(self.hits): protein_location = FeatureLocation(hit["protein_start"], hit["protein_end"]) pfam_feature = PFAMDomain(location_from_string(hit["location"]), description=hit["description"], protein_location=protein_location, identifier=hit["identifier"], tool=self.tool, locus_tag=hit["locus_tag"]) for key in ["label", "locus_tag", "domain", "evalue", "score", "translation"]: setattr(pfam_feature, key, hit[key]) pfam_feature.database = db_version pfam_feature.detection = "hmmscan" pfam_feature.domain_id = "{}_{}_{:04d}".format(self.tool, pfam_feature.locus_tag, i + 1) record.add_pfam_domain(pfam_feature)
def from_json(json: Dict[str, Any], record: Record) -> Optional["ClusterFinderResults"]: if json.get("schema") != ClusterFinderResults.schema_version: logging.warning( "Dropping ClusterFinder probabilistic results, schema version has changed" ) return None areas = [] for area in json["areas"]: areas.append( SubRegion(location_from_string(area[0]), tool="clusterfinder", probability=area[1])) return ClusterFinderResults(record.id, areas, create=json["created"])
def from_json(json: Dict, record: secmet.Record) -> Optional["ThioResults"]: """ Builds a results object from JSON """ if json.get("schema_version") != ThioResults.schema_version: logging.warning("Discarding Thiopeptide results, schema version mismatch") return None results = ThioResults(json["record_id"]) for motif in json["motifs"]: results.motifs.append(secmet.Prepeptide.from_json(motif)) for cluster in json["protoclusters with motifs"]: results.clusters_with_motifs.add(record.get_protocluster(cluster)) for cluster, features in json["cds_features"].items(): for location, name in features: cds = all_orfs.create_feature_from_location(record, location_from_string(location), label=name) results.cds_features[cluster].append(cds) return results
def add_to_record(self, record: Record) -> None: """ Adds the hits as PFAMDomains to the given record """ db_version = pfamdb.get_db_version_from_path(self.database) for i, hit in enumerate(self.hits): protein_location = FeatureLocation(hit.protein_start, hit.protein_end) pfam_feature = PFAMDomain(location_from_string(hit.location), description=hit.description, protein_location=protein_location, identifier=hit.identifier, tool=self.tool, locus_tag=hit.locus_tag) for key in ["label", "locus_tag", "domain", "evalue", "score", "translation"]: setattr(pfam_feature, key, getattr(hit, key)) pfam_feature.database = db_version pfam_feature.detection = "hmmscan" pfam_feature.domain_id = "{}_{}_{:04d}".format(self.tool, pfam_feature.locus_tag, i + 1) record.add_pfam_domain(pfam_feature)
def add_to_record(self, record: Record) -> None: """ Adds the hits as TIGRDomains to the given record """ if record.id != self.record_id: raise ValueError("Record to store in and record analysed don't match") for i, hit in enumerate(self.hits): protein_location = FeatureLocation(hit.protein_start, hit.protein_end) tigr_feature = TIGRDomain(location_from_string(hit.location), description=hit.description, protein_location=protein_location, identifier=hit.identifier, locus_tag=hit.locus_tag) for key in ["label", "locus_tag", "domain", "evalue", "score", "translation"]: setattr(tigr_feature, key, getattr(hit, key)) tigr_feature.detection = "hmmscan" tigr_feature.domain_id = "{}_{}_{:04d}".format(self.tool, tigr_feature.locus_tag, i + 1) record.add_feature(tigr_feature)
def convert_hits_to_features(self) -> None: """Convert all the hits found to features""" for locus_tag, hits in self.hits_by_cds.items(): domain_counts: Dict[str, int] = defaultdict(int) for hit in hits: location = location_from_string(hit.location) protein_location = FeatureLocation(hit.protein_start, hit.protein_end) rre_feature = RREDomain(location, hit.description, protein_location, identifier=hit.identifier, locus_tag=locus_tag, domain=hit.domain) # Set additional properties rre_feature.score = hit.score rre_feature.evalue = hit.evalue rre_feature.label = hit.label rre_feature.translation = hit.translation rre_feature.database = self.database rre_feature.detection = self.detection domain_counts[hit.domain] += 1 # 1-indexed, so increment before use rre_feature.domain_id = f"{self.tool}_{locus_tag}_{hit.domain}.{domain_counts[hit.domain]}" self.features.append(rre_feature)
def from_json(cls, name: str, data: Dict[str, Any]) -> "ReferenceCDS": """ Reconstructs a ReferenceCDS from a JSON representation """ return cls(name, data["function"], data["components"], location_from_string(data["location"]))