예제 #1
0
    def convert_hits_to_features(self) -> None:
        '''Convert all the hits found to features'''
        domain_counts = defaultdict(int)  # type: Dict[str, int]
        for locus_tag, hits in self.hit_info.items():
            for hit in hits:
                location = location_from_string(hit['location'])
                protein_location = FeatureLocation(hit['protein_start'],
                                                   hit['protein_end'])
                rre_feature = RRE(location,
                                  hit['description'],
                                  protein_location,
                                  tool=self.tool,
                                  identifier=hit['identifier'],
                                  locus_tag=locus_tag,
                                  domain=hit['domain'])

                # Set additional properties
                for attr in ['score', 'evalue', 'label', 'translation']:
                    setattr(rre_feature, attr, hit[attr])

                rre_feature.database = self.database
                rre_feature.detection = self.detection

                domain_counts[
                    hit['domain']] += 1  # 1-indexed, so increment before use
                rre_feature.domain_id = "{}_{}_{:04d}".format(
                    self.tool, rre_feature.locus_tag,
                    domain_counts[hit['domain']])

                self.features.append(rre_feature)
예제 #2
0
 def from_json(cls, accession: str, data: Dict[str, Any],
               cdses: Dict[str, ReferenceCDS],
               cds_mapping: Dict[str, str]) -> "ReferenceProtocluster":
     """ Reconstructs a ReferenceProtocluster from a JSON representation """
     cores = [cdses[core] for core in data["core_cdses"]]
     location = location_from_string(data["location"])
     return cls(accession, location.start, location.end, cds_mapping, cdses,
                cores, data["product"])
예제 #3
0
def feature_from_json(data: Union[str, Dict]) -> SeqFeature:
    """ Converts a JSON representation of a feature into a SeqFeature """
    if isinstance(data, str):
        data = json.loads(data, object_pairs_hook=OrderedDict)
    assert isinstance(data, dict)
    return SeqFeature(location=location_from_string(data["location"]),
                      type=data["type"],
                      id=data["id"],
                      qualifiers=data["qualifiers"])
예제 #4
0
 def rebuild_references(annotations: Dict) -> Dict[str, List[Reference]]:
     """ Rebuilds the SeqRecord 'references' annotation from JSON """
     bases = annotations["references"]
     refs = []
     for ref in bases:
         new_reference = Reference()
         new_reference.__dict__ = ref
         new_reference.location = [location_from_string(loc) for loc in ref["location"]]
         refs.append(new_reference)
     annotations["references"] = refs
     return annotations
예제 #5
0
    def convert(self, location, expected_type=FeatureLocation):
        assert isinstance(location, expected_type)

        before_string = str(location)
        print(before_string)  # just for help when debugging a failing test
        after_string = str(location)
        assert isinstance(after_string, str)
        assert before_string == after_string

        new_location = location_from_string(after_string)
        assert isinstance(new_location, expected_type)

        return new_location
 def from_json(json: Dict[str, Any], record: Record) -> Optional["LanthiResults"]:
     if json.get("schema_version") != LanthiResults.schema_version:
         logging.warning("Discarding Lanthipeptide results, schema version mismatch")
         return None
     results = LanthiResults(json["record_id"])
     for locus, motifs in json["motifs"].items():
         for motif in motifs:
             results.motifs_by_locus[locus].append(Prepeptide.from_json(motif))
     results.clusters = {int(key): set(val) for key, val in json["protoclusters"].items()}
     for location, name in json["new_cds_features"]:
         cds = all_orfs.create_feature_from_location(record, location_from_string(location), label=name)
         results.new_cds_features.add(cds)
     return results
예제 #7
0
 def add_to_record(self, record: Record) -> None:
     db_version = pfamdb.get_db_version_from_path(self.database)
     for i, hit in enumerate(self.hits):
         protein_location = FeatureLocation(hit["protein_start"], hit["protein_end"])
         pfam_feature = PFAMDomain(location_from_string(hit["location"]),
                                   description=hit["description"], protein_location=protein_location,
                                   identifier=hit["identifier"], tool=self.tool, locus_tag=hit["locus_tag"])
         for key in ["label", "locus_tag", "domain", "evalue",
                     "score", "translation"]:
             setattr(pfam_feature, key, hit[key])
         pfam_feature.database = db_version
         pfam_feature.detection = "hmmscan"
         pfam_feature.domain_id = "{}_{}_{:04d}".format(self.tool, pfam_feature.locus_tag, i + 1)
         record.add_pfam_domain(pfam_feature)
예제 #8
0
 def from_json(json: Dict[str, Any],
               record: Record) -> Optional["ClusterFinderResults"]:
     if json.get("schema") != ClusterFinderResults.schema_version:
         logging.warning(
             "Dropping ClusterFinder probabilistic results, schema version has changed"
         )
         return None
     areas = []
     for area in json["areas"]:
         areas.append(
             SubRegion(location_from_string(area[0]),
                       tool="clusterfinder",
                       probability=area[1]))
     return ClusterFinderResults(record.id, areas, create=json["created"])
예제 #9
0
 def from_json(json: Dict, record: secmet.Record) -> Optional["ThioResults"]:
     """ Builds a results object from JSON """
     if json.get("schema_version") != ThioResults.schema_version:
         logging.warning("Discarding Thiopeptide results, schema version mismatch")
         return None
     results = ThioResults(json["record_id"])
     for motif in json["motifs"]:
         results.motifs.append(secmet.Prepeptide.from_json(motif))
     for cluster in json["protoclusters with motifs"]:
         results.clusters_with_motifs.add(record.get_protocluster(cluster))
     for cluster, features in json["cds_features"].items():
         for location, name in features:
             cds = all_orfs.create_feature_from_location(record, location_from_string(location), label=name)
             results.cds_features[cluster].append(cds)
     return results
예제 #10
0
파일: hmmer.py 프로젝트: SBGlab/antismash
 def add_to_record(self, record: Record) -> None:
     """ Adds the hits as PFAMDomains to the given record """
     db_version = pfamdb.get_db_version_from_path(self.database)
     for i, hit in enumerate(self.hits):
         protein_location = FeatureLocation(hit.protein_start, hit.protein_end)
         pfam_feature = PFAMDomain(location_from_string(hit.location),
                                   description=hit.description, protein_location=protein_location,
                                   identifier=hit.identifier, tool=self.tool, locus_tag=hit.locus_tag)
         for key in ["label", "locus_tag", "domain", "evalue",
                     "score", "translation"]:
             setattr(pfam_feature, key, getattr(hit, key))
         pfam_feature.database = db_version
         pfam_feature.detection = "hmmscan"
         pfam_feature.domain_id = "{}_{}_{:04d}".format(self.tool, pfam_feature.locus_tag, i + 1)
         record.add_pfam_domain(pfam_feature)
예제 #11
0
    def add_to_record(self, record: Record) -> None:
        """ Adds the hits as TIGRDomains to the given record """
        if record.id != self.record_id:
            raise ValueError("Record to store in and record analysed don't match")

        for i, hit in enumerate(self.hits):
            protein_location = FeatureLocation(hit.protein_start, hit.protein_end)
            tigr_feature = TIGRDomain(location_from_string(hit.location),
                                      description=hit.description, protein_location=protein_location,
                                      identifier=hit.identifier, locus_tag=hit.locus_tag)
            for key in ["label", "locus_tag", "domain", "evalue",
                        "score", "translation"]:
                setattr(tigr_feature, key, getattr(hit, key))
            tigr_feature.detection = "hmmscan"
            tigr_feature.domain_id = "{}_{}_{:04d}".format(self.tool, tigr_feature.locus_tag, i + 1)
            record.add_feature(tigr_feature)
예제 #12
0
    def convert_hits_to_features(self) -> None:
        """Convert all the hits found to features"""
        for locus_tag, hits in self.hits_by_cds.items():
            domain_counts: Dict[str, int] = defaultdict(int)
            for hit in hits:
                location = location_from_string(hit.location)
                protein_location = FeatureLocation(hit.protein_start, hit.protein_end)
                rre_feature = RREDomain(location, hit.description, protein_location,
                                        identifier=hit.identifier, locus_tag=locus_tag, domain=hit.domain)

                # Set additional properties
                rre_feature.score = hit.score
                rre_feature.evalue = hit.evalue
                rre_feature.label = hit.label
                rre_feature.translation = hit.translation

                rre_feature.database = self.database
                rre_feature.detection = self.detection

                domain_counts[hit.domain] += 1  # 1-indexed, so increment before use
                rre_feature.domain_id = f"{self.tool}_{locus_tag}_{hit.domain}.{domain_counts[hit.domain]}"

                self.features.append(rre_feature)
예제 #13
0
 def from_json(cls, name: str, data: Dict[str, Any]) -> "ReferenceCDS":
     """ Reconstructs a ReferenceCDS from a JSON representation """
     return cls(name, data["function"], data["components"],
                location_from_string(data["location"]))