def test_simple_simple(self): assert not locations_overlap(FeatureLocation(1, 5, strand=1), FeatureLocation(10, 15, strand=1)) assert locations_overlap(FeatureLocation(1, 25, strand=1), FeatureLocation(10, 15, strand=1)) assert locations_overlap(FeatureLocation(1, 12, strand=1), FeatureLocation(10, 15, strand=1)) assert locations_overlap(FeatureLocation(12, 22, strand=-1), FeatureLocation(10, 15, strand=1)) assert not locations_overlap(FeatureLocation(12, 22, strand=-1), FeatureLocation(10, 12, strand=1))
def test_single_compound(self): for strand in [1, -1]: location = CompoundLocation([ FeatureLocation(6, 9, strand), FeatureLocation(12, 16, strand) ]) new = build_location_from_others([location]) assert new == location
def test_all_merged(self): for strand in [1, -1]: locations = [FeatureLocation(6, 9, strand), FeatureLocation(9, 12, strand), FeatureLocation(12, 16, strand)] new = build_location_from_others(locations) assert isinstance(new, FeatureLocation) and not isinstance(new, CompoundLocation) assert new == FeatureLocation(6, 16, strand)
def test_position_conversion_nonzero_start(self): location = FeatureLocation(6, 21, strand=1) assert len(location) == 15 assert self.func(0, 2, location) == (6, 12) assert self.func(1, 4, location) == (9, 18) location = FeatureLocation(6, 21, strand=-1) assert len(location) == 15 assert self.func(0, 2, location) == (15, 21) assert self.func(1, 4, location) == (9, 18)
def setUp(self): self.seqrec = SeqRecord(UnknownSeq(21)) loc = CompoundLocation([ FeatureLocation(12, 21, strand=1), FeatureLocation(0, 9, strand=1) ], operator="join") self.seqcds = SeqFeature(loc, type="CDS") self.seqgene = SeqFeature(loc, type="gene") self.seqrec.annotations["topology"] = "circular"
def test_mixed(self): compound = build_compound([(0, 10), (20, 30), (40, 50)], strand=1) simple = FeatureLocation(15, 17) assert not locations_overlap(simple, compound) assert not locations_overlap(compound, simple) simple = FeatureLocation(22, 25) assert locations_overlap(simple, compound) assert locations_overlap(compound, simple) simple = FeatureLocation(35, 45) assert locations_overlap(simple, compound) assert locations_overlap(compound, simple)
def setUp(self): self.seqrec = SeqRecord(Seq("A" * 21)) loc = CompoundLocation([ FeatureLocation(12, 15, strand=1), FeatureLocation(18, 21, strand=1), FeatureLocation(0, 3, strand=1), FeatureLocation(6, 9, strand=1) ], operator="join") self.seqcds = SeqFeature(loc, type="CDS") self.seqgene = SeqFeature(loc, type="gene") self.seqrec.annotations["topology"] = "circular" self.seqrec.annotations["molecule_type"] = "DNA"
def test_simple_in_compound(self): simple = FeatureLocation(5, 10) compound = build_compound([(1, 4), (12, 20)], strand=1) assert not location_contains_other(compound, simple) simple = FeatureLocation(1, 20) assert not location_contains_other(compound, simple) simple = FeatureLocation(15, 18) assert location_contains_other(compound, simple) for part in compound.parts: assert location_contains_other(compound, part)
def test_position_conversion_compound_reverse(self): location = CompoundLocation([FeatureLocation(0, 6, strand=-1), FeatureLocation(9, 18, strand=-1)]) assert len(location) == 15 assert self.func(0, 4, location) == (3, 18) assert self.func(1, 5, location) == (0, 15) location = CompoundLocation([FeatureLocation(0, 6, strand=-1), FeatureLocation(12, 15, strand=-1), FeatureLocation(21, 27, strand=-1)]) assert len(location) == 15 assert self.func(0, 4, location) == (3, 27) assert self.func(1, 5, location) == (0, 24) assert self.func(2, 3, location) == (12, 15)
def test_position_conversion_nonzero_compound(self): location = CompoundLocation([FeatureLocation(6, 18, strand=1), FeatureLocation(24, 27, strand=1)]) assert len(location) == 15 assert self.func(0, 2, location) == (6, 12) assert self.func(1, 4, location) == (9, 18) assert self.func(3, 5, location) == (15, 27) location = CompoundLocation([FeatureLocation(6, 15, strand=-1), FeatureLocation(21, 27, strand=-1)]) assert len(location) == 15 assert self.func(0, 2, location) == (21, 27) assert self.func(1, 4, location) == (9, 24) assert self.func(3, 5, location) == (6, 12)
def test_compound(self): first = FeatureLocation(1, 6, strand=1) second = FeatureLocation(10, 16, strand=1) location = CompoundLocation([first, second], operator="join") assert 5 in location assert 7 not in location assert 15 in location new_location = self.convert(location, expected_type=CompoundLocation) assert location.start == 1 assert 5 in new_location assert 7 not in new_location assert 15 in new_location assert location.end == 16 assert new_location.operator == "join"
def test_compound_in_simple(self): simple = FeatureLocation(10, 40) compound = build_compound([(10, 20), (20, 40)], strand=1) assert location_contains_other(simple, compound) compound = build_compound([(10, 20), (20, 40), (50, 60)], strand=1) assert not location_contains_other(simple, compound)
def test_single(self): for strand in [1, -1]: location = FeatureLocation(6, 9, strand) new = build_location_from_others([location]) assert isinstance(new, FeatureLocation) and not isinstance( new, CompoundLocation) assert new == location
def test_multi_cds_protein_location(self): domains = [ DummyAntismashDomain(locus_tag=i, protein_start=n, protein_end=n + 5) for n, i in enumerate("AB") ] module = create_module(domains=domains) assert module.is_multigene_module() with self.assertRaisesRegex( ValueError, "cannot generate protein location for multi"): _ = module.protein_location assert module.get_parent_protein_location("A") == FeatureLocation(0, 5) assert module.get_parent_protein_location("B") == FeatureLocation(1, 6) with self.assertRaisesRegex(ValueError, "has no parent"): module.get_parent_protein_location("C")
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None, leftovers: Dict[str, List[str]] = None, record: Any = None) -> T: if leftovers is None: leftovers = Feature.make_qualifiers_copy(bio_feature) # grab mandatory qualifiers and create the class description = leftovers.pop("description")[0] p_start = int(leftovers.pop("protein_start")[0]) p_end = int(leftovers.pop("protein_end")[0]) xref = leftovers.get("db_xref", []) # only remove the interesting part name = None for i, ref in enumerate(xref): if ref.startswith("PF"): name = ref xref.pop(i) break if name is None: raise SecmetInvalidInputError("PFAMDomain missing identifier") tool = leftovers.pop("aSTool")[0] locus_tag = leftovers.pop("locus_tag", ["(unknown)"])[0] feature = cls(bio_feature.location, description, FeatureLocation(p_start, p_end), identifier=name, tool=tool, locus_tag=locus_tag) # grab optional qualifiers feature.gene_ontologies = GOQualifier.from_biopython(leftovers.pop("gene_ontologies", [])) if "probability" in leftovers: feature.probability = float(leftovers["probability"][0]) # grab parent optional qualifiers updated = super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record) assert isinstance(updated, PFAMDomain) return updated
def generate_motif_features(feature: CDSFeature, motifs: List[HMMResult]) -> List[CDSMotif]: """ Convert a list of HMMResult to a list of CDSMotif features """ # use a locus tag if one exists locus_tag = feature.get_name() if feature.locus_tag: locus_tag = feature.locus_tag motif_features = [] for i, motif in enumerate(motifs): i += 1 # user facing, so 1-indexed loc = feature.get_sub_location_from_protein_coordinates( motif.query_start, motif.query_end) prot_loc = FeatureLocation(motif.query_start, motif.query_end) new_motif = CDSMotif(loc, feature.get_name(), prot_loc, tool="nrps_pks_domains") new_motif.label = motif.hit_id new_motif.domain_id = 'nrpspksmotif_{}_{:04d}'.format(locus_tag, i) new_motif.evalue = motif.evalue new_motif.score = motif.bitscore new_motif.detection = "hmmscan" new_motif.database = "abmotifs" new_motif.locus_tag = locus_tag new_motif.translation = feature.translation[motif.query_start:motif. query_end] motif_features.append(new_motif) return motif_features
def test_simple_in_simple(self): inner = FeatureLocation(5, 10) outer = FeatureLocation(1, 20) # clear contains assert location_contains_other(outer, inner) assert not location_contains_other(inner, outer) # on one edge outer = FeatureLocation(5, 20) assert location_contains_other(outer, inner) assert not location_contains_other(inner, outer) # on both edges outer = FeatureLocation(1, 20) assert location_contains_other(outer, inner) assert not location_contains_other(inner, outer)
def test_unknown_position(self): location = FeatureLocation(ExactPosition(1), UnknownPosition(), strand=1) new_location = self.convert(location) assert isinstance(new_location.start, ExactPosition) assert new_location.start == 1 assert isinstance(new_location.end, UnknownPosition)
def test_after_position(self): location = FeatureLocation(ExactPosition(1), AfterPosition(6), strand=1) new_location = self.convert(location) assert isinstance(new_location.start, ExactPosition) assert new_location.start == 1 assert isinstance(new_location.end, AfterPosition) assert new_location.end == 6
def test_before_position(self): location = FeatureLocation(BeforePosition(1), ExactPosition(6), strand=-1) new_location = self.convert(location) assert isinstance(new_location.start, BeforePosition) assert new_location.start == 1 assert isinstance(new_location.end, ExactPosition) assert new_location.end == 6
def test_some_merged(self): for strand in [1, -1]: locations = [FeatureLocation(1, 4, strand), FeatureLocation(6, 9, strand), FeatureLocation(9, 12, strand), FeatureLocation(15, 18, strand)] new = build_location_from_others(locations) assert isinstance(new, CompoundLocation) assert new == CompoundLocation([FeatureLocation(1, 4, strand), FeatureLocation(6, 12, strand), FeatureLocation(15, 18, strand)])
def test_compound_location(self): old = Prepeptide(CompoundLocation( [FeatureLocation(10, 50, 1), FeatureLocation(130, 180, 1)], operator="join"), peptide_class="test_class", core="coreseq...", locus_tag="loc", tool="test tool", leader="10chleader", tail="10chartail") leader, core, tail = old.to_biopython() assert leader.location.start == 10 assert leader.location.end == 40 assert isinstance(core.location, CompoundLocation) assert core.location.start == 40 assert core.location.end == 150 assert tail.location.start == 150 assert tail.location.end == 180 new = Prepeptide.from_biopython(core) assert str(new.location) == str(old.location)
def test_other(self): location = CompoundLocation([FeatureLocation(5922, 6190, strand=1), FeatureLocation(5741, 5877, strand=1), FeatureLocation(4952, 5682, strand=1)]) assert self.func(97, 336, location) == (5243, 6064) location = CompoundLocation([FeatureLocation(5922, 6190, strand=-1), FeatureLocation(5741, 5877, strand=-1), FeatureLocation(4952, 5682, strand=-1)]) assert self.func(97, 336, location) == (5078, 5854)
def test_basic_conversion(self): old = Prepeptide(FeatureLocation(5, 95), peptide_class="test_class", core="coreseq...", locus_tag="loc", tool="test tool", peptide_subclass="test_subclass", score=20.4, monoisotopic_mass=6.7, molecular_weight=0.5, alternative_weights=[5.2, 6.7, 20.5], leader="leaderseq.", tail="tailseq...") leader, core, tail = old.to_biopython() assert leader.location.start == 5 assert leader.location.end == 35 assert leader.qualifiers["prepeptide"] == ["leader"] assert core.location.start == 35 assert core.location.end == 65 assert core.qualifiers["prepeptide"] == ["core"] assert tail.location.start == 65 assert tail.location.end == 95 assert tail.qualifiers["prepeptide"] == ["tail"] with self.assertRaisesRegex( ValueError, "can only be reconstructed from core feature"): Prepeptide.from_biopython(leader) with self.assertRaisesRegex( ValueError, "can only be reconstructed from core feature"): Prepeptide.from_biopython(tail) new = Prepeptide.from_biopython(core) assert isinstance(new, Prepeptide) assert str(new.location) == str(old.location) assert new.peptide_class == old.peptide_class assert new.core == old.core assert new.locus_tag == old.locus_tag assert new.peptide_subclass == old.peptide_subclass assert new.score == old.score assert new.monoisotopic_mass == old.monoisotopic_mass assert new.molecular_weight == old.molecular_weight assert new.alternative_weights == old.alternative_weights assert new.leader == old.leader assert new.tail == old.tail
def generate_domain_features( gene: CDSFeature, domains: List[HMMResult]) -> Dict[HMMResult, AntismashDomain]: """ Generates AntismashDomain features for each provided HMMResult Arguments: gene: the CDSFeature the domains were found in domains: a list of HMMResults found in the CDSFeature Returns: a dictionary mapping the HMMResult used to the matching AntismashDomain """ new_features = {} domain_counts = defaultdict(int) # type: Dict[str, int] for domain in domains: loc = gene.get_sub_location_from_protein_coordinates( domain.query_start, domain.query_end) prot_loc = FeatureLocation(domain.query_start, domain.query_end) # set up new feature new_feature = AntismashDomain(loc, tool="nrps_pks_domains", protein_location=prot_loc, locus_tag=gene.get_name()) new_feature.domain = domain.hit_id new_feature.locus_tag = gene.locus_tag or gene.get_name() new_feature.detection = "hmmscan" new_feature.database = "nrpspksdomains.hmm" new_feature.evalue = domain.evalue new_feature.score = domain.bitscore new_feature.translation = gene.translation[domain.query_start:domain. query_end] domain_counts[domain.hit_id] += 1 # 1-indexed, so increment before use domain_name = "{}_{}.{}".format(gene.get_name(), domain.hit_id, domain_counts[domain.hit_id]) new_feature.domain_id = "nrpspksdomains_" + domain_name new_feature.label = domain_name new_features[domain] = new_feature return new_features
def to_json(self) -> Dict[str, Any]: return { "core_cdses": [cds.name for cds in self.cores], "product": self.product, "location": str(FeatureLocation(self.start, self.end)), }
def test_overlapping_exons(self, _patched_overlap): features = [SeqFeature(FeatureLocation(5, 8, 1))] with self.assertRaisesRegex(ValueError, "contains overlapping exons"): self.check(features)
def test_outside_seq(self): features = [SeqFeature(FeatureLocation(50, 140, 1))] with self.assertRaisesRegex(ValueError, "feature outside record sequence"): self.check(features)
def test_bad_types(self): for bad in [None, "loc", [FeatureLocation(10, 40)], 5]: with self.assertRaises(TypeError): overlapping_exons(bad)
def test_non_compound(self): assert not overlapping_exons(FeatureLocation(10, 40))