Esempio n. 1
0
    def test_simple_simple(self):
        assert not locations_overlap(FeatureLocation(1, 5, strand=1), FeatureLocation(10, 15, strand=1))
        assert locations_overlap(FeatureLocation(1, 25, strand=1), FeatureLocation(10, 15, strand=1))
        assert locations_overlap(FeatureLocation(1, 12, strand=1), FeatureLocation(10, 15, strand=1))

        assert locations_overlap(FeatureLocation(12, 22, strand=-1), FeatureLocation(10, 15, strand=1))
        assert not locations_overlap(FeatureLocation(12, 22, strand=-1), FeatureLocation(10, 12, strand=1))
Esempio n. 2
0
 def test_single_compound(self):
     for strand in [1, -1]:
         location = CompoundLocation([
             FeatureLocation(6, 9, strand),
             FeatureLocation(12, 16, strand)
         ])
         new = build_location_from_others([location])
         assert new == location
Esempio n. 3
0
 def test_all_merged(self):
     for strand in [1, -1]:
         locations = [FeatureLocation(6, 9, strand),
                      FeatureLocation(9, 12, strand),
                      FeatureLocation(12, 16, strand)]
         new = build_location_from_others(locations)
         assert isinstance(new, FeatureLocation) and not isinstance(new, CompoundLocation)
         assert new == FeatureLocation(6, 16, strand)
Esempio n. 4
0
    def test_position_conversion_nonzero_start(self):
        location = FeatureLocation(6, 21, strand=1)
        assert len(location) == 15
        assert self.func(0, 2, location) == (6, 12)
        assert self.func(1, 4, location) == (9, 18)

        location = FeatureLocation(6, 21, strand=-1)
        assert len(location) == 15
        assert self.func(0, 2, location) == (15, 21)
        assert self.func(1, 4, location) == (9, 18)
 def setUp(self):
     self.seqrec = SeqRecord(UnknownSeq(21))
     loc = CompoundLocation([
         FeatureLocation(12, 21, strand=1),
         FeatureLocation(0, 9, strand=1)
     ],
                            operator="join")
     self.seqcds = SeqFeature(loc, type="CDS")
     self.seqgene = SeqFeature(loc, type="gene")
     self.seqrec.annotations["topology"] = "circular"
Esempio n. 6
0
    def test_mixed(self):
        compound = build_compound([(0, 10), (20, 30), (40, 50)], strand=1)
        simple = FeatureLocation(15, 17)
        assert not locations_overlap(simple, compound)
        assert not locations_overlap(compound, simple)

        simple = FeatureLocation(22, 25)
        assert locations_overlap(simple, compound)
        assert locations_overlap(compound, simple)

        simple = FeatureLocation(35, 45)
        assert locations_overlap(simple, compound)
        assert locations_overlap(compound, simple)
Esempio n. 7
0
 def setUp(self):
     self.seqrec = SeqRecord(Seq("A" * 21))
     loc = CompoundLocation([
         FeatureLocation(12, 15, strand=1),
         FeatureLocation(18, 21, strand=1),
         FeatureLocation(0, 3, strand=1),
         FeatureLocation(6, 9, strand=1)
     ],
                            operator="join")
     self.seqcds = SeqFeature(loc, type="CDS")
     self.seqgene = SeqFeature(loc, type="gene")
     self.seqrec.annotations["topology"] = "circular"
     self.seqrec.annotations["molecule_type"] = "DNA"
Esempio n. 8
0
    def test_simple_in_compound(self):
        simple = FeatureLocation(5, 10)
        compound = build_compound([(1, 4), (12, 20)], strand=1)
        assert not location_contains_other(compound, simple)

        simple = FeatureLocation(1, 20)
        assert not location_contains_other(compound, simple)

        simple = FeatureLocation(15, 18)
        assert location_contains_other(compound, simple)

        for part in compound.parts:
            assert location_contains_other(compound, part)
Esempio n. 9
0
    def test_position_conversion_compound_reverse(self):
        location = CompoundLocation([FeatureLocation(0, 6, strand=-1),
                                     FeatureLocation(9, 18, strand=-1)])
        assert len(location) == 15
        assert self.func(0, 4, location) == (3, 18)
        assert self.func(1, 5, location) == (0, 15)

        location = CompoundLocation([FeatureLocation(0, 6, strand=-1),
                                     FeatureLocation(12, 15, strand=-1),
                                     FeatureLocation(21, 27, strand=-1)])
        assert len(location) == 15
        assert self.func(0, 4, location) == (3, 27)
        assert self.func(1, 5, location) == (0, 24)
        assert self.func(2, 3, location) == (12, 15)
Esempio n. 10
0
    def test_position_conversion_nonzero_compound(self):
        location = CompoundLocation([FeatureLocation(6, 18, strand=1),
                                     FeatureLocation(24, 27, strand=1)])
        assert len(location) == 15
        assert self.func(0, 2, location) == (6, 12)
        assert self.func(1, 4, location) == (9, 18)
        assert self.func(3, 5, location) == (15, 27)

        location = CompoundLocation([FeatureLocation(6, 15, strand=-1),
                                     FeatureLocation(21, 27, strand=-1)])
        assert len(location) == 15
        assert self.func(0, 2, location) == (21, 27)
        assert self.func(1, 4, location) == (9, 24)
        assert self.func(3, 5, location) == (6, 12)
Esempio n. 11
0
    def test_compound(self):
        first = FeatureLocation(1, 6, strand=1)
        second = FeatureLocation(10, 16, strand=1)
        location = CompoundLocation([first, second], operator="join")
        assert 5 in location
        assert 7 not in location
        assert 15 in location

        new_location = self.convert(location, expected_type=CompoundLocation)
        assert location.start == 1
        assert 5 in new_location
        assert 7 not in new_location
        assert 15 in new_location
        assert location.end == 16
        assert new_location.operator == "join"
Esempio n. 12
0
    def test_compound_in_simple(self):
        simple = FeatureLocation(10, 40)
        compound = build_compound([(10, 20), (20, 40)], strand=1)
        assert location_contains_other(simple, compound)

        compound = build_compound([(10, 20), (20, 40), (50, 60)], strand=1)
        assert not location_contains_other(simple, compound)
Esempio n. 13
0
 def test_single(self):
     for strand in [1, -1]:
         location = FeatureLocation(6, 9, strand)
         new = build_location_from_others([location])
         assert isinstance(new, FeatureLocation) and not isinstance(
             new, CompoundLocation)
         assert new == location
Esempio n. 14
0
    def test_multi_cds_protein_location(self):
        domains = [
            DummyAntismashDomain(locus_tag=i,
                                 protein_start=n,
                                 protein_end=n + 5) for n, i in enumerate("AB")
        ]
        module = create_module(domains=domains)
        assert module.is_multigene_module()
        with self.assertRaisesRegex(
                ValueError, "cannot generate protein location for multi"):
            _ = module.protein_location

        assert module.get_parent_protein_location("A") == FeatureLocation(0, 5)
        assert module.get_parent_protein_location("B") == FeatureLocation(1, 6)
        with self.assertRaisesRegex(ValueError, "has no parent"):
            module.get_parent_protein_location("C")
Esempio n. 15
0
    def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
                       leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)
        # grab mandatory qualifiers and create the class
        description = leftovers.pop("description")[0]
        p_start = int(leftovers.pop("protein_start")[0])
        p_end = int(leftovers.pop("protein_end")[0])
        xref = leftovers.get("db_xref", [])  # only remove the interesting part
        name = None
        for i, ref in enumerate(xref):
            if ref.startswith("PF"):
                name = ref
            xref.pop(i)
            break
        if name is None:
            raise SecmetInvalidInputError("PFAMDomain missing identifier")
        tool = leftovers.pop("aSTool")[0]
        locus_tag = leftovers.pop("locus_tag", ["(unknown)"])[0]

        feature = cls(bio_feature.location, description, FeatureLocation(p_start, p_end),
                      identifier=name, tool=tool, locus_tag=locus_tag)

        # grab optional qualifiers
        feature.gene_ontologies = GOQualifier.from_biopython(leftovers.pop("gene_ontologies", []))
        if "probability" in leftovers:
            feature.probability = float(leftovers["probability"][0])

        # grab parent optional qualifiers
        updated = super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)
        assert isinstance(updated, PFAMDomain)
        return updated
Esempio n. 16
0
def generate_motif_features(feature: CDSFeature,
                            motifs: List[HMMResult]) -> List[CDSMotif]:
    """ Convert a list of HMMResult to a list of CDSMotif features """
    # use a locus tag if one exists
    locus_tag = feature.get_name()
    if feature.locus_tag:
        locus_tag = feature.locus_tag

    motif_features = []
    for i, motif in enumerate(motifs):
        i += 1  # user facing, so 1-indexed
        loc = feature.get_sub_location_from_protein_coordinates(
            motif.query_start, motif.query_end)
        prot_loc = FeatureLocation(motif.query_start, motif.query_end)
        new_motif = CDSMotif(loc,
                             feature.get_name(),
                             prot_loc,
                             tool="nrps_pks_domains")
        new_motif.label = motif.hit_id
        new_motif.domain_id = 'nrpspksmotif_{}_{:04d}'.format(locus_tag, i)
        new_motif.evalue = motif.evalue
        new_motif.score = motif.bitscore
        new_motif.detection = "hmmscan"
        new_motif.database = "abmotifs"
        new_motif.locus_tag = locus_tag

        new_motif.translation = feature.translation[motif.query_start:motif.
                                                    query_end]

        motif_features.append(new_motif)
    return motif_features
Esempio n. 17
0
    def test_simple_in_simple(self):
        inner = FeatureLocation(5, 10)
        outer = FeatureLocation(1, 20)

        # clear contains
        assert location_contains_other(outer, inner)
        assert not location_contains_other(inner, outer)

        # on one edge
        outer = FeatureLocation(5, 20)
        assert location_contains_other(outer, inner)
        assert not location_contains_other(inner, outer)

        # on both edges
        outer = FeatureLocation(1, 20)
        assert location_contains_other(outer, inner)
        assert not location_contains_other(inner, outer)
Esempio n. 18
0
    def test_unknown_position(self):
        location = FeatureLocation(ExactPosition(1), UnknownPosition(), strand=1)
        new_location = self.convert(location)

        assert isinstance(new_location.start, ExactPosition)
        assert new_location.start == 1

        assert isinstance(new_location.end, UnknownPosition)
Esempio n. 19
0
    def test_after_position(self):
        location = FeatureLocation(ExactPosition(1), AfterPosition(6), strand=1)
        new_location = self.convert(location)

        assert isinstance(new_location.start, ExactPosition)
        assert new_location.start == 1

        assert isinstance(new_location.end, AfterPosition)
        assert new_location.end == 6
Esempio n. 20
0
    def test_before_position(self):
        location = FeatureLocation(BeforePosition(1), ExactPosition(6), strand=-1)
        new_location = self.convert(location)

        assert isinstance(new_location.start, BeforePosition)
        assert new_location.start == 1

        assert isinstance(new_location.end, ExactPosition)
        assert new_location.end == 6
Esempio n. 21
0
 def test_some_merged(self):
     for strand in [1, -1]:
         locations = [FeatureLocation(1, 4, strand),
                      FeatureLocation(6, 9, strand),
                      FeatureLocation(9, 12, strand),
                      FeatureLocation(15, 18, strand)]
         new = build_location_from_others(locations)
         assert isinstance(new, CompoundLocation)
         assert new == CompoundLocation([FeatureLocation(1, 4, strand),
                                         FeatureLocation(6, 12, strand),
                                         FeatureLocation(15, 18, strand)])
Esempio n. 22
0
    def test_compound_location(self):
        old = Prepeptide(CompoundLocation(
            [FeatureLocation(10, 50, 1),
             FeatureLocation(130, 180, 1)],
            operator="join"),
                         peptide_class="test_class",
                         core="coreseq...",
                         locus_tag="loc",
                         tool="test tool",
                         leader="10chleader",
                         tail="10chartail")

        leader, core, tail = old.to_biopython()
        assert leader.location.start == 10
        assert leader.location.end == 40
        assert isinstance(core.location, CompoundLocation)
        assert core.location.start == 40
        assert core.location.end == 150
        assert tail.location.start == 150
        assert tail.location.end == 180

        new = Prepeptide.from_biopython(core)
        assert str(new.location) == str(old.location)
Esempio n. 23
0
    def test_other(self):
        location = CompoundLocation([FeatureLocation(5922, 6190, strand=1),
                                     FeatureLocation(5741, 5877, strand=1),
                                     FeatureLocation(4952, 5682, strand=1)])
        assert self.func(97, 336, location) == (5243, 6064)

        location = CompoundLocation([FeatureLocation(5922, 6190, strand=-1),
                                     FeatureLocation(5741, 5877, strand=-1),
                                     FeatureLocation(4952, 5682, strand=-1)])
        assert self.func(97, 336, location) == (5078, 5854)
Esempio n. 24
0
    def test_basic_conversion(self):
        old = Prepeptide(FeatureLocation(5, 95),
                         peptide_class="test_class",
                         core="coreseq...",
                         locus_tag="loc",
                         tool="test tool",
                         peptide_subclass="test_subclass",
                         score=20.4,
                         monoisotopic_mass=6.7,
                         molecular_weight=0.5,
                         alternative_weights=[5.2, 6.7, 20.5],
                         leader="leaderseq.",
                         tail="tailseq...")
        leader, core, tail = old.to_biopython()

        assert leader.location.start == 5
        assert leader.location.end == 35
        assert leader.qualifiers["prepeptide"] == ["leader"]

        assert core.location.start == 35
        assert core.location.end == 65
        assert core.qualifiers["prepeptide"] == ["core"]

        assert tail.location.start == 65
        assert tail.location.end == 95
        assert tail.qualifiers["prepeptide"] == ["tail"]

        with self.assertRaisesRegex(
                ValueError, "can only be reconstructed from core feature"):
            Prepeptide.from_biopython(leader)
        with self.assertRaisesRegex(
                ValueError, "can only be reconstructed from core feature"):
            Prepeptide.from_biopython(tail)

        new = Prepeptide.from_biopython(core)
        assert isinstance(new, Prepeptide)
        assert str(new.location) == str(old.location)
        assert new.peptide_class == old.peptide_class
        assert new.core == old.core
        assert new.locus_tag == old.locus_tag
        assert new.peptide_subclass == old.peptide_subclass
        assert new.score == old.score
        assert new.monoisotopic_mass == old.monoisotopic_mass
        assert new.molecular_weight == old.molecular_weight
        assert new.alternative_weights == old.alternative_weights
        assert new.leader == old.leader
        assert new.tail == old.tail
Esempio n. 25
0
def generate_domain_features(
        gene: CDSFeature,
        domains: List[HMMResult]) -> Dict[HMMResult, AntismashDomain]:
    """ Generates AntismashDomain features for each provided HMMResult

        Arguments:
            gene: the CDSFeature the domains were found in
            domains: a list of HMMResults found in the CDSFeature

        Returns:
            a dictionary mapping the HMMResult used to the matching AntismashDomain
    """
    new_features = {}
    domain_counts = defaultdict(int)  # type: Dict[str, int]
    for domain in domains:
        loc = gene.get_sub_location_from_protein_coordinates(
            domain.query_start, domain.query_end)
        prot_loc = FeatureLocation(domain.query_start, domain.query_end)

        # set up new feature
        new_feature = AntismashDomain(loc,
                                      tool="nrps_pks_domains",
                                      protein_location=prot_loc,
                                      locus_tag=gene.get_name())
        new_feature.domain = domain.hit_id
        new_feature.locus_tag = gene.locus_tag or gene.get_name()
        new_feature.detection = "hmmscan"
        new_feature.database = "nrpspksdomains.hmm"
        new_feature.evalue = domain.evalue
        new_feature.score = domain.bitscore

        new_feature.translation = gene.translation[domain.query_start:domain.
                                                   query_end]

        domain_counts[domain.hit_id] += 1  # 1-indexed, so increment before use
        domain_name = "{}_{}.{}".format(gene.get_name(), domain.hit_id,
                                        domain_counts[domain.hit_id])

        new_feature.domain_id = "nrpspksdomains_" + domain_name
        new_feature.label = domain_name

        new_features[domain] = new_feature
    return new_features
Esempio n. 26
0
 def to_json(self) -> Dict[str, Any]:
     return {
         "core_cdses": [cds.name for cds in self.cores],
         "product": self.product,
         "location": str(FeatureLocation(self.start, self.end)),
     }
Esempio n. 27
0
 def test_overlapping_exons(self, _patched_overlap):
     features = [SeqFeature(FeatureLocation(5, 8, 1))]
     with self.assertRaisesRegex(ValueError, "contains overlapping exons"):
         self.check(features)
Esempio n. 28
0
 def test_outside_seq(self):
     features = [SeqFeature(FeatureLocation(50, 140, 1))]
     with self.assertRaisesRegex(ValueError,
                                 "feature outside record sequence"):
         self.check(features)
Esempio n. 29
0
 def test_bad_types(self):
     for bad in [None, "loc", [FeatureLocation(10, 40)], 5]:
         with self.assertRaises(TypeError):
             overlapping_exons(bad)
Esempio n. 30
0
 def test_non_compound(self):
     assert not overlapping_exons(FeatureLocation(10, 40))