Python Record 예제들, antismash.common.secmet.record.Record Python 예제들

예제 #1

0

파일 보기

파일: domain_identification.py 프로젝트: emzodls/antismash

    def annotate_domains(self, record: Record, cds: CDSFeature) -> None:
        """ Adds domain annotations to CDSFeatures and creates AntismashDomain
            features for all domains found
        """
        if not self.domain_hmms:
            return

        cds.nrps_pks.type = self.type

        # generate AntismashDomain features
        domain_features = generate_domain_features(record, cds,
                                                   self.domain_hmms)
        for domain, domain_feature in domain_features.items():
            record.add_antismash_domain(domain_feature)
            # update the CDS' NRPS_PKS qualifier
            cds.nrps_pks.add_domain(domain, domain_feature.get_name())

        # construct CDSMotif features
        if not self.motif_hmms:
            return

        motif_features = generate_motif_features(record, cds, self.motif_hmms)

        for motif in motif_features:
            record.add_cds_motif(motif)
        cds.motifs.extend(motif_features)

예제 #2

0

파일 보기

 def add_to_record(self, record: Record) -> None:
     # track multi-CDS modules to avoid duplication
     added_modules = set()
     for cds, result in self.cds_results.items():
         for module in result.modules:
             if module in added_modules:
                 continue
             added_modules.add(module)
             domains: List[AntismashDomain] = []
             for component in module:
                 if component.locus == cds.get_name():
                     domain = result.domain_features[component.domain]
                 else:
                     other_cds_results = self.cds_results[
                         record.get_cds_by_name(component.locus)]
                     domain = other_cds_results.domain_features[
                         component.domain]
                 domains.append(domain)
             mod_type = ModuleFeature.types.UNKNOWN
             if module.is_nrps():
                 mod_type = ModuleFeature.types.NRPS
             elif module.is_pks():
                 mod_type = ModuleFeature.types.PKS
             feature = ModuleFeature(domains,
                                     mod_type,
                                     complete=module.is_complete(),
                                     starter=module.is_starter_module(),
                                     final=module.is_termination_module(),
                                     iterative=module.is_iterative())
             record.add_module(feature)

예제 #3

0

파일 보기

파일: test_secmet.py 프로젝트: emzodls/antismash

 def test_cluster_numbering(self):
     record = Record(Seq("A"*1000))
     for start, end in [(50, 100), (10, 40), (700, 1000), (0, 9)]:
         cluster = helpers.DummyCluster(start, end)
         record.add_cluster(cluster)
     for i, cluster in enumerate(sorted(list(record.get_clusters()))):
         assert cluster.get_cluster_number() == i + 1

예제 #4

0

파일 보기

파일: domain_identification.py 프로젝트: SBGlab/antismash

    def annotate_domains(self, record: Record, cds: CDSFeature) -> None:
        """ Adds domain annotations to CDSFeatures and creates ModularDomain
            features for all domains found
        """
        if not self.domain_hmms:
            return

        cds.nrps_pks.type = self.type

        # generate domain features
        self.domain_features = generate_domain_features(cds, self.domain_hmms)
        ks_sub = iter(self.ks_subtypes)
        for domain, domain_feature in self.domain_features.items():
            if domain.hit_id == "PKS_KS":
                sub = next(ks_sub)
            else:
                sub = ""
            record.add_antismash_domain(domain_feature)
            # update the CDS' NRPS_PKS qualifier
            cds.nrps_pks.add_domain(domain, domain_feature.get_name(), sub)

        # construct CDSMotif features
        if not self.motif_hmms:
            return

        motif_features = generate_motif_features(cds, self.motif_hmms)

        for motif in motif_features:
            record.add_cds_motif(motif)
        cds.motifs.extend(motif_features)

예제 #5

0

파일 보기

    def test_blank_records(self):
        blank_no_pfams = DummyRecord()
        blank_no_ids = Record(Seq("ATGTTATGAGGGTCATAACAT", generic_dna))
        fake_pfam = DummyPFAMDomain(identifier="PF00000")
        blank_no_ids.add_pfam_domain(fake_pfam)

        assert not pfam2go.get_gos_for_pfams(blank_no_pfams)
        assert not pfam2go.get_gos_for_pfams(blank_no_ids)

예제 #6

0

파일 보기

파일: test_secmet.py 프로젝트: emzodls/antismash

 def setUp(self):
     self.record = Record(Seq("A" * 1000))
     self.start = 100
     self.end = 900
     self.cluster = self.create_cluster(self.start, self.end)
     self.record.add_cluster(self.cluster)
     assert self.cluster.location.start == self.start
     assert self.cluster.location.end == self.end

예제 #7

0

파일 보기

 def test_bridge_in_linear_record(self):
     self.seqrec.annotations["topology"] = "linear"
     self.seqrec.features.append(self.seqcds)
     with self.assertRaisesRegex(ValueError, "Features that bridge"):
         Record.from_biopython(self.seqrec, taxon='bacteria')
     self.seqrec.features[0] = self.seqgene
     with self.assertRaisesRegex(ValueError, "Features that bridge"):
         Record.from_biopython(self.seqrec, taxon='bacteria')

예제 #8

0

파일 보기

파일: test_secmet.py 프로젝트: emzodls/antismash

    def test_orphaned_cluster_number(self):
        record = Record(Seq("A" * 1000))
        cluster = helpers.DummyCluster(0, 1000)

        with self.assertRaisesRegex(ValueError, "Cluster not contained in record"):
            print(record.get_cluster_number(cluster))

        with self.assertRaisesRegex(ValueError, "Cluster not contained in record"):
            print(cluster.get_cluster_number())

예제 #9

0

파일 보기

파일: test_circular_conversion.py 프로젝트: mibig-secmet/antismash-mibig

 def test_bridge_in_linear_record(self):
     self.seqrec.annotations["topology"] = "linear"
     self.seqrec.features.append(self.seqcds)
     with self.assertRaisesRegex(SecmetInvalidInputError,
                                 "cannot determine correct exon ordering"):
         Record.from_biopython(self.seqrec, taxon='fungi')
     self.seqrec.features[0] = self.seqgene
     with self.assertRaisesRegex(SecmetInvalidInputError,
                                 "cannot determine correct exon ordering"):
         Record.from_biopython(self.seqrec, taxon='fungi')

예제 #10

0

파일 보기

 def add_to_record(self, record: Record) -> None:
     for result in self.cds_results.values():
         for module in result.modules:
             domains = [result.domain_features[component.domain] for component in module]
             mod_type = ModuleFeature.types.UNKNOWN
             if module.is_nrps():
                 mod_type = ModuleFeature.types.NRPS
             elif module.is_pks():
                 mod_type = ModuleFeature.types.PKS
             feature = ModuleFeature(domains, mod_type, complete=module.is_complete(),
                                     starter=module.is_starter_module(),
                                     final=module.is_termination_module(),
                                     iterative=module.is_iterative())
             record.add_module(feature)

예제 #11

0

파일 보기

파일: test_secmet.py 프로젝트: emzodls/antismash

 def test_overlapping_clusters(self):
     record = Record(seq="A"*40)
     record.add_cluster(Cluster(FeatureLocation(10, 40), 0, 0, []))
     with self.assertRaises(ValueError):
         record.add_cluster(Cluster(FeatureLocation(0, 11), 0, 0, []))
     # ok, since ends aren't inclusive
     record.add_cluster(Cluster(FeatureLocation(0, 10), 0, 0, []))

예제 #12

0

파일 보기

파일: test_pfam2go.py 프로젝트: zachcp/antismash

    def test_blank_records(self):
        blank_no_pfams = DummyRecord()
        blank_no_ids = Record(Seq("ATGTTATGAGGGTCATAACAT", generic_dna))
        fake_pfam_location = FeatureLocation(0, 12)
        fake_pfam = PFAMDomain(location=fake_pfam_location,
                               description='MCPsignal',
                               protein_start=0,
                               protein_end=5,
                               identifier="PF00000",
                               tool="test")
        fake_pfam.domain_id = 'BLANK'
        blank_no_ids.add_pfam_domain(fake_pfam)

        assert not pfam2go.get_gos_for_pfams(blank_no_pfams)
        assert not pfam2go.get_gos_for_pfams(blank_no_ids)

예제 #13

0

파일 보기

def generate_domains(record: Record) -> NRPSPKSDomains:
    """ Annotates NRPS/PKS domains on CDS features. The `nrps_pks` member of
        each feature will be updated, along with creating CDSMotif features
        when relevant.

        Arguments:
            record: the secmet.Record of which to annotate CDS features

        Returns:
            a NRPSPKSDomains instance containing all found motifs and domain HMMs for each CDS
    """
    results = NRPSPKSDomains(record.id)

    cds_within_clusters = record.get_cds_features_within_clusters()
    assert cds_within_clusters  # because every cluster should have genes

    fasta = get_fasta_from_features(cds_within_clusters)
    cds_domains = find_domains(fasta, record)
    cds_motifs = find_ab_motifs(fasta)

    for cds in cds_within_clusters:
        domains = cds_domains.get(cds.get_name(), [])
        motifs = cds_motifs.get(cds.get_name(), [])
        if not (domains or motifs):
            continue
        domain_type = classify_cds([domain.hit_id for domain in domains])
        results.cds_results[cds] = CDSResult(domains, motifs, domain_type)

    for cds, cds_result in results.cds_results.items():
        cds_result.annotate_domains(record, cds)
    results.added = True
    return results

예제 #14

0

파일 보기

    def from_json(json: Dict[str, Any],
                  record: Record) -> Optional["Pfam2GoResults"]:
        """ Constructs a new Pfam2GoResults instance from a json format and the
            original record analysed.

            Arguments:
                json: JSON representation of Pfam2GoResults
                record: Record analysed

            Returns:
                A Pfam2GoResults instance constructed from the record and the JSON
        """
        if json["schema_version"] != Pfam2GoResults.schema_version:
            logging.warning(
                "Schema version mismatch, discarding Pfam2GO results")
            return None
        all_pfam_ids_to_ontologies = defaultdict(
            list)  # type: Dict[PFAMDomain, List[GeneOntologies]]
        for domain in record.get_pfam_domains():
            for pfam_id in domain.db_xref:
                id_without_version = pfam_id.partition('.')[0]
                if id_without_version in json["pfams"]:
                    all_ontology = [
                        GeneOntology(go_id, go_description)
                        for go_id, go_description in json["pfams"]
                        [id_without_version].items()
                    ]
                    all_pfam_ids_to_ontologies[domain].append(
                        GeneOntologies(id_without_version, all_ontology))
        results = Pfam2GoResults(record.id, all_pfam_ids_to_ontologies)
        return results

예제 #15

0

파일 보기

def get_gos_for_pfams(
        record: Record) -> Dict[PFAMDomain, List[GeneOntologies]]:
    """ Find Gene Ontology terms for a record's Pfam domains.

    Arguments:
        record: Record instance to annotate with Gene Ontology information

    Returns:
        A dictionary mapping a specific PFAMDomain instance to a list of GeneOntologies within the PFAMDomain.
    """
    pfam_domains_with_gos = defaultdict(
        list)  # type: Dict[PFAMDomain, List[GeneOntologies]]
    pfams = record.get_pfam_domains()
    full_gomap_as_ontologies = construct_mapping(
        path.get_full_path(__file__, 'data', 'pfam2go-march-2018.txt'))
    if not pfams:
        logging.debug(
            'No Pfam domains found in record, cannot create Pfam to Gene Ontology mapping'
        )
    for pfam in pfams:
        pfam_ids = pfam.db_xref
        if not pfam_ids:
            logging.debug(
                'No Pfam ids found in Pfam domain %s, cannot create Pfam to Gene Ontology mapping',
                pfam)
        for pfam_id in pfam_ids:
            pfam_id = pfam_id.partition('.')[0]  # strip out version number
            if not (len(pfam_id) == 7 and pfam_id[:2] == 'PF'
                    and pfam_id[2:].isdecimal()):
                raise ValueError(
                    'Pfam id {} is not a valid Pfam id'.format(pfam_id))
            gene_ontologies_for_pfam = full_gomap_as_ontologies.get(pfam_id)
            if gene_ontologies_for_pfam:
                pfam_domains_with_gos[pfam].append(gene_ontologies_for_pfam)
    return pfam_domains_with_gos

예제 #16

0

파일 보기

파일: test_secmet.py 프로젝트: emzodls/antismash

    def test_record_conversion_from_biopython(self):
        before = list(Bio.SeqIO.parse(helpers.get_path_to_nisin_genbank(), "genbank"))[0]
        # sort notes, because direct comparisons otherwise are awful
        for feature in before.features:
            if "note" in feature.qualifiers:
                feature.qualifiers["note"] = sorted(feature.qualifiers["note"])
        before_features = sorted(map(str, before.features))
        type_counts = defaultdict(lambda: 0)
        for feature in before.features:
            type_counts[feature.type] += 1
        record = Record.from_biopython(before, taxon="bacteria")
        after = record.to_biopython()

        # ensure new features are correct
        assert len(before_features) == len(after.features)
        for bef, aft in zip(before_features, sorted(map(str, after.features))):
            assert bef == aft

        # ensure we haven't changed the original record or feature list
        assert id(before) != id(after)
        assert id(before.features) != id(after.features)
        for i in range(len(before.features)):
            assert id(before.features[i]) != id(after.features[i])
        for bef, aft in zip(before_features, sorted(map(str, before.features))):
            assert bef == aft

        # ensure that the counts of each match
        assert type_counts["CDS"] == len(record.get_cds_features())
        assert type_counts["PFAM_domain"] == len(record.get_pfam_domains())
        assert type_counts["cluster"] == len(record.get_clusters())
        assert type_counts["aSDomain"] == len(record.get_antismash_domains())

예제 #17

0

파일 보기

def create_cluster_borders(anchor: str, clusters: List[ClusterPrediction],
                           record: Record) -> List[ClusterBorder]:
    """ Create the predicted ClusterBorders """
    if not clusters:
        return []
    borders = []
    for i, cluster in enumerate(clusters):
        # cluster borders returned by hmmdetect are based on CDS features
        # in contrast, cluster borders returned by cassis are based on gene features
        # --> hmmdetect derived clusters have exact loctions, like the CDSs have
        # --> cassis derived clusters may have fuzzy locations, like the genes have
        left_name = cluster.start.gene
        right_name = cluster.end.gene
        left = None
        right = None
        for gene in record.get_genes():
            if gene.get_name() == left_name:
                left = gene
            if gene.get_name() == right_name:
                right = gene
            if left and right:
                break

        new_feature = SeqFeature(FeatureLocation(left.location.start,
                                                 right.location.end),
                                 type="cluster_border")
        new_feature.qualifiers = {
            "aStool": ["cassis"],
            "anchor": [anchor],
            "abundance": [cluster.start.abundance + cluster.end.abundance],
            "motif_score":
            ["{:.1e}".format(cluster.start.score + cluster.end.score)],
            "gene_left": [cluster.start.gene],
            "promoter_left": [cluster.start.promoter],
            "abundance_left": [cluster.start.abundance],
            "motif_left": [cluster.start.pairing_string],
            "motif_score_left": ["{:.1e}".format(cluster.start.score)],
            "gene_right": [cluster.end.gene],
            "promoter_right": [cluster.end.promoter],
            "abundance_right": [cluster.end.abundance],
            "motif_right": [cluster.end.pairing_string],
            "motif_score_right": ["{:.1e}".format(cluster.end.score)],
            "genes": [cluster.genes],
            "promoters": [cluster.promoters],
        }

        if i == 0:
            new_feature.qualifiers["note"] = [
                "best prediction (most abundant) for anchor gene {}".format(
                    anchor)
            ]
        else:
            new_feature.qualifiers["note"] = [
                "alternative prediction ({}) for anchor gene {}".format(
                    i, anchor)
            ]

        new_feature = ClusterBorder.from_biopython(new_feature)
        borders.append(new_feature)
    return borders

예제 #18

0

파일 보기

def get_gos_for_pfams(
        record: Record) -> Dict[PFAMDomain, List[GeneOntologies]]:
    """ Find Gene Ontology terms for a record's Pfam domains.

    Arguments:
        record: Record instance to annotate with Gene Ontology information

    Returns:
        A dictionary mapping a specific PFAMDomain instance to a list of GeneOntologies within the PFAMDomain.
    """
    pfam_domains_with_gos = defaultdict(
        list)  # type: Dict[PFAMDomain, List[GeneOntologies]]
    pfams = record.get_pfam_domains()
    full_gomap_as_ontologies = construct_mapping(
        path.get_full_path(__file__, 'data', 'pfam2go-march-2018.txt'))
    if not pfams:
        logging.debug(
            'No Pfam domains found in record, cannot create Pfam to Gene Ontology mapping'
        )
    for pfam in pfams:
        gene_ontologies_for_pfam = full_gomap_as_ontologies.get(
            pfam.identifier)
        if gene_ontologies_for_pfam:
            pfam_domains_with_gos[pfam].append(gene_ontologies_for_pfam)
    return pfam_domains_with_gos

예제 #19

0

파일 보기

 def test_blank_records(self):
     blank_no_pfams = DummyRecord()
     blank_no_ids = Record(Seq("ATGTTATGAGGGTCATAACAT", generic_dna))
     fake_pfam_location = FeatureLocation(0, 12)
     fake_pfam = PFAMDomain(location=fake_pfam_location,
                            description='MCPsignal',
                            protein_start=0,
                            protein_end=5)
     fake_pfam.domain_id = 'BLANK'
     blank_no_ids.add_pfam_domain(fake_pfam)
     with self.assertLogs(level='DEBUG') as log_cm:
         gos_for_no_pfams = pfam2go.get_gos_for_pfams(blank_no_pfams)
         assert 'No Pfam domains found' in str(log_cm.output)
         assert not gos_for_no_pfams
         gos_for_no_ids = pfam2go.get_gos_for_pfams(blank_no_ids)
         assert 'No Pfam ids found' in str(log_cm.output)
         assert not gos_for_no_ids

예제 #20

0

파일 보기

파일: test_secmet.py 프로젝트: emzodls/antismash

 def test_cds_cluster_linkage(self):
     record = Record("A"*200)
     for start, end in [(50, 100), (10, 90), (0, 9), (150, 200)]:
         record.add_cds_feature(helpers.DummyCDS(start, end))
     for start, end in [(10, 120), (5, 110), (10, 160), (45, 200)]:
         record.clear_clusters()
         cluster = helpers.DummyCluster(start, end)
         record.add_cluster(cluster)
         assert len(cluster.cds_children) == 2
         for cds in cluster.cds_children:
             assert cds.overlaps_with(cluster)

예제 #21

0

파일 보기

파일: test_circular_conversion.py 프로젝트: mibig-secmet/antismash-mibig

    def test_cds_with_no_id(self):
        self.seqrec.features.append(self.seqcds)
        rec = Record.from_biopython(self.seqrec, taxon="bacteria")
        cdses = rec.get_cds_features()
        assert len(cdses) == 2
        assert cdses[0].location.start == 0
        assert cdses[0].location.end == 9
        assert cdses[0].get_name() == "bridge_LOWER"

        assert cdses[1].location.start == 12
        assert cdses[1].location.end == 21
        assert cdses[1].get_name() == "bridge_UPPER"

예제 #22

0

파일 보기

def store_promoters(promoters: Iterable[Promoter], record: Record) -> None:
    """Store information about promoter sequences to a SeqRecord"""
    logging.critical("adding promoters based on biopython features")
    for promoter in promoters:
        # remember to account for 0-indexed start location
        new_feature = SeqFeature(FeatureLocation(max(0, promoter.start - 1),
                                                 promoter.end),
                                 type="promoter")
        new_feature.qualifiers = {
            "locus_tag": promoter.get_gene_names(
            ),  # already a list with one or two elements
            "seq": [str(promoter.seq)],  # TODO save string or Seq object?
        }

        if isinstance(promoter, CombinedPromoter):
            new_feature.qualifiers["note"] = ["bidirectional promoter"]

        secmet_version = Feature.from_biopython(new_feature)
        secmet_version.created_by_antismash = True

        record.add_feature(secmet_version)

예제 #23

0

파일 보기

    def setUp(self):
        # locations:            VVV         VVV
        record = Record(Seq("ATGTTATGAGGGTCATAACAT", generic_dna))

        record.add_cds_feature(DummyCDS(0, 9, strand=1))
        record.add_cds_feature(DummyCDS(12, 21, strand=-1))

        cluster_loc = FeatureLocation(0, 21)
        cluster = Cluster(cluster_loc, 0, 0, [])
        record.add_cluster(cluster)
        # if these aren't correct, the tests will fail
        assert len(cluster.cds_children) == 2
        for cds in record.get_cds_features():
            assert cds.overlaps_with(cluster)
            assert cds.cluster == cluster, str(cds.location)
            assert cds.extract(record.seq) == "ATGTTATGA", str(cds.location)

        self.record = record

예제 #24

0

파일 보기

    def from_json(json: Dict[str, Any], record: Record) -> Optional["NRPSPKSDomains"]:
        if NRPSPKSDomains.schema_version != json.get("schema_version"):
            logging.warning("Schema version mismatch, discarding NRPS PKS domain results")
            return None
        if record.id != json.get("record_id"):
            logging.warning("Record identifier mismatch, discarding NRPS PKS domain results")
            return None

        cds_results = {}
        for cds_name, cds_result in json["cds_results"].items():
            cds = record.get_cds_by_name(cds_name)
            cds_result = CDSResult.from_json(cds_result)
            cds_result.annotate_domains(record, cds)
            cds_results[cds] = cds_result

        return NRPSPKSDomains(record.id, cds_results)

예제 #25

0

파일 보기

def generate_domains(record: Record) -> NRPSPKSDomains:
    """ Annotates NRPS/PKS domains on CDS features. The `nrps_pks` member of
        each feature will be updated, along with creating CDSMotif features
        when relevant.

        Arguments:
            record: the secmet.Record of which to annotate CDS features

        Returns:
            a NRPSPKSDomains instance containing all found motifs and domain HMMs for each CDS
    """
    results = NRPSPKSDomains(record.id)

    cds_within_regions = record.get_cds_features_within_regions()
    assert cds_within_regions  # because every cluster should have genes

    fasta = get_fasta_from_features(cds_within_regions)
    cds_domains = find_domains(fasta, record)
    cds_ks_subtypes = find_ks_domains(fasta)
    cds_motifs = find_ab_motifs(fasta)

    prev: Optional[CDSModuleInfo] = None
    for cds in cds_within_regions:
        domains = cds_domains.get(cds.get_name(), [])
        motifs = cds_motifs.get(cds.get_name(), [])
        if not (domains or motifs):
            continue
        subtype_names = match_subtypes_to_ks_domains(
            domains, cds_ks_subtypes.get(cds.get_name(), []))
        domain_type = classify_cds([domain.hit_id for domain in domains],
                                   subtype_names)
        modules = build_modules_for_cds(domains, subtype_names, cds.get_name())
        results.cds_results[cds] = CDSResult(domains, motifs, domain_type,
                                             modules, subtype_names)

        # combine modules that cross CDS boundaries, if possible and relevant
        info = CDSModuleInfo(cds, modules)
        if prev and prev.modules and info.modules:
            combine_modules(
                info,
                prev)  # modifies the lists of modules linked in each CDSResult
        prev = info

    for cds, cds_result in results.cds_results.items():
        cds_result.annotate_domains(record, cds)
    return results

예제 #26

0

파일 보기

파일: test_circular_conversion.py 프로젝트: mibig-secmet/antismash-mibig

    def test_cds_split(self):
        self.seqrec.features.append(self.seqcds)
        print(self.seqcds)
        for id_name in ["locus_tag", "gene"]:
            self.seqcds.qualifiers[id_name] = ["test"]
            rec = Record.from_biopython(self.seqrec, taxon="bacteria")
            cdses = rec.get_cds_features()
            assert len(cdses) == 2

            assert cdses[0].location.start == 0
            assert cdses[0].location.end == 9
            assert getattr(cdses[0], id_name) == "test_LOWER"
            assert cdses[0].get_name() == "test_LOWER"

            assert cdses[1].location.start == 12
            assert cdses[1].location.end == 21
            assert getattr(cdses[1], id_name) == "test_UPPER"
            assert cdses[1].get_name() == "test_UPPER"

            self.seqcds.qualifiers.pop(id_name)

예제 #27

0

파일 보기

def get_anchor_gene_names(record: Record) -> List[str]:
    """ Finds all gene names that have a CDS with secondary metabolite
        annotations.

        Requires that a CDS.get_name() returns the same name of its parent
        Gene.get_name()

        Arguments:
            record: the record to search

        Returns:
            a list of gene names
    """
    anchor_genes = []

    for feature in record.get_cds_features():
        if feature.gene_function == GeneFunction.CORE:
            anchor_genes.append(feature.get_name())

    return anchor_genes

예제 #28

0

파일 보기

def filter_nonterminal_docking_domains(record: Record, cds_domains: Dict[str, List[HMMResult]]
                                       ) -> Dict[str, List[HMMResult]]:
    """ For multiprotein domains, remove all docking terminal predictions that
        aren't overlapping with the first or last 50 amino acids of the protein.
    """
    dockingdomains = {'NRPS-COM_Nterm', 'NRPS-COM_Cterm',
                      'PKS_Docking_Cterm', 'PKS_Docking_Nterm'}
    feature_by_id = record.get_cds_name_mapping()
    results = {}
    for cds_name in list(cds_domains):
        new = []
        cds_length = len(feature_by_id[cds_name].translation)
        for hit in cds_domains[cds_name]:
            if hit.hit_id in dockingdomains and \
                    not (cds_length - max(hit.query_start, hit.query_end) < 50
                         or min(hit.query_start, hit.query_end) < 50):
                continue
            new.append(hit)
        if new:
            results[cds_name] = new
    return results

예제 #29

0

파일 보기

파일: test_circular_conversion.py 프로젝트: mibig-secmet/antismash-mibig

    def test_gene_split(self):
        self.seqrec.features.append(self.seqgene)
        for id_name in ["locus_tag", "gene"]:
            self.seqgene.qualifiers[id_name] = [id_name + "_test"]
            expected = id_name + "_test"
            rec = Record.from_biopython(self.seqrec, taxon="bacteria")
            self.seqgene.qualifiers.pop(id_name)
            genes = rec.get_genes()
            assert len(genes) == 2

            if id_name == "gene":
                id_name = "gene_name"  # since a Gene doesn't have a gene member

            assert genes[0].location.start == 12
            assert genes[0].location.end == 21
            assert getattr(genes[0], id_name) == expected + "_UPPER"
            assert genes[0].get_name() == expected + "_UPPER"

            assert genes[1].location.start == 0
            assert genes[1].location.end == 9
            assert getattr(genes[1], id_name) == expected + "_LOWER"
            assert genes[1].get_name() == expected + "_LOWER"

예제 #30

0

파일 보기

파일: test_tta.py 프로젝트: SBGlab/antismash

    def setUp(self):
        # locations:            VVV         VVV
        record = Record(Seq("ATGTTATGAGGGTCATAACAT"))

        record.add_cds_feature(DummyCDS(0, 9, strand=1))
        record.add_cds_feature(DummyCDS(12, 21, strand=-1))

        cluster = DummyProtocluster(start=0, end=21)
        record.add_protocluster(cluster)
        record.create_candidate_clusters()
        record.create_regions()
        # if these aren't correct, the tests will fail
        assert len(cluster.cds_children) == 2
        assert len(record.get_regions()) == 1
        for cds in record.get_cds_features():
            assert cds.is_contained_by(cluster)
            assert cds.extract(record.seq) == "ATGTTATGA", str(cds.location)

        self.record = record