def run_both_dirs(self, expected, seq): def reverse_location(location, length): return location._flip(length) record = DummyRecord(seq=seq) assert expected == [feat.location for feat in find_all_orfs(record)] record.seq = record.seq.reverse_complement() expected = [reverse_location(loc, len(seq)) for loc in expected[::-1]] assert expected == [feat.location for feat in find_all_orfs(record)]
def test_multi_start_single_stop(self): seq = "ATGNNNATG" + "N" * 60 + "TAG" expected = [ FeatureLocation(ExactPosition(0), ExactPosition(72), strand=1) ] assert expected == [ feat.location for feat in find_all_orfs(DummyRecord(seq=seq)) ] seq = str(DummyRecord(seq=seq).seq.reverse_complement()) expected[0].strand = -1 assert expected == [ feat.location for feat in find_all_orfs(DummyRecord(seq=seq)) ]
def specific_analysis(record: secmet.Record) -> ThioResults: """ Runs thiopeptide prediction over all cluster features and any extra ORFs that are found not overlapping with existing features """ results = ThioResults(record.id) for cluster in record.get_protoclusters(): if cluster.product != "thiopeptide": continue # Find candidate ORFs that are not yet annotated new_orfs = all_orfs.find_all_orfs(record, cluster) thio_features = list(cluster.cds_children) + new_orfs domains = get_detected_domains(cluster) thio_type = predict_type_from_cluster(domains) amidation = predict_amidation(domains) for thio_feature in thio_features: result_vec = run_thiopred(thio_feature, thio_type, domains) if result_vec is None: continue if amidation: result_vec.amidation = True new_feature = result_vec_to_feature(thio_feature, result_vec) if thio_feature in new_orfs: results.cds_features[cluster.get_protocluster_number()].append(thio_feature) results.motifs.append(new_feature) results.clusters_with_motifs.add(cluster) logging.debug("Thiopeptides marked %d motifs", len(results.motifs)) return results
def specific_analysis(record: Record) -> LassoResults: """ Runs the full lassopeptide analysis over the given record Arguments: record: the Record instance to analyse Returns: A populated LassoResults object """ results = LassoResults(record.id) motif_count = 0 for cluster in record.get_clusters(): if cluster.product != 'lassopeptide': continue precursor_candidates = list(cluster.cds_children) # Find candidate ORFs that are not yet annotated extra_orfs = all_orfs.find_all_orfs(record, cluster) precursor_candidates.extend(extra_orfs) for candidate in precursor_candidates: motif = run_lassopred(record, cluster, candidate) if motif is None: continue results.motifs_by_locus[candidate.get_name()].append(motif) motif_count += 1 results.clusters[cluster.get_cluster_number()].add(candidate.get_name()) # track new CDSFeatures if found with all_orfs if candidate.region is None: results.new_cds_features.add(candidate) logging.debug("Lassopeptide module marked %d motifs", motif_count) return results
def specific_analysis(record: secmet.Record, options: ConfigType) -> SactiResults: """ Analyse each sactipeptide cluster and find precursors within it. If an unannotated ORF would contain the precursor, it will be annotated. Arguments: record: the Record to analyse Returns: a SactiResults instance holding all found precursors and new ORFs """ results = SactiResults(record.id) new_feature_hits = 0 motif_count = 0 counter = 0 for cluster in record.get_protoclusters(): if cluster.product != 'sactipeptide': continue # Find candidate ORFs that are not yet annotated new_orfs = all_orfs.find_all_orfs(record, cluster) hmm_results = run_non_biosynthetic_phmms( fasta.get_fasta_from_features(new_orfs)) annotate_orfs(new_orfs, hmm_results) # Get all CDS features to evaluate for RiPP-likeness candidates = list(cluster.cds_children) + new_orfs domains = get_detected_domains(cluster) # Evaluate each candidate precursor peptide for candidate in candidates: motif = run_sactipred(cluster, candidate, domains) if motif is None: continue results.motifs_by_locus[candidate.get_name()].append(motif) motif_count += 1 results.clusters[cluster.get_protocluster_number()].add( candidate.get_name()) # track new CDSFeatures if found with all_orfs if candidate.region is None: results.new_cds_features.add(candidate) new_feature_hits += 1 # Analyze the cluster with RREfinder counter += 1 name = '%s_%s_%s' % (record.id, cluster.product, counter) RRE_main(cluster, results, name, options) if not motif_count: logging.debug("Found no sactipeptide motifs") else: verb = "is" if new_feature_hits == 1 else "are" logging.debug( "Found %d sactipeptide motif(s) in %d feature(s), %d of which %s new", motif_count, len(results.motifs_by_locus), new_feature_hits, verb) return results
def run_specific_analysis(record: Record, options: ConfigType) -> LanthiResults: """ Runs the full lanthipeptide analysis over the given record Arguments: record: the Record instance to analyse Returns: A populated LanthiResults object """ results = LanthiResults(record.id) counter = 0 for cluster in record.get_protoclusters(): if cluster.product != 'lanthipeptide': continue # find core biosynthetic enzyme locations core_domain_names = { 'Lant_dehydr_N', 'Lant_dehydr_C', 'DUF4135', 'Pkinase' } core_genes = [] for gene in cluster.cds_children: if not gene.sec_met: continue # We seem to hit Lant_dehydr_C on some O-Methyltranferases that also hit PCMT if 'PCMT' in gene.sec_met.domain_ids: continue if core_domain_names.intersection(set(gene.sec_met.domain_ids)): core_genes.append(gene) precursor_candidates = find_lan_a_features(cluster) # Find candidate ORFs that are not yet annotated extra_orfs = all_orfs.find_all_orfs(record, cluster) for orf in extra_orfs: if len(orf.translation) < 80: precursor_candidates.append(orf) for gene in core_genes: neighbours = find_neighbours_in_range(gene, precursor_candidates) if not neighbours: continue run_lanthi_on_genes(record, gene, cluster, neighbours, results) # Analyze the cluster with RREfinder counter += 1 name = '%s_%s_%s' % (record.id, cluster.product, counter) RRE_main(cluster, results, name, options) logging.debug("Lanthipeptide module marked %d motifs", sum(map(len, results.motifs_by_locus))) return results
def run_specific_analysis(record: Record) -> LanthiResults: """ Runs the full lanthipeptide analysis over the given record Arguments: record: the Record instance to analyse Returns: A populated LanthiResults object """ results = LanthiResults(record.id) for cluster in record.get_clusters(): if 'lanthipeptide' not in cluster.products: continue # find core biosynthetic enzyme locations core_domain_names = { 'Lant_dehyd_N', 'Lant_dehyd_C', 'DUF4135', 'Pkinase' } core_genes = [] for gene in cluster.cds_children: if not gene.sec_met: continue if core_domain_names.intersection(set(gene.sec_met.domain_ids)): core_genes.append(gene) precursor_candidates = find_lan_a_features(cluster) # Find candidate ORFs that are not yet annotated extra_orfs = all_orfs.find_all_orfs(record, cluster) for orf in extra_orfs: if len(orf.translation) < 80: precursor_candidates.append(orf) for gene in core_genes: neighbours = find_neighbours_in_range(gene, precursor_candidates) run_lanthi_on_genes(record, gene, neighbours, results) logging.debug("Lanthipeptide module marked %d motifs", sum(map(len, results.motifs_by_locus))) return results