def generate_html(region_layer: RegionLayer, _results: ModuleResults, _record_layer: RecordLayer, options_layer: OptionsLayer) -> HTMLSections: """ Generate the details section of NRPS/PKS domains in the main HTML output """ template = FileTemplate( path.get_full_path(__file__, 'templates', 'details.html')) html = HTMLSections("nrps_pks") if not has_domain_details(region_layer): return html # hide lids by default if none have predictions (e.g. in a minimal run) hide_lids = not domains_have_predictions(region_layer) section = template.render(has_domain_details=has_domain_details, region=region_layer, docs_url=options_layer.urls.docs_baseurl, hide_lids=hide_lids) html.add_detail_section("NRPS/PKS domains", section) return html
def generate_sidepanel(cluster_layer: ClusterLayer, results: SactiResults, record_layer: RecordLayer, options_layer: OptionsLayer) -> str: """ Generates the sidepanel section of HTML with any results for the given cluster """ env = Environment(loader=FileSystemLoader( path.get_full_path(__file__, 'templates')), autoescape=True, undefined=StrictUndefined) template = env.get_template('sidepanel.html') cluster = SactipeptideLayer(record_layer, cluster_layer.cluster_feature) motifs_in_cluster = {} for locus in results.clusters.get(cluster_layer.get_cluster_number(), []): motifs_in_cluster[locus] = results.motifs_by_locus[locus] sidepanel = template.render(record=record_layer, cluster=cluster, options=options_layer, results=motifs_in_cluster) return sidepanel
def generate_html_table(outfile_name: str, mibig_entries: List[MibigEntry]) -> None: """ Generates an HTML page containing a table for MiBIG hits for CDSes Arguments: outfile_name: the path to write the HTML page to mibig_entries: a list of clusterblast MibigEntry hits Returns: None """ os.makedirs(os.path.dirname(outfile_name), exist_ok=True) with open(outfile_name, 'w') as handle: env = Environment(autoescape=True, undefined=StrictUndefined, loader=FileSystemLoader(get_full_path(__file__, "templates"))) template = env.get_template('mibig_hits_table.html') aux = template.render(mibig_homology_file_lines=mibig_entries) handle.write(aux)
def test_epidermin(self): "Test lanthipeptide prediction for epidermin" filename = path.get_full_path(__file__, 'data', 'epidermin.gbk') rec = Record.from_biopython(seqio.read(filename), taxon="bacteria") assert not rec.get_cds_motifs() result = run_specific_analysis(rec) motifs = self.gather_all_motifs(result) assert len(motifs) == 1 assert not rec.get_cds_motifs() result.add_to_record(rec) assert len(rec.get_cds_motifs()) == 1 prepeptide = motifs[0] self.assertAlmostEqual(2164, prepeptide.monoisotopic_mass, delta=0.5) self.assertAlmostEqual(2165.6, prepeptide.molecular_weight, delta=0.5) self.assertEqual(3, prepeptide.lan_bridges) self.assertEqual("MEAVKEKNDLFNLDVKVNAKESNDSGAEPR", prepeptide.leader) self.assertEqual("IASKFICTPGCAKTGSFNSYCC", prepeptide.core) self.assertEqual('Class I', prepeptide.peptide_subclass) self.assertEqual(['AviCys'], prepeptide.get_modifications())
def test_alignment_generation(self): pregenerated = list( SearchIO.parse( open(path.get_full_path(__file__, 'data', 'KS_N.output')), "hmmer2-text")) domains = self.generate_domains() analysis = ActiveSiteAnalysis("PKS_KS", domains, "PKSI-KS_N.hmm2", [176, 186, 187, 188], ['G', 'S', 'S', 'S']) with patch.object(subprocessing, "run_hmmpfam2", return_value=pregenerated): alignments = analysis.get_alignments() assert {"PKS_KS"} == { domain.domain for domain in analysis.domains_of_interest } assert len(alignments) == 4 assert [align.domain for align in alignments[:4]] == domains[:4]
def generate_details_div(cluster_layer, results, record_layer, options_layer) -> str: """ Generates a HTML div for the main page of results """ lanthi_layer = LanthipeptideLayer(record_layer, cluster_layer.cluster_rec) if not results: return "" env = Environment(loader=FileSystemLoader( path.get_full_path(__file__, "templates")), autoescape=True, undefined=StrictUndefined) template = env.get_template('details.html') motifs_in_cluster = {} for locus in results.clusters.get(cluster_layer.get_cluster_number(), []): motifs_in_cluster[locus] = results.motifs_by_locus[locus] details_div = template.render(record=record_layer, cluster=lanthi_layer, options=options_layer, results=motifs_in_cluster) return details_div
def load_smiles() -> Dict[str, str]: """Load smiles from a dictionary mapping residues to SMILES string""" aa_smiles = {} # type: Dict[str, str] smiles_monomer = open(path.get_full_path(__file__, 'data', 'aaSMILES.txt'), 'r') for line in smiles_monomer.readlines(): line = line.strip() if not line or line.startswith('#') or line == "END": continue smiles = line.split() assert len(smiles) == 2, "Invalid smiles line {!r}".format(line) assert smiles[ 0] not in aa_smiles, "%s contained twice in smiles data" % smiles[0] aa_smiles[smiles[0]] = smiles[1] smiles_monomer.close() return aa_smiles
def find_domains(fasta: str, record: Record) -> Dict[str, List[HMMResult]]: """ Analyse for C/A/PCP/E/KS/AT/ATd/DH/KR/ER/ACP/TE/TD/COM/Docking/MT/CAL domains Arguments: fasta: a group of features in fasta format record: the Record that contains all the features Returns: a dictionary mapping feature name to a list of domain results for that feature """ opts = ["--cut_tc"] nrpspks_file = path.get_full_path(__file__, "data", "nrpspksdomains.hmm") nrpspksdomain_results = subprocessing.run_hmmscan(nrpspks_file, fasta, opts) lengths = utils.get_hmm_lengths(nrpspks_file) domains = refine_hmmscan_results(nrpspksdomain_results, lengths, neighbour_mode=True) return filter_nonterminal_docking_domains(record, domains)
def prepare_data(logging_only: bool = False) -> List[str]: """ Ensures packaged data is fully prepared Arguments: logging_only: whether to return error messages instead of raising exceptions Returns: a list of error messages (only if logging_only is True) """ failure_messages = [] for model in [ 'abmotifs.hmm', 'dockingdomains.hmm', 'ksdomains.hmm', 'nrpspksdomains.hmm' ]: full_path = path.get_full_path(__file__, "data", model) failure_messages.extend( hmmer.ensure_database_pressed(full_path, return_not_raise=logging_only)) return failure_messages
def perform_docking_domain_analysis( cds_features: List[CDSFeature]) -> List[CDSFeature]: """ Estimates gene ordering based on docking domains of features Arguments: cds_features: a list of CDSFeatures to order Returns: a list of CDSFeatures in estimated order """ start_cds, end_cds = find_first_and_last_cds(cds_features) data_dir = path.get_full_path(__file__, "data", "terminals") n_terminal_residues = extract_nterminus(data_dir, cds_features, start_cds) c_terminal_residues = extract_cterminus(data_dir, cds_features, end_cds) possible_orders = find_possible_orders(cds_features, start_cds, end_cds) geneorder = rank_biosynthetic_orders(n_terminal_residues, c_terminal_residues, possible_orders) return geneorder
def generate_sidepanel(cluster_layer, results, record_layer, options_layer) -> str: """ Generates a div for the sidepanel results """ env = Environment(loader=FileSystemLoader( path.get_full_path(__file__, "templates")), autoescape=True, undefined=StrictUndefined) template = env.get_template('sidepanel.html') cluster = LanthipeptideLayer(record_layer, cluster_layer.cluster_rec) if not results: return "" record = record_layer motifs_in_cluster = {} for locus in results.clusters.get(cluster_layer.get_cluster_number(), []): motifs_in_cluster[locus] = results.motifs_by_locus[locus] sidepanel = template.render(record=record, cluster=cluster, options=options_layer, results=motifs_in_cluster) return sidepanel
def test_css_matches_rules(self): defined_clusters = set( name for name in hmm_detection.get_supported_cluster_types("loose")) available_classes = set() base_classes = {"hybrid" } # a special case used at the javascript level less = path.get_full_path(__file__, "..", "css", "secmet.scss") with open(less) as handle: for line in handle.readlines(): if line.startswith('.'): class_ = line[1:].split()[0] available_classes.add(class_) missing_css = defined_clusters - available_classes assert not missing_css # allow for the extra base classes and hybrids extra_css = available_classes - defined_clusters - base_classes # and clusterfinders clustertypes extra_css -= {'cf_putative', 'cf_fatty_acid', 'cf_saccharide'} assert not extra_css
def get_sequence_counts(details_file: str) -> Dict[str, int]: """ Gets the number of sequences/seeds used to generate each HMM signature Arguments: detail_file: a file containing all HMMs Returns: a dictionary mapping HMM name to the number of sequences used to generate it """ result = {} for hmm in get_signature_profiles(details_file): for line in open(path.get_full_path(details_file, hmm.hmm_file), 'r'): if line.startswith('NSEQ '): result[hmm.name] = int(line[6:].strip()) break if hmm.name not in result: raise ValueError("Unknown number of seeds for hmm file: %s" % details_file) return result
def test_CP009369(self): # pylint: disable=invalid-name " tests the special case HMM files for rodeo " record_path = path.get_full_path(__file__, 'data', 'CP009369.1.gbk') results = helpers.run_and_regenerate_results_for_module(record_path, thiopeptides, self.config) assert results assert len(results.motifs) == 1 prepeptide = results.motifs[0] self.assertAlmostEqual(1934.6, prepeptide.monoisotopic_mass, places=1) self.assertAlmostEqual(1936.0, prepeptide.molecular_weight, places=1) assert prepeptide.leader == "MVKSIIKARESGRFYETKYLKGGEEMKEQKELKNEEFELDVEFLDLDEVSAIPETTA" assert prepeptide.core == "SSGTSSCSASSTCGSSSCCGSC" assert not prepeptide.detailed_information.macrocycle assert prepeptide.peptide_subclass == "Type III" assert prepeptide.detailed_information.core_features == 'Central ring: pyridine trisubstituted' assert prepeptide.tail == '' for calc, expected in zip(prepeptide.alternative_weights, [1954.0, 1972.1, 1990.1, 2008.1, 2026.1, 2044.1, 2062.2, 2080.2, 2098.2, 2116.2, 2134.2, 2152.3, 2170.3]): self.assertAlmostEqual(calc, expected, places=1) assert len(prepeptide.to_biopython()) == 2 # no tail
def prepare_data(logging_only: bool = False) -> List[str]: """ Ensures packaged data is fully prepared Arguments: logging_only: whether to return error messages instead of raising exceptions Returns: a list of error messages (only if logging_only is True) """ training_set = path.get_full_path(__file__, "data", "training_set.csv") try: pickle_classifier(training_set, prefix="thiopeptide", kernel='rbf', C=2.83e5, gamma=1e-9, overwrite=not logging_only) except ValueError: return ["failed to rebuild thiopeptide classifier"] return []
def test_nisin_fasta_only(self): config.update_config({"genefinding_tool": "none"}) filepath = path.get_full_path(__file__, "data", "nisin.fasta") records = record_processing.parse_input_sequence(filepath) assert len(records) == 1 assert not records[0].get_cds_features() # make sure genefinding wasn't run with default options record_processing.pre_process_sequences(records, self.options, self.genefinding) assert not self.genefinding.was_run assert not records[0].get_cds_features() # make sure genefinding was run when not 'none' records[0].skip = False config.update_config({"genefinding_tool": "not-none"}) record_processing.pre_process_sequences(records, self.options, self.genefinding) assert self.genefinding.was_run # still no features because we used dummy genefinding assert not records[0].get_cds_features()
def test_css_matches_rules(self): defined_clusters = set( name for name in hmm_detection.get_supported_cluster_types("loose")) available_classes = set() base_classes = { "hybrid", # a special case used at the javascript level "unknown", # for regions containing only subregions } less = path.get_full_path(__file__, "..", "css", "secmet.scss") with open(less) as handle: for line in handle.readlines(): if line.startswith('.'): class_ = line[1:].split()[0] available_classes.add(class_) missing_css = defined_clusters - available_classes assert not missing_css # allow for the extra base classes and hybrids extra_css = available_classes - defined_clusters - base_classes assert not extra_css
def test_cp002271_c19(self): filename = path.get_full_path(__file__, 'data', 'CP002271.1.cluster019.gbk') results = helpers.run_and_regenerate_results_for_module( filename, nrps_pks, self.options) # catch ordering changes along with ensuring ATResults are there pred = results.domain_predictions["nrpspksdomains_STAUR_3982_PKS_AT.1"] assert pred["signature"].predictions[0][1].score == 87.5 # ensure all genes are present and have the right consensus assert results.consensus == { 'nrpspksdomains_STAUR_3982_PKS_AT.1': 'mmal', 'nrpspksdomains_STAUR_3983_PKS_AT.1': 'mmal', 'nrpspksdomains_STAUR_3984_PKS_AT.1': 'mmal', 'nrpspksdomains_STAUR_3985_PKS_AT.1': 'pk', 'nrpspksdomains_STAUR_3985_PKS_AT.2': 'mmal' } assert len(results.region_predictions) == 1 assert list(results.region_predictions) == [1] assert len(results.region_predictions[1]) == 1 # check the gene ordering and, in this case, that it used domain docking sc_pred = results.region_predictions[1][0] assert sc_pred.polymer == '(Me-ccmal) + (Me-ccmal) + (Me-ccmal)' assert sc_pred.domain_docking_used assert sc_pred.ordering == [ 'STAUR_3983', 'STAUR_3984', 'STAUR_3985', 'STAUR_3982' ] assert len(results.domain_predictions) == 10 expected_domains = { 'nrpspksdomains_STAUR_3982_PKS_AT.1', 'nrpspksdomains_STAUR_3983_PKS_AT.1', 'nrpspksdomains_STAUR_3984_PKS_AT.1', 'nrpspksdomains_STAUR_3985_PKS_AT.1', 'nrpspksdomains_STAUR_3985_PKS_AT.2', 'nrpspksdomains_STAUR_3972_PKS_KR.1', 'nrpspksdomains_STAUR_3984_PKS_KR.1', 'nrpspksdomains_STAUR_3985_PKS_KR.1', 'nrpspksdomains_STAUR_3983_PKS_KR.1', 'nrpspksdomains_STAUR_3983_PKS_KR.1', 'nrpspksdomains_STAUR_3982_PKS_KR.1' } assert set(results.domain_predictions) == expected_domains
def test_NZ_CP015439(self): # pylint: disable=invalid-name """ Tests that small ORFs are found and saved in results """ record_path = path.get_full_path(__file__, 'data', 'NZ_CP015439_section.gbk') results = helpers.run_and_regenerate_results_for_module( record_path, thiopeptides, self.config) assert results # check that the extra orf was found and stored correctly assert len(results.cds_features) == 1 additions = list(results.cds_features.values())[0] assert len(additions) == 1 assert isinstance(additions[0], secmet.features.CDSFeature) # also test the analysis results itself assert len(results.motifs) == 1 prepeptide = results.motifs[0] self.assertAlmostEqual(1408.6, prepeptide.monoisotopic_mass, places=1) self.assertAlmostEqual(1409.5, prepeptide.molecular_weight, places=1) assert prepeptide.leader == "MRYMEGGENMQDIMLELYAEELPDITQYTAAGTSTLSTESSVLSASCP" assert prepeptide.core == "TSTASTYTSMSSVS"
def classify(record_id: str, cds_features: List[CDSFeature], # an API, so hide unused warning options: ConfigType) -> FunctionResults: # pylint: disable=unused-argument """ Finds possible classifications for the provided CDS features. Arguments: cds_features: a list of CDSFeatures to classify Returns: a dictionary mapping CDS name to a list of HMMResult instances of classifications """ hmm_file = path.get_full_path(__file__, "data", "smcogs.hmm") hits = scan_for_functions(cds_features, hmm_file, hmmscan_opts=["-E", "1E-16"]) ids_to_function = build_function_mapping() cds_name_to_function = {} for cds_name, result in hits.items(): smcog_id = result.hit_id.split(":", 1)[0] cds_name_to_function[cds_name] = ids_to_function[smcog_id] result.hit_id = result.hit_id.replace('_', ' ') return FunctionResults(record_id, "smcogs", hits, cds_name_to_function)
def check_prereqs(options: ConfigType) -> List[str]: "Checks if all required files and applications are around" failure_messages = [] for binary_name in ['hmmpfam2', 'hmmscan', 'hmmpress']: if binary_name not in options.executables: failure_messages.append("Failed to locate file: %r" % binary_name) # Get all HMM profile names from XML file for profile in [ "PKSI-KR.hmm2", "PKSI-KS_N.hmm2", "PKSI-KS_C.hmm2", "PKSI-AT.hmm2", "PKSI-ACP.hmm2", "PKSI-DH.hmm2", "Thioesterase.hmm2", "PKSI-ER.hmm2", "p450.hmm2" ]: full_hmm_path = path.get_full_path(__file__, "data", profile) if path.locate_file(full_hmm_path) is None: failure_messages.append("Failed to locate file: %s" % profile) continue return failure_messages
def test_full_blastp_use(self): test_file = path.get_full_path(__file__, 'data', 'GQ409537.1.gbk') results = helpers.run_and_regenerate_results_for_module( test_file, t2pks, self.options) assert list(results.cluster_predictions) == [1] pred = results.cluster_predictions[1] assert pred.starter_units == [ t2pks.results.Prediction('malonamyl-CoA', 2319., 0.) ] assert pred.malonyl_elongations == [ t2pks.results.Prediction('8|9', 661.0, 1.3e-201) ] assert pred.product_classes == { 'angucycline', 'tetracycline', 'aureolic acid', 'anthracycline' } assert set( pred.molecular_weights) == {'malonamyl-CoA_8', 'malonamyl-CoA_9'} self.assertAlmostEqual(pred.molecular_weights['malonamyl-CoA_8'], 638.63534) self.assertAlmostEqual(pred.molecular_weights['malonamyl-CoA_9'], 680.67202)
def get_sequence_counts(details_file: str) -> Dict[str, str]: """ Gets the number of sequences/seeds used to generate each HMM signature Arguments: detail_file: a file containing all HMMs Returns: a dictionary mapping HMM name to the number of sequences used to generate it """ result = {} for hmm in get_signature_profiles(details_file): for line in open(path.get_full_path(details_file, hmm.hmm_file), 'r'): if line.startswith('NSEQ '): result[hmm.name] = line[6:].strip() break # TODO: ideally this shouldn't ever happen, clean up inputs and change to error if hmm.name not in result: result[hmm.name] = "?" return result
def prepare_data(_logging_only: bool = False) -> List[str]: """ Rebuild any dynamically buildable data """ flavours = ["bacteria", "fungi", "plants"] with path.changed_directory(path.get_full_path(__file__, "css")): built_files = [ os.path.abspath("%s.css" % flavour) for flavour in flavours ] if path.is_outdated(built_files, glob.glob("*.scss")): logging.info("CSS files out of date, rebuilding") for flavour in flavours: target = "%s.css" % flavour assert os.path.exists(flavour + ".scss"), flavour result = scss.Compiler( output_style="expanded").compile(flavour + ".scss") assert result with open(target, "w") as out: out.write(result) return []
def setUp(self): test_file = path.get_full_path(__file__, 'data', 'NC_003888.3.cluster011.gbk') self.record = record_processing.parse_input_sequence(test_file)[0] self.cluster = Cluster(FeatureLocation(0, len(self.record.seq)), surrounding_location=FeatureLocation( 0, len(self.record.seq)), cutoff=20, neighbourhood_range=0, tool="test", product="T2PKS", detection_rule="dummy rule") self.record.add_cluster(self.cluster) self.record.create_superclusters() self.record.create_regions() hmm_results = { 'SCO5072': [HMMResult("KR", 1, 265, evalue=3.1e-49, bitscore=159.4)], 'SCO5079': [HMMResult("DIMER", 4, 293, evalue=8.7e-131, bitscore=426.8)], 'SCO5080': [HMMResult("OXY", 8, 377, evalue=2.1e-14, bitscore=44.7)], 'SCO5086': [HMMResult("KR_C9", 0, 261, evalue=1.9e-134, bitscore=438.4)], 'SCO5087': [HMMResult("KS", 44, 463, evalue=3.5e-234, bitscore=768.6)], 'SCO5088': [HMMResult("CLF_7", 1, 401, evalue=1.2e-226, bitscore=743.5)], 'SCO5089': [HMMResult("ACP", 4, 86, evalue=5e-36, bitscore=114.2)], 'SCO5090': [HMMResult("CYC_C7-C12", 1, 312, evalue=7.8e-124, bitscore=404)], 'SCO5091': [HMMResult("CYC_C5-C14", 3, 297, evalue=4.4e-143, bitscore=467.3)], 'SCO5094': [HMMResult("MET", 40, 155, evalue=9.8e-11, bitscore=32.7)], 'SCO5097': [HMMResult("KR", 3, 247, evalue=3.3e-40, bitscore=129.8)], } mock("t2pks_analysis.run_t2pks_hmmscan", returns=hmm_results) mock("t2pks_analysis.run_starter_unit_blastp", returns={})
def test_nosiheptide(self): "Test thiopeptide prediction for nosiheptide - nosM" rec = seqio.read( path.get_full_path(__file__, 'data', 'nosi_before_analysis.gbk')) rec = secmet.Record.from_biopython(rec, "bacteria") rec.get_cluster(1).trim_overlapping() assert rec.get_feature_count() == 56 assert not rec.get_cds_motifs() result = thiopeptides.specific_analysis(rec) assert rec.get_feature_count() == 56 assert len(result.motifs) == 1 result.add_to_record(rec) for i in rec.get_cds_motifs(): print(i, i.leader, i.score, i.rodeo_score) assert len(rec.get_cds_motifs()) == 1, rec.get_cds_motifs() assert rec.get_feature_count() == 57 # check the motif in an existing CDS prepeptide = rec.get_cds_motifs()[0] assert prepeptide is result.motifs[0] self.assertAlmostEqual(1315.3, prepeptide.monoisotopic_mass, places=1) self.assertAlmostEqual(1316.5, prepeptide.molecular_weight, places=1) assert prepeptide.leader == "MDAAHLSDLDIDALEISEFLDESRLEDSEVVAKVMSA" assert prepeptide.core == "SCTTCECCCSCSS" assert prepeptide.macrocycle == "26-member" assert prepeptide.peptide_subclass == "Type I" self.assertAlmostEqual(1222.4, prepeptide.mature_weights[0], places=1) self.assertAlmostEqual(1221.2, prepeptide.mature_weights[1], places=1) for calc, expected in zip( prepeptide.mature_weights[2:], [1240.4, 1258.4, 1276.5, 1294.5, 1312.5, 1330.5]): self.assertAlmostEqual(calc, expected, places=1) expected_core_features = ( "Central ring: pyridine tetrasubstituted (hydroxyl group present);" " second macrocycle") assert prepeptide.core_features == expected_core_features assert prepeptide.tail_reaction == 'dealkylation of C-Terminal residue; amidation'
def setUp(self): self.record = secmet.Record() # except for Thioesterase, all domains were found in BN001301.1 # TE domains were found in Y16952 for filename, domain_type in [("PKS_KS.input", "PKS_KS"), ("AT.input", "PKS_AT"), ("ACP.input", "ACP"), ("DH.input", "PKS_DH"), ("KR.input", "PKS_KR"), ("TE.input", "Thioesterase"), ("ER.input", "PKS_ER")]: for domain in rebuild_domains(filename, domain_type): self.record.add_antismash_domain(domain) # these PFAMs found in BN001301.1 with clusterhmmer, one was excluded # to avoid a Biopython SearchIO bug domain_fasta = fasta.read_fasta( path.get_full_path(__file__, 'data', "p450.input")) for name, translation in domain_fasta.items(): pfam_domain = DummyPFAMDomain(domain="p450", domain_id="PFAM_p450_" + name) pfam_domain.translation = translation self.record.add_pfam_domain(pfam_domain)
def setUp(self): self.record = secmet.Record() # except for Thioesterase, all domains were found in BN001301.1 # TE domains were found in Y16952 for filename, domain_type in [("PKS_KS.input", "PKS_KS"), ("AT.input", "PKS_AT"), ("ACP.input", "ACP"), ("DH.input", "PKS_DH"), ("KR.input", "PKS_KR"), ("TE.input", "Thioesterase"), ("ER.input", "PKS_ER")]: for domain in rebuild_domains(filename, domain_type): self.record.add_antismash_domain(domain) # these PFAMs found in BN001301.1 with clusterhmmer, one was excluded # to avoid a Biopython SearchIO bug dummy_location = secmet.features.FeatureLocation(1, 100) domain_fasta = fasta.read_fasta(path.get_full_path(__file__, 'data', "p450.input")) for name, translation in domain_fasta.items(): pfam_domain = secmet.features.PFAMDomain(dummy_location, protein_start=5, protein_end=10, description="test", identifier="PF00001", tool="test") pfam_domain.translation = translation pfam_domain.domain_id = "PFAM_p450_" + name pfam_domain.domain = "p450" self.record.add_pfam_domain(pfam_domain)
def test_cp002271_c19(self): filename = path.get_full_path(__file__, 'data', 'CP002271.1.cluster019.gbk') results = helpers.run_and_regenerate_results_for_module( filename, nrps_pks, self.options) # catch ordering changes along with ensuring ATResults are there assert results.pks.method_results["signature"]["STAUR_3982_AT1"][ 0].score == 87.5 # ensure all genes are present and have the right consensus assert results.consensus == { 'STAUR_3982_AT1': 'ohmmal', 'STAUR_3983_AT1': 'ccmmal', 'STAUR_3984_AT1': 'ccmmal', 'STAUR_3985_AT1': 'pk', 'STAUR_3985_AT2': 'pk' } # check the gene ordering and, in this case, that it used domain docking assert results.cluster_predictions == { '1': ['(ccmmal) + (ccmmal) + (pk-pk) + (ohmmal)', True] } # no A domains in the cluster, so make sure no NRPS results assert results.nrps == {}
def check_prereqs() -> List[str]: "Check if all required applications are around" failure_messages = [] for binary_name in ['muscle', 'hmmscan', 'hmmpress', 'fasttree', 'java']: if path.locate_executable(binary_name) is None: failure_messages.append("Failed to locate file: %r" % binary_name) for hmm in ['smcogs.hmm']: hmm = path.get_full_path(__file__, 'data', hmm) if path.locate_file(hmm) is None: failure_messages.append("Failed to locate file %r" % hmm) continue for ext in ['.h3f', '.h3i', '.h3m', '.h3p']: binary = "%s%s" % (hmm, ext) if path.locate_file(binary) is None: # regenerate them result = subprocessing.run_hmmpress(hmm) if not result.successful(): failure_messages.append("Failed to hmmpress %s: %s" % (hmm, result.stderr.rstrip())) break return failure_messages