Exemple #1
0
def generate_html(region_layer: RegionLayer, _results: ModuleResults,
                  _record_layer: RecordLayer,
                  options_layer: OptionsLayer) -> HTMLSections:
    """ Generate the details section of NRPS/PKS domains in the main HTML output """
    template = FileTemplate(
        path.get_full_path(__file__, 'templates', 'details.html'))
    html = HTMLSections("nrps_pks")
    if not has_domain_details(region_layer):
        return html

    # hide lids by default if none have predictions (e.g. in a minimal run)
    hide_lids = not domains_have_predictions(region_layer)

    section = template.render(has_domain_details=has_domain_details,
                              region=region_layer,
                              docs_url=options_layer.urls.docs_baseurl,
                              hide_lids=hide_lids)
    html.add_detail_section("NRPS/PKS domains", section)
    return html
Exemple #2
0
def generate_sidepanel(cluster_layer: ClusterLayer, results: SactiResults,
                       record_layer: RecordLayer,
                       options_layer: OptionsLayer) -> str:
    """ Generates the sidepanel section of HTML with any results for the given
        cluster """
    env = Environment(loader=FileSystemLoader(
        path.get_full_path(__file__, 'templates')),
                      autoescape=True,
                      undefined=StrictUndefined)
    template = env.get_template('sidepanel.html')
    cluster = SactipeptideLayer(record_layer, cluster_layer.cluster_feature)
    motifs_in_cluster = {}
    for locus in results.clusters.get(cluster_layer.get_cluster_number(), []):
        motifs_in_cluster[locus] = results.motifs_by_locus[locus]
    sidepanel = template.render(record=record_layer,
                                cluster=cluster,
                                options=options_layer,
                                results=motifs_in_cluster)
    return sidepanel
def generate_html_table(outfile_name: str, mibig_entries: List[MibigEntry]) -> None:
    """ Generates an HTML page containing a table for MiBIG hits for CDSes

        Arguments:
            outfile_name: the path to write the HTML page to
            mibig_entries: a list of clusterblast MibigEntry hits

        Returns:
            None
    """
    os.makedirs(os.path.dirname(outfile_name), exist_ok=True)

    with open(outfile_name, 'w') as handle:
        env = Environment(autoescape=True, undefined=StrictUndefined,
                          loader=FileSystemLoader(get_full_path(__file__, "templates")))
        template = env.get_template('mibig_hits_table.html')

        aux = template.render(mibig_homology_file_lines=mibig_entries)
        handle.write(aux)
 def test_epidermin(self):
     "Test lanthipeptide prediction for epidermin"
     filename = path.get_full_path(__file__, 'data', 'epidermin.gbk')
     rec = Record.from_biopython(seqio.read(filename), taxon="bacteria")
     assert not rec.get_cds_motifs()
     result = run_specific_analysis(rec)
     motifs = self.gather_all_motifs(result)
     assert len(motifs) == 1
     assert not rec.get_cds_motifs()
     result.add_to_record(rec)
     assert len(rec.get_cds_motifs()) == 1
     prepeptide = motifs[0]
     self.assertAlmostEqual(2164, prepeptide.monoisotopic_mass, delta=0.5)
     self.assertAlmostEqual(2165.6, prepeptide.molecular_weight, delta=0.5)
     self.assertEqual(3, prepeptide.lan_bridges)
     self.assertEqual("MEAVKEKNDLFNLDVKVNAKESNDSGAEPR", prepeptide.leader)
     self.assertEqual("IASKFICTPGCAKTGSFNSYCC", prepeptide.core)
     self.assertEqual('Class I', prepeptide.peptide_subclass)
     self.assertEqual(['AviCys'], prepeptide.get_modifications())
Exemple #5
0
 def test_alignment_generation(self):
     pregenerated = list(
         SearchIO.parse(
             open(path.get_full_path(__file__, 'data', 'KS_N.output')),
             "hmmer2-text"))
     domains = self.generate_domains()
     analysis = ActiveSiteAnalysis("PKS_KS", domains, "PKSI-KS_N.hmm2",
                                   [176, 186, 187, 188],
                                   ['G', 'S', 'S', 'S'])
     with patch.object(subprocessing,
                       "run_hmmpfam2",
                       return_value=pregenerated):
         alignments = analysis.get_alignments()
     assert {"PKS_KS"} == {
         domain.domain
         for domain in analysis.domains_of_interest
     }
     assert len(alignments) == 4
     assert [align.domain for align in alignments[:4]] == domains[:4]
Exemple #6
0
def generate_details_div(cluster_layer, results, record_layer,
                         options_layer) -> str:
    """ Generates a HTML div for the main page of results """
    lanthi_layer = LanthipeptideLayer(record_layer, cluster_layer.cluster_rec)
    if not results:
        return ""
    env = Environment(loader=FileSystemLoader(
        path.get_full_path(__file__, "templates")),
                      autoescape=True,
                      undefined=StrictUndefined)
    template = env.get_template('details.html')
    motifs_in_cluster = {}
    for locus in results.clusters.get(cluster_layer.get_cluster_number(), []):
        motifs_in_cluster[locus] = results.motifs_by_locus[locus]
    details_div = template.render(record=record_layer,
                                  cluster=lanthi_layer,
                                  options=options_layer,
                                  results=motifs_in_cluster)
    return details_div
Exemple #7
0
def load_smiles() -> Dict[str, str]:
    """Load smiles from a dictionary mapping residues to SMILES string"""
    aa_smiles = {}  # type: Dict[str, str]

    smiles_monomer = open(path.get_full_path(__file__, 'data', 'aaSMILES.txt'),
                          'r')

    for line in smiles_monomer.readlines():
        line = line.strip()
        if not line or line.startswith('#') or line == "END":
            continue
        smiles = line.split()
        assert len(smiles) == 2, "Invalid smiles line {!r}".format(line)
        assert smiles[
            0] not in aa_smiles, "%s contained twice in smiles data" % smiles[0]
        aa_smiles[smiles[0]] = smiles[1]

    smiles_monomer.close()
    return aa_smiles
def find_domains(fasta: str, record: Record) -> Dict[str, List[HMMResult]]:
    """ Analyse for C/A/PCP/E/KS/AT/ATd/DH/KR/ER/ACP/TE/TD/COM/Docking/MT/CAL domains

        Arguments:
            fasta: a group of features in fasta format
            record: the Record that contains all the features

        Returns:
            a dictionary mapping feature name to a list of domain results for that feature
    """
    opts = ["--cut_tc"]
    nrpspks_file = path.get_full_path(__file__, "data", "nrpspksdomains.hmm")
    nrpspksdomain_results = subprocessing.run_hmmscan(nrpspks_file, fasta,
                                                      opts)
    lengths = utils.get_hmm_lengths(nrpspks_file)
    domains = refine_hmmscan_results(nrpspksdomain_results,
                                     lengths,
                                     neighbour_mode=True)
    return filter_nonterminal_docking_domains(record, domains)
def prepare_data(logging_only: bool = False) -> List[str]:
    """ Ensures packaged data is fully prepared

        Arguments:
            logging_only: whether to return error messages instead of raising exceptions

        Returns:
            a list of error messages (only if logging_only is True)
    """
    failure_messages = []
    for model in [
            'abmotifs.hmm', 'dockingdomains.hmm', 'ksdomains.hmm',
            'nrpspksdomains.hmm'
    ]:
        full_path = path.get_full_path(__file__, "data", model)
        failure_messages.extend(
            hmmer.ensure_database_pressed(full_path,
                                          return_not_raise=logging_only))
    return failure_messages
Exemple #10
0
def perform_docking_domain_analysis(
        cds_features: List[CDSFeature]) -> List[CDSFeature]:
    """ Estimates gene ordering based on docking domains of features

        Arguments:
            cds_features: a list of CDSFeatures to order

        Returns:
            a list of CDSFeatures in estimated order
    """
    start_cds, end_cds = find_first_and_last_cds(cds_features)
    data_dir = path.get_full_path(__file__, "data", "terminals")

    n_terminal_residues = extract_nterminus(data_dir, cds_features, start_cds)
    c_terminal_residues = extract_cterminus(data_dir, cds_features, end_cds)
    possible_orders = find_possible_orders(cds_features, start_cds, end_cds)

    geneorder = rank_biosynthetic_orders(n_terminal_residues,
                                         c_terminal_residues, possible_orders)
    return geneorder
Exemple #11
0
def generate_sidepanel(cluster_layer, results, record_layer,
                       options_layer) -> str:
    """ Generates a div for the sidepanel results """
    env = Environment(loader=FileSystemLoader(
        path.get_full_path(__file__, "templates")),
                      autoescape=True,
                      undefined=StrictUndefined)
    template = env.get_template('sidepanel.html')
    cluster = LanthipeptideLayer(record_layer, cluster_layer.cluster_rec)
    if not results:
        return ""
    record = record_layer
    motifs_in_cluster = {}
    for locus in results.clusters.get(cluster_layer.get_cluster_number(), []):
        motifs_in_cluster[locus] = results.motifs_by_locus[locus]
    sidepanel = template.render(record=record,
                                cluster=cluster,
                                options=options_layer,
                                results=motifs_in_cluster)
    return sidepanel
Exemple #12
0
 def test_css_matches_rules(self):
     defined_clusters = set(
         name
         for name in hmm_detection.get_supported_cluster_types("loose"))
     available_classes = set()
     base_classes = {"hybrid"
                     }  # a special case used at the javascript level
     less = path.get_full_path(__file__, "..", "css", "secmet.scss")
     with open(less) as handle:
         for line in handle.readlines():
             if line.startswith('.'):
                 class_ = line[1:].split()[0]
                 available_classes.add(class_)
     missing_css = defined_clusters - available_classes
     assert not missing_css
     # allow for the extra base classes and hybrids
     extra_css = available_classes - defined_clusters - base_classes
     # and clusterfinders clustertypes
     extra_css -= {'cf_putative', 'cf_fatty_acid', 'cf_saccharide'}
     assert not extra_css
def get_sequence_counts(details_file: str) -> Dict[str, int]:
    """ Gets the number of sequences/seeds used to generate each HMM signature

        Arguments:
            detail_file: a file containing all HMMs

        Returns:
            a dictionary mapping HMM name to the number of sequences used to
                generate it
    """
    result = {}
    for hmm in get_signature_profiles(details_file):
        for line in open(path.get_full_path(details_file, hmm.hmm_file), 'r'):
            if line.startswith('NSEQ '):
                result[hmm.name] = int(line[6:].strip())
                break
        if hmm.name not in result:
            raise ValueError("Unknown number of seeds for hmm file: %s" % details_file)

    return result
Exemple #14
0
 def test_CP009369(self):  # pylint: disable=invalid-name
     " tests the special case HMM files for rodeo "
     record_path = path.get_full_path(__file__, 'data', 'CP009369.1.gbk')
     results = helpers.run_and_regenerate_results_for_module(record_path, thiopeptides, self.config)
     assert results
     assert len(results.motifs) == 1
     prepeptide = results.motifs[0]
     self.assertAlmostEqual(1934.6, prepeptide.monoisotopic_mass, places=1)
     self.assertAlmostEqual(1936.0, prepeptide.molecular_weight, places=1)
     assert prepeptide.leader == "MVKSIIKARESGRFYETKYLKGGEEMKEQKELKNEEFELDVEFLDLDEVSAIPETTA"
     assert prepeptide.core == "SSGTSSCSASSTCGSSSCCGSC"
     assert not prepeptide.detailed_information.macrocycle
     assert prepeptide.peptide_subclass == "Type III"
     assert prepeptide.detailed_information.core_features == 'Central ring: pyridine trisubstituted'
     assert prepeptide.tail == ''
     for calc, expected in zip(prepeptide.alternative_weights,
                               [1954.0, 1972.1, 1990.1, 2008.1, 2026.1, 2044.1,
                                2062.2, 2080.2, 2098.2, 2116.2, 2134.2, 2152.3, 2170.3]):
         self.assertAlmostEqual(calc, expected, places=1)
     assert len(prepeptide.to_biopython()) == 2  # no tail
Exemple #15
0
def prepare_data(logging_only: bool = False) -> List[str]:
    """ Ensures packaged data is fully prepared

        Arguments:
            logging_only: whether to return error messages instead of raising exceptions

        Returns:
            a list of error messages (only if logging_only is True)
    """
    training_set = path.get_full_path(__file__, "data", "training_set.csv")
    try:
        pickle_classifier(training_set,
                          prefix="thiopeptide",
                          kernel='rbf',
                          C=2.83e5,
                          gamma=1e-9,
                          overwrite=not logging_only)
    except ValueError:
        return ["failed to rebuild thiopeptide classifier"]
    return []
    def test_nisin_fasta_only(self):
        config.update_config({"genefinding_tool": "none"})
        filepath = path.get_full_path(__file__, "data", "nisin.fasta")
        records = record_processing.parse_input_sequence(filepath)
        assert len(records) == 1
        assert not records[0].get_cds_features()
        # make sure genefinding wasn't run with default options
        record_processing.pre_process_sequences(records, self.options,
                                                self.genefinding)
        assert not self.genefinding.was_run
        assert not records[0].get_cds_features()

        # make sure genefinding was run when not 'none'
        records[0].skip = False
        config.update_config({"genefinding_tool": "not-none"})
        record_processing.pre_process_sequences(records, self.options,
                                                self.genefinding)
        assert self.genefinding.was_run
        # still no features because we used dummy genefinding
        assert not records[0].get_cds_features()
 def test_css_matches_rules(self):
     defined_clusters = set(
         name
         for name in hmm_detection.get_supported_cluster_types("loose"))
     available_classes = set()
     base_classes = {
         "hybrid",  # a special case used at the javascript level
         "unknown",  # for regions containing only subregions
     }
     less = path.get_full_path(__file__, "..", "css", "secmet.scss")
     with open(less) as handle:
         for line in handle.readlines():
             if line.startswith('.'):
                 class_ = line[1:].split()[0]
                 available_classes.add(class_)
     missing_css = defined_clusters - available_classes
     assert not missing_css
     # allow for the extra base classes and hybrids
     extra_css = available_classes - defined_clusters - base_classes
     assert not extra_css
Exemple #18
0
 def test_cp002271_c19(self):
     filename = path.get_full_path(__file__, 'data',
                                   'CP002271.1.cluster019.gbk')
     results = helpers.run_and_regenerate_results_for_module(
         filename, nrps_pks, self.options)
     # catch ordering changes along with ensuring ATResults are there
     pred = results.domain_predictions["nrpspksdomains_STAUR_3982_PKS_AT.1"]
     assert pred["signature"].predictions[0][1].score == 87.5
     # ensure all genes are present and have the right consensus
     assert results.consensus == {
         'nrpspksdomains_STAUR_3982_PKS_AT.1': 'mmal',
         'nrpspksdomains_STAUR_3983_PKS_AT.1': 'mmal',
         'nrpspksdomains_STAUR_3984_PKS_AT.1': 'mmal',
         'nrpspksdomains_STAUR_3985_PKS_AT.1': 'pk',
         'nrpspksdomains_STAUR_3985_PKS_AT.2': 'mmal'
     }
     assert len(results.region_predictions) == 1
     assert list(results.region_predictions) == [1]
     assert len(results.region_predictions[1]) == 1
     # check the gene ordering and, in this case, that it used domain docking
     sc_pred = results.region_predictions[1][0]
     assert sc_pred.polymer == '(Me-ccmal) + (Me-ccmal) + (Me-ccmal)'
     assert sc_pred.domain_docking_used
     assert sc_pred.ordering == [
         'STAUR_3983', 'STAUR_3984', 'STAUR_3985', 'STAUR_3982'
     ]
     assert len(results.domain_predictions) == 10
     expected_domains = {
         'nrpspksdomains_STAUR_3982_PKS_AT.1',
         'nrpspksdomains_STAUR_3983_PKS_AT.1',
         'nrpspksdomains_STAUR_3984_PKS_AT.1',
         'nrpspksdomains_STAUR_3985_PKS_AT.1',
         'nrpspksdomains_STAUR_3985_PKS_AT.2',
         'nrpspksdomains_STAUR_3972_PKS_KR.1',
         'nrpspksdomains_STAUR_3984_PKS_KR.1',
         'nrpspksdomains_STAUR_3985_PKS_KR.1',
         'nrpspksdomains_STAUR_3983_PKS_KR.1',
         'nrpspksdomains_STAUR_3983_PKS_KR.1',
         'nrpspksdomains_STAUR_3982_PKS_KR.1'
     }
     assert set(results.domain_predictions) == expected_domains
Exemple #19
0
    def test_NZ_CP015439(self):  # pylint: disable=invalid-name
        """ Tests that small ORFs are found and saved in results """
        record_path = path.get_full_path(__file__, 'data',
                                         'NZ_CP015439_section.gbk')
        results = helpers.run_and_regenerate_results_for_module(
            record_path, thiopeptides, self.config)
        assert results

        # check that the extra orf was found and stored correctly
        assert len(results.cds_features) == 1
        additions = list(results.cds_features.values())[0]
        assert len(additions) == 1
        assert isinstance(additions[0], secmet.features.CDSFeature)

        # also test the analysis results itself
        assert len(results.motifs) == 1
        prepeptide = results.motifs[0]
        self.assertAlmostEqual(1408.6, prepeptide.monoisotopic_mass, places=1)
        self.assertAlmostEqual(1409.5, prepeptide.molecular_weight, places=1)
        assert prepeptide.leader == "MRYMEGGENMQDIMLELYAEELPDITQYTAAGTSTLSTESSVLSASCP"
        assert prepeptide.core == "TSTASTYTSMSSVS"
Exemple #20
0
def classify(record_id: str, cds_features: List[CDSFeature],  # an API, so hide unused warning
             options: ConfigType) -> FunctionResults:  # pylint: disable=unused-argument
    """ Finds possible classifications for the provided CDS features.

        Arguments:
            cds_features: a list of CDSFeatures to classify

        Returns:
            a dictionary mapping CDS name to a list of HMMResult instances of
                classifications
    """

    hmm_file = path.get_full_path(__file__, "data", "smcogs.hmm")
    hits = scan_for_functions(cds_features, hmm_file, hmmscan_opts=["-E", "1E-16"])
    ids_to_function = build_function_mapping()
    cds_name_to_function = {}
    for cds_name, result in hits.items():
        smcog_id = result.hit_id.split(":", 1)[0]
        cds_name_to_function[cds_name] = ids_to_function[smcog_id]
        result.hit_id = result.hit_id.replace('_', ' ')
    return FunctionResults(record_id, "smcogs", hits, cds_name_to_function)
Exemple #21
0
def check_prereqs(options: ConfigType) -> List[str]:
    "Checks if all required files and applications are around"
    failure_messages = []

    for binary_name in ['hmmpfam2', 'hmmscan', 'hmmpress']:
        if binary_name not in options.executables:
            failure_messages.append("Failed to locate file: %r" % binary_name)

    # Get all HMM profile names from XML file
    for profile in [
            "PKSI-KR.hmm2", "PKSI-KS_N.hmm2", "PKSI-KS_C.hmm2", "PKSI-AT.hmm2",
            "PKSI-ACP.hmm2", "PKSI-DH.hmm2", "Thioesterase.hmm2",
            "PKSI-ER.hmm2", "p450.hmm2"
    ]:
        full_hmm_path = path.get_full_path(__file__, "data", profile)

        if path.locate_file(full_hmm_path) is None:
            failure_messages.append("Failed to locate file: %s" % profile)
            continue

    return failure_messages
Exemple #22
0
 def test_full_blastp_use(self):
     test_file = path.get_full_path(__file__, 'data', 'GQ409537.1.gbk')
     results = helpers.run_and_regenerate_results_for_module(
         test_file, t2pks, self.options)
     assert list(results.cluster_predictions) == [1]
     pred = results.cluster_predictions[1]
     assert pred.starter_units == [
         t2pks.results.Prediction('malonamyl-CoA', 2319., 0.)
     ]
     assert pred.malonyl_elongations == [
         t2pks.results.Prediction('8|9', 661.0, 1.3e-201)
     ]
     assert pred.product_classes == {
         'angucycline', 'tetracycline', 'aureolic acid', 'anthracycline'
     }
     assert set(
         pred.molecular_weights) == {'malonamyl-CoA_8', 'malonamyl-CoA_9'}
     self.assertAlmostEqual(pred.molecular_weights['malonamyl-CoA_8'],
                            638.63534)
     self.assertAlmostEqual(pred.molecular_weights['malonamyl-CoA_9'],
                            680.67202)
Exemple #23
0
def get_sequence_counts(details_file: str) -> Dict[str, str]:
    """ Gets the number of sequences/seeds used to generate each HMM signature

        Arguments:
            detail_file: a file containing all HMMs

        Returns:
            a dictionary mapping HMM name to the number of sequences used to
                generate it
    """
    result = {}
    for hmm in get_signature_profiles(details_file):
        for line in open(path.get_full_path(details_file, hmm.hmm_file), 'r'):
            if line.startswith('NSEQ '):
                result[hmm.name] = line[6:].strip()
                break
        # TODO: ideally this shouldn't ever happen, clean up inputs and change to error
        if hmm.name not in result:
            result[hmm.name] = "?"

    return result
Exemple #24
0
def prepare_data(_logging_only: bool = False) -> List[str]:
    """ Rebuild any dynamically buildable data """
    flavours = ["bacteria", "fungi", "plants"]

    with path.changed_directory(path.get_full_path(__file__, "css")):
        built_files = [
            os.path.abspath("%s.css" % flavour) for flavour in flavours
        ]

        if path.is_outdated(built_files, glob.glob("*.scss")):
            logging.info("CSS files out of date, rebuilding")

            for flavour in flavours:
                target = "%s.css" % flavour
                assert os.path.exists(flavour + ".scss"), flavour
                result = scss.Compiler(
                    output_style="expanded").compile(flavour + ".scss")
                assert result
                with open(target, "w") as out:
                    out.write(result)
    return []
Exemple #25
0
 def setUp(self):
     test_file = path.get_full_path(__file__, 'data',
                                    'NC_003888.3.cluster011.gbk')
     self.record = record_processing.parse_input_sequence(test_file)[0]
     self.cluster = Cluster(FeatureLocation(0, len(self.record.seq)),
                            surrounding_location=FeatureLocation(
                                0, len(self.record.seq)),
                            cutoff=20,
                            neighbourhood_range=0,
                            tool="test",
                            product="T2PKS",
                            detection_rule="dummy rule")
     self.record.add_cluster(self.cluster)
     self.record.create_superclusters()
     self.record.create_regions()
     hmm_results = {
         'SCO5072':
         [HMMResult("KR", 1, 265, evalue=3.1e-49, bitscore=159.4)],
         'SCO5079':
         [HMMResult("DIMER", 4, 293, evalue=8.7e-131, bitscore=426.8)],
         'SCO5080':
         [HMMResult("OXY", 8, 377, evalue=2.1e-14, bitscore=44.7)],
         'SCO5086':
         [HMMResult("KR_C9", 0, 261, evalue=1.9e-134, bitscore=438.4)],
         'SCO5087':
         [HMMResult("KS", 44, 463, evalue=3.5e-234, bitscore=768.6)],
         'SCO5088':
         [HMMResult("CLF_7", 1, 401, evalue=1.2e-226, bitscore=743.5)],
         'SCO5089': [HMMResult("ACP", 4, 86, evalue=5e-36, bitscore=114.2)],
         'SCO5090':
         [HMMResult("CYC_C7-C12", 1, 312, evalue=7.8e-124, bitscore=404)],
         'SCO5091':
         [HMMResult("CYC_C5-C14", 3, 297, evalue=4.4e-143, bitscore=467.3)],
         'SCO5094':
         [HMMResult("MET", 40, 155, evalue=9.8e-11, bitscore=32.7)],
         'SCO5097':
         [HMMResult("KR", 3, 247, evalue=3.3e-40, bitscore=129.8)],
     }
     mock("t2pks_analysis.run_t2pks_hmmscan", returns=hmm_results)
     mock("t2pks_analysis.run_starter_unit_blastp", returns={})
Exemple #26
0
    def test_nosiheptide(self):
        "Test thiopeptide prediction for nosiheptide - nosM"
        rec = seqio.read(
            path.get_full_path(__file__, 'data', 'nosi_before_analysis.gbk'))
        rec = secmet.Record.from_biopython(rec, "bacteria")
        rec.get_cluster(1).trim_overlapping()
        assert rec.get_feature_count() == 56
        assert not rec.get_cds_motifs()
        result = thiopeptides.specific_analysis(rec)
        assert rec.get_feature_count() == 56

        assert len(result.motifs) == 1

        result.add_to_record(rec)
        for i in rec.get_cds_motifs():
            print(i, i.leader, i.score, i.rodeo_score)
        assert len(rec.get_cds_motifs()) == 1, rec.get_cds_motifs()
        assert rec.get_feature_count() == 57

        # check the motif in an existing CDS
        prepeptide = rec.get_cds_motifs()[0]
        assert prepeptide is result.motifs[0]

        self.assertAlmostEqual(1315.3, prepeptide.monoisotopic_mass, places=1)
        self.assertAlmostEqual(1316.5, prepeptide.molecular_weight, places=1)
        assert prepeptide.leader == "MDAAHLSDLDIDALEISEFLDESRLEDSEVVAKVMSA"
        assert prepeptide.core == "SCTTCECCCSCSS"
        assert prepeptide.macrocycle == "26-member"
        assert prepeptide.peptide_subclass == "Type I"
        self.assertAlmostEqual(1222.4, prepeptide.mature_weights[0], places=1)
        self.assertAlmostEqual(1221.2, prepeptide.mature_weights[1], places=1)
        for calc, expected in zip(
                prepeptide.mature_weights[2:],
            [1240.4, 1258.4, 1276.5, 1294.5, 1312.5, 1330.5]):
            self.assertAlmostEqual(calc, expected, places=1)
        expected_core_features = (
            "Central ring: pyridine tetrasubstituted (hydroxyl group present);"
            " second macrocycle")
        assert prepeptide.core_features == expected_core_features
        assert prepeptide.tail_reaction == 'dealkylation of C-Terminal residue; amidation'
Exemple #27
0
 def setUp(self):
     self.record = secmet.Record()
     # except for Thioesterase, all domains were found in BN001301.1
     # TE domains were found in Y16952
     for filename, domain_type in [("PKS_KS.input", "PKS_KS"),
                                   ("AT.input", "PKS_AT"),
                                   ("ACP.input", "ACP"),
                                   ("DH.input", "PKS_DH"),
                                   ("KR.input", "PKS_KR"),
                                   ("TE.input", "Thioesterase"),
                                   ("ER.input", "PKS_ER")]:
         for domain in rebuild_domains(filename, domain_type):
             self.record.add_antismash_domain(domain)
     # these PFAMs found in BN001301.1 with clusterhmmer, one was excluded
     # to avoid a Biopython SearchIO bug
     domain_fasta = fasta.read_fasta(
         path.get_full_path(__file__, 'data', "p450.input"))
     for name, translation in domain_fasta.items():
         pfam_domain = DummyPFAMDomain(domain="p450",
                                       domain_id="PFAM_p450_" + name)
         pfam_domain.translation = translation
         self.record.add_pfam_domain(pfam_domain)
Exemple #28
0
 def setUp(self):
     self.record = secmet.Record()
     # except for Thioesterase, all domains were found in BN001301.1
     # TE domains were found in Y16952
     for filename, domain_type in [("PKS_KS.input", "PKS_KS"), ("AT.input", "PKS_AT"),
                                   ("ACP.input", "ACP"), ("DH.input", "PKS_DH"),
                                   ("KR.input", "PKS_KR"), ("TE.input", "Thioesterase"),
                                   ("ER.input", "PKS_ER")]:
         for domain in rebuild_domains(filename, domain_type):
             self.record.add_antismash_domain(domain)
     # these PFAMs found in BN001301.1 with clusterhmmer, one was excluded
     # to avoid a Biopython SearchIO bug
     dummy_location = secmet.features.FeatureLocation(1, 100)
     domain_fasta = fasta.read_fasta(path.get_full_path(__file__, 'data', "p450.input"))
     for name, translation in domain_fasta.items():
         pfam_domain = secmet.features.PFAMDomain(dummy_location, protein_start=5, protein_end=10,
                                                  description="test", identifier="PF00001",
                                                  tool="test")
         pfam_domain.translation = translation
         pfam_domain.domain_id = "PFAM_p450_" + name
         pfam_domain.domain = "p450"
         self.record.add_pfam_domain(pfam_domain)
 def test_cp002271_c19(self):
     filename = path.get_full_path(__file__, 'data',
                                   'CP002271.1.cluster019.gbk')
     results = helpers.run_and_regenerate_results_for_module(
         filename, nrps_pks, self.options)
     # catch ordering changes along with ensuring ATResults are there
     assert results.pks.method_results["signature"]["STAUR_3982_AT1"][
         0].score == 87.5
     # ensure all genes are present and have the right consensus
     assert results.consensus == {
         'STAUR_3982_AT1': 'ohmmal',
         'STAUR_3983_AT1': 'ccmmal',
         'STAUR_3984_AT1': 'ccmmal',
         'STAUR_3985_AT1': 'pk',
         'STAUR_3985_AT2': 'pk'
     }
     # check the gene ordering and, in this case, that it used domain docking
     assert results.cluster_predictions == {
         '1': ['(ccmmal) + (ccmmal) + (pk-pk) + (ohmmal)', True]
     }
     # no A domains in the cluster, so make sure no NRPS results
     assert results.nrps == {}
Exemple #30
0
def check_prereqs() -> List[str]:
    "Check if all required applications are around"
    failure_messages = []
    for binary_name in ['muscle', 'hmmscan', 'hmmpress', 'fasttree', 'java']:
        if path.locate_executable(binary_name) is None:
            failure_messages.append("Failed to locate file: %r" % binary_name)

    for hmm in ['smcogs.hmm']:
        hmm = path.get_full_path(__file__, 'data', hmm)
        if path.locate_file(hmm) is None:
            failure_messages.append("Failed to locate file %r" % hmm)
            continue
        for ext in ['.h3f', '.h3i', '.h3m', '.h3p']:
            binary = "%s%s" % (hmm, ext)
            if path.locate_file(binary) is None:
                # regenerate them
                result = subprocessing.run_hmmpress(hmm)
                if not result.successful():
                    failure_messages.append("Failed to hmmpress %s: %s" %
                                            (hmm, result.stderr.rstrip()))
                break
    return failure_messages