Beispiel #1
0
    def test_construct_references(self):
        # import argparse
        from ample.util import config_util, argparse_util

        options = config_util.AMPLEConfigOptions()
        argso = argparse_util.process_command_line(args=['-mtz', 'foo', '-fasta', 'bar'])
        options.populate(argso)

        refMgr = reference_manager.ReferenceManager(options.d)
        ref_references = '* Bibby et al. (2012). AMPLE: A cluster-and-truncate approach to solve the crystal structures of small proteins using rapidly computed ab initio models. Acta Crystallogr. Sect. D Biol. Crystallogr. 68(12), 1622-1631. [doi:10.1107/S0907444912039194]\n\n* Winn et al. (2011). Overview of the CCP4 suite and current developments. Acta Crystallographica Section D 67(4), 235-242. [doi:10.1107/S0907444910045749]\n\n* Thomas et al. (2015). Routine phasing of coiled-coil protein crystal structures with AMPLE. IUCrJ 2(2), 198-206. [doi:10.1107/S2052252515002080]\n\n* Simkovic et al. (2016). Residue contacts predicted by evolutionary covariance extend the application of ab initio molecular replacement to larger and more challenging protein folds. IUCrJ 3(4), 259-270. [doi:10.1107/S2052252516008113]\n\n* Bradley et al. (2005). Toward High-Resolution de Novo Structure Prediction for Small Proteins. Science 309(5742), 1868-1871. [doi:10.1126/science.1113801]\n\n* Grosse-Kunstleve et al. (2002). The Computational Crystallography Toolbox: crystallographic algorithms in a reusable software framework. Journal of Applied Crystallography 35(1), 126-136. [doi:10.1107/S0021889801017824]\n\n* Theobald et al. (2006). THESEUS: maximum likelihood superpositioning and analysis of macromolecular structures. Bioinformatics 22(17), 2171-2172. [doi:10.1093/bioinformatics/btl332]\n\n* Krissinel et al. (2012). Enhanced fold recognition using efficient short fragment clustering. Journal of molecular biochemistry 1(2), 76-85. [doi:]\n\n* Zhang et al. (2004). SPICKER: A clustering approach to identify near-native protein folds. Journal of Computational Chemistry 25(6), 865-871. [doi:10.1002/jcc.20011]\n\n* Keegan et al. (2018). Recent developments in MrBUMP: better search-model preparation, graphical interaction with search models, and solution improvement and assessment. Acta Crystallographica Section D 74(3), 167-182. [doi:10.1107/S2059798318003455]\n\n* Murshudov et al. (1997). Refinement of macromolecular structures by the maximum-likelihood method. Acta Crystallogr. Sect. D Biol. Crystallogr. 53(3), 240-255. [doi:10.1107/S0907444996012255]\n\n* Thorn et al. (2013). Extending molecular-replacement solutions with SHELXE. Acta Crystallogr. Sect. D Biol. Crystallogr. 69(11), 2251-2256. [doi:10.1107/S0907444913027534]\n\n* Cohen et al. (2008). ARP/wARP and molecular replacement: the next generation. Acta Crystallogr. Sect. D Biol. Crystallogr. 64(1), 49-60. [doi:10.1107/S0907444907047580]\n\n'

        ref_references_windows = '* Bibby et al. (2012). AMPLE: A cluster-and-truncate approach to solve the crystal structures of small proteins using rapidly computed ab initio models. Acta Crystallogr. Sect. D Biol. Crystallogr. 68(12), 1622-1631. [doi:10.1107/S0907444912039194]\r\n\r\n* Winn et al. (2011). Overview of the CCP4 suite and current developments. Acta Crystallographica Section D 67(4), 235-242. [doi:10.1107/S0907444910045749]\r\n\r\n* Thomas et al. (2015). Routine phasing of coiled-coil protein crystal structures with AMPLE. IUCrJ 2(2), 198-206. [doi:10.1107/S2052252515002080]\r\n\r\n* Simkovic et al. (2016). Residue contacts predicted by evolutionary covariance extend the application of ab initio molecular replacement to larger and more challenging protein folds. IUCrJ 3(4), 259-270. [doi:10.1107/S2052252516008113]\r\n\r\n* Bradley et al. (2005). Toward High-Resolution de Novo Structure Prediction for Small Proteins. Science 309(5742), 1868-1871. [doi:10.1126/science.1113801]\r\n\r\n* Grosse-Kunstleve et al. (2002). The Computational Crystallography Toolbox: crystallographic algorithms in a reusable software framework. Journal of Applied Crystallography 35(1), 126-136. [doi:10.1107/S0021889801017824]\r\n\r\n* Theobald et al. (2006). THESEUS: maximum likelihood superpositioning and analysis of macromolecular structures. Bioinformatics 22(17), 2171-2172. [doi:10.1093/bioinformatics/btl332]\r\n\r\n* Krissinel et al. (2012). Enhanced fold recognition using efficient short fragment clustering. Journal of molecular biochemistry 1(2), 76-85. [doi:]\r\n\r\n* Zhang et al. (2004). SPICKER: A clustering approach to identify near-native protein folds. Journal of Computational Chemistry 25(6), 865-871. [doi:10.1002/jcc.20011]\r\n\r\n* Keegan et al. (2018). Recent developments in MrBUMP: better search-model preparation, graphical interaction with search models, and solution improvement and assessment. Acta Crystallographica Section D 74(3), 167-182. [doi:10.1107/S2059798318003455]\r\n\r\n* Murshudov et al. (1997). Refinement of macromolecular structures by the maximum-likelihood method. Acta Crystallogr. Sect. D Biol. Crystallogr. 53(3), 240-255. [doi:10.1107/S0907444996012255]\r\n\r\n* Thorn et al. (2013). Extending molecular-replacement solutions with SHELXE. Acta Crystallogr. Sect. D Biol. Crystallogr. 69(11), 2251-2256. [doi:10.1107/S0907444913027534]\r\n\r\n* Cohen et al. (2008). ARP/wARP and molecular replacement: the next generation. Acta Crystallogr. Sect. D Biol. Crystallogr. 64(1), 49-60. [doi:10.1107/S0907444907047580]\r\n\r\n'

        # We may not run (e.g.) arpwarp, so need to be tolerant of missing citations.
        if sys.platform.startswith("win"):
            self.assertGreaterEqual(ref_references_windows.index(refMgr.citation_list_as_text), 0)
        else:
            self.assertGreaterEqual(ref_references.index(refMgr.citation_list_as_text), 0)

        options.d['nmr_model_in'] = 'foo'
        options.d['transmembrane'] = True
        options.d['use_scwrl'] = True
        options.d['do_mr'] = False
        refMgr = reference_manager.ReferenceManager(options.d)
        ref_references = '<h3>References</h3><ol><li> Bibby et al. (2012). AMPLE: A cluster-and-truncate approach to solve the crystal structures of small proteins using rapidly computed ab initio models. Acta Crystallogr. Sect. D Biol. Crystallogr. 68(12), 1622-1631. [doi:10.1107/S0907444912039194]</li><li> Winn et al. (2011). Overview of the CCP4 suite and current developments. Acta Crystallographica Section D 67(4), 235-242. [doi:10.1107/S0907444910045749]</li><li> Thomas et al. (2015). Routine phasing of coiled-coil protein crystal structures with AMPLE. IUCrJ 2(2), 198-206. [doi:10.1107/S2052252515002080]</li><li> Simkovic et al. (2016). Residue contacts predicted by evolutionary covariance extend the application of ab initio molecular replacement to larger and more challenging protein folds. IUCrJ 3(4), 259-270. [doi:10.1107/S2052252516008113]</li><li> Bradley et al. (2005). Toward High-Resolution de Novo Structure Prediction for Small Proteins. Science 309(5742), 1868-1871. [doi:10.1126/science.1113801]</li><li> Bibby et al. (2013). Application of the AMPLE cluster-and-truncate approach to NMR structures for molecular replacement. Acta Crystallogr. Sect. D Biol. Crystallogr. 69(11), 2194-2201. [doi:10.1107/S0907444913018453]</li><li> Thomas et al. (2017). Approaches to ab initio molecular replacement of alpha-helical transmembrane proteins. Acta Crystallographica Section D 73(12), 985-996. [doi:10.1107/S2059798317016436]</li><li> Grosse-Kunstleve et al. (2002). The Computational Crystallography Toolbox: crystallographic algorithms in a reusable software framework. Journal of Applied Crystallography 35(1), 126-136. [doi:10.1107/S0021889801017824]</li><li> Theobald et al. (2006). THESEUS: maximum likelihood superpositioning and analysis of macromolecular structures. Bioinformatics 22(17), 2171-2172. [doi:10.1093/bioinformatics/btl332]</li><li> Krissinel et al. (2012). Enhanced fold recognition using efficient short fragment clustering. Journal of molecular biochemistry 1(2), 76-85. [doi:]</li><li> Krivov et al. (2009). Improved prediction of protein side-chain conformations with SCWRL4. Proteins: Struct., Funct., Bioinf. 77(4), 778-795. [doi:10.1002/prot.22488]</li></ol>'
        self.assertEqual(refMgr.citations_as_html, ref_references)
Beispiel #2
0
 def create_citation_tab(self, ample_dict):
     if self.citation_tab_id:
         return
     self.citation_tab_id = "citation_tab"
     pyrvapi.rvapi_insert_tab(self.citation_tab_id, "Citation",
                              self.log_tab_id, False)
     refMgr = reference_manager.ReferenceManager(ample_dict)
     bibtex_file = refMgr.save_citations_to_file(ample_dict)
     if self.ccp4i2:
         # The horror of ccp4i2 means that this all gets dumped into xml so we can't use any markup tags
         tdata = refMgr.citations_as_text
     else:
         tdata = refMgr.methods_as_html
         tdata += refMgr.citations_as_html
         tdata += '<hr><p>A bibtex file with the relevant citations has been saved to: {}</p>'.format(
             bibtex_file)
     pyrvapi.rvapi_add_text(tdata, self.citation_tab_id, 0, 0, 1, 1)
     if not self.ccp4i2:
         pyrvapi.rvapi_add_data(
             "bibtex_file",
             "Citations as BIBTEX",
             self.fix_path(bibtex_file),
             "text",
             self.citation_tab_id,
             2,
             0,
             1,
             1,
             True,
         )
     return self.citation_tab_id
Beispiel #3
0
    def main(self, args=None):
        """Main AMPLE routine.

        We require this as the multiprocessing module (only on **!!*%$$!! Windoze)
        requires that the main module can be imported. We there need ample to be
        a python script that can be imported, hence the main routine with its
        calling protected by the if __name__=="__main__":...

        args is an option argument that can contain the command-line arguments
        for the program - required for testing.
        """
        argso = argparse_util.process_command_line(args=args)

        self.amopt = amopt = config_util.AMPLEConfigOptions()
        amopt.populate(argso)

        # Setup things like logging, file structure, etc...
        amopt.d = self.setup(amopt.d)
        rosetta_modeller = options_processor.process_rosetta_options(amopt.d)

        # Display the parameters used
        logger.debug(amopt.prettify_parameters())

        amopt.write_config_file()
        #######################################################
        # SCRIPT PROPER STARTS HERE
        time_start = time.time()

        # Create function for monitoring jobs - static function decorator?
        if self.ample_output:

            def monitor():
                return self.ample_output.display_results(amopt.d)
        else:
            monitor = None

        if amopt.d['benchmark_mode'] and amopt.d['native_pdb']:
            # Process the native before we do anything else
            benchmark_util.analysePdb(amopt.d)

        # Create constituent models from an NMR ensemble
        if amopt.d['nmr_model_in']:
            nmr_mdir = os.path.join(amopt.d['work_dir'], 'nmr_models')
            amopt.d['modelling_workdir'] = nmr_mdir
            logger.info(
                'Splitting NMR ensemble into constituent models in directory: {0}'
                .format(nmr_mdir))
            amopt.d['models'] = pdb_edit.split_pdb(amopt.d['nmr_model_in'],
                                                   directory=nmr_mdir,
                                                   strip_hetatm=True,
                                                   same_size=True)
            logger.info('NMR ensemble contained {0} models'.format(
                len(amopt.d['models'])))

        # Modelling business happens here
        self.modelling(amopt.d, rosetta_modeller)
        amopt.write_config_file()

        # Ensembling business next
        if amopt.d['make_ensembles']:
            self.ensembling(amopt.d)
            amopt.write_config_file()

        # Some MR here
        if amopt.d['do_mr']:
            self.molecular_replacement(amopt.d)
            amopt.write_config_file()

        # Timing data
        time_stop = time.time()
        elapsed_time = time_stop - time_start
        run_in_min = elapsed_time / 60
        run_in_hours = run_in_min / 60
        msg = os.linesep + \
            'All processing completed  (in {0:6.2F} hours)'.format(
                run_in_hours) + os.linesep
        msg += '----------------------------------------' + os.linesep
        logging.info(msg)

        # Benchmark mode
        if amopt.d['benchmark_mode']:
            self.benchmarking(amopt.d)
            amopt.write_config_file()

        amopt.write_config_file()
        # Flag to show that we reached the end without error - useful for integration testing
        amopt.d['AMPLE_finished'] = True
        ample_util.save_amoptd(amopt.d)

        logger.info("AMPLE finished at: %s",
                    time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
        ref_mgr = reference_manager.ReferenceManager(amopt.d)
        ref_mgr.save_citations_to_file(amopt.d)
        logger.info(ref_mgr.citations_as_text)
        logger.info(reference_manager.footer)

        # Finally update pyrvapi results
        if self.ample_output:
            self.ample_output.display_results(amopt.d)
            self.ample_output.rvapi_shutdown(amopt.d)

        self.cleanup(amopt.d)
        return
Beispiel #4
0
    def main(self, args=None):
        """Main AMPLE routine.

        We require this as the multiprocessing module (only on **!!*%$$!! Windoze)
        requires that the main module can be imported. We there need ample to be
        a python script that can be imported, hence the main routine with its
        calling protected by the if __name__=="__main__":...

        args is an option argument that can contain the command-line arguments
        for the program - required for testing.
        """
        argso = argparse_util.process_command_line(args=args)
        # Work directory and loggers need to be setup before we do anything else
        self.setup_workdir(argso)
        global logger
        logger = logging_util.setup_logging(argso)

        # Logging and work directories in place so can start work
        self.amopt = amopt = config_util.AMPLEConfigOptions()
        amopt.populate(argso)
        amopt.d = self.setup(amopt.d)
        rosetta_modeller = options_processor.process_rosetta_options(amopt.d)
        logger.debug(
            amopt.prettify_parameters())  # Display the parameters used
        amopt.write_config_file()
        time_start = time.time()
        if self.ample_output:

            def monitor():
                return self.ample_output.display_results(amopt.d)

        else:
            monitor = None

        # Highlight deprecated command line arguments
        if amopt.d['submit_cluster']:
            message = "-%s has been deprecated and will be removed in version %s!" % (
                'submit_cluster', 1.6)
            warnings.warn(message, DeprecationWarning)
        if amopt.d["submit_pe_lsf"]:
            message = "-%s has been deprecated and will be removed in version %s! Use -submit_pe instead" % (
                'submit_pe_lsf', 1.6)
            warnings.warn(message, DeprecationWarning)
        if amopt.d["submit_pe_sge"]:
            message = "-%s has been deprecated and will be removed in version %s! Use -submit_pe instead" % (
                'submit_pe_sge', 1.6)
            warnings.warn(message, DeprecationWarning)

        # Process any files we may have been given
        model_results = process_models.extract_and_validate_models(amopt.d)
        if model_results:
            process_models.handle_model_import(amopt.d, model_results)
        if amopt.d['benchmark_mode'] and amopt.d['native_pdb']:
            # Process the native before we do anything else
            benchmark_util.analysePdb(amopt.d)

        # Create constituent models from an NMR ensemble
        if amopt.d['nmr_model_in']:
            nmr_mdir = os.path.join(amopt.d['work_dir'], 'nmr_models')
            amopt.d['modelling_workdir'] = nmr_mdir
            logger.info(
                'Splitting NMR ensemble into constituent models in directory: {0}'
                .format(nmr_mdir))
            amopt.d['processed_models'] = pdb_edit.split_pdb(
                amopt.d['nmr_model_in'],
                directory=nmr_mdir,
                strip_hetatm=True,
                same_size=True)
            logger.info('NMR ensemble contained {0} models'.format(
                len(amopt.d['processed_models'])))

        # Modelling business happens here
        if self.modelling_required(amopt.d):
            self.modelling(amopt.d, rosetta_modeller)
            ample_util.save_amoptd(amopt.d)
            amopt.write_config_file()

        # Ensembling business next
        if amopt.d['make_ensembles']:
            self.ensembling(amopt.d)
            amopt.write_config_file()

        # Some MR here
        if amopt.d['do_mr']:
            self.molecular_replacement(amopt.d)
            amopt.write_config_file()

        # Timing data
        time_stop = time.time()
        elapsed_time = time_stop - time_start
        run_in_min = elapsed_time / 60
        run_in_hours = run_in_min / 60
        msg = os.linesep + 'All processing completed  (in {0:6.2F} hours)'.format(
            run_in_hours) + os.linesep
        msg += '----------------------------------------' + os.linesep
        logging.info(msg)

        # Benchmark mode
        if amopt.d['benchmark_mode']:
            self.benchmarking(amopt.d)
            amopt.write_config_file()

        amopt.write_config_file()
        # Flag to show that we reached the end without error - useful for integration testing
        amopt.d['AMPLE_finished'] = True
        ample_util.save_amoptd(amopt.d)

        logger.info("AMPLE finished at: %s",
                    time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
        ref_mgr = reference_manager.ReferenceManager(amopt.d)
        ref_mgr.save_citations_to_file(amopt.d)
        logger.info(ref_mgr.citations_as_text)
        logger.info(reference_manager.footer)

        # Finally update pyrvapi results
        if self.ample_output:
            self.ample_output.display_results(amopt.d)
            self.ample_output.rvapi_shutdown(amopt.d)

        self.cleanup(amopt.d)
        return