def benchmarking(self, optd): if optd['submit_cluster']: # Pickle dictionary so it can be opened by the job to get the parameters ample_util.save_amoptd(optd) script = benchmark_util.cluster_script(optd) workers_util.run_scripts( job_scripts=[script], monitor=monitor, nproc=optd['nproc'], job_time=43200, job_name='benchmark', submit_cluster=optd['submit_cluster'], submit_qtype=optd['submit_qtype'], submit_queue=optd['submit_queue'], submit_pe_lsf=optd['submit_pe_lsf'], submit_pe_sge=optd['submit_pe_sge'], submit_array=optd['submit_array'], submit_max_array=optd['submit_max_array'], ) # queue finished so unpickle results optd.update(ample_util.read_amoptd(optd['results_path'])) else: benchmark_util.analyse(optd) ample_util.save_amoptd(optd) return
def benchmarking(self, optd): if optd['submit_qtype'] != 'local': # Pickle dictionary so it can be opened by the job to get the parameters ample_util.save_amoptd(optd) script = benchmark_util.cluster_script(optd) with TaskFactory( optd['submit_qtype'], script, cwd=optd['work_dir'], environment=optd['submit_pe'], run_time=43200, name='benchmark', nprocesses=optd['nproc'], max_array_size=optd['submit_max_array'], queue=optd['submit_queue'], shell="/bin/bash", ) as task: task.run() task.wait(interval=5, monitor_f=monitor) # queue finished so unpickle results optd.update(ample_util.read_amoptd(optd['results_path'])) else: benchmark_util.analyse(optd) ample_util.save_amoptd(optd) return
def process_models(self, optd): process_models.extract_and_validate_models(optd) # Need to check if Quark and handle things accordingly if optd['quark_models']: # We always add sidechains to QUARK models if SCWRL is installed if ample_util.is_exe(optd['scwrl_exe']): optd['use_scwrl'] = True else: # No SCWRL so don't do owt with the side chains logger.info('Using QUARK models but SCWRL is not installed ' 'so only using %s sidechains', UNMODIFIED) optd['side_chain_treatments'] = [UNMODIFIED] ample_util.save_amoptd(optd)
def process_models(self, optd): process_models.extract_and_validate_models(optd) # Need to check if Quark and handle things accordingly if optd['quark_models']: # We always add sidechains to QUARK models if SCWRL is installed if ample_util.is_exe(optd['scwrl_exe']): optd['use_scwrl'] = True else: # No SCWRL so don't do owt with the side chains logger.info( 'Using QUARK models but SCWRL is not installed ' 'so only using %s sidechains', UNMODIFIED) optd['side_chain_treatments'] = [UNMODIFIED] ample_util.save_amoptd(optd)
def benchmarking(self, optd): if optd['submit_cluster']: # Pickle dictionary so it can be opened by the job to get the parameters ample_util.save_amoptd(optd) script = benchmark_util.cluster_script(optd) workers_util.run_scripts( job_scripts=[script], monitor=monitor, nproc=optd['nproc'], job_time=43200, job_name='benchmark', submit_cluster=optd['submit_cluster'], submit_qtype=optd['submit_qtype'], submit_queue=optd['submit_queue'], submit_pe_lsf=optd['submit_pe_lsf'], submit_pe_sge=optd['submit_pe_sge'], submit_array=optd['submit_array'], submit_max_array=optd['submit_max_array']) # queue finished so unpickle results optd.update(ample_util.read_amoptd(optd['results_path'])) else: benchmark_util.analyse(optd) ample_util.save_amoptd(optd) return
def main(self, args=None): """Main AMPLE routine. We require this as the multiprocessing module (only on **!!*%$$!! Windoze) requires that the main module can be imported. We there need ample to be a python script that can be imported, hence the main routine with its calling protected by the if __name__=="__main__":... args is an option argument that can contain the command-line arguments for the program - required for testing. """ argso = argparse_util.process_command_line(args=args) self.amopt = amopt = config_util.AMPLEConfigOptions() amopt.populate(argso) # Setup things like logging, file structure, etc... amopt.d = self.setup(amopt.d) rosetta_modeller = options_processor.process_rosetta_options(amopt.d) # Display the parameters used logger.debug(amopt.prettify_parameters()) amopt.write_config_file() ####################################################### # SCRIPT PROPER STARTS HERE time_start = time.time() # Create function for monitoring jobs - static function decorator? if self.ample_output: def monitor(): return self.ample_output.display_results(amopt.d) else: monitor = None if amopt.d['benchmark_mode'] and amopt.d['native_pdb']: # Process the native before we do anything else benchmark_util.analysePdb(amopt.d) # Create constituent models from an NMR ensemble if amopt.d['nmr_model_in']: nmr_mdir = os.path.join(amopt.d['work_dir'], 'nmr_models') amopt.d['modelling_workdir'] = nmr_mdir logger.info( 'Splitting NMR ensemble into constituent models in directory: {0}' .format(nmr_mdir)) amopt.d['models'] = pdb_edit.split_pdb(amopt.d['nmr_model_in'], directory=nmr_mdir, strip_hetatm=True, same_size=True) logger.info('NMR ensemble contained {0} models'.format( len(amopt.d['models']))) # Modelling business happens here self.modelling(amopt.d, rosetta_modeller) amopt.write_config_file() # Ensembling business next if amopt.d['make_ensembles']: self.ensembling(amopt.d) amopt.write_config_file() # Some MR here if amopt.d['do_mr']: self.molecular_replacement(amopt.d) amopt.write_config_file() # Timing data time_stop = time.time() elapsed_time = time_stop - time_start run_in_min = elapsed_time / 60 run_in_hours = run_in_min / 60 msg = os.linesep + \ 'All processing completed (in {0:6.2F} hours)'.format( run_in_hours) + os.linesep msg += '----------------------------------------' + os.linesep logging.info(msg) # Benchmark mode if amopt.d['benchmark_mode']: self.benchmarking(amopt.d) amopt.write_config_file() amopt.write_config_file() # Flag to show that we reached the end without error - useful for integration testing amopt.d['AMPLE_finished'] = True ample_util.save_amoptd(amopt.d) logger.info("AMPLE finished at: %s", time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) ref_mgr = reference_manager.ReferenceManager(amopt.d) ref_mgr.save_citations_to_file(amopt.d) logger.info(ref_mgr.citations_as_text) logger.info(reference_manager.footer) # Finally update pyrvapi results if self.ample_output: self.ample_output.display_results(amopt.d) self.ample_output.rvapi_shutdown(amopt.d) self.cleanup(amopt.d) return
def molecular_replacement(self, optd): if not optd['mrbump_scripts']: # MRBUMP analysis of the ensembles logger.info('----- Running MRBUMP on ensembles--------\n\n') if len(optd['ensembles']) < 1: msg = "ERROR! Cannot run MRBUMP as there are no ensembles!" exit_util.exit_error(msg) if optd['mrbump_dir'] is None: bump_dir = os.path.join(optd['work_dir'], 'MRBUMP') optd['mrbump_dir'] = bump_dir else: bump_dir = optd['mrbump_dir'] if not os.path.exists(bump_dir): os.mkdir(bump_dir) optd['mrbump_results'] = [] logger.info("Running MRBUMP jobs in directory: %s", bump_dir) # Set an ensemble-specific phaser_rms if required if optd['phaser_rms'] == 'auto': ensembler.set_phaser_rms_from_subcluster_score(optd) # Sort the ensembles in a favourable way logger.info("Sorting ensembles") sort_keys = [ 'cluster_num', 'truncation_level', 'subcluster_radius_threshold', 'side_chain_treatment' ] ensemble_pdbs_sorted = ensembler.sort_ensembles( optd['ensembles'], optd['ensembles_data'], keys=sort_keys, prioritise=True) # Create job scripts logger.info("Generating MRBUMP runscripts") optd['mrbump_scripts'] = mrbump_util.write_mrbump_files( ensemble_pdbs_sorted, optd, job_time=mrbump_util.MRBUMP_RUNTIME, ensemble_options=optd['ensemble_options'], directory=bump_dir) # Create function for monitoring jobs - static function decorator? if self.ample_output: def monitor(): r = mrbump_util.ResultsSummary() r.extractResults(optd['mrbump_dir'], purge=bool(optd['purge'])) optd['mrbump_results'] = r.results return self.ample_output.display_results(optd) else: monitor = None # Save results here so that we have the list of scripts and mrbump directory set ample_util.save_amoptd(optd) # Change to mrbump directory before running os.chdir(optd['mrbump_dir']) ok = workers_util.run_scripts( job_scripts=optd['mrbump_scripts'], monitor=monitor, check_success=mrbump_util.checkSuccess, early_terminate=optd['early_terminate'], nproc=optd['nproc'], job_time=mrbump_util.MRBUMP_RUNTIME, job_name='mrbump', submit_cluster=optd['submit_cluster'], submit_qtype=optd['submit_qtype'], submit_queue=optd['submit_queue'], submit_pe_lsf=optd['submit_pe_lsf'], submit_pe_sge=optd['submit_pe_sge'], submit_array=optd['submit_array'], submit_max_array=optd['submit_max_array']) if not ok: msg = "An error code was returned after running MRBUMP on the ensembles!\n" + \ "For further information check the logs in directory: {0}".format(optd['mrbump_dir']) logger.critical(msg) # Collect the MRBUMP results results_summary = mrbump_util.ResultsSummary() optd['mrbump_results'] = results_summary.extractResults( optd['mrbump_dir'], purge=bool(optd['purge'])) optd['success'] = results_summary.success ample_util.save_amoptd(optd) summary = mrbump_util.finalSummary(optd) logger.info(summary)
def modelling(self, optd, rosetta_modeller=None): if not (optd['import_models'] or optd['make_frags'] or optd['make_models'] or optd['nmr_remodel']): return # Set the direcotry where the final models will end up optd['models_dir'] = os.path.join(optd['work_dir'], 'models') if not os.path.isdir(optd['models_dir']): os.mkdir(optd['models_dir']) if not rosetta_modeller: rosetta_modeller = options_processor.process_rosetta_options(optd) # Make Rosetta fragments if optd['make_frags']: rosetta_modeller.generate_fragments(optd) optd['frags_3mers'] = rosetta_modeller.frags_3mers optd['frags_9mers'] = rosetta_modeller.frags_9mers optd['psipred_ss2'] = rosetta_modeller.psipred_ss2 if optd["use_contacts"] and not optd['restraints_file']: con_util = contact_util.ContactUtil( optd['fasta'], 'fasta', contact_file=optd['contact_file'], contact_format=optd['contact_format'], bbcontacts_file=optd['bbcontacts_file'], bbcontacts_format=optd["bbcontacts_format"], cutoff_factor=optd['restraints_factor'], distance_to_neighbor=optd['distance_to_neighbour']) optd["contacts_dir"] = os.path.join(optd["work_dir"], "contacts") if not os.path.isdir(optd["contacts_dir"]): os.mkdir(optd["contacts_dir"]) if con_util.require_contact_prediction: if con_util.found_ccmpred_contact_prediction_deps: con_util.predict_contacts_from_sequence( wdir=optd["contacts_dir"]) optd["contact_file"] = con_util.contact_file optd["contact_format"] = con_util.contact_format if con_util.do_contact_analysis: plot_file = os.path.join(optd['contacts_dir'], optd['name'] + ".cm.png") if optd['native_pdb'] and optd['native_pdb_std']: structure_file = optd['native_pdb_std'] elif optd["native_pdb"]: structure_file = optd['native_std'] else: structure_file = None optd['contact_map'], optd['contact_ppv'] = con_util.summarize( plot_file, structure_file, 'pdb', optd['native_cutoff']) restraints_file = os.path.join(optd['contacts_dir'], optd['name'] + ".cst") optd['restraints_file'] = con_util.write_restraints( restraints_file, optd['restraints_format'], optd['energy_function']) else: con_util = None else: con_util = None if optd['make_models'] and optd['restraints_file']: rosetta_modeller.restraints_file = optd['restraints_file'] if optd['make_models']: logger.info('----- making Rosetta models--------') if optd['nmr_remodel']: try: optd['models'] = rosetta_modeller.nmr_remodel( models=optd['models'], ntimes=optd['nmr_process'], alignment_file=optd['alignment_file'], remodel_fasta=optd['nmr_remodel_fasta'], monitor=monitor) except Exception as e: msg = "Error remodelling NMR ensemble: {0}".format(e) exit_util.exit_error(msg, sys.exc_info()[2]) else: logger.info('making %s models...', optd['nmodels']) try: optd['models'] = rosetta_modeller.ab_initio_model( monitor=monitor) except Exception as e: msg = "Error running ROSETTA to create models: {0}".format( e) exit_util.exit_error(msg, sys.exc_info()[2]) if not pdb_edit.check_pdb_directory(optd['models_dir'], sequence=optd['sequence']): msg = "Problem with rosetta pdb files - please check the log for more information" exit_util.exit_error(msg) logger.info('Modelling complete - models stored in: %s\n', optd['models_dir']) elif optd['import_models']: logger.info('Importing models from directory: %s\n', optd['models_dir']) if optd['homologs']: optd['models'] = ample_util.extract_and_validate_models( optd, sequence=None, single=True, allsame=False) else: optd['models'] = ample_util.extract_and_validate_models(optd) # Need to check if Quark and handle things accordingly if optd['quark_models']: # We always add sidechains to QUARK models if SCWRL is installed if ample_util.is_exe(optd['scwrl_exe']): optd['use_scwrl'] = True else: # No SCWRL so don't do owt with the side chains logger.info( 'Using QUARK models but SCWRL is not installed ' 'so only using %s sidechains', UNMODIFIED) optd['side_chain_treatments'] = [UNMODIFIED] # Sub-select the decoys using contact information if con_util and optd['subselect_mode'] and not (optd['nmr_model_in'] or optd['nmr_remodel']): logger.info('Subselecting models from directory using ' 'provided contact information') subselect_data = con_util.subselect_decoys( optd['models'], 'pdb', mode=optd['subselect_mode'], **optd) optd['models'] = zip(*subselect_data)[0] optd['subselect_data'] = dict(subselect_data) ample_util.save_amoptd(optd)
def ensembling(self, optd): if optd['import_ensembles']: ensembler.import_ensembles(optd) elif optd['ideal_helices']: ample_util.ideal_helices(optd) logger.info("*** Using ideal helices to solve structure ***") else: # Import the models here instead of cluster_util. if optd['cluster_method'] is 'import': # HACK - this is certainly not how we want to do it. One flag for all (-models) in future optd['models'] = optd['cluster_dir'] optd['models'] = ample_util.extract_and_validate_models(optd) # Check we have some models to work with if not (optd['single_model_mode'] or optd['models']): ample_util.save_amoptd(optd) msg = "ERROR! Cannot find any pdb files in: {0}".format( optd['models_dir']) exit_util.exit_error(msg) optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok') if optd['submit_cluster']: # Pickle dictionary so it can be opened by the job to get the parameters ample_util.save_amoptd(optd) script = ensembler.cluster_script(optd) ensembler_timeout = ensembler.get_ensembler_timeout(optd) workers_util.run_scripts( job_scripts=[script], monitor=monitor, nproc=optd['nproc'], job_time=ensembler_timeout, job_name='ensemble', submit_cluster=optd['submit_cluster'], submit_qtype=optd['submit_qtype'], submit_queue=optd['submit_queue'], submit_pe_lsf=optd['submit_pe_lsf'], submit_pe_sge=optd['submit_pe_sge'], submit_array=optd['submit_array'], submit_max_array=optd['submit_max_array']) # queue finished so unpickle results optd.update(ample_util.read_amoptd(optd['results_path'])) else: try: ensembler.create_ensembles(optd) except Exception as e: msg = "Error creating ensembles: {0}".format(e) exit_util.exit_error(msg, sys.exc_info()[2]) # Check we have something to work with if not os.path.isfile( optd['ensemble_ok']) or 'ensembles' not in optd.keys( ) or not len(optd['ensembles']): msg = "Problem generating ensembles!" exit_util.exit_error(msg) if not (optd['homologs'] or optd['single_model_mode']): ensemble_summary = ensembler.ensemble_summary( optd['ensembles_data']) logger.info(ensemble_summary) # Save the results ample_util.save_amoptd(optd) # Bail here if we didn't create anything if not len(optd['ensembles']): msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..." exit_util.exit_error(msg) # Update results view if self.ample_output: self.ample_output.display_results(optd) return
else: return path # Run unit tests if __name__ == "__main__": # This runs the benchmarking starting from a pickled file containing an amopt dictionary. # - used when submitting the modelling jobs to a cluster if len(sys.argv) != 2 or not os.path.isfile(sys.argv[1]): print( "benchmark script requires the path to a pickled amopt dictionary!" ) sys.exit(1) # Get the amopt dictionary amoptd = ample_util.read_amoptd(sys.argv[1]) # Set up logging - could append to an existing log? logger = logging.getLogger() logger.setLevel(logging.DEBUG) fl = logging.FileHandler(os.path.join(amoptd['work_dir'], "benchmark.log")) fl.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fl.setFormatter(formatter) logger.addHandler(fl) analyse(amoptd) ample_util.save_amoptd(amoptd)
if not restart and not ('models' in optd and optd['models'] and os.path.exists(optd['models'])): msg = 'AMPLE ensembler requires a -models argument with a file/directory of pdbs' exit_util.exit_error(msg, sys.exc_info()[2]) # Set up the working directory if one doesn't already exist if not ('work_dir' in optd and optd['work_dir']): optd['work_dir'] = os.path.join(os.path.abspath(os.path.curdir), ENSEMBLE_DIRNAME) if not os.path.isdir(optd['work_dir']): try: os.mkdir(optd['work_dir']) except OSError as e: msg = 'Error making ensemble workdir {0} : {1}'.format(optd['work_dir'], e) exit_util.exit_error(msg, sys.exc_info()[2]) assert os.path.isdir(optd['work_dir']) # Start logging to a file logging_util.setup_file_logging(os.path.join(optd['work_dir'], "ensemble.log")) try: if not restart: results = process_models.extract_and_validate_models(optd) process_models.handle_model_import(optd, results) process_ensemble_options(optd) optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok') optd['results_path'] = os.path.join(optd['work_dir'], AMPLE_PKL) ensembler.create_ensembles(optd) ample_util.save_amoptd(optd) except Exception as e: msg = "Error running ensembling: {0}".format(e.message) exit_util.exit_error(msg, sys.exc_info()[2])
def main(self, args=None): """Main AMPLE routine. We require this as the multiprocessing module (only on **!!*%$$!! Windoze) requires that the main module can be imported. We there need ample to be a python script that can be imported, hence the main routine with its calling protected by the if __name__=="__main__":... args is an option argument that can contain the command-line arguments for the program - required for testing. """ argso = argparse_util.process_command_line(args=args) # Work directory and loggers need to be setup before we do anything else self.setup_workdir(argso) global logger logger = logging_util.setup_logging(argso) # Logging and work directories in place so can start work self.amopt = amopt = config_util.AMPLEConfigOptions() amopt.populate(argso) amopt.d = self.setup(amopt.d) rosetta_modeller = options_processor.process_rosetta_options(amopt.d) logger.debug( amopt.prettify_parameters()) # Display the parameters used amopt.write_config_file() time_start = time.time() if self.ample_output: def monitor(): return self.ample_output.display_results(amopt.d) else: monitor = None # Highlight deprecated command line arguments if amopt.d['submit_cluster']: message = "-%s has been deprecated and will be removed in version %s!" % ( 'submit_cluster', 1.6) warnings.warn(message, DeprecationWarning) if amopt.d["submit_pe_lsf"]: message = "-%s has been deprecated and will be removed in version %s! Use -submit_pe instead" % ( 'submit_pe_lsf', 1.6) warnings.warn(message, DeprecationWarning) if amopt.d["submit_pe_sge"]: message = "-%s has been deprecated and will be removed in version %s! Use -submit_pe instead" % ( 'submit_pe_sge', 1.6) warnings.warn(message, DeprecationWarning) # Process any files we may have been given model_results = process_models.extract_and_validate_models(amopt.d) if model_results: process_models.handle_model_import(amopt.d, model_results) if amopt.d['benchmark_mode'] and amopt.d['native_pdb']: # Process the native before we do anything else benchmark_util.analysePdb(amopt.d) # Create constituent models from an NMR ensemble if amopt.d['nmr_model_in']: nmr_mdir = os.path.join(amopt.d['work_dir'], 'nmr_models') amopt.d['modelling_workdir'] = nmr_mdir logger.info( 'Splitting NMR ensemble into constituent models in directory: {0}' .format(nmr_mdir)) amopt.d['processed_models'] = pdb_edit.split_pdb( amopt.d['nmr_model_in'], directory=nmr_mdir, strip_hetatm=True, same_size=True) logger.info('NMR ensemble contained {0} models'.format( len(amopt.d['processed_models']))) # Modelling business happens here if self.modelling_required(amopt.d): self.modelling(amopt.d, rosetta_modeller) ample_util.save_amoptd(amopt.d) amopt.write_config_file() # Ensembling business next if amopt.d['make_ensembles']: self.ensembling(amopt.d) amopt.write_config_file() # Some MR here if amopt.d['do_mr']: self.molecular_replacement(amopt.d) amopt.write_config_file() # Timing data time_stop = time.time() elapsed_time = time_stop - time_start run_in_min = elapsed_time / 60 run_in_hours = run_in_min / 60 msg = os.linesep + 'All processing completed (in {0:6.2F} hours)'.format( run_in_hours) + os.linesep msg += '----------------------------------------' + os.linesep logging.info(msg) # Benchmark mode if amopt.d['benchmark_mode']: self.benchmarking(amopt.d) amopt.write_config_file() amopt.write_config_file() # Flag to show that we reached the end without error - useful for integration testing amopt.d['AMPLE_finished'] = True ample_util.save_amoptd(amopt.d) logger.info("AMPLE finished at: %s", time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) ref_mgr = reference_manager.ReferenceManager(amopt.d) ref_mgr.save_citations_to_file(amopt.d) logger.info(ref_mgr.citations_as_text) logger.info(reference_manager.footer) # Finally update pyrvapi results if self.ample_output: self.ample_output.display_results(amopt.d) self.ample_output.rvapi_shutdown(amopt.d) self.cleanup(amopt.d) return
def ensembling(self, optd): if optd['import_ensembles']: ensembler.import_ensembles(optd) elif optd['ideal_helices']: ample_util.ideal_helices(optd) logger.info( "*** Attempting to solve the structure using ideal helices ***" ) logger.warning( 'If ideal helices do not solve the structure, you may want to use -helical_ensembles in ' 'place of -ideal_helices. AMPLE will then use a new set of helical ensembles which has been ' 'very successful on solving challenging cases!') elif optd['helical_ensembles']: ample_util.ideal_helices(optd) logger.info( "*** Attempting to solve the structure using %s set of helical ensembles ***" % optd['helical_ensembles_set']) else: # Check we have some models to work with if not (optd['single_model_mode'] or optd['processed_models']): ample_util.save_amoptd(optd) msg = "ERROR! Cannot find any pdb files in: {0}".format( optd['models_dir']) exit_util.exit_error(msg) optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok') if optd['submit_qtype'] != 'local': # Pickle dictionary so it can be opened by the job to get the parameters ample_util.save_amoptd(optd) script = ensembler.cluster_script(optd) ensembler_timeout = ensembler.get_ensembler_timeout(optd) with TaskFactory( optd['submit_qtype'], script, cwd=optd['work_dir'], environment=optd['submit_pe'], run_time=ensembler_timeout, name='benchmark', nprocesses=optd['nproc'], max_array_size=optd['submit_max_array'], queue=optd['submit_queue'], shell="/bin/bash", ) as task: task.run() task.wait(interval=5, monitor_f=monitor) # queue finished so unpickle results optd.update(ample_util.read_amoptd(optd['results_path'])) else: try: ensembler.create_ensembles(optd) except Exception as e: msg = "Error creating ensembles: {0}".format(e) exit_util.exit_error(msg, sys.exc_info()[2]) # Check we have something to work with if not os.path.isfile( optd['ensemble_ok']) or 'ensembles' not in optd.keys( ) or not len(optd['ensembles']): msg = "Problem generating ensembles!" exit_util.exit_error(msg) if not (optd['homologs'] or optd['single_model_mode']): ensemble_summary = ensembler.ensemble_summary( optd['ensembles_data']) logger.info(ensemble_summary) # Save the results ample_util.save_amoptd(optd) # Bail here if we didn't create anything if not len(optd['ensembles']): msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..." exit_util.exit_error(msg) # Update results view if self.ample_output: self.ample_output.display_results(optd) return
def main(self, args=None): """Main AMPLE routine. We require this as the multiprocessing module (only on **!!*%$$!! Windoze) requires that the main module can be imported. We there need ample to be a python script that can be imported, hence the main routine with its calling protected by the if __name__=="__main__":... args is an option argument that can contain the command-line arguments for the program - required for testing. """ argso = argparse_util.process_command_line(args=args) # SWork directory and loggers need to be setup before we do anything else self.setup_workdir(argso) global logger logger = logging_util.setup_logging(argso) # Logging and work directories in place so can start work self.amopt = amopt = config_util.AMPLEConfigOptions() amopt.populate(argso) amopt.d = self.setup(amopt.d) rosetta_modeller = options_processor.process_rosetta_options(amopt.d) # Display the parameters used logger.debug(amopt.prettify_parameters()) amopt.write_config_file() ####################################################### # SCRIPT PROPER STARTS HERE time_start = time.time() # Create function for monitoring jobs - static function decorator? if self.ample_output: def monitor(): return self.ample_output.display_results(amopt.d) else: monitor = None # Process any files we may have been given model_results = process_models.extract_and_validate_models(amopt.d) if model_results: process_models.handle_model_import(amopt.d, model_results) if amopt.d['benchmark_mode'] and amopt.d['native_pdb']: # Process the native before we do anything else benchmark_util.analysePdb(amopt.d) # Create constituent models from an NMR ensemble if amopt.d['nmr_model_in']: nmr_mdir = os.path.join(amopt.d['work_dir'], 'nmr_models') amopt.d['modelling_workdir'] = nmr_mdir logger.info('Splitting NMR ensemble into constituent models in directory: {0}'.format(nmr_mdir)) amopt.d['processed_models'] = pdb_edit.split_pdb( amopt.d['nmr_model_in'], directory=nmr_mdir, strip_hetatm=True, same_size=True) logger.info('NMR ensemble contained {0} models'.format(len(amopt.d['processed_models']))) # Modelling business happens here if self.modelling_required(amopt.d): self.modelling(amopt.d, rosetta_modeller) ample_util.save_amoptd(amopt.d) amopt.write_config_file() # Ensembling business next if amopt.d['make_ensembles']: self.ensembling(amopt.d) amopt.write_config_file() # Some MR here if amopt.d['do_mr']: self.molecular_replacement(amopt.d) amopt.write_config_file() # Timing data time_stop = time.time() elapsed_time = time_stop - time_start run_in_min = elapsed_time / 60 run_in_hours = run_in_min / 60 msg = os.linesep + \ 'All processing completed (in {0:6.2F} hours)'.format( run_in_hours) + os.linesep msg += '----------------------------------------' + os.linesep logging.info(msg) # Benchmark mode if amopt.d['benchmark_mode']: self.benchmarking(amopt.d) amopt.write_config_file() amopt.write_config_file() # Flag to show that we reached the end without error - useful for integration testing amopt.d['AMPLE_finished'] = True ample_util.save_amoptd(amopt.d) logger.info("AMPLE finished at: %s", time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) ref_mgr = reference_manager.ReferenceManager(amopt.d) ref_mgr.save_citations_to_file(amopt.d) logger.info(ref_mgr.citations_as_text) logger.info(reference_manager.footer) # Finally update pyrvapi results if self.ample_output: self.ample_output.display_results(amopt.d) self.ample_output.rvapi_shutdown(amopt.d) self.cleanup(amopt.d) return
def molecular_replacement(self, optd): mrbump_util.set_success_criteria(optd) if not optd['mrbump_scripts']: # MRBUMP analysis of the ensembles logger.info('----- Running MRBUMP on ensembles--------\n\n') if len(optd['ensembles']) < 1: msg = "ERROR! Cannot run MRBUMP as there are no ensembles!" exit_util.exit_error(msg) if optd['mrbump_dir'] is None: bump_dir = os.path.join(optd['work_dir'], 'MRBUMP') optd['mrbump_dir'] = bump_dir else: bump_dir = optd['mrbump_dir'] if not os.path.exists(bump_dir): os.mkdir(bump_dir) optd['mrbump_results'] = [] logger.info("Running MRBUMP jobs in directory: %s", bump_dir) # Set an ensemble-specific phaser_rms if required if optd['phaser_rms'] == 'auto': ensembler.set_phaser_rms_from_subcluster_score(optd) # Sort the ensembles in a favourable way logger.info("Sorting ensembles") sort_keys = ['cluster_num', 'truncation_level', 'subcluster_radius_threshold', 'side_chain_treatment'] ensemble_pdbs_sorted = ensembler.sort_ensembles( optd['ensembles'], optd['ensembles_data'], keys=sort_keys, prioritise=True) # Create job scripts logger.info("Generating MRBUMP runscripts") optd['mrbump_scripts'] = mrbump_util.write_mrbump_files( ensemble_pdbs_sorted, optd, job_time=mrbump_util.MRBUMP_RUNTIME, ensemble_options=optd['ensemble_options'], directory=bump_dir) # Create function for monitoring jobs - static function decorator? if self.ample_output: def monitor(): r = mrbump_util.ResultsSummary() r.extractResults(optd['mrbump_dir'], purge=bool(optd['purge'])) optd['mrbump_results'] = r.results return self.ample_output.display_results(optd) else: monitor = None # Save results here so that we have the list of scripts and mrbump directory set ample_util.save_amoptd(optd) # Change to mrbump directory before running os.chdir(optd['mrbump_dir']) ok = workers_util.run_scripts( job_scripts=optd['mrbump_scripts'], monitor=monitor, check_success=mrbump_util.checkSuccess, early_terminate=optd['early_terminate'], nproc=optd['nproc'], job_time=mrbump_util.MRBUMP_RUNTIME, job_name='mrbump', submit_cluster=optd['submit_cluster'], submit_qtype=optd['submit_qtype'], submit_queue=optd['submit_queue'], submit_pe_lsf=optd['submit_pe_lsf'], submit_pe_sge=optd['submit_pe_sge'], submit_array=optd['submit_array'], submit_max_array=optd['submit_max_array']) if not ok: msg = "An error code was returned after running MRBUMP on the ensembles!\n" + \ "For further information check the logs in directory: {0}".format(optd['mrbump_dir']) logger.critical(msg) # Collect the MRBUMP results results_summary = mrbump_util.ResultsSummary() optd['mrbump_results'] = results_summary.extractResults(optd['mrbump_dir'], purge=bool(optd['purge'])) optd['success'] = results_summary.success ample_util.save_amoptd(optd) summary = mrbump_util.finalSummary(optd) logger.info(summary)
def ensembling(self, optd): if optd['import_ensembles']: ensembler.import_ensembles(optd) elif optd['ideal_helices']: ample_util.ideal_helices(optd) logger.info("*** Using ideal helices to solve structure ***") else: # Check we have some models to work with if not (optd['single_model_mode'] or optd['processed_models']): ample_util.save_amoptd(optd) msg = "ERROR! Cannot find any pdb files in: {0}".format(optd['models_dir']) exit_util.exit_error(msg) optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok') if optd['submit_cluster']: # Pickle dictionary so it can be opened by the job to get the parameters ample_util.save_amoptd(optd) script = ensembler.cluster_script(optd) ensembler_timeout = ensembler.get_ensembler_timeout(optd) workers_util.run_scripts( job_scripts=[script], monitor=monitor, nproc=optd['nproc'], job_time=ensembler_timeout, job_name='ensemble', submit_cluster=optd['submit_cluster'], submit_qtype=optd['submit_qtype'], submit_queue=optd['submit_queue'], submit_pe_lsf=optd['submit_pe_lsf'], submit_pe_sge=optd['submit_pe_sge'], submit_array=optd['submit_array'], submit_max_array=optd['submit_max_array']) # queue finished so unpickle results optd.update(ample_util.read_amoptd(optd['results_path'])) else: try: ensembler.create_ensembles(optd) except Exception as e: msg = "Error creating ensembles: {0}".format(e) exit_util.exit_error(msg, sys.exc_info()[2]) # Check we have something to work with if not os.path.isfile(optd['ensemble_ok']) or 'ensembles' not in optd.keys() or not len(optd['ensembles']): msg = "Problem generating ensembles!" exit_util.exit_error(msg) if not (optd['homologs'] or optd['single_model_mode']): ensemble_summary = ensembler.ensemble_summary(optd['ensembles_data']) logger.info(ensemble_summary) # Save the results ample_util.save_amoptd(optd) # Bail here if we didn't create anything if not len(optd['ensembles']): msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..." exit_util.exit_error(msg) # Update results view if self.ample_output: self.ample_output.display_results(optd) return