def benchmarking(self, optd): if optd['submit_cluster']: # Pickle dictionary so it can be opened by the job to get the parameters ample_util.save_amoptd(optd) script = benchmark_util.cluster_script(optd) workers_util.run_scripts( job_scripts=[script], monitor=monitor, nproc=optd['nproc'], job_time=43200, job_name='benchmark', submit_cluster=optd['submit_cluster'], submit_qtype=optd['submit_qtype'], submit_queue=optd['submit_queue'], submit_pe_lsf=optd['submit_pe_lsf'], submit_pe_sge=optd['submit_pe_sge'], submit_array=optd['submit_array'], submit_max_array=optd['submit_max_array'], ) # queue finished so unpickle results optd.update(ample_util.read_amoptd(optd['results_path'])) else: benchmark_util.analyse(optd) ample_util.save_amoptd(optd) return
def restart_amoptd(optd): """Create an ample dictionary from a restart pkl file Description ----------- For any new command-line options, we update the old dictionary with the new values We then go through the new dictionary and set ant of the flags corresponding to the data we find: Notes ----- We return the dictionary as we may need to change it and it seems we can't change the external reference in this scope. I think?... """ if not optd['restart_pkl']: return optd logger.info('Restarting from existing pkl file: {0}'.format( optd['restart_pkl'])) # We use the old dictionary, but udpate it with any new values optd_old = ample_util.read_amoptd(optd['restart_pkl']) # Now update any variables that were given on the command-line for k in optd['cmdline_flags']: logger.debug("Restart updating amopt variable: {0} : {1}".format( k, optd[k])) optd_old[k] = optd[k] # We can now replace the old dictionary with this new one optd = optd_old return optd
def benchmarking(self, optd): if optd['submit_qtype'] != 'local': # Pickle dictionary so it can be opened by the job to get the parameters ample_util.save_amoptd(optd) script = benchmark_util.cluster_script(optd) with TaskFactory( optd['submit_qtype'], script, cwd=optd['work_dir'], environment=optd['submit_pe'], run_time=43200, name='benchmark', nprocesses=optd['nproc'], max_array_size=optd['submit_max_array'], queue=optd['submit_queue'], shell="/bin/bash", ) as task: task.run() task.wait(interval=5, monitor_f=monitor) # queue finished so unpickle results optd.update(ample_util.read_amoptd(optd['results_path'])) else: benchmark_util.analyse(optd) ample_util.save_amoptd(optd) return
def setUp(self): self.assertTrue(os.path.isfile(self.RESULTS_PKL), "Missing pkl file: {0}".format(self.RESULTS_PKL)) try: self.AMPLE_DICT = ample_util.read_amoptd(self.RESULTS_PKL) except ImportError as e: logger.exception("Error importing module while unpickling ample results dictionary: '{}'" \ "Add any imports required to the module: {}".format(e, os.path.abspath(__file__))) raise(e)
def setUp(self): self.assertTrue(os.path.isfile(self.RESULTS_PKL), "Missing pkl file: {0}".format(self.RESULTS_PKL)) try: self.AMPLE_DICT = ample_util.read_amoptd(self.RESULTS_PKL) except ImportError as e: logger.exception( "Error importing module while unpickling ample results dictionary: '{}'" "Add any imports required to the module: {}".format( e, os.path.abspath(__file__))) raise (e)
def restart_amoptd(optd): """Create an ample dictionary from a restart pkl file Description ----------- For any new command-line options, we update the old dictionary with the new values We then go through the new dictionary and set any of the flags corresponding to the data we find: Notes ----- We return the dictionary as we may need to change it and it seems we can't change the external reference in this scope. I think?... """ if not optd['restart_pkl']: return optd logger.info('Restarting from existing pkl file: %s', optd['restart_pkl']) optd_old = ample_util.read_amoptd(optd['restart_pkl']) for k in optd['cmdline_flags']: logger.debug("Restart updating amopt variable: %s : %s", k, str(optd[k])) optd_old[k] = optd[k] optd = optd_old return optd
def benchmarking(self, optd): if optd['submit_cluster']: # Pickle dictionary so it can be opened by the job to get the parameters ample_util.save_amoptd(optd) script = benchmark_util.cluster_script(optd) workers_util.run_scripts( job_scripts=[script], monitor=monitor, nproc=optd['nproc'], job_time=43200, job_name='benchmark', submit_cluster=optd['submit_cluster'], submit_qtype=optd['submit_qtype'], submit_queue=optd['submit_queue'], submit_pe_lsf=optd['submit_pe_lsf'], submit_pe_sge=optd['submit_pe_sge'], submit_array=optd['submit_array'], submit_max_array=optd['submit_max_array']) # queue finished so unpickle results optd.update(ample_util.read_amoptd(optd['results_path'])) else: benchmark_util.analyse(optd) ample_util.save_amoptd(optd) return
mrb_results[:nresults]).topFiles(nresults): # Mangle paths fdata['pdb'] = os.path.relpath(root, fdata['pdb']) fdata['mtz'] = os.path.relpath(root, fdata['mtz']) meta['results'].append(fdata) # Commit to file logger.debug("Exporting pyrvapi metadata:\n{0}".format(meta)) pyrvapi.rvapi_put_meta(json.dumps(meta)) pyrvapi.rvapi_store_document2(rvdoc) return if __name__ == "__main__": import copy, sys, time pklfile = sys.argv[1] ample_dict = ample_util.read_amoptd(pklfile) ample_dict['no_gui'] = False ample_dict['ample_log'] = os.path.abspath(__file__) report_dir = os.path.abspath(os.path.join(os.curdir, "pyrvapi_tmp")) AR = AmpleOutput(ample_dict, report_dir=report_dir, own_gui=True, xml=None) #AR.display_results(ample_dict) view1_dict = copy.copy(ample_dict) del view1_dict['ensembles_data'] del view1_dict['mrbump_results'] SLEEP = 5 AR.display_results(view1_dict)
argparse_util.add_cluster_submit_options(parser) argparse_util.add_ensembler_options(parser) # Get command-line arguments and see if we have a restart_pkl option as this # is how we pass in an existing ample dictionary when we are running the ensembling # as a standalone job on a cluster optd = vars(parser.parse_args()) # Track restart as it determines if we need to unpack models restart = False if 'restart_pkl' in optd and optd['restart_pkl']: if not os.path.isfile(optd['restart_pkl']): msg = 'Cannot find ensemble pkl file: {0}'.format(optd['restart_pkl']) exit_util.exit_error(msg) try: optd = ample_util.read_amoptd(optd['restart_pkl']) except Exception as e: msg = "Error unpickling ensemble pkl: {0}".format(e.message) exit_util.exit_error(msg, sys.exc_info()[2]) restart = True else: # We're running purely from command-line arguments amopt = config_util.AMPLEConfigOptions() amopt.populate(optd) optd = amopt.d # Start logging to the console logger = logging_util.setup_console_logging() # Make sure we have models if in standalone mode if not restart and not ('models' in optd and optd['models'] and os.path.exists(optd['models'])):
ensembler.add_argparse_options(parser) # Get command-line arguments and see if we have a restart_pkl option as this # is how we pass in an existing ample dictionary when we are running the ensembling # as a standalone job on a cluster args = parser.parse_args() optd = vars(args) # Track restart as it determines if we need to unpack models restart = False if 'restart_pkl' in optd and optd['restart_pkl']: if not os.path.isfile(optd['restart_pkl']): msg = 'Cannot find ensemble pkl file: {0}'.format(optd['restart_pkl']) exit_util.exit_error(msg) try: optd = ample_util.read_amoptd(optd['restart_pkl']) except Exception as e: msg = "Error unpickling ensemble pkl: {0}".format(e.message) exit_util.exit_error(msg, sys.exc_info()[2]) restart = True else: # We're running purely from command-line arguments amopt = config_util.AMPLEConfigOptions() amopt.populate(args) optd = amopt.d # Start logging to the console logging_util.setup_console_logging() # Make sure we have models if in standalone mode if not restart and not ('models' in optd and optd['models']
def setUp(self): self.assertTrue(os.path.isfile(self.RESULTS_PKL), "Missing pkl file: {0}".format(self.RESULTS_PKL)) self.AMPLE_DICT = ample_util.read_amoptd(self.RESULTS_PKL)
# Mangle paths. relpath assumes args are directories so need to add .. fdata['pdb'] = self.fix_path(fdata['pdb']) fdata['mtz'] = self.fix_path(fdata['mtz']) meta['results'].append(fdata) # Commit to file logger.debug("Exporting pyrvapi metadata:\n{0}".format(meta)) pyrvapi.rvapi_put_meta(json.dumps(meta)) pyrvapi.rvapi_store_document2(rvdoc) return if __name__ == "__main__": import copy, sys, time logging.basicConfig(level=logging.DEBUG) pklfile = sys.argv[1] ample_dict = ample_util.read_amoptd(pklfile) ample_dict['show_gui'] = True ample_dict['ample_log'] = os.path.abspath(__file__) report_dir = os.path.abspath(os.path.join(os.curdir,"pyrvapi_tmp")) AR = AmpleOutput(ample_dict) AR.display_results(ample_dict) view1_dict = copy.copy(ample_dict) del view1_dict['ensembles_data'] del view1_dict['mrbump_results'] SLEEP = 5 AR.display_results(view1_dict) time.sleep(SLEEP)
# fix for analysing on a different machine if _oldroot and _newroot: return path.replace(_oldroot,_newroot) else: return path # Run unit tests if __name__ == "__main__": # This runs the benchmarking starting from a pickled file containing an amopt dictionary. # - used when submitting the modelling jobs to a cluster if len(sys.argv) != 2 or not os.path.isfile(sys.argv[1]): print("benchmark script requires the path to a pickled amopt dictionary!") sys.exit(1) # Get the amopt dictionary amoptd = ample_util.read_amoptd(sys.argv[1]) # Set up logging - could append to an existing log? logger = logging.getLogger() logger.setLevel(logging.DEBUG) fl = logging.FileHandler(os.path.join(amoptd['work_dir'], "benchmark.log")) fl.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fl.setFormatter(formatter) logger.addHandler(fl) analyse(amoptd) ample_util.save_amoptd(amoptd)
def ensembling(self, optd): if optd['import_ensembles']: ensembler.import_ensembles(optd) elif optd['ideal_helices']: ample_util.ideal_helices(optd) logger.info("*** Using ideal helices to solve structure ***") else: # Check we have some models to work with if not (optd['single_model_mode'] or optd['processed_models']): ample_util.save_amoptd(optd) msg = "ERROR! Cannot find any pdb files in: {0}".format(optd['models_dir']) exit_util.exit_error(msg) optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok') if optd['submit_cluster']: # Pickle dictionary so it can be opened by the job to get the parameters ample_util.save_amoptd(optd) script = ensembler.cluster_script(optd) ensembler_timeout = ensembler.get_ensembler_timeout(optd) workers_util.run_scripts( job_scripts=[script], monitor=monitor, nproc=optd['nproc'], job_time=ensembler_timeout, job_name='ensemble', submit_cluster=optd['submit_cluster'], submit_qtype=optd['submit_qtype'], submit_queue=optd['submit_queue'], submit_pe_lsf=optd['submit_pe_lsf'], submit_pe_sge=optd['submit_pe_sge'], submit_array=optd['submit_array'], submit_max_array=optd['submit_max_array']) # queue finished so unpickle results optd.update(ample_util.read_amoptd(optd['results_path'])) else: try: ensembler.create_ensembles(optd) except Exception as e: msg = "Error creating ensembles: {0}".format(e) exit_util.exit_error(msg, sys.exc_info()[2]) # Check we have something to work with if not os.path.isfile(optd['ensemble_ok']) or 'ensembles' not in optd.keys() or not len(optd['ensembles']): msg = "Problem generating ensembles!" exit_util.exit_error(msg) if not (optd['homologs'] or optd['single_model_mode']): ensemble_summary = ensembler.ensemble_summary(optd['ensembles_data']) logger.info(ensemble_summary) # Save the results ample_util.save_amoptd(optd) # Bail here if we didn't create anything if not len(optd['ensembles']): msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..." exit_util.exit_error(msg) # Update results view if self.ample_output: self.ample_output.display_results(optd) return
else: return path # Run unit tests if __name__ == "__main__": # This runs the benchmarking starting from a pickled file containing an amopt dictionary. # - used when submitting the modelling jobs to a cluster if len(sys.argv) != 2 or not os.path.isfile(sys.argv[1]): print( "benchmark script requires the path to a pickled amopt dictionary!" ) sys.exit(1) # Get the amopt dictionary amoptd = ample_util.read_amoptd(sys.argv[1]) # Set up logging - could append to an existing log? logger = logging.getLogger() logger.setLevel(logging.DEBUG) fl = logging.FileHandler(os.path.join(amoptd['work_dir'], "benchmark.log")) fl.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fl.setFormatter(formatter) logger.addHandler(fl) analyse(amoptd) ample_util.save_amoptd(amoptd)
def ensembling(self, optd): if optd['import_ensembles']: ensembler.import_ensembles(optd) elif optd['ideal_helices']: ample_util.ideal_helices(optd) logger.info("*** Using ideal helices to solve structure ***") else: # Import the models here instead of cluster_util. if optd['cluster_method'] is 'import': # HACK - this is certainly not how we want to do it. One flag for all (-models) in future optd['models'] = optd['cluster_dir'] optd['models'] = ample_util.extract_and_validate_models(optd) # Check we have some models to work with if not (optd['single_model_mode'] or optd['models']): ample_util.save_amoptd(optd) msg = "ERROR! Cannot find any pdb files in: {0}".format( optd['models_dir']) exit_util.exit_error(msg) optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok') if optd['submit_cluster']: # Pickle dictionary so it can be opened by the job to get the parameters ample_util.save_amoptd(optd) script = ensembler.cluster_script(optd) ensembler_timeout = ensembler.get_ensembler_timeout(optd) workers_util.run_scripts( job_scripts=[script], monitor=monitor, nproc=optd['nproc'], job_time=ensembler_timeout, job_name='ensemble', submit_cluster=optd['submit_cluster'], submit_qtype=optd['submit_qtype'], submit_queue=optd['submit_queue'], submit_pe_lsf=optd['submit_pe_lsf'], submit_pe_sge=optd['submit_pe_sge'], submit_array=optd['submit_array'], submit_max_array=optd['submit_max_array']) # queue finished so unpickle results optd.update(ample_util.read_amoptd(optd['results_path'])) else: try: ensembler.create_ensembles(optd) except Exception as e: msg = "Error creating ensembles: {0}".format(e) exit_util.exit_error(msg, sys.exc_info()[2]) # Check we have something to work with if not os.path.isfile( optd['ensemble_ok']) or 'ensembles' not in optd.keys( ) or not len(optd['ensembles']): msg = "Problem generating ensembles!" exit_util.exit_error(msg) if not (optd['homologs'] or optd['single_model_mode']): ensemble_summary = ensembler.ensemble_summary( optd['ensembles_data']) logger.info(ensemble_summary) # Save the results ample_util.save_amoptd(optd) # Bail here if we didn't create anything if not len(optd['ensembles']): msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..." exit_util.exit_error(msg) # Update results view if self.ample_output: self.ample_output.display_results(optd) return
def ensembling(self, optd): if optd['import_ensembles']: ensembler.import_ensembles(optd) elif optd['ideal_helices']: ample_util.ideal_helices(optd) logger.info( "*** Attempting to solve the structure using ideal helices ***" ) logger.warning( 'If ideal helices do not solve the structure, you may want to use -helical_ensembles in ' 'place of -ideal_helices. AMPLE will then use a new set of helical ensembles which has been ' 'very successful on solving challenging cases!') elif optd['helical_ensembles']: ample_util.ideal_helices(optd) logger.info( "*** Attempting to solve the structure using %s set of helical ensembles ***" % optd['helical_ensembles_set']) else: # Check we have some models to work with if not (optd['single_model_mode'] or optd['processed_models']): ample_util.save_amoptd(optd) msg = "ERROR! Cannot find any pdb files in: {0}".format( optd['models_dir']) exit_util.exit_error(msg) optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok') if optd['submit_qtype'] != 'local': # Pickle dictionary so it can be opened by the job to get the parameters ample_util.save_amoptd(optd) script = ensembler.cluster_script(optd) ensembler_timeout = ensembler.get_ensembler_timeout(optd) with TaskFactory( optd['submit_qtype'], script, cwd=optd['work_dir'], environment=optd['submit_pe'], run_time=ensembler_timeout, name='benchmark', nprocesses=optd['nproc'], max_array_size=optd['submit_max_array'], queue=optd['submit_queue'], shell="/bin/bash", ) as task: task.run() task.wait(interval=5, monitor_f=monitor) # queue finished so unpickle results optd.update(ample_util.read_amoptd(optd['results_path'])) else: try: ensembler.create_ensembles(optd) except Exception as e: msg = "Error creating ensembles: {0}".format(e) exit_util.exit_error(msg, sys.exc_info()[2]) # Check we have something to work with if not os.path.isfile( optd['ensemble_ok']) or 'ensembles' not in optd.keys( ) or not len(optd['ensembles']): msg = "Problem generating ensembles!" exit_util.exit_error(msg) if not (optd['homologs'] or optd['single_model_mode']): ensemble_summary = ensembler.ensemble_summary( optd['ensembles_data']) logger.info(ensemble_summary) # Save the results ample_util.save_amoptd(optd) # Bail here if we didn't create anything if not len(optd['ensembles']): msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..." exit_util.exit_error(msg) # Update results view if self.ample_output: self.ample_output.display_results(optd) return