def update_all_jobs(self, update_server_output=False): nsg_job_lists = {} ssh_conns = {} _update_server_output = self.update_server_output if update_server_output: _update_server_output = True for job in self.sim_jobs: if (job.status == ServerInterface.ssh_status[0]): ssh_conn = ssh_conns.get(job.server_connector) if not ssh_conn: server = job.get_server() try: ssh_conn = ServerInterface().connect_ssh(server, job) ssh_conns[job.server_connector] = ssh_conn except Exception as e: job.append_log( 'SimDirectory.update_all_jobs() Caught exception: %s: %s' % (e.__class__, e)) #traceback.print_exc() try: ssh_conn.close() except: pass job.update(ssh_connection=ssh_conn, update_server_output=_update_server_output) job.read_properties() if (job.status == ServerInterface.nsg_status[0]): nsg_list = nsg_job_lists.get(job.server_connector) if not nsg_list: server = job.get_server() nsg = Client(server.nsg_api_appname, server.nsg_api_appid, server.user, server.password, server.nsg_api_url) nsg_job_lists[job.server_connector] = nsg.listJobs() job.update(nsg_job_list=nsg_list, update_server_output=_update_server_output) job.read_properties() for key, ssh_conn in ssh_conns.items(): #clean up ssh connections try: ssh_conn.close() except Exception as e: print( 'SimDirectory.update_all_jobs() Caught exception while attempting to close connections: %s: %s' % (e.__class__, e)) pass return
def delete_nsg(self, simjob, server): nsg = Client(server.nsg_api_appname, server.nsg_api_appid, server.user, server.password, server.nsg_api_url) for job in nsg.listJobs(): if job.jobUrl == simjob.server_remote_identifier: #job.update() job.delete() simjob.append_log("NSG Job Deleted on remote server") if simjob.status == ServerInterface.nsg_status[ 0] or simjob.status == ServerInterface.nsg_status[2]: simjob.status = ServerInterface.nsg_status[3] simjob.write_properties() return
def download_status_nsg(self, simjob, server, nsg_job_list=None): nsg = Client(server.nsg_api_appname, server.nsg_api_appid, server.user, server.password, server.nsg_api_url) outfile = "stdout.txt" errfile = "stderr.txt" updateJob = False if not nsg_job_list: #Just save a couple calls to their server nsg_job_list = nsg.listJobs() updateJob = True #We probably didn't update it earlier for job in nsg_job_list: if job.jobUrl == simjob.server_remote_identifier: if updateJob: job.update() resultFiles = job.listResults(final=False) try: for filename in resultFiles: if filename == outfile: resultFiles[filename].download( simjob.job_directory_absolute) out_dl = os.path.join( simjob.job_directory_absolute, outfile) std_out = os.path.join( simjob.job_directory_absolute, simjob.stdout_file) if os.path.exists(std_out): os.remove(std_out) os.rename(out_dl, std_out) #os.remove(out_dl) if filename == errfile: resultFiles[filename].download( simjob.job_directory_absolute) err_dl = os.path.join( simjob.job_directory_absolute, errfile) std_err = os.path.join( simjob.job_directory_absolute, simjob.stderr_file) if os.path.exists(std_err): os.remove(std_err) os.rename(err_dl, std_err) #os.remove(err_dl) except Exception as e: simjob.append_log('*** Caught exception: {}: {}'.format( e.__class__, e)) #We don't care if we can't grab these files return
def stop_nsg(self, simjob, server): nsg = Client(server.nsg_api_appname, server.nsg_api_appid, server.user, server.password, server.nsg_api_url) for job in nsg.listJobs(): if job.jobUrl == simjob.server_remote_identifier: job.update() for m in job.messages: simjob.append_log(m) if not job.isDone(): job.delete() simjob.append_log("NSG Job Canceled") simjob.status = ServerInterface.nsg_status[3] simjob.write_properties() return
def delete_nsg(self, simjob, server, nsg_job_list=None): nsg = Client(server.nsg_api_appname, server.nsg_api_appid, server.user, server.password, server.nsg_api_url) if not nsg_job_list: #Just save a couple calls to their server nsg_job_list = nsg.listJobs() for job in nsg_job_list: if job.jobUrl == simjob.server_remote_identifier: #job.update() job.delete() simjob.append_log("NSG Job Deleted on remote server") if simjob.status == ServerInterface.nsg_status[0]: simjob.status = ServerInterface.nsg_status[3] simjob.write_properties() return
def update_nsg(self, simjob, server, nsg_job_list=None): simjob.append_log("Updating NSG information on job...") nsg = Client(server.nsg_api_appname, server.nsg_api_appid, server.user, server.password, server.nsg_api_url) if not nsg_job_list: #Just save a couple calls to their server nsg_job_list = nsg.listJobs() for job in nsg_job_list: if job.jobUrl == simjob.server_remote_identifier: job.update() for m in job.messages: simjob.append_log(m) if (job.isError()): simjob.append_log("NSG Job found in error state") simjob.status = ServerInterface.nsg_status[3] simjob.write_properties() if (job.isDone()): simjob.append_log("NSG Job found in finished state") simjob.status = ServerInterface.nsg_status[1] simjob.write_properties() return
def delete_all_nsg(self, server): nsg = Client(server.nsg_api_appname, server.nsg_api_appid, server.user, server.password, server.nsg_api_url) print("Deleting ALL NSG Jobs...") for job in nsg.listJobs(): job.delete()
def download_nsg(self, simjob, server, nsg_job_list=None): nsg = Client(server.nsg_api_appname, server.nsg_api_appid, server.user, server.password, server.nsg_api_url) simjob.status = ServerInterface.nsg_status[ 4] #In a downloading state, don't try to download again simjob.write_properties() updateJob = False if not nsg_job_list: #Just save a couple calls to their server nsg_job_list = nsg.listJobs() updateJob = True #We probably didn't update it earlier for job in nsg_job_list: if job.jobUrl == simjob.server_remote_identifier: if updateJob: job.update() if not job.isError(): if job.isDone(): results = job.listResults() for m in job.messages: simjob.append_log(m) for r in results: simjob.append_log("Downloading: " + r) job.downloadResults(simjob.job_directory_absolute) try: simjob.append_log("Extracting results") nsg_tar_returned = os.path.join( simjob.job_directory_absolute, simjob.file_resultszip) zip_dir_nsg_return = os.path.join( simjob.job_directory_absolute, simjob.dir_results) tar = tarfile.open(nsg_tar_returned, "r:gz") tar.extractall(zip_dir_nsg_return) tar.close() simjob.append_log("Extracted results to " + zip_dir_nsg_return) simjob.status = ServerInterface.nsg_status[2] except Exception as e: simjob.append_log("Error extracting tar file.") simjob.append_log( '*** Caught exception: {}: {}'.format( e.__class__, e)) simjob.append_log( "The job was marked as completed but may not have finished within the specified 'Max Runtime' before it could produce results. See 'Server Output' or 'Server Error' for more information." ) simjob.status = ServerInterface.nsg_status[3] simjob.write_properties() else: simjob.append_log( "The job is not done can't download yet.") else: simjob.append_log( "There was an error running or downloading. See console output" ) return
def submit_nsg(self, simjob, validate_only, server): nsg_template_param_file = "param.properties" nsg_template_input_file = "input.properties" return_filename = simjob.sim_name + '-nsg-return' simjob.append_log("Creating NSG parameter files: " + nsg_template_param_file + "," + nsg_template_input_file) #generate new properties with open( os.path.join(simjob.sim_directory_object.sim_results_dir, simjob.job_directory, nsg_template_input_file), 'w') as the_file: the_file.write('{}={}\n'.format( "infile_", os.path.join(simjob.job_directory, simjob.file_snapshotzip))) with open( os.path.join(simjob.sim_directory_object.sim_results_dir, simjob.job_directory, nsg_template_param_file), 'w') as the_file: the_file.write('{}={}\n'.format("toolId", simjob.server_nsg_tool)) the_file.write('{}={}\n'.format("filename_", simjob.batch_file)) the_file.write('{}={}\n'.format("number_nodes_", simjob.server_nodes)) the_file.write('{}={}\n'.format("number_cores_", simjob.server_cores)) the_file.write('{}={}\n'.format("pythonoption_", simjob.server_nsg_python)) the_file.write('{}={}\n'.format("outputfilename_", return_filename)) the_file.write('{}={}\n'.format("runtime_", simjob.server_max_runtime)) the_file.write('{}={}\n'.format("singlelayer_", "0")) #validate simjob.file_resultszip = return_filename + ".tar.gz" simjob.dir_results = return_filename simjob.write_properties() nsg = Client(server.nsg_api_appname, server.nsg_api_appid, server.user, server.password, server.nsg_api_url) simjob.append_log("Validating job build with NSG...") try: status = nsg.validateJobTemplate(simjob.job_directory_absolute) if status.isError(): simjob.append_log("NSG template validation failed. See debug.") else: simjob.append_log("NSG template validation success") except CipresError as e: simjob.append_log("Error validating NSG template: " + e.message) simjob.append_log("Job stopped") simjob.status = ServerInterface.nsg_status[3] simjob.write_properties() return if (validate_only): return try: sm = 'false' if simjob.server_status_email: sm = 'true' status = nsg.submitJobTemplate(simjob.job_directory_absolute, metadata={"statusEmail": sm}) simjob.server_remote_identifier = status.jobUrl simjob.write_properties() if status.isError(): simjob.append_log("NSG template submit failed. See debug.") else: simjob.append_log("NSG template submit success") simjob.status = ServerInterface.nsg_status[0] simjob.write_properties() except CipresError as e: simjob.append_log("Error submitting NSG template: " + e.message) simjob.append_log("Job stopped") return
def submit_nsg(self, simjob, validate_only, server): nsg_template_param_file = "param.properties" nsg_template_input_file = "input.properties" return_filename = simjob.sim_name + '-nsg-return' simjob.append_log("Creating NSG parameter files: " + nsg_template_param_file + "," + nsg_template_input_file) #generate new properties with open( os.path.join(simjob.sim_directory_object.sim_results_dir, simjob.job_directory, nsg_template_input_file), 'w') as the_file: the_file.write('{}={}\n'.format( "infile_", os.path.join(simjob.job_directory, simjob.file_snapshotzip))) with open( os.path.join(simjob.sim_directory_object.sim_results_dir, simjob.job_directory, nsg_template_param_file), 'w') as the_file: the_file.write('{}={}\n'.format("filename_", simjob.batch_file)) the_file.write('{}={}\n'.format("runtime_", simjob.server_max_runtime)) the_file.write('{}={}\n'.format("outputfilename_", return_filename)) if (simjob.server_nsg_tool in [ "NEURON77_TG", "NEURON75_TG", "NEURON74_TG", "NEURON73_TG", "EEGLAB_TG" ]): the_file.write('{}={}\n'.format("toolId", simjob.server_nsg_tool)) if (simjob.server_nsg_tool in [ "NEURON77_TG", "NEURON75_TG", "NEURON74_TG", "NEURON73_TG", "PY_TG_2.7.9", "PY_TG_3.5.0" ]): the_file.write('{}={}\n'.format("number_nodes_", simjob.server_nodes)) the_file.write('{}={}\n'.format("number_cores_", simjob.server_cores)) if (simjob.server_nsg_tool in [ "NEURON77_TG", "NEURON75_TG", "NEURON74_TG", "NEURON73_TG" ]): the_file.write('{}={}\n'.format("pythonoption_", simjob.server_nsg_python)) the_file.write('{}={}\n'.format("singlelayer_", "0")) if (simjob.server_nsg_tool in ["PY_TG_2.7.9", "PY_TG_3.5.0"]): the_file.write( '{}={}\n'.format("nrnivmodl_o_", "1") ) #Just assume we'll need neuron at some point. TODO: Add button to turn off the_file.write( '{}={}\n'.format("toolId", "PY_TG") ) #Our naming is just for convenience, nsg uses different names and a version if (simjob.server_nsg_tool in ["PY_TG_2.7.9"]): the_file.write('{}={}\n'.format("version_", "2.7.9")) else: the_file.write('{}={}\n'.format("version_", "3.5.0")) #validate simjob.file_resultszip = return_filename + ".tar.gz" simjob.dir_results = return_filename simjob.write_properties() nsg = Client(server.nsg_api_appname, server.nsg_api_appid, server.user, server.password, server.nsg_api_url) simjob.append_log("Validating job build with NSG...") try: status = nsg.validateJobTemplate(simjob.job_directory_absolute) if status.isError(): simjob.append_log("NSG template validation failed. See debug.") else: simjob.append_log("NSG template validation success") except CipresError as e: simjob.append_log("Error validating NSG template: " + e.message) simjob.append_log("Job stopped") simjob.status = ServerInterface.nsg_status[3] simjob.write_properties() return if (validate_only): return try: sm = 'false' if simjob.server_status_email: sm = 'true' status = nsg.submitJobTemplate(simjob.job_directory_absolute, metadata={"statusEmail": sm}) simjob.server_remote_identifier = status.jobUrl simjob.write_properties() if status.isError(): simjob.append_log("NSG template submit failed. See debug.") else: simjob.append_log("NSG template submit success") simjob.status = ServerInterface.nsg_status[0] simjob.write_properties() except CipresError as e: simjob.append_log("Error submitting NSG template: " + e.message) simjob.append_log("Job stopped") return