def run_job(self, job_details=None): JobDeploymentBase.run_job(self) # This function uses the SAGA-Python library to run the job. Separate # functionality in the library is used for monitoring the job process. # TODO: Add modules to PBS job confiuguration # Here we extract the job details from the previously stored job details # object into a SAGA Python job description object so that we can run # the job. job_arguments = getattr(self.job_config, 'args', []) input_files = getattr(self, 'transferred_input_files', []) job_arguments += input_files jd = saga.job.Description() jd.environment = getattr(self.job_config, 'environment', {}) jd.executable = getattr(self.job_config, 'executable', None) jd.arguments = job_arguments jd.working_directory = getattr(self.job_config, 'working_dir', None) jd.output = getattr(self.job_config, 'stdout', None) jd.error = getattr(self.job_config, 'stderr', None) jd.wall_time_limit = getattr(self.job_config, 'time_limit_mins', 0) jd.total_cpu_count = getattr(self.job_config, 'num_processes', 1) #jd.processes_per_host = 1 #jd.total_physical_memory = "2400" self.job = self.svc.create_job(jd) self.job.run()
def run_job(self): JobDeploymentBase.run_job(self) # This function uses the libhpc resource daemon client to talk to the # resource daemon that is installed on cloud resources. It uses this # interface to run jobs and monitor their state to see when they are # complete. # TODO: Should this be running/managing the job remotely via a SAGA # SSH session or should we be expecting to communicate with a remote # resource management service to handle this? LOG.debug('Run job...') job_arguments = getattr(self.job_config, 'args', []) input_files = getattr(self, 'transferred_input_files', []) job_arguments += input_files # Check if we have a JOB_ID variable in the arguments or input files. # If so, replace this variable with the actual job ID. job_arguments_tmp = job_arguments job_arguments = [] for item in job_arguments_tmp: # Can't do a replace on items that are not string types! if isinstance(item, basestring): job_arguments.append( item.replace('$JOB_ID', self.job_config.job_id)) else: job_arguments.append(item) LOG.debug('Modified job arguments: %s' % job_arguments) jd = saga.job.Description() jd.environment = getattr(self.job_config, 'environment', {}) if self.job_config.num_processes > 1: jd.executable = ('mpirun -np %s -machinefile /tmp/machinefile' % (self.job_config.num_processes)) executable = getattr(self.job_config, 'executable', None) if executable: job_arguments.insert(0, executable) else: jd.executable = getattr(self.job_config, 'executable', None) jd.arguments = job_arguments jd.working_directory = getattr(self.job_config, 'working_dir', None) jd.output = getattr(self.job_config, 'stdout', None) jd.error = getattr(self.job_config, 'stderr', None) jd.wall_time_limit = getattr(self.job_config, 'time_limit_mins', 0) #jd.number_of_processes = 4 #jd.processes_per_host = 1 #jd.total_physical_memory = "2400" if not jd.output: jd.output = 'std.out' if not jd.error: jd.error = 'std.err' self.svc = saga.job.Service('ssh://%s/' % self.running_nodes[0][0].public_ips[0], session=self.session) self.job = self.svc.create_job(jd) self.job.run()
def run_job(self): JobDeploymentBase.run_job(self) # TODO: Should this be running/managing the job remotely via a SAGA # SSH session or should we be expecting to communicate with a remote # resource management service to handle this? # This function could use the libhpc resource daemon client to talk to # a resource daemon that is installed on the target resource, however, # at present we simply use SSH (via SAGA Python) to trigger job # execution and handle compressing and returning the output files. LOG.debug('SSH Deployer: Run job...') job_arguments = getattr(self.job_config, 'args', []) input_files = getattr(self, 'transferred_input_files', []) job_arguments += input_files # Check if we have a JOB_ID variable in the arguments or input files. # If so, replace this variable with the actual job ID. job_arguments_tmp = job_arguments job_arguments = [] for item in job_arguments_tmp: # Can't do a replace on items that are not string types! if isinstance(item, basestring): job_arguments.append( item.replace('$JOB_ID', self.job_config.job_id)) else: job_arguments.append(item) LOG.debug('Modified job arguments: %s' % job_arguments) jd = saga.job.Description() jd.environment = getattr(self.job_config, 'environment', {}) jd.executable = getattr(self.job_config, 'executable', None) jd.arguments = job_arguments jd.working_directory = getattr(self.job_config, 'working_dir', None) jd.output = getattr(self.job_config, 'stdout', None) jd.error = getattr(self.job_config, 'stderr', None) jd.wall_time_limit = getattr(self.job_config, 'time_limit_mins', 0) if not jd.output: jd.output = 'std.out' if not jd.error: jd.error = 'std.err' self.svc = saga.job.Service('ssh://%s/' % self.host, session=self.session) self.job = self.svc.create_job(jd) self.job.run()
def run_job(self): JobDeploymentBase.run_job(self) # This function uses the libhpc resource daemon client to talk to the # resource daemon that is installed on cloud resources. It uses this # interface to run jobs and monitor their state to see when they are # complete. # TODO: Should this be running/managing the job remotely via a SAGA # SSH session or should we be expecting to communicate with a remote # resource management service to handle this? LOG.debug('Run job...') job_arguments = getattr(self.job_config, 'args', []) input_files = getattr(self, 'transferred_input_files', []) job_arguments += input_files jd = Description() jd.environment = getattr(self.job_config, 'environment', {}) if self.job_config.num_processes > 1: jd.executable = ('mpirun -np %s -machinefile /tmp/machinefile' % (self.job_config.num_processes)) executable = getattr(self.job_config, 'executable', None) if executable: job_arguments.insert(0, executable) else: jd.executable = getattr(self.job_config, 'executable', None) jd.arguments = job_arguments jd.working_directory = getattr(self.job_config, 'working_dir', None) jd.output = getattr(self.job_config, 'stdout', None) jd.error = getattr(self.job_config, 'stderr', None) jd.wall_time_limit = getattr(self.job_config, 'time_limit_mins', 0) #jd.number_of_processes = 4 #jd.processes_per_host = 1 #jd.total_physical_memory = "2400" if not jd.output: jd.output = 'std.out' if not jd.error: jd.error = 'std.err' self.svc = Service('ssh://%s/' % self.running_nodes[0][0].public_ips[0], session=self.session) self.job = self.svc.create_job(jd) self.job.run()
def run_job(self, job_details=None): JobDeploymentBase.run_job(self)