def submit_job(self, app): """ Run an `Application` instance as a local process. :see: `LRMS.submit_job` """ # Update current resource usage to check how many jobs are # running in there. Please note that for consistency with # other backends, these updated information are not kept! try: self.transport.connect() except gc3libs.exceptions.TransportError as ex: raise gc3libs.exceptions.LRMSSubmitError( "Unable to access shellcmd resource at %s: %s" % (self.frontend, str(ex))) job_infos = self._get_persisted_resource_state() free_slots = self.max_cores - self._compute_used_cores(job_infos) available_memory = self.total_memory - \ self._compute_used_memory(job_infos) if self.free_slots == 0 or free_slots == 0: # XXX: We shouldn't check for self.free_slots ! raise gc3libs.exceptions.LRMSSubmitError( "Resource %s already running maximum allowed number of jobs" " (%s). Increase 'max_cores' to raise." % (self.name, self.max_cores)) if app.requested_memory and \ (available_memory < app.requested_memory or self.available_memory < app.requested_memory): raise gc3libs.exceptions.LRMSSubmitError( "Resource %s does not have enough available memory:" " %s requested, but only %s available." % (self.name, app.requested_memory.to_str('%g%s', unit=Memory.MB), available_memory.to_str('%g%s', unit=Memory.MB),) ) log.debug("Executing local command '%s' ...", str.join(" ", app.arguments)) # Check if spooldir is a valid directory if not self.spooldir: ex, stdout, stderr = self.transport.execute_command( 'cd "$TMPDIR" && pwd') if ex != 0 or stdout.strip() == '' or not stdout[0] == '/': log.debug( "Unable to recover a valid absolute path for spooldir." " Using `/var/tmp`.") self.spooldir = '/var/tmp' else: self.spooldir = stdout.strip() # determine execution directory exit_code, stdout, stderr = self.transport.execute_command( "mktemp -d %s " % posixpath.join( self.spooldir, 'gc3libs.XXXXXX')) if exit_code != 0: log.error( "Error creating temporary directory on host %s: %s", self.frontend, stderr) log.debug('Freeing resources used by failed application') self.free(app) raise gc3libs.exceptions.LRMSSubmitError( "Error creating temporary directory on host %s: %s", self.frontend, stderr) execdir = stdout.strip() app.execution.lrms_execdir = execdir # Copy input files to remote dir for local_path, remote_path in app.inputs.items(): if local_path.scheme != 'file': continue remote_path = posixpath.join(execdir, remote_path) remote_parent = os.path.dirname(remote_path) try: if (remote_parent not in ['', '.'] and not self.transport.exists(remote_parent)): log.debug("Making remote directory '%s'", remote_parent) self.transport.makedirs(remote_parent) log.debug("Transferring file '%s' to '%s'", local_path.path, remote_path) self.transport.put(local_path.path, remote_path) # preserve execute permission on input files if os.access(local_path.path, os.X_OK): self.transport.chmod(remote_path, 0o755) except: log.critical( "Copying input file '%s' to remote host '%s' failed", local_path.path, self.frontend) log.debug('Cleaning up failed application') self.free(app) raise # try to ensure that a local executable really has # execute permissions, but ignore failures (might be a # link to a file we do not own) if app.arguments[0].startswith('./'): try: self.transport.chmod( posixpath.join(execdir, app.arguments[0][2:]), 0o755) # os.chmod(app.arguments[0], 0755) except: log.error( "Failed setting execution flag on remote file '%s'", posixpath.join(execdir, app.arguments[0])) # set up redirection redirection_arguments = '' if app.stdin is not None: # stdin = open(app.stdin, 'r') redirection_arguments += " <%s" % app.stdin if app.stdout is not None: redirection_arguments += " >%s" % app.stdout stdout_dir = os.path.dirname(app.stdout) if stdout_dir: self.transport.makedirs(posixpath.join(execdir, stdout_dir)) if app.join: redirection_arguments += " 2>&1" else: if app.stderr is not None: redirection_arguments += " 2>%s" % app.stderr stderr_dir = os.path.dirname(app.stderr) if stderr_dir: self.transport.makedirs(posixpath.join(execdir, stderr_dir)) # set up environment env_commands = [] for k, v in app.environment.iteritems(): env_commands.append( "export {k}={v};" .format(k=sh_quote_safe(k), v=sh_quote_unsafe(v))) # Create the directory in which pid, output and wrapper script # files will be stored wrapper_dir = posixpath.join( execdir, ShellcmdLrms.WRAPPER_DIR) if not self.transport.isdir(wrapper_dir): try: self.transport.makedirs(wrapper_dir) except: log.error("Failed creating remote folder '%s'" % wrapper_dir) self.free(app) raise # Set up scripts to download/upload the swift/http files downloadfiles = [] uploadfiles = [] wrapper_downloader_filename = posixpath.join( wrapper_dir, ShellcmdLrms.WRAPPER_DOWNLOADER) for url, outfile in app.inputs.items(): if url.scheme in ['swift', 'swifts', 'swt', 'swts', 'http', 'https']: downloadfiles.append("python '%s' download '%s' '%s'" % (wrapper_downloader_filename, str(url), outfile)) for infile, url in app.outputs.items(): if url.scheme in ['swift', 'swt', 'swifts', 'swts']: uploadfiles.append("python '%s' upload '%s' '%s'" % (wrapper_downloader_filename, str(url), infile)) if downloadfiles or uploadfiles: # Also copy the downloader. with open(resource_filename(Requirement.parse("gc3pie"), "gc3libs/etc/downloader.py")) as fd: wrapper_downloader = self.transport.open( wrapper_downloader_filename, 'w') wrapper_downloader.write(fd.read()) wrapper_downloader.close() # Build pidfilename = posixpath.join(wrapper_dir, ShellcmdLrms.WRAPPER_PID) wrapper_output_filename = posixpath.join( wrapper_dir, ShellcmdLrms.WRAPPER_OUTPUT_FILENAME) wrapper_script_fname = posixpath.join( wrapper_dir, ShellcmdLrms.WRAPPER_SCRIPT) try: # Create the wrapper script wrapper_script = self.transport.open( wrapper_script_fname, 'w') commands = ( r"""#!/bin/sh echo $$ >{pidfilename} cd {execdir} exec {redirections} {environment} {downloadfiles} '{time_cmd}' -o '{wrapper_out}' -f '{fmt}' {command} rc=$? {uploadfiles} rc2=$? if [ $rc -ne 0 ]; then exit $rc; else exit $rc2; fi """.format( pidfilename=pidfilename, execdir=execdir, time_cmd=self.time_cmd, wrapper_out=wrapper_output_filename, fmt=ShellcmdLrms.TIMEFMT, redirections=redirection_arguments, environment=str.join('\n', env_commands), downloadfiles=str.join('\n', downloadfiles), uploadfiles=str.join('\n', uploadfiles), command=(str.join(' ', (sh_quote_unsafe(arg) for arg in app.arguments))), )) wrapper_script.write(commands) wrapper_script.close() #log.info("Wrapper script: <<<%s>>>", commands) except gc3libs.exceptions.TransportError: log.error("Freeing resources used by failed application") self.free(app) raise try: self.transport.chmod(wrapper_script_fname, 0o755) # Execute the script in background self.transport.execute_command(wrapper_script_fname, detach=True) except gc3libs.exceptions.TransportError: log.error("Freeing resources used by failed application") self.free(app) raise # Just after the script has been started the pidfile should be # filled in with the correct pid. # # However, the script can have not been able to write the # pidfile yet, so we have to wait a little bit for it... pidfile = None for retry in gc3libs.utils.ExponentialBackoff(): try: pidfile = self.transport.open(pidfilename, 'r') break except gc3libs.exceptions.TransportError as ex: if '[Errno 2]' in str(ex): # no such file or directory time.sleep(retry) continue else: raise if pidfile is None: # XXX: probably self.free(app) should go here as well raise gc3libs.exceptions.LRMSSubmitError( "Unable to get PID file of submitted process from" " execution directory `%s`: %s" % (execdir, pidfilename)) pid = pidfile.read().strip() try: pid = int(pid) except ValueError: # XXX: probably self.free(app) should go here as well pidfile.close() raise gc3libs.exceptions.LRMSSubmitError( "Invalid pid `%s` in pidfile %s." % (pid, pidfilename)) pidfile.close() # Update application and current resources app.execution.lrms_jobid = pid # We don't need to update free_slots since its value is # checked at runtime. if app.requested_memory: self.available_memory -= app.requested_memory self.job_infos[pid] = { 'requested_cores': app.requested_cores, 'requested_memory': app.requested_memory, 'execution_dir': execdir, 'terminated': False, } self._update_job_resource_file(pid, self.job_infos[pid]) return app
def submit_job(self, app): """ Run an `Application` instance as a local process. :see: `LRMS.submit_job` """ # Update current resource usage to check how many jobs are # running in there. Please note that for consistency with # other backends, these updated information are not kept! try: self.transport.connect() except gc3libs.exceptions.TransportError as ex: raise gc3libs.exceptions.LRMSSubmitError( "Unable to access shellcmd resource at %s: %s" % (self.frontend, str(ex))) job_infos = self._get_persisted_resource_state() free_slots = self.max_cores - self._compute_used_cores(job_infos) available_memory = self.total_memory - \ self._compute_used_memory(job_infos) if self.free_slots == 0 or free_slots == 0: # XXX: We shouldn't check for self.free_slots ! raise gc3libs.exceptions.LRMSSubmitError( "Resource %s already running maximum allowed number of jobs" " (%s). Increase 'max_cores' to raise." % (self.name, self.max_cores)) if app.requested_memory and \ (available_memory < app.requested_memory or self.available_memory < app.requested_memory): raise gc3libs.exceptions.LRMSSubmitError( "Resource %s does not have enough available memory:" " %s requested, but only %s available." % ( self.name, app.requested_memory.to_str('%g%s', unit=Memory.MB), available_memory.to_str('%g%s', unit=Memory.MB), )) log.debug("Executing local command '%s' ...", str.join(" ", app.arguments)) # Check if spooldir is a valid directory if not self.spooldir: ex, stdout, stderr = self.transport.execute_command( 'cd "$TMPDIR" && pwd') if ex != 0 or stdout.strip() == '' or not stdout[0] == '/': log.debug( "Unable to recover a valid absolute path for spooldir." " Using `/var/tmp`.") self.spooldir = '/var/tmp' else: self.spooldir = stdout.strip() # determine execution directory exit_code, stdout, stderr = self.transport.execute_command( "mktemp -d %s " % posixpath.join(self.spooldir, 'gc3libs.XXXXXX')) if exit_code != 0: log.error("Error creating temporary directory on host %s: %s", self.frontend, stderr) log.debug('Freeing resources used by failed application') self.free(app) raise gc3libs.exceptions.LRMSSubmitError( "Error creating temporary directory on host %s: %s", self.frontend, stderr) execdir = stdout.strip() app.execution.lrms_execdir = execdir # Copy input files to remote dir for local_path, remote_path in app.inputs.items(): if local_path.scheme != 'file': continue remote_path = posixpath.join(execdir, remote_path) remote_parent = os.path.dirname(remote_path) try: if (remote_parent not in ['', '.'] and not self.transport.exists(remote_parent)): log.debug("Making remote directory '%s'", remote_parent) self.transport.makedirs(remote_parent) log.debug("Transferring file '%s' to '%s'", local_path.path, remote_path) self.transport.put(local_path.path, remote_path) # preserve execute permission on input files if os.access(local_path.path, os.X_OK): self.transport.chmod(remote_path, 0o755) except: log.critical( "Copying input file '%s' to remote host '%s' failed", local_path.path, self.frontend) log.debug('Cleaning up failed application') self.free(app) raise # try to ensure that a local executable really has # execute permissions, but ignore failures (might be a # link to a file we do not own) if app.arguments[0].startswith('./'): try: self.transport.chmod( posixpath.join(execdir, app.arguments[0][2:]), 0o755) # os.chmod(app.arguments[0], 0755) except: log.error("Failed setting execution flag on remote file '%s'", posixpath.join(execdir, app.arguments[0])) # set up redirection redirection_arguments = '' if app.stdin is not None: # stdin = open(app.stdin, 'r') redirection_arguments += " <%s" % app.stdin if app.stdout is not None: redirection_arguments += " >%s" % app.stdout stdout_dir = os.path.dirname(app.stdout) if stdout_dir: self.transport.makedirs(posixpath.join(execdir, stdout_dir)) if app.join: redirection_arguments += " 2>&1" else: if app.stderr is not None: redirection_arguments += " 2>%s" % app.stderr stderr_dir = os.path.dirname(app.stderr) if stderr_dir: self.transport.makedirs(posixpath.join( execdir, stderr_dir)) # set up environment env_commands = [] for k, v in app.environment.iteritems(): env_commands.append("export {k}={v};".format(k=sh_quote_safe(k), v=sh_quote_unsafe(v))) # Create the directory in which pid, output and wrapper script # files will be stored wrapper_dir = posixpath.join(execdir, ShellcmdLrms.WRAPPER_DIR) if not self.transport.isdir(wrapper_dir): try: self.transport.makedirs(wrapper_dir) except: log.error("Failed creating remote folder '%s'" % wrapper_dir) self.free(app) raise # Set up scripts to download/upload the swift/http files downloadfiles = [] uploadfiles = [] wrapper_downloader_filename = posixpath.join( wrapper_dir, ShellcmdLrms.WRAPPER_DOWNLOADER) for url, outfile in app.inputs.items(): if url.scheme in [ 'swift', 'swifts', 'swt', 'swts', 'http', 'https' ]: downloadfiles.append( "python '%s' download '%s' '%s'" % (wrapper_downloader_filename, str(url), outfile)) for infile, url in app.outputs.items(): if url.scheme in ['swift', 'swt', 'swifts', 'swts']: uploadfiles.append( "python '%s' upload '%s' '%s'" % (wrapper_downloader_filename, str(url), infile)) if downloadfiles or uploadfiles: # Also copy the downloader. with open( resource_filename(Requirement.parse("gc3pie"), "gc3libs/etc/downloader.py")) as fd: wrapper_downloader = self.transport.open( wrapper_downloader_filename, 'w') wrapper_downloader.write(fd.read()) wrapper_downloader.close() # Build pidfilename = posixpath.join(wrapper_dir, ShellcmdLrms.WRAPPER_PID) wrapper_output_filename = posixpath.join( wrapper_dir, ShellcmdLrms.WRAPPER_OUTPUT_FILENAME) wrapper_script_fname = posixpath.join(wrapper_dir, ShellcmdLrms.WRAPPER_SCRIPT) try: # Create the wrapper script wrapper_script = self.transport.open(wrapper_script_fname, 'w') commands = (r"""#!/bin/sh echo $$ >{pidfilename} cd {execdir} exec {redirections} {environment} {downloadfiles} '{time_cmd}' -o '{wrapper_out}' -f '{fmt}' {command} rc=$? {uploadfiles} rc2=$? if [ $rc -ne 0 ]; then exit $rc; else exit $rc2; fi """.format( pidfilename=pidfilename, execdir=execdir, time_cmd=self.time_cmd, wrapper_out=wrapper_output_filename, fmt=ShellcmdLrms.TIMEFMT, redirections=redirection_arguments, environment=str.join('\n', env_commands), downloadfiles=str.join('\n', downloadfiles), uploadfiles=str.join('\n', uploadfiles), command=(str.join(' ', (sh_quote_unsafe(arg) for arg in app.arguments))), )) wrapper_script.write(commands) wrapper_script.close() #log.info("Wrapper script: <<<%s>>>", commands) except gc3libs.exceptions.TransportError: log.error("Freeing resources used by failed application") self.free(app) raise try: self.transport.chmod(wrapper_script_fname, 0o755) # Execute the script in background self.transport.execute_command(wrapper_script_fname, detach=True) except gc3libs.exceptions.TransportError: log.error("Freeing resources used by failed application") self.free(app) raise # Just after the script has been started the pidfile should be # filled in with the correct pid. # # However, the script can have not been able to write the # pidfile yet, so we have to wait a little bit for it... pidfile = None for retry in gc3libs.utils.ExponentialBackoff(): try: pidfile = self.transport.open(pidfilename, 'r') break except gc3libs.exceptions.TransportError as ex: if '[Errno 2]' in str(ex): # no such file or directory time.sleep(retry) continue else: raise if pidfile is None: # XXX: probably self.free(app) should go here as well raise gc3libs.exceptions.LRMSSubmitError( "Unable to get PID file of submitted process from" " execution directory `%s`: %s" % (execdir, pidfilename)) pid = pidfile.read().strip() try: pid = int(pid) except ValueError: # XXX: probably self.free(app) should go here as well pidfile.close() raise gc3libs.exceptions.LRMSSubmitError( "Invalid pid `%s` in pidfile %s." % (pid, pidfilename)) pidfile.close() # Update application and current resources app.execution.lrms_jobid = pid # We don't need to update free_slots since its value is # checked at runtime. if app.requested_memory: self.available_memory -= app.requested_memory self.job_infos[pid] = { 'requested_cores': app.requested_cores, 'requested_memory': app.requested_memory, 'execution_dir': execdir, 'terminated': False, } self._update_job_resource_file(pid, self.job_infos[pid]) return app
def _gather_machine_specs(self): """ Gather information about this machine and, if `self.override` is true, also update the value of `max_cores` and `max_memory_per_jobs` attributes. This method works with both Linux and MacOSX. """ self.transport.connect() # expand env variables in the `resource_dir` setting exit_code, stdout, stderr = self.transport.execute_command( 'echo %s' % sh_quote_unsafe(self.cfg_resourcedir)) self.resource_dir = stdout.strip() # XXX: it is actually necessary to create the folder # as a separate step if not self.transport.exists(self.resource_dir): try: log.info("Creating resource file directory: '%s' ...", self.resource_dir) self.transport.makedirs(self.resource_dir) except Exception as ex: log.error("Failed creating resource directory '%s':" " %s: %s", self.resource_dir, type(ex), str(ex)) # cannot continue raise exit_code, stdout, stderr = self.transport.execute_command('uname -m') arch = gc3libs.config._parse_architecture(stdout) if arch != self.architecture: raise gc3libs.exceptions.ConfigurationError( "Invalid architecture: configuration file says `%s` but " "it actually is `%s`" % (str.join(', ', self.architecture), str.join(', ', arch))) exit_code, stdout, stderr = self.transport.execute_command('uname -s') self.running_kernel = stdout.strip() # ensure `time_cmd` points to a valid value self.time_cmd = self._locate_gnu_time() if not self.time_cmd: raise gc3libs.exceptions.ConfigurationError( "Unable to find GNU `time` installed on your system." " Please, install GNU time and set the `time_cmd`" " configuration option in gc3pie.conf.") if not self.override: # Ignore other values. return if self.running_kernel == 'Linux': exit_code, stdout, stderr = self.transport.execute_command('nproc') max_cores = int(stdout) # get the amount of total memory from /proc/meminfo with self.transport.open('/proc/meminfo', 'r') as fd: for line in fd: if line.startswith('MemTotal'): self.total_memory = int(line.split()[1]) * Memory.KiB break elif self.running_kernel == 'Darwin': exit_code, stdout, stderr = self.transport.execute_command( 'sysctl hw.ncpu') max_cores = int(stdout.split(':')[-1]) exit_code, stdout, stderr = self.transport.execute_command( 'sysctl hw.memsize') self.total_memory = int(stdout.split(':')[1]) * Memory.B if max_cores != self.max_cores: log.info( "Mismatch of value `max_cores` on resource '%s':" " configuration file says `max_cores=%d` while it's actually `%d`." " Updating current value.", self.name, self.max_cores, max_cores) self.max_cores = max_cores if self.total_memory != self.max_memory_per_core: log.info( "Mismatch of value `max_memory_per_core` on resource %s:" " configuration file says `max_memory_per_core=%s` while it's" " actually `%s`. Updating current value.", self.name, self.max_memory_per_core, self.total_memory.to_str('%g%s', unit=Memory.MB)) self.max_memory_per_core = self.total_memory self.available_memory = self.total_memory