def stage_in(self, stagein): """Perform a single stage in.""" task_logger = create_task_logger(logger, stagein.task.pk) task_logger.info("Stagein: %sing '%s' to '%s'", stagein.method, stagein.src, stagein.dst) method = stagein.method if method == 'link' and not self.link_supported: method = 'lcopy' if method == 'lcopy' and not self.lcopy_supported: method = 'copy' if stagein.method != method: task_logger.info("Preferred stagein method '%s' not supported, changing it to '%s'", stagein.method, method) stagein.method = method stagein.save() if stagein.method == 'copy': if stagein.src.endswith('/'): FSBackend.remote_copy(self.yabiusername, stagein.src, stagein.dst) else: FSBackend.remote_file_copy(self.yabiusername, stagein.src, stagein.dst) if stagein.method == 'lcopy': if stagein.src.endswith('/'): self.local_copy_recursive(stagein.src, stagein.dst) else: self.local_copy(stagein.src, stagein.dst) if stagein.method == 'link': if stagein.src.endswith('/'): listing = self.ls(stagein.src).values()[0] for entry in listing['files'] + listing['directories']: name, _, _, _ = entry self.symbolic_link(url_join(stagein.src, name), url_join(stagein.dst, name)) else: self.symbolic_link(stagein.src, stagein.dst)
def add_task(self, uridict, name=""): logger.debug("add_task called with uridict: %s, name: %s" % (uridict, name)) # create the task self.working_dir = str(uuid.uuid4()) self.name = name # make the command from the command template template = self.job.template # set our template batch uri conversion path template.set_uri_conversion(url_join(self.fsbackend_parts.path, self.working_dir, "input") + "/%(filename)s") if uridict is None: # batchfileless task (eg, select file) self.command = template.render() else: self.command = template.render(uridict) self.save() # non batch stageins for key, stageins in template.all_files(): logger.debug("key:%s stagein:%s" % (key, stageins)) for stagein in stageins: self.batch_files_stagein(stagein) self.status = '' self.save() logger.info('Created task for job id: %s using command: %s' % (self.job.id, self.command)) logger.info('working dir is: %s' % (self.working_dir))
def _update_stagein_destination(self, stagein, dst_uri): if stagein.src.endswith('/'): dst_uri = self.task.stageout else: filename = stagein.src.rsplit('/', 1)[1] dst_uri = url_join(self.task.stageout, filename) stagein.dst = dst_uri # Destination changed so we have to determine the stagein method again stagein.method = self.task.determine_stagein_method(stagein.src, stagein.dst) stagein.save()
def _update_stagein_destination(self, stagein, dst_uri): if stagein.src.endswith('/'): dst_uri = self.task.stageout else: filename = stagein.src.rsplit('/', 1)[1] dst_uri = url_join(self.task.stageout, filename) stagein.dst = dst_uri # Destination changed so we have to determine the stagein method again stagein.method = self.task.determine_stagein_method( stagein.src, stagein.dst) stagein.save()
def remote_copy_recurse(yabiusername, src_uri, dst_uri): """Recursively copy src_uri to dst_uri""" logger.info('remote_copy {0} -> {1}'.format(src_uri, dst_uri)) src_backend = FSBackend.urifactory(yabiusername, src_uri) dst_backend = FSBackend.urifactory(yabiusername, dst_uri) try: src_stat = src_backend.remote_uri_stat(src_uri) listing = src_backend.ls(src_uri) # get _flat_ listing here not recursive as before dst_backend.mkdir(dst_uri) logger.debug("listing of src_uri %s = %s" % (src_uri, listing)) for key in listing: # copy files using a fifo for listing_file in listing[key]['files']: src_file_uri = url_join(src_uri, listing_file[0]) dst_file_uri = url_join(dst_uri, listing_file[0]) logger.debug("src_file_uri = %s" % src_file_uri) logger.debug("dst_file_uri = %s" % dst_file_uri) FSBackend.remote_file_copy(yabiusername, src_file_uri, dst_file_uri) # recurse on directories for listing_dir in listing[key]['directories']: src_dir_uri = url_join(src_uri, listing_dir[0]) dst_dir_uri = url_join(dst_uri, listing_dir[0]) logger.debug("src_dir_uri = %s" % src_dir_uri) logger.debug("dst_dir_uri = %s" % dst_dir_uri) FSBackend.remote_copy_recurse(yabiusername, src_dir_uri, dst_dir_uri) if src_stat and src_backend.basename(src_uri.rstrip('/')) == dst_backend.basename(dst_uri.rstrip('/')): # Avoid setting the times if we're copying the contents of the source atime = src_stat.get('atime') mtime = src_stat.get('mtime') dst_backend.set_remote_uri_times(dst_uri, atime, mtime) except Exception as exc: raise RetryException(exc, traceback.format_exc())
def add_task(self, uridict, name=""): logger.debug("add_task called with uridict: %s, name: %s" % (uridict, name)) # create the task self.working_dir = str(uuid.uuid4()) self.name = name # make the command from the command template template = self.job.template # set our template batch uri conversion path template.set_uri_conversion( url_join(self.fsbackend_parts.path, self.working_dir, "input") + "/%(filename)s") if uridict is None: # batchfileless task (eg, select file) self.command = template.render() else: self.command = template.render(uridict) self.save() # non batch stageins for key, stageins in template.all_files(): logger.debug("key:%s stagein:%s" % (key, stageins)) for stagein in stageins: self.batch_files_stagein(stagein) self.status = '' self.save() logger.info('Created task for job id: %s using command: %s' % (self.job.id, self.command)) logger.info('working dir is: %s' % (self.working_dir))
def working_output_dir_uri(self): """working/output dir""" return url_join(self.working_dir_uri(), 'output')
def working_input_dir_uri(self): """working/input dir""" return url_join(self.working_dir_uri(), 'input')
def working_dir_uri(self): """working dir""" return url_join(self.task.job.fs_backend, self.task.working_dir)
def json(self): # formulate our status url and our error url # use the yabi embedded in this server statusurl = webhelpers.url("/engine/status/task/%d" % self.id) syslogurl = webhelpers.url("/engine/syslog/task/%d" % self.id) remoteidurl = webhelpers.url("/engine/remote_id/%d" % self.id) remoteinfourl = webhelpers.url("/engine/remote_info/%d" % self.id) # get our tools fs_backend fsscheme, fsbackend_parts = uriparse(self.job.fs_backend) logger.debug("getting fs backend for user: %s fs_backend:%s" % (self.job.workflow.user.name, self.job.fs_backend)) fs_backend = backendhelper.get_fs_backend_for_uri(self.job.workflow.user.name, self.job.fs_backend) logger.debug("fs backend is: %s" % fs_backend) # get out exec backend so we can get our submission script logger.debug("getting exec backendcredential for user: %s exec_backend:%s" % (self.job.workflow.user.name, self.job.exec_backend)) submission_backendcredential = backendhelper.get_exec_backendcredential_for_uri(self.job.workflow.user.name, self.job.exec_backend) logger.debug("exec backendcredential is: %s" % (submission_backendcredential)) submission_backend = submission_backendcredential.backend submission = submission_backendcredential.submission if str(submission_backend.submission).isspace() else submission_backend.submission # if the tools filesystem and the users stageout area are on the same schema/host/port # then use the preferred_copy_method, else default to 'copy' so_backend = backendhelper.get_fs_backend_for_uri(self.job.workflow.user.name, self.job.stageout) soscheme, sobackend_parts = uriparse(self.job.stageout) if so_backend == fs_backend and soscheme == fsscheme and sobackend_parts.hostname == fsbackend_parts.hostname and sobackend_parts.port == fsbackend_parts.port and sobackend_parts.username == fsbackend_parts.username: stageout_method = self.job.preferred_stageout_method else: stageout_method = "copy" output = { "yabiusername": self.job.workflow.user.name, "taskid": self.id, "statusurl": statusurl, "syslogurl": syslogurl, "remoteidurl": remoteidurl, "remoteinfourl": remoteinfourl, "stagein": [], "exec": { "command": self.command, "backend": url_join(self.job.exec_backend), "fsbackend": url_join(self.job.fs_backend, self.working_dir), "workingdir": os.path.join(fsbackend_parts.path, self.working_dir), "cpus": self.job.cpus, "walltime": self.job.walltime, "module": self.job.module, "queue": self.job.queue, "memory": self.job.max_memory, "jobtype": self.job.job_type, "tasknum": self.task_num, "tasktotal": self.job.task_total, "submission": submission }, "stageout": self.job.stageout + ("" if self.job.stageout.endswith("/") else "/") + ("" if not self.name else self.name + "/"), "stageout_method": stageout_method } for s in self.stagein_set.all(): src_scheme, src_rest = uriparse(s.src) dst_scheme, dst_rest = uriparse(s.dst) # method may be 'copy', 'lcopy' or 'link' output["stagein"].append({"src": s.src, "dst": s.dst, "order": s.order, "method": s.method}) return json.dumps(output)