Example #1
0
    def stage_in(self, stagein):
        """Perform a single stage in."""
        task_logger = create_task_logger(logger, stagein.task.pk)
        task_logger.info("Stagein: %sing '%s' to '%s'", stagein.method, stagein.src, stagein.dst)

        method = stagein.method
        if method == 'link' and not self.link_supported:
            method = 'lcopy'
        if method == 'lcopy' and not self.lcopy_supported:
            method = 'copy'
        if stagein.method != method:
            task_logger.info("Preferred stagein method '%s' not supported, changing it to '%s'", stagein.method, method)
            stagein.method = method
            stagein.save()

        if stagein.method == 'copy':
            if stagein.src.endswith('/'):
                FSBackend.remote_copy(self.yabiusername, stagein.src, stagein.dst)
            else:
                FSBackend.remote_file_copy(self.yabiusername, stagein.src, stagein.dst)

        if stagein.method == 'lcopy':
            if stagein.src.endswith('/'):
                self.local_copy_recursive(stagein.src, stagein.dst)
            else:
                self.local_copy(stagein.src, stagein.dst)

        if stagein.method == 'link':
            if stagein.src.endswith('/'):
                listing = self.ls(stagein.src).values()[0]
                for entry in listing['files'] + listing['directories']:
                    name, _, _, _ = entry
                    self.symbolic_link(url_join(stagein.src, name), url_join(stagein.dst, name))
            else:
                self.symbolic_link(stagein.src, stagein.dst)
Example #2
0
    def add_task(self, uridict, name=""):
        logger.debug("add_task called with uridict: %s, name: %s" % (uridict, name))

        # create the task
        self.working_dir = str(uuid.uuid4())
        self.name = name

        # make the command from the command template
        template = self.job.template

        # set our template batch uri conversion path
        template.set_uri_conversion(url_join(self.fsbackend_parts.path, self.working_dir, "input") + "/%(filename)s")

        if uridict is None:
            # batchfileless task (eg, select file)
            self.command = template.render()
        else:
            self.command = template.render(uridict)

        self.save()

        # non batch stageins
        for key, stageins in template.all_files():
            logger.debug("key:%s stagein:%s" % (key, stageins))
            for stagein in stageins:
                self.batch_files_stagein(stagein)

        self.status = ''
        self.save()

        logger.info('Created task for job id: %s using command: %s' % (self.job.id, self.command))
        logger.info('working dir is: %s' % (self.working_dir))
Example #3
0
    def _update_stagein_destination(self, stagein, dst_uri):
        if stagein.src.endswith('/'):
            dst_uri = self.task.stageout
        else:
            filename = stagein.src.rsplit('/', 1)[1]
            dst_uri = url_join(self.task.stageout, filename)

        stagein.dst = dst_uri
        # Destination changed so we have to determine the stagein method again
        stagein.method = self.task.determine_stagein_method(stagein.src, stagein.dst)
        stagein.save()
Example #4
0
    def _update_stagein_destination(self, stagein, dst_uri):
        if stagein.src.endswith('/'):
            dst_uri = self.task.stageout
        else:
            filename = stagein.src.rsplit('/', 1)[1]
            dst_uri = url_join(self.task.stageout, filename)

        stagein.dst = dst_uri
        # Destination changed so we have to determine the stagein method again
        stagein.method = self.task.determine_stagein_method(
            stagein.src, stagein.dst)
        stagein.save()
Example #5
0
    def remote_copy_recurse(yabiusername, src_uri, dst_uri):
        """Recursively copy src_uri to dst_uri"""
        logger.info('remote_copy {0} -> {1}'.format(src_uri, dst_uri))
        src_backend = FSBackend.urifactory(yabiusername, src_uri)
        dst_backend = FSBackend.urifactory(yabiusername, dst_uri)

        try:
            src_stat = src_backend.remote_uri_stat(src_uri)

            listing = src_backend.ls(src_uri)  # get _flat_ listing here not recursive as before
            dst_backend.mkdir(dst_uri)
            logger.debug("listing of src_uri %s = %s" % (src_uri, listing))
            for key in listing:
                # copy files using a fifo
                for listing_file in listing[key]['files']:
                    src_file_uri = url_join(src_uri, listing_file[0])
                    dst_file_uri = url_join(dst_uri, listing_file[0])
                    logger.debug("src_file_uri = %s" % src_file_uri)
                    logger.debug("dst_file_uri = %s" % dst_file_uri)
                    FSBackend.remote_file_copy(yabiusername, src_file_uri, dst_file_uri)

                # recurse on directories

                for listing_dir in listing[key]['directories']:
                    src_dir_uri = url_join(src_uri, listing_dir[0])
                    dst_dir_uri = url_join(dst_uri, listing_dir[0])
                    logger.debug("src_dir_uri = %s" % src_dir_uri)
                    logger.debug("dst_dir_uri = %s" % dst_dir_uri)
                    FSBackend.remote_copy_recurse(yabiusername, src_dir_uri, dst_dir_uri)

            if src_stat and src_backend.basename(src_uri.rstrip('/')) == dst_backend.basename(dst_uri.rstrip('/')):
                # Avoid setting the times if we're copying the contents of the source
                atime = src_stat.get('atime')
                mtime = src_stat.get('mtime')
                dst_backend.set_remote_uri_times(dst_uri, atime, mtime)
        except Exception as exc:
            raise RetryException(exc, traceback.format_exc())
Example #6
0
    def add_task(self, uridict, name=""):
        logger.debug("add_task called with uridict: %s, name: %s" %
                     (uridict, name))

        # create the task
        self.working_dir = str(uuid.uuid4())
        self.name = name

        # make the command from the command template
        template = self.job.template

        # set our template batch uri conversion path
        template.set_uri_conversion(
            url_join(self.fsbackend_parts.path, self.working_dir, "input") +
            "/%(filename)s")

        if uridict is None:
            # batchfileless task (eg, select file)
            self.command = template.render()
        else:
            self.command = template.render(uridict)

        self.save()

        # non batch stageins
        for key, stageins in template.all_files():
            logger.debug("key:%s stagein:%s" % (key, stageins))
            for stagein in stageins:
                self.batch_files_stagein(stagein)

        self.status = ''
        self.save()

        logger.info('Created task for job id: %s using command: %s' %
                    (self.job.id, self.command))
        logger.info('working dir is: %s' % (self.working_dir))
Example #7
0
 def working_output_dir_uri(self):
     """working/output dir"""
     return url_join(self.working_dir_uri(), 'output')
Example #8
0
 def working_input_dir_uri(self):
     """working/input dir"""
     return url_join(self.working_dir_uri(), 'input')
Example #9
0
 def working_dir_uri(self):
     """working dir"""
     return url_join(self.task.job.fs_backend, self.task.working_dir)
Example #10
0
    def json(self):
        # formulate our status url and our error url
        # use the yabi embedded in this server
        statusurl = webhelpers.url("/engine/status/task/%d" % self.id)
        syslogurl = webhelpers.url("/engine/syslog/task/%d" % self.id)
        remoteidurl = webhelpers.url("/engine/remote_id/%d" % self.id)
        remoteinfourl = webhelpers.url("/engine/remote_info/%d" % self.id)

        # get our tools fs_backend
        fsscheme, fsbackend_parts = uriparse(self.job.fs_backend)
        logger.debug("getting fs backend for user: %s fs_backend:%s" % (self.job.workflow.user.name, self.job.fs_backend))
        fs_backend = backendhelper.get_fs_backend_for_uri(self.job.workflow.user.name, self.job.fs_backend)
        logger.debug("fs backend is: %s" % fs_backend)

        # get out exec backend so we can get our submission script
        logger.debug("getting exec backendcredential for user: %s exec_backend:%s" % (self.job.workflow.user.name, self.job.exec_backend))
        submission_backendcredential = backendhelper.get_exec_backendcredential_for_uri(self.job.workflow.user.name, self.job.exec_backend)
        logger.debug("exec backendcredential is: %s" % (submission_backendcredential))

        submission_backend = submission_backendcredential.backend

        submission = submission_backendcredential.submission if str(submission_backend.submission).isspace() else submission_backend.submission

        # if the tools filesystem and the users stageout area are on the same schema/host/port
        # then use the preferred_copy_method, else default to 'copy'
        so_backend = backendhelper.get_fs_backend_for_uri(self.job.workflow.user.name, self.job.stageout)
        soscheme, sobackend_parts = uriparse(self.job.stageout)
        if so_backend == fs_backend and soscheme == fsscheme and sobackend_parts.hostname == fsbackend_parts.hostname and sobackend_parts.port == fsbackend_parts.port and sobackend_parts.username == fsbackend_parts.username:
            stageout_method = self.job.preferred_stageout_method
        else:
            stageout_method = "copy"

        output = {
            "yabiusername": self.job.workflow.user.name,
            "taskid": self.id,
            "statusurl": statusurl,
            "syslogurl": syslogurl,
            "remoteidurl": remoteidurl,
            "remoteinfourl": remoteinfourl,
            "stagein": [],
            "exec": {
                "command": self.command,
                "backend": url_join(self.job.exec_backend),
                "fsbackend": url_join(self.job.fs_backend, self.working_dir),
                "workingdir": os.path.join(fsbackend_parts.path, self.working_dir),
                "cpus": self.job.cpus,
                "walltime": self.job.walltime,
                "module": self.job.module,
                "queue": self.job.queue,
                "memory": self.job.max_memory,
                "jobtype": self.job.job_type,
                "tasknum": self.task_num,
                "tasktotal": self.job.task_total,
                "submission": submission
            },
            "stageout": self.job.stageout + ("" if self.job.stageout.endswith("/") else "/") + ("" if not self.name else self.name + "/"),
            "stageout_method": stageout_method
        }

        for s in self.stagein_set.all():
            src_scheme, src_rest = uriparse(s.src)
            dst_scheme, dst_rest = uriparse(s.dst)

            # method may be 'copy', 'lcopy' or 'link'
            output["stagein"].append({"src": s.src, "dst": s.dst,
                                      "order": s.order, "method": s.method})

        return json.dumps(output)