Ejemplo n.º 1
0
    def test_prev_uploaded(self, upl):
        """Test pathmapper handling previously uploaded files."""

        arvrunner = arvados_cwl.ArvCwlRunner(self.api)
        arvrunner.add_uploaded(
            'tests/hw.py',
            MapperEnt(
                resolved='keep:99999999999999999999999999999991+99/hw.py',
                target='',
                type='File'))

        upl.side_effect = upload_mock

        p = ArvPathMapper(arvrunner, [{
            "class": "File",
            "location": "tests/hw.py"
        }], "", "/test/%s", "/test/%s/%s")

        self.assertEqual(
            {
                'tests/hw.py':
                MapperEnt(
                    resolved='keep:99999999999999999999999999999991+99/hw.py',
                    target='/test/99999999999999999999999999999991+99/hw.py',
                    type='File')
            }, p._pathmap)
Ejemplo n.º 2
0
 def setup(self, referenced_files, basedir):
     log.debug("PATHMAPPER: " + pformat(referenced_files))
     self._pathmap = {}
     for src in referenced_files:
         log.debug('SOURCE: ' + str(src))
         if src['location'].startswith("fs://"):
             target_name = os.path.basename(src['location'])
             self._pathmap[src['location']] = MapperEnt(
                 resolved=src['location'],
                 target=os.path.join(BASE_MOUNT, target_name),
                 type=src['class'])
         elif src['location'].startswith("file://"):
             src_path = src['location'][7:]
             log.debug("Copying %s to shared %s" %
                       (src['location'], self.store_base))
             dst = os.path.join(self.store_base, os.path.basename(src_path))
             shutil.copy(src_path, dst)
             location = "fs://%s" % (os.path.basename(src['location']))
             self._pathmap[src['location']] = MapperEnt(
                 resolved=location,
                 target=os.path.join(BASE_MOUNT,
                                     os.path.basename(src['location'])),
                 type=src['class'])
         else:
             raise Exception("Unknown file source: %s" % (src['location']))
     log.debug('PATHMAP: ' + pformat(self._pathmap))
Ejemplo n.º 3
0
 def visit(self, obj, stagedir, basedir, copy=False, staged=False):
     # type: (Dict[unicode, Any], unicode, unicode, bool) -> None
     loc = obj["location"]
     tgt = os.path.join(stagedir, obj["basename"])
     basetgt, baseext = os.path.splitext(tgt)
     n = 1
     while tgt in self.targets:
         n += 1
         tgt = "%s_%i%s" % (basetgt, n, baseext)
     self.targets.add(tgt)
     if obj["class"] == "Directory":
         self._pathmap[loc] = MapperEnt(loc, tgt, "Directory", staged)
         if loc.startswith("_:") or self._follow_dirs:
             self.visitlisting(obj.get("listing", []), tgt, basedir)
     elif obj["class"] == "File":
         if loc in self._pathmap:
             return
         if "contents" in obj and loc.startswith("_:"):
             self._pathmap[loc] = MapperEnt(obj["contents"], tgt,
                                            "CreateFile", staged)
         else:
             if copy:
                 self._pathmap[loc] = MapperEnt(loc, tgt, "WritableFile",
                                                staged)
             else:
                 self._pathmap[loc] = MapperEnt(loc, tgt, "File", staged)
             self.visitlisting(obj.get("secondaryFiles", []), stagedir,
                               basedir)
Ejemplo n.º 4
0
 def visit(self, srcobj, uploadfiles):
     src = srcobj["location"]
     if srcobj["class"] == "File":
         if "#" in src:
             src = src[:src.index("#")]
         if isinstance(src,
                       basestring) and ArvPathMapper.pdh_path.match(src):
             self._pathmap[src] = MapperEnt(
                 src, self.collection_pattern % src[5:], "File")
         if src not in self._pathmap:
             # Local FS ref, may need to be uploaded or may be on keep
             # mount.
             ab = abspath(src, self.input_basedir)
             st = arvados.commands.run.statfile("",
                                                ab,
                                                fnPattern=self.file_pattern)
             if isinstance(st, arvados.commands.run.UploadFile):
                 uploadfiles.add((src, ab, st))
             elif isinstance(st, arvados.commands.run.ArvFile):
                 self._pathmap[src] = MapperEnt(ab, st.fn, "File")
             elif src.startswith("_:") and "contents" in srcobj:
                 pass
             else:
                 raise WorkflowException("Input file path '%s' is invalid" %
                                         st)
         if "secondaryFiles" in srcobj:
             for l in srcobj["secondaryFiles"]:
                 self.visit(l, uploadfiles)
     elif srcobj["class"] == "Directory":
         if isinstance(src,
                       basestring) and ArvPathMapper.pdh_dirpath.match(src):
             self._pathmap[src] = MapperEnt(
                 src, self.collection_pattern % src[5:], "Directory")
         for l in srcobj["listing"]:
             self.visit(l, uploadfiles)
Ejemplo n.º 5
0
 def setup(self, referenced_files, basedir):
     self._pathmap = {}
     for src in referenced_files:
         logging.debug(src)
         if DEBUG:
             print "pathing", src
         if src['location'].startswith("fs://"):
             target_name = os.path.basename(src['location'])
             self._pathmap[src['location']] = MapperEnt(
                 resolved=src['location'],
                 target=os.path.join(BASE_MOUNT, target_name),
                 type=src['class'])
         elif src['location'].startswith("file://"):
             src_path = src['location'][7:]
             logging.debug("Copying %s to shared %s" %
                           (src['location'], self.store_base))
             dst = os.path.join(self.store_base, os.path.basename(src_path))
             print src_path
             shutil.copy(src_path, dst)
             location = "fs://%s" % (os.path.basename(src['location']))
             self._pathmap[src['location']] = MapperEnt(
                 resolved=location,
                 target=os.path.join(BASE_MOUNT,
                                     os.path.basename(src['location'])),
                 type=src['class'])
         else:
             raise Exception("Unknown file source: %s" % (src['location']))
Ejemplo n.º 6
0
 def visit(self, obj, stagedir, basedir, copy=False, staged=False):
     # type: (Dict[Text, Any], Text, Text, bool, bool) -> None
     tgt = os.path.join(stagedir, obj["basename"])
     if obj["location"] in self._pathmap:
         return
     if obj["class"] == "Directory":
         if obj["location"].startswith("file://"):
             resolved = schema_salad.ref_resolver.uri_file_path(
                 obj["location"])
         else:
             resolved = obj["location"]
         self._pathmap[obj["location"]] = MapperEnt(
             resolved, tgt,
             "WritableDirectory" if copy else "Directory", staged)
         if obj["location"].startswith("file://") \
                 and not self.stage_listing:
             staged = False
         self.visitlisting(
             obj.get("listing", []), tgt, basedir, copy=copy, staged=staged)
     elif obj["class"] == "File":
         loc = obj["location"]
         if "contents" in obj and obj["location"].startswith("_:"):
             self._pathmap[obj["location"]] = MapperEnt(
                 obj["contents"], tgt, "CreateFile", staged)
         else:
             resolved = self.get_file(loc) if self.get_file else loc
             if resolved.startswith("file:"):
                 resolved = schema_salad.ref_resolver.uri_file_path(
                     resolved)
             self._pathmap[loc] = MapperEnt(
                 resolved, tgt, "WritableFile" if copy else "File", staged)
             self.visitlisting(obj.get("secondaryFiles", []),
                               stagedir, basedir, copy=copy, staged=staged)
Ejemplo n.º 7
0
    def visit(self, srcobj, uploadfiles):
        src = srcobj["location"]
        if "#" in src:
            src = src[:src.index("#")]

        if isinstance(src,
                      basestring) and ArvPathMapper.pdh_dirpath.match(src):
            self._pathmap[src] = MapperEnt(
                src, self.collection_pattern % urllib.parse.unquote(src[5:]),
                srcobj["class"], True)
            if arvados_cwl.util.collectionUUID in srcobj:
                self.pdh_to_uuid[src.split(
                    "/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID]

        debug = logger.isEnabledFor(logging.DEBUG)

        if src not in self._pathmap:
            if src.startswith("file:"):
                # Local FS ref, may need to be uploaded or may be on keep
                # mount.
                ab = abspath(src, self.input_basedir)
                st = arvados.commands.run.statfile("",
                                                   ab,
                                                   fnPattern="keep:%s/%s",
                                                   dirPattern="keep:%s/%s",
                                                   raiseOSError=True)
                with SourceLine(srcobj, "location", WorkflowException, debug):
                    if isinstance(st, arvados.commands.run.UploadFile):
                        uploadfiles.add((src, ab, st))
                    elif isinstance(st, arvados.commands.run.ArvFile):
                        self._pathmap[src] = MapperEnt(
                            st.fn, self.collection_pattern %
                            urllib.parse.unquote(st.fn[5:]), "File", True)
                    else:
                        raise WorkflowException(
                            "Input file path '%s' is invalid" % st)
            elif src.startswith("_:"):
                if srcobj["class"] == "File" and "contents" not in srcobj:
                    raise WorkflowException(
                        "File literal '%s' is missing `contents`" % src)
                if srcobj["class"] == "Directory" and "listing" not in srcobj:
                    raise WorkflowException(
                        "Directory literal '%s' is missing `listing`" % src)
            elif src.startswith("http:") or src.startswith("https:"):
                keepref = http_to_keep(self.arvrunner.api,
                                       self.arvrunner.project_uuid, src)
                logger.info("%s is %s", src, keepref)
                self._pathmap[src] = MapperEnt(keepref, keepref,
                                               srcobj["class"], True)
            else:
                self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True)

        with SourceLine(srcobj, "secondaryFiles", WorkflowException, debug):
            for l in srcobj.get("secondaryFiles", []):
                self.visit(l, uploadfiles)
        with SourceLine(srcobj, "listing", WorkflowException, debug):
            for l in srcobj.get("listing", []):
                self.visit(l, uploadfiles)
Ejemplo n.º 8
0
    def visit(self, obj, stagedir, basedir, copy=False, staged=False):
        tgt = convert_pathsep_to_unix(os.path.join(stagedir, obj["basename"]))
        if obj["location"] in self._pathmap:
            return
        if obj["class"] == "Directory":
            if obj["location"].startswith("file://"):
                log.warning("a file:// based Directory slipped through: %s",
                            obj)
                resolved = uri_file_path(obj["location"])
            else:
                resolved = obj["location"]
            self._pathmap[obj["location"]] = MapperEnt(
                resolved, tgt, "WritableDirectory" if copy else "Directory",
                staged)
            if obj["location"].startswith("file://"):
                staged = False
            self.visitlisting(obj.get("listing", []),
                              tgt,
                              basedir,
                              copy=copy,
                              staged=staged)
        elif obj["class"] == "File":
            path = obj["location"]
            abpath = abspath(path, basedir)
            if "contents" in obj and obj["location"].startswith("_:"):
                self._pathmap[obj["location"]] = MapperEnt(
                    obj["contents"], tgt, "CreateFile", staged)
            else:
                with SourceLine(obj, "location", validate.ValidationException,
                                log.isEnabledFor(logging.DEBUG)):
                    deref = abpath
                    if urllib.parse.urlsplit(deref).scheme in [
                            'http', 'https'
                    ]:
                        deref = downloadHttpFile(path)
                    elif urllib.parse.urlsplit(deref).scheme == 'ftp':
                        deref = self._download_ftp_file(path)
                    else:
                        log.warning("unprocessed File %s", obj)
                        # Dereference symbolic links
                        st = os.lstat(deref)
                        while stat.S_ISLNK(st.st_mode):
                            rl = os.readlink(deref)
                            deref = rl if os.path.isabs(rl) \
                                else os.path.join(os.path.dirname(deref), rl)
                            st = os.lstat(deref)

                    self._pathmap[path] = MapperEnt(
                        deref, tgt, "WritableFile" if copy else "File", staged)
                    self.visitlisting(obj.get("secondaryFiles", []),
                                      stagedir,
                                      basedir,
                                      copy=copy,
                                      staged=staged)
Ejemplo n.º 9
0
 def visit(self, obj, stagedir, basedir, copy=False):
     # type: (Dict[unicode, Any], unicode, unicode, bool) -> None
     loc = obj["location"]
     if obj["class"] == "Directory":
         self._pathmap[loc] = MapperEnt(loc, stagedir, "Directory")
     elif obj["class"] == "File":
         if loc in self._pathmap:
             return
         tgt = os.path.join(stagedir, obj["basename"])
         self._pathmap[loc] = MapperEnt(loc, tgt, "File")
         self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir)
Ejemplo n.º 10
0
    def test_upload(self, statfile, upl):
        """Test pathmapper uploading files."""

        arvrunner = arvados_cwl.ArvCwlRunner(self.api)

        def statfile_mock(prefix,
                          fn,
                          fnPattern="$(file %s/%s)",
                          dirPattern="$(dir %s/%s/)",
                          raiseOSError=False):
            st = arvados.commands.run.UploadFile("", "tests/hw.py")
            return st

        upl.side_effect = upload_mock
        statfile.side_effect = statfile_mock

        p = ArvPathMapper(arvrunner, [{
            "class": "File",
            "location": "file:tests/hw.py"
        }], "", "/test/%s", "/test/%s/%s")

        self.assertEqual(
            {
                'file:tests/hw.py':
                MapperEnt(
                    resolved='keep:99999999999999999999999999999991+99/hw.py',
                    target='/test/99999999999999999999999999999991+99/hw.py',
                    type='File',
                    staged=True)
            }, p._pathmap)
Ejemplo n.º 11
0
    def test_statfile(self, statfile, upl):
        """Test pathmapper handling ArvFile references."""
        arvrunner = arvados_cwl.ArvCwlRunner(self.api)

        # An ArvFile object returned from arvados.commands.run.statfile means the file is located on a
        # keep mount, so we can construct a direct reference directly without upload.
        def statfile_mock(prefix,
                          fn,
                          fnPattern="$(file %s/%s)",
                          dirPattern="$(dir %s/%s/)"):
            st = arvados.commands.run.ArvFile(
                "",
                fnPattern % ("99999999999999999999999999999991+99", "hw.py"))
            return st

        upl.side_effect = upload_mock
        statfile.side_effect = statfile_mock

        p = ArvPathMapper(arvrunner, [{
            "class": "File",
            "location": "tests/hw.py"
        }], "", "/test/%s", "/test/%s/%s")

        self.assertEqual(
            {
                'tests/hw.py':
                MapperEnt(
                    resolved='keep:99999999999999999999999999999991+99/hw.py',
                    target='/test/99999999999999999999999999999991+99/hw.py',
                    type='File')
            }, p._pathmap)
Ejemplo n.º 12
0
    def visit(self, srcobj, uploadfiles):
        src = srcobj["location"]
        if "#" in src:
            src = src[:src.index("#")]

        if isinstance(src,
                      basestring) and ArvPathMapper.pdh_dirpath.match(src):
            self._pathmap[src] = MapperEnt(
                src, self.collection_pattern % urllib.unquote(src[5:]),
                srcobj["class"], True)

        if src not in self._pathmap:
            if src.startswith("file:"):
                # Local FS ref, may need to be uploaded or may be on keep
                # mount.
                ab = abspath(src, self.input_basedir)
                st = arvados.commands.run.statfile("",
                                                   ab,
                                                   fnPattern="keep:%s/%s",
                                                   dirPattern="keep:%s/%s",
                                                   raiseOSError=True)
                with SourceLine(srcobj, "location", WorkflowException):
                    if isinstance(st, arvados.commands.run.UploadFile):
                        uploadfiles.add((src, ab, st))
                    elif isinstance(st, arvados.commands.run.ArvFile):
                        self._pathmap[src] = MapperEnt(
                            st.fn, self.collection_pattern %
                            urllib.unquote(st.fn[5:]), "File", True)
                    else:
                        raise WorkflowException(
                            "Input file path '%s' is invalid" % st)
            elif src.startswith("_:"):
                if srcobj["class"] == "File" and "contents" not in srcobj:
                    raise WorkflowException(
                        "File literal '%s' is missing `contents`" % src)
                if srcobj["class"] == "Directory" and "listing" not in srcobj:
                    raise WorkflowException(
                        "Directory literal '%s' is missing `listing`" % src)
            else:
                self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True)

        with SourceLine(srcobj, "secondaryFiles", WorkflowException):
            for l in srcobj.get("secondaryFiles", []):
                self.visit(l, uploadfiles)
        with SourceLine(srcobj, "listing", WorkflowException):
            for l in srcobj.get("listing", []):
                self.visit(l, uploadfiles)
Ejemplo n.º 13
0
    def setup(self, referenced_files, basedir):
        # type: (List[Any], unicode) -> None

        # Go through each file and set the target to its own directory along
        # with any secondary files.
        self.visitlisting(referenced_files, self.stagedir, basedir)

        for path, (ab, tgt, type, staged) in viewitems(self._pathmap):
            if type in ("File", "Directory") and ab.startswith("keep:"):
                self._pathmap[path] = MapperEnt("$(task.keep)/%s" % ab[5:], tgt, type, staged)
Ejemplo n.º 14
0
    def test_keepref(self):
        """Test direct keep references."""

        arvrunner = arvados_cwl.ArvCwlRunner(self.api)

        p = ArvPathMapper(arvrunner, [{
            "class": "File",
            "location": "keep:99999999999999999999999999999991+99/hw.py"
        }], "", "/test/%s", "/test/%s/%s")

        self.assertEqual({'keep:99999999999999999999999999999991+99/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
                         p._pathmap)
Ejemplo n.º 15
0
 def visit(self, obj, stagedir, basedir, copy=False):
     # type: (Dict[unicode, Any], unicode, unicode, bool) -> None
     if obj["class"] == "Directory":
         self._pathmap[obj["location"]] = MapperEnt(obj["location"],
                                                    stagedir, "Directory")
         self.visitlisting(obj.get("listing", []), stagedir, basedir)
     elif obj["class"] == "File":
         loc = obj["location"]
         if loc in self._pathmap:
             return
         tgt = os.path.join(stagedir, obj["basename"])
         if "contents" in obj and obj["location"].startswith("_:"):
             self._pathmap[loc] = MapperEnt(obj["contents"], tgt,
                                            "CreateFile")
         else:
             if copy:
                 self._pathmap[loc] = MapperEnt(obj["path"], tgt,
                                                "WritableFile")
             else:
                 self._pathmap[loc] = MapperEnt(obj["path"], tgt, "File")
             self.visitlisting(obj.get("secondaryFiles", []), stagedir,
                               basedir)
Ejemplo n.º 16
0
    def mapper(self, src):  # type: (Text) -> MapperEnt.
        # Overridden to maintain the use case of mapping by source (identifier) to
        # target regardless of how the map is structured interally.
        def getMapperEnt(src):
            for k,v in viewitems(self._pathmap):
                if (v.type != "CreateFile" and v.resolved == src) or (v.type == "CreateFile" and k == src):
                    return v

        if u"#" in src:
            i = src.index(u"#")
            v = getMapperEnt(src[i:])
            return MapperEnt(v.resolved, v.target + src[i:], v.type, v.staged)
        return getMapperEnt(src)
Ejemplo n.º 17
0
    def test_upload(self, upl):
        """Test pathmapper uploading files."""

        arvrunner = arvados_cwl.ArvCwlRunner(mock.MagicMock())

        upl.side_effect = upload_mock

        p = ArvPathMapper(arvrunner, [{
            "class": "File",
            "location": "tests/hw.py"
        }], "", "/test/%s", "/test/%s/%s")

        self.assertEqual(
            {
                'tests/hw.py':
                MapperEnt(
                    resolved='keep:99999999999999999999999999999991+99/hw.py',
                    target='/test/99999999999999999999999999999991+99/hw.py',
                    type='File')
            }, p._pathmap)
Ejemplo n.º 18
0
    def setup(self, referenced_files, basedir):
        # type: (List[Any], unicode) -> None
        uploadfiles = set()

        collection = None
        if self.single_collection:
            collection = arvados.collection.Collection(
                api_client=self.arvrunner.api,
                keep_client=self.arvrunner.keep_client,
                num_retries=self.arvrunner.num_retries)

        already_uploaded = self.arvrunner.get_uploaded()
        copied_files = set()
        for k in referenced_files:
            loc = k["location"]
            if loc in already_uploaded:
                v = already_uploaded[loc]
                self._pathmap[loc] = MapperEnt(
                    v.resolved,
                    self.collection_pattern % urllib.unquote(v.resolved[5:]),
                    v.type, True)
                if self.single_collection:
                    basename = k["basename"]
                    if basename not in collection:
                        self.addentry(
                            {
                                "location": loc,
                                "class": v.type,
                                "basename": basename
                            }, collection, ".", [])
                        copied_files.add((loc, basename, v.type))

        for srcobj in referenced_files:
            self.visit(srcobj, uploadfiles)

        arvados.commands.run.uploadfiles(
            [u[2] for u in uploadfiles],
            self.arvrunner.api,
            dry_run=False,
            num_retries=self.arvrunner.num_retries,
            fnPattern="keep:%s/%s",
            name=self.name,
            project=self.arvrunner.project_uuid,
            collection=collection)

        for src, ab, st in uploadfiles:
            self._pathmap[src] = MapperEnt(
                urllib.quote(st.fn,
                             "/:+@"), self.collection_pattern % st.fn[5:],
                "Directory" if os.path.isdir(ab) else "File", True)
            self.arvrunner.add_uploaded(src, self._pathmap[src])

        for loc, basename, cls in copied_files:
            fn = "keep:%s/%s" % (collection.portable_data_hash(), basename)
            self._pathmap[loc] = MapperEnt(urllib.quote(fn, "/:+@"),
                                           self.collection_pattern % fn[5:],
                                           cls, True)

        for srcobj in referenced_files:
            subdirs = []
            if srcobj["class"] == "Directory" and srcobj[
                    "location"] not in self._pathmap:
                c = arvados.collection.Collection(
                    api_client=self.arvrunner.api,
                    keep_client=self.arvrunner.keep_client,
                    num_retries=self.arvrunner.num_retries)
                for l in srcobj.get("listing", []):
                    self.addentry(l, c, ".", subdirs)

                check = self.arvrunner.api.collections().list(
                    filters=[[
                        "portable_data_hash", "=",
                        c.portable_data_hash()
                    ]],
                    limit=1).execute(num_retries=self.arvrunner.num_retries)
                if not check["items"]:
                    c.save_new(owner_uuid=self.arvrunner.project_uuid)

                ab = self.collection_pattern % c.portable_data_hash()
                self._pathmap[srcobj["location"]] = MapperEnt(
                    "keep:" + c.portable_data_hash(), ab, "Directory", True)
            elif srcobj["class"] == "File" and (
                    srcobj.get("secondaryFiles") or
                (srcobj["location"].startswith("_:")
                 and "contents" in srcobj)):

                c = arvados.collection.Collection(
                    api_client=self.arvrunner.api,
                    keep_client=self.arvrunner.keep_client,
                    num_retries=self.arvrunner.num_retries)
                self.addentry(srcobj, c, ".", subdirs)

                check = self.arvrunner.api.collections().list(
                    filters=[[
                        "portable_data_hash", "=",
                        c.portable_data_hash()
                    ]],
                    limit=1).execute(num_retries=self.arvrunner.num_retries)
                if not check["items"]:
                    c.save_new(owner_uuid=self.arvrunner.project_uuid)

                ab = self.file_pattern % (c.portable_data_hash(),
                                          srcobj["basename"])
                self._pathmap[srcobj["location"]] = MapperEnt(
                    "keep:%s/%s" %
                    (c.portable_data_hash(), srcobj["basename"]), ab, "File",
                    True)
                if srcobj.get("secondaryFiles"):
                    ab = self.collection_pattern % c.portable_data_hash()
                    self._pathmap["_:" + unicode(uuid.uuid4())] = MapperEnt(
                        "keep:" + c.portable_data_hash(), ab, "Directory",
                        True)

            if subdirs:
                for loc, sub in subdirs:
                    # subdirs will all start with "./", strip it off
                    ab = self.file_pattern % (c.portable_data_hash(), sub[2:])
                    self._pathmap[loc] = MapperEnt(
                        "keep:%s/%s" % (c.portable_data_hash(), sub[2:]), ab,
                        "Directory", True)

        self.keepdir = None
Ejemplo n.º 19
0
    def setup(self, referenced_files, basedir):
        # type: (List[Any], unicode) -> None
        uploadfiles = set()

        collection = None
        if self.single_collection:
            collection = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                       keep_client=self.arvrunner.keep_client,
                                                       num_retries=self.arvrunner.num_retries)

        for srcobj in referenced_files:
            self.visit(srcobj, uploadfiles)

        arvados.commands.run.uploadfiles([u[2] for u in uploadfiles],
                                         self.arvrunner.api,
                                         dry_run=False,
                                         num_retries=self.arvrunner.num_retries,
                                         fnPattern="keep:%s/%s",
                                         name=self.name,
                                         project=self.arvrunner.project_uuid,
                                         collection=collection,
                                         packed=False)

        for src, ab, st in uploadfiles:
            self._pathmap[src] = MapperEnt(urllib.parse.quote(st.fn, "/:+@"), self.collection_pattern % st.fn[5:],
                                           "Directory" if os.path.isdir(ab) else "File", True)

        for srcobj in referenced_files:
            remap = []
            if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap:
                c = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                  keep_client=self.arvrunner.keep_client,
                                                  num_retries=self.arvrunner.num_retries)
                for l in srcobj.get("listing", []):
                    self.addentry(l, c, ".", remap)

                container = arvados_cwl.util.get_current_container(self.arvrunner.api, self.arvrunner.num_retries, logger)
                info = arvados_cwl.util.get_intermediate_collection_info(None, container, self.arvrunner.intermediate_output_ttl)

                c.save_new(name=info["name"],
                           owner_uuid=self.arvrunner.project_uuid,
                           ensure_unique_name=True,
                           trash_at=info["trash_at"],
                           properties=info["properties"])

                ab = self.collection_pattern % c.portable_data_hash()
                self._pathmap[srcobj["location"]] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)
            elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or
                (srcobj["location"].startswith("_:") and "contents" in srcobj)):

                # If all secondary files/directories are located in
                # the same collection as the primary file and the
                # paths and names that are consistent with staging,
                # don't create a new collection.
                if not self.needs_new_collection(srcobj):
                    continue

                c = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                  keep_client=self.arvrunner.keep_client,
                                                  num_retries=self.arvrunner.num_retries                                                  )
                self.addentry(srcobj, c, ".", remap)

                container = arvados_cwl.util.get_current_container(self.arvrunner.api, self.arvrunner.num_retries, logger)
                info = arvados_cwl.util.get_intermediate_collection_info(None, container, self.arvrunner.intermediate_output_ttl)

                c.save_new(name=info["name"],
                           owner_uuid=self.arvrunner.project_uuid,
                           ensure_unique_name=True,
                           trash_at=info["trash_at"],
                           properties=info["properties"])

                ab = self.file_pattern % (c.portable_data_hash(), srcobj["basename"])
                self._pathmap[srcobj["location"]] = MapperEnt("keep:%s/%s" % (c.portable_data_hash(), srcobj["basename"]),
                                                              ab, "File", True)
                if srcobj.get("secondaryFiles"):
                    ab = self.collection_pattern % c.portable_data_hash()
                    self._pathmap["_:" + str(uuid.uuid4())] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)

            if remap:
                for loc, sub in remap:
                    # subdirs start with "./", strip it off
                    if sub.startswith("./"):
                        ab = self.file_pattern % (c.portable_data_hash(), sub[2:])
                    else:
                        ab = self.file_pattern % (c.portable_data_hash(), sub)
                    self._pathmap[loc] = MapperEnt("keep:%s/%s" % (c.portable_data_hash(), sub[2:]),
                                                   ab, "Directory", True)

        self.keepdir = None
Ejemplo n.º 20
0
    def setup(self, referenced_files, basedir):
        # type: (List[Any], unicode) -> None
        self._pathmap = self.arvrunner.get_uploaded()
        uploadfiles = set()

        for srcobj in referenced_files:
            self.visit(srcobj, uploadfiles)

        if uploadfiles:
            arvados.commands.run.uploadfiles(
                [u[2] for u in uploadfiles],
                self.arvrunner.api,
                dry_run=False,
                num_retries=self.arvrunner.num_retries,
                fnPattern=self.file_pattern,
                name=self.name,
                project=self.arvrunner.project_uuid)

        for src, ab, st in uploadfiles:
            self._pathmap[src] = MapperEnt("keep:" + st.keepref, st.fn, "File")
            self.arvrunner.add_uploaded(src, self._pathmap[src])

        for srcobj in referenced_files:
            if srcobj["class"] == "Directory":
                if srcobj["location"] not in self._pathmap:
                    c = arvados.collection.Collection(
                        api_client=self.arvrunner.api,
                        num_retries=self.arvrunner.num_retries)
                    subdirs = []
                    for l in srcobj["listing"]:
                        self.addentry(l, c, ".", subdirs)

                    check = self.arvrunner.api.collections().list(
                        filters=[[
                            "portable_data_hash", "=",
                            c.portable_data_hash()
                        ]],
                        limit=1).execute(
                            num_retries=self.arvrunner.num_retries)
                    if not check["items"]:
                        c.save_new(owner_uuid=self.arvrunner.project_uuid)

                    ab = self.collection_pattern % c.portable_data_hash()
                    self._pathmap[srcobj["location"]] = MapperEnt(
                        ab, ab, "Directory")
                    for loc, sub in subdirs:
                        ab = self.file_pattern % (c.portable_data_hash(),
                                                  sub[2:])
                        self._pathmap[loc] = MapperEnt(ab, ab, "Directory")
            elif srcobj["class"] == "File" and (
                    srcobj.get("secondaryFiles") or
                (srcobj["location"].startswith("_:")
                 and "contents" in srcobj)):

                c = arvados.collection.Collection(
                    api_client=self.arvrunner.api,
                    num_retries=self.arvrunner.num_retries)
                subdirs = []
                self.addentry(srcobj, c, ".", subdirs)

                check = self.arvrunner.api.collections().list(
                    filters=[[
                        "portable_data_hash", "=",
                        c.portable_data_hash()
                    ]],
                    limit=1).execute(num_retries=self.arvrunner.num_retries)
                if not check["items"]:
                    c.save_new(owner_uuid=self.arvrunner.project_uuid)

                ab = self.file_pattern % (c.portable_data_hash(),
                                          srcobj["basename"])
                self._pathmap[srcobj["location"]] = MapperEnt(ab, ab, "File")
                if srcobj.get("secondaryFiles"):
                    ab = self.collection_pattern % c.portable_data_hash()
                    self._pathmap["_:" + unicode(uuid.uuid4())] = MapperEnt(
                        ab, ab, "Directory")
                for loc, sub in subdirs:
                    ab = self.file_pattern % (c.portable_data_hash(), sub[2:])
                    self._pathmap[loc] = MapperEnt(ab, ab, "Directory")

        self.keepdir = None
Ejemplo n.º 21
0
def test_docker_tmpdir_prefix(tmp_path: Path) -> None:
    """Test that DockerCommandLineJob respects temp directory directives."""
    (tmp_path / "3").mkdir()
    tmpdir_prefix = str(tmp_path / "3" / "ttmp")
    runtime_context = RuntimeContext({
        "tmpdir_prefix": tmpdir_prefix,
        "user_space_docker_cmd": None
    })
    builder = Builder(
        {},
        [],
        [],
        {},
        schema.Names(),
        [],
        [],
        {},
        None,
        None,
        StdFsAccess,
        StdFsAccess(""),
        None,
        0.1,
        False,
        False,
        False,
        "",
        runtime_context.get_outdir(),
        runtime_context.get_tmpdir(),
        runtime_context.get_stagedir(),
        INTERNAL_VERSION,
    )
    job = DockerCommandLineJob(builder, {}, PathMapper, [], [], "")
    runtime: List[str] = []

    volume_writable_file = MapperEnt(resolved=get_data("tests/2.fastq"),
                                     target="foo",
                                     type=None,
                                     staged=None)
    (tmp_path / "1").mkdir()
    job.add_writable_file_volume(runtime, volume_writable_file, None,
                                 str(tmp_path / "1" / "writable_file"))
    children = sorted((tmp_path / "1").glob("*"))
    assert len(children) == 1
    subdir = tmp_path / children[0]
    assert subdir.name.startswith("writable_file")
    assert len(sorted(subdir.glob("*"))) == 1
    assert (subdir / "2.fastq").exists()

    resolved_writable_dir = tmp_path / "data_orig"
    resolved_writable_dir.mkdir(parents=True)
    volume_dir = MapperEnt(resolved=str(resolved_writable_dir),
                           target="bar",
                           type=None,
                           staged=None)
    (tmp_path / "2").mkdir()
    job.add_writable_directory_volume(runtime, volume_dir, None,
                                      str(tmp_path / "2" / "dir"))
    children = sorted((tmp_path / "2").glob("*"))
    assert len(children) == 1
    subdir = tmp_path / "2" / children[0]
    assert subdir.name.startswith("dir")
    assert len(sorted(subdir.glob("*"))) == 1
    assert (subdir / "data_orig").exists()

    cidfile = job.create_runtime({}, runtime_context)[1]
    assert cidfile and cidfile.startswith(tmpdir_prefix)

    volume_file = MapperEnt(resolved="Hoopla!",
                            target="baz",
                            type=None,
                            staged=None)
    (tmp_path / "4").mkdir()
    job.create_file_and_add_volume(runtime, volume_file, None, None,
                                   str(tmp_path / "4" / "file"))
    children = sorted((tmp_path / "4").glob("*"))
    assert len(children) == 1
    subdir = tmp_path / "4" / children[0]
    assert subdir.name.startswith("file")
    assert len(sorted(subdir.glob("*"))) == 1
    assert (subdir / "baz").exists()