Beispiel #1
0
 def syncToRemote(self, p, spec):
     if not self.writeStore:
         return
     tarballNameWithRev = (
         "{package}-{version}-{revision}.{architecture}.tar.gz".format(
             architecture=self.architecture, **spec))
     tar_path = os.path.join(
         resolve_store_path(self.architecture, spec["hash"]),
         tarballNameWithRev)
     link_path = os.path.join(resolve_links_path(self.architecture, p),
                              tarballNameWithRev)
     tar_exists = self._s3_key_exists(tar_path)
     link_exists = self._s3_key_exists(link_path)
     if tar_exists and link_exists:
         debug("%s exists on S3 already, not uploading", tarballNameWithRev)
         return
     if tar_exists or link_exists:
         warning("%s exists already but %s does not, overwriting!",
                 tar_path if tar_exists else link_path,
                 link_path if tar_exists else tar_path)
     debug("Uploading tarball and symlink for %s %s-%s (%s) to S3", p,
           spec["version"], spec["revision"], spec["hash"])
     self.s3.upload_file(Bucket=self.writeStore,
                         Key=tar_path,
                         Filename=os.path.join(self.workdir, tar_path))
     self.s3.put_object(
         Bucket=self.writeStore,
         Key=link_path,
         Body=os.readlink(os.path.join(
             self.workdir, link_path)).lstrip("./").encode("utf-8"))
Beispiel #2
0
 def syncToLocal(self, p, spec):
     debug("Updating remote store for package %s with hashes %s", p,
           ", ".join(spec["remote_hashes"]))
     err = execute("""\
 mkdir -p {workDir}/{linksPath}
 rsync -rlvW --delete {remoteStore}/{linksPath}/ {workDir}/{linksPath}/ || :
 for storePath in {storePaths}; do
   # Only get the first matching tarball. If there are multiple with the
   # same hash, we only need one and they should be interchangable.
   if tars=$(rsync -s --list-only "{remoteStore}/$storePath/{pkg}-{ver}-*.{arch}.tar.gz" 2>/dev/null) &&
      # Strip away the metadata in rsync's file listing, leaving only the first filename.
      tar=$(echo "$tars" | sed -rn '1s#[- a-z0-9,/]* [0-9]{{2}}:[0-9]{{2}}:[0-9]{{2}} ##p') &&
      mkdir -p "{workDir}/$storePath" &&
      # If we already have a file with the same name, assume it's up to date
      # with the remote. In reality, we'll have unpacked, relocated and
      # repacked the tarball from the remote, so the file differs, but
      # there's no point in downloading the one from the remote again.
      rsync -vW --ignore-existing "{remoteStore}/$storePath/$tar" "{workDir}/$storePath/"
   then
     break
   fi
 done
 """.format(pkg=p,
            ver=spec["version"],
            arch=self.architecture,
            remoteStore=self.remoteStore,
            workDir=self.workdir,
            linksPath=resolve_links_path(self.architecture, p),
            storePaths=" ".join(
                resolve_store_path(self.architecture, pkg_hash)
                for pkg_hash in spec["remote_hashes"])))
     dieOnError(err, "Unable to update from specified store.")
Beispiel #3
0
 def syncToRemote(self, p, spec):
     if not self.writeStore:
         return
     tarballNameWithRev = format(
         "%(package)s-%(version)s-%(revision)s.%(architecture)s.tar.gz",
         architecture=self.architecture,
         **spec)
     cmd = format(
         "cd %(workdir)s && "
         "rsync -avR --ignore-existing %(storePath)s/%(tarballNameWithRev)s  %(remoteStore)s/ &&"
         "rsync -avR --ignore-existing %(linksPath)s/%(tarballNameWithRev)s  %(remoteStore)s/",
         workdir=self.workdir,
         remoteStore=self.remoteStore,
         storePath=resolve_store_path(self.architecture, spec["hash"]),
         linksPath=resolve_links_path(self.architecture, p),
         tarballNameWithRev=tarballNameWithRev)
     err = execute(cmd)
     dieOnError(err, "Unable to upload tarball.")
Beispiel #4
0
 def paginate_listdir(Bucket, Delimiter, Prefix):
     if "/store/" in Prefix:
         store_path = resolve_store_path(ARCHITECTURE, self.spec["remote_revision_hash"])
         if self.spec["remote_revision_hash"] == GOOD_HASH:
             return [{"Contents": [{"Key": store_path + Delimiter +
                                    "zlib-v1.2.3-1.slc7_x86-64.tar.gz"}]}]
         elif self.spec["remote_revision_hash"] == BAD_HASH:
             return [{"Contents": [{"Key": store_path + Delimiter +
                                    "zlib-v1.2.3-2.slc7_x86-64.tar.gz"}]}]
         elif self.spec["remote_revision_hash"] == NONEXISTENT_HASH:
             return [{}]
     elif Prefix.endswith("/" + self.spec["package"] + "/"):
         links_path = resolve_links_path(ARCHITECTURE, self.spec["package"])
         return [{"Contents": [
             {"Key": links_path + Delimiter + "zlib-v1.2.3-1.slc7_x86-64.tar.gz"},
             {"Key": links_path + Delimiter + "zlib-v1.2.3-2.slc7_x86-64.tar.gz"},
         ]}]
     raise NotImplementedError("unknown prefix " + Prefix)
Beispiel #5
0
 def syncToRemote(self, p, spec):
     if not self.writeStore:
         return
     tarballNameWithRev = format(
         "%(package)s-%(version)s-%(revision)s.%(architecture)s.tar.gz",
         architecture=self.architecture,
         **spec)
     cmd = format(
         "cd %(workdir)s && "
         "TARSHA256=`sha256sum %(storePath)s/%(tarballNameWithRev)s | awk '{ print $1 }'` && "
         "s3cmd put -s -v --host s3.cern.ch --host-bucket %(b)s.s3.cern.ch %(storePath)s/%(tarballNameWithRev)s s3://%(b)s/%(storePath)s/ 2>&1 || true\n"
         "HASHEDURL=`readlink %(linksPath)s/%(tarballNameWithRev)s | sed -e's|^../../||'` && "
         "echo $HASHEDURL | s3cmd put -s -v --host s3.cern.ch --host-bucket %(b)s.s3.cern.ch - s3://%(b)s/%(linksPath)s/%(tarballNameWithRev)s 2>&1 || true\n",
         workdir=self.workdir,
         b=self.remoteStore,
         storePath=resolve_store_path(self.architecture, spec["hash"]),
         linksPath=resolve_links_path(self.architecture, p),
         tarballNameWithRev=tarballNameWithRev)
     err = execute(cmd)
     dieOnError(err, "Unable to upload tarball.")
Beispiel #6
0
 def syncToLocal(self, p, spec):
     debug("Updating remote store for package %s with hashes %s", p,
           ", ".join(spec["remote_hashes"]))
     err = execute("""\
 for storePath in {storePaths}; do
   # For the first store path that contains tarballs, fetch them, and skip
   # any possible later tarballs (we only need one).
   if [ -n "$(s3cmd ls -s -v --host s3.cern.ch --host-bucket {b}.s3.cern.ch \
                    "s3://{b}/$storePath/")" ]; then
     s3cmd --no-check-md5 sync -s -v --host s3.cern.ch --host-bucket {b}.s3.cern.ch \
           "s3://{b}/$storePath/" "{workDir}/$storePath/" 2>&1 || :
     break
   fi
 done
 mkdir -p "{workDir}/{linksPath}"
 find "{workDir}/{linksPath}" -type l -delete
 curl -sL "https://s3.cern.ch/swift/v1/{b}/{linksPath}.manifest" |
   while IFS='\t' read -r symlink target; do
     ln -sf "../../${{target#../../}}" "{workDir}/{linksPath}/$symlink" || true
   done
 for x in $(curl -sL "https://s3.cern.ch/swift/v1/{b}/?prefix={linksPath}/"); do
   # Skip already existing symlinks -- these were from the manifest.
   # (We delete leftover symlinks from previous runs above.)
   [ -L "{workDir}/{linksPath}/$(basename "$x")" ] && continue
   ln -sf "$(curl -sL "https://s3.cern.ch/swift/v1/{b}/$x" | sed -r 's,^(\\.\\./\\.\\./)?,../../,')" \
      "{workDir}/{linksPath}/$(basename "$x")" || true
 done
 """.format(
         b=self.remoteStore,
         storePaths=" ".join(
             resolve_store_path(self.architecture, pkg_hash)
             for pkg_hash in spec["remote_hashes"]),
         linksPath=resolve_links_path(self.architecture, p),
         workDir=self.workdir,
     ))
     dieOnError(err, "Unable to update from specified store.")
Beispiel #7
0
    def syncToLocal(self, p, spec):
        from botocore.exceptions import ClientError
        debug("Updating remote store for package %s with hashes %s", p,
              ", ".join(spec["remote_hashes"]))

        # If we already have a tarball with any equivalent hash, don't check S3.
        have_tarball = False
        for pkg_hash in spec["remote_hashes"]:
            store_path = resolve_store_path(self.architecture, pkg_hash)
            if glob.glob(
                    os.path.join(self.workdir, store_path, "%s-*.tar.gz" % p)):
                debug("Reusing existing tarball for %s@%s", p, pkg_hash)
                have_tarball = True
                break

        for pkg_hash in spec["remote_hashes"]:
            if have_tarball:
                break
            store_path = resolve_store_path(self.architecture, pkg_hash)

            # We don't already have a tarball with the hash that we need, so download
            # the first existing one from the remote, if possible. (Downloading more
            # than one is a waste of time as they should be equivalent and we only
            # ever use one anyway.)
            for tarball in self._s3_listdir(store_path):
                debug("Fetching tarball %s", tarball)
                # Create containing directory locally. (exist_ok= is python3-specific.)
                os.makedirs(os.path.join(self.workdir, store_path),
                            exist_ok=True)
                self.s3.download_file(Bucket=self.remoteStore,
                                      Key=tarball,
                                      Filename=os.path.join(
                                          self.workdir, store_path,
                                          os.path.basename(tarball)))
                have_tarball = True  # break out of outer loop
                break

        if not have_tarball:
            debug("Remote has no tarballs for %s with hashes %s", p,
                  ", ".join(spec["remote_hashes"]))

        links_path = resolve_links_path(self.architecture, p)
        os.makedirs(os.path.join(self.workdir, links_path), exist_ok=True)

        # Remove existing symlinks: we'll fetch the ones from the remote next.
        parent = os.path.join(self.workdir, links_path)
        for fname in os.listdir(parent):
            path = os.path.join(parent, fname)
            if os.path.islink(path):
                os.unlink(path)

        # Fetch symlink manifest and create local symlinks to match.
        debug("Fetching symlink manifest")
        n_symlinks = 0
        try:
            manifest = self.s3.get_object(Bucket=self.remoteStore,
                                          Key=links_path + ".manifest")
        except ClientError as exc:
            debug("Could not fetch manifest: %s", exc)
        else:
            for line in manifest["Body"].iter_lines():
                link_name, has_sep, target = line.rstrip(b"\n").partition(
                    b"\t")
                if not has_sep:
                    debug("Ignoring malformed line in manifest: %r", line)
                    continue
                if not target.startswith(b"../../"):
                    target = b"../../" + target
                target = os.fsdecode(target)
                link_path = os.path.join(self.workdir, links_path,
                                         os.fsdecode(link_name))
                dieOnError(
                    execute("ln -sf {} {}".format(target, link_path)),
                    "Unable to create symlink {} -> {}".format(
                        link_name, target))
                n_symlinks += 1
            debug("Got %d entries in manifest", n_symlinks)

        # Create remote symlinks that aren't in the manifest yet.
        debug("Looking for symlinks not in manifest")
        for link_key in self._s3_listdir(links_path):
            link_path = os.path.join(self.workdir, link_key)
            if os.path.islink(link_path):
                continue
            debug("Fetching leftover symlink %s", link_key)
            resp = self.s3.get_object(Bucket=self.remoteStore, Key=link_key)
            target = os.fsdecode(resp["Body"].read()).rstrip("\n")
            if not target.startswith("../../"):
                target = "../../" + target
            dieOnError(
                execute("ln -sf {} {}".format(target, link_path)),
                "Unable to create symlink {} -> {}".format(link_key, target))
Beispiel #8
0
    def syncToLocal(self, p, spec):
        # Check for any existing tarballs we can use instead of fetching new ones.
        for pkg_hash in spec["remote_hashes"]:
            try:
                have_tarballs = os.listdir(
                    os.path.join(
                        self.workdir,
                        resolve_store_path(self.architecture, pkg_hash)))
            except OSError:  # store path not readable
                continue
            for tarball in have_tarballs:
                if re.match(
                        r"^{package}-{version}-[0-9]+\.{arch}\.tar\.gz$".
                        format(
                            package=re.escape(spec["package"]),
                            version=re.escape(spec["version"]),
                            arch=re.escape(self.architecture),
                        ), os.path.basename(tarball)):
                    debug(
                        "Previously downloaded tarball for %s with hash %s, reusing",
                        p, pkg_hash)
                    return

        with requests.Session() as session:
            debug("Updating remote store for package %s; trying hashes %s", p,
                  ", ".join(spec["remote_hashes"]))
            store_path = use_tarball = None
            # Find the first tarball that matches any possible hash and fetch it.
            for pkg_hash in spec["remote_hashes"]:
                store_path = resolve_store_path(self.architecture, pkg_hash)
                tarballs = self.getRetry("%s/%s/" %
                                         (self.remoteStore, store_path),
                                         session=session)
                if tarballs:
                    use_tarball = tarballs[0]["name"]
                    break

            if store_path is None or use_tarball is None:
                debug("Nothing fetched for %s (%s)", p,
                      ", ".join(spec["remote_hashes"]))
                return

            links_path = resolve_links_path(self.architecture, spec["package"])
            execute("mkdir -p {}/{} {}/{}".format(self.workdir, store_path,
                                                  self.workdir, links_path))

            destPath = os.path.join(self.workdir, store_path, use_tarball)
            if not os.path.isfile(destPath):
                # Do not download twice
                self.getRetry("/".join(
                    (self.remoteStore, store_path, use_tarball)),
                              destPath,
                              session=session)

            # Fetch manifest file with initial symlinks. This file is updated
            # regularly; we use it to avoid many small network requests.
            manifest = self.getRetry("%s/%s.manifest" %
                                     (self.remoteStore, links_path),
                                     returnResult=True,
                                     session=session)
            symlinks = {
                linkname.decode("utf-8"): target.decode("utf-8")
                for linkname, sep, target in (
                    line.partition(b"\t") for line in manifest.splitlines())
                if sep and linkname and target
            }
            # If we've just downloaded a tarball, add a symlink to it.
            # We need to strip the leading TARS/ first, though.
            assert store_path.startswith("TARS/"), store_path
            symlinks[use_tarball] = os.path.join(store_path[len("TARS/"):],
                                                 use_tarball)
            # Now add any remaining symlinks that aren't in the manifest yet. There
            # should always be relatively few of these, as the separate network
            # requests are a bit expensive.
            for link in self.getRetry("%s/%s/" %
                                      (self.remoteStore, links_path),
                                      session=session):
                linkname = link["name"]
                if linkname in symlinks:
                    # This symlink is already present in the manifest.
                    continue
                if os.path.islink(
                        os.path.join(self.workdir, links_path, linkname)):
                    # We have this symlink locally. With local revisions, we won't produce
                    # revisions that will conflict with remote revisions unless we upload
                    # them anyway, so there's no need to redownload.
                    continue
                # This symlink isn't in the manifest yet, and we don't have it locally,
                # so download it individually.
                symlinks[linkname] = \
                    self.getRetry("/".join((self.remoteStore, links_path, linkname)),
                                  returnResult=True, log=False, session=session) \
                        .decode("utf-8").rstrip("\r\n")
        for linkname, target in symlinks.items():
            execute("ln -nsf ../../{target} {workdir}/{linkdir}/{name}".format(
                workdir=self.workdir,
                linkdir=links_path,
                name=linkname,
                target=target))