def syncToRemote(self, p, spec): if not self.writeStore: return tarballNameWithRev = ( "{package}-{version}-{revision}.{architecture}.tar.gz".format( architecture=self.architecture, **spec)) tar_path = os.path.join( resolve_store_path(self.architecture, spec["hash"]), tarballNameWithRev) link_path = os.path.join(resolve_links_path(self.architecture, p), tarballNameWithRev) tar_exists = self._s3_key_exists(tar_path) link_exists = self._s3_key_exists(link_path) if tar_exists and link_exists: debug("%s exists on S3 already, not uploading", tarballNameWithRev) return if tar_exists or link_exists: warning("%s exists already but %s does not, overwriting!", tar_path if tar_exists else link_path, link_path if tar_exists else tar_path) debug("Uploading tarball and symlink for %s %s-%s (%s) to S3", p, spec["version"], spec["revision"], spec["hash"]) self.s3.upload_file(Bucket=self.writeStore, Key=tar_path, Filename=os.path.join(self.workdir, tar_path)) self.s3.put_object( Bucket=self.writeStore, Key=link_path, Body=os.readlink(os.path.join( self.workdir, link_path)).lstrip("./").encode("utf-8"))
def syncToLocal(self, p, spec): debug("Updating remote store for package %s with hashes %s", p, ", ".join(spec["remote_hashes"])) err = execute("""\ mkdir -p {workDir}/{linksPath} rsync -rlvW --delete {remoteStore}/{linksPath}/ {workDir}/{linksPath}/ || : for storePath in {storePaths}; do # Only get the first matching tarball. If there are multiple with the # same hash, we only need one and they should be interchangable. if tars=$(rsync -s --list-only "{remoteStore}/$storePath/{pkg}-{ver}-*.{arch}.tar.gz" 2>/dev/null) && # Strip away the metadata in rsync's file listing, leaving only the first filename. tar=$(echo "$tars" | sed -rn '1s#[- a-z0-9,/]* [0-9]{{2}}:[0-9]{{2}}:[0-9]{{2}} ##p') && mkdir -p "{workDir}/$storePath" && # If we already have a file with the same name, assume it's up to date # with the remote. In reality, we'll have unpacked, relocated and # repacked the tarball from the remote, so the file differs, but # there's no point in downloading the one from the remote again. rsync -vW --ignore-existing "{remoteStore}/$storePath/$tar" "{workDir}/$storePath/" then break fi done """.format(pkg=p, ver=spec["version"], arch=self.architecture, remoteStore=self.remoteStore, workDir=self.workdir, linksPath=resolve_links_path(self.architecture, p), storePaths=" ".join( resolve_store_path(self.architecture, pkg_hash) for pkg_hash in spec["remote_hashes"]))) dieOnError(err, "Unable to update from specified store.")
def syncToRemote(self, p, spec): if not self.writeStore: return tarballNameWithRev = format( "%(package)s-%(version)s-%(revision)s.%(architecture)s.tar.gz", architecture=self.architecture, **spec) cmd = format( "cd %(workdir)s && " "rsync -avR --ignore-existing %(storePath)s/%(tarballNameWithRev)s %(remoteStore)s/ &&" "rsync -avR --ignore-existing %(linksPath)s/%(tarballNameWithRev)s %(remoteStore)s/", workdir=self.workdir, remoteStore=self.remoteStore, storePath=resolve_store_path(self.architecture, spec["hash"]), linksPath=resolve_links_path(self.architecture, p), tarballNameWithRev=tarballNameWithRev) err = execute(cmd) dieOnError(err, "Unable to upload tarball.")
def paginate_listdir(Bucket, Delimiter, Prefix): if "/store/" in Prefix: store_path = resolve_store_path(ARCHITECTURE, self.spec["remote_revision_hash"]) if self.spec["remote_revision_hash"] == GOOD_HASH: return [{"Contents": [{"Key": store_path + Delimiter + "zlib-v1.2.3-1.slc7_x86-64.tar.gz"}]}] elif self.spec["remote_revision_hash"] == BAD_HASH: return [{"Contents": [{"Key": store_path + Delimiter + "zlib-v1.2.3-2.slc7_x86-64.tar.gz"}]}] elif self.spec["remote_revision_hash"] == NONEXISTENT_HASH: return [{}] elif Prefix.endswith("/" + self.spec["package"] + "/"): links_path = resolve_links_path(ARCHITECTURE, self.spec["package"]) return [{"Contents": [ {"Key": links_path + Delimiter + "zlib-v1.2.3-1.slc7_x86-64.tar.gz"}, {"Key": links_path + Delimiter + "zlib-v1.2.3-2.slc7_x86-64.tar.gz"}, ]}] raise NotImplementedError("unknown prefix " + Prefix)
def syncToRemote(self, p, spec): if not self.writeStore: return tarballNameWithRev = format( "%(package)s-%(version)s-%(revision)s.%(architecture)s.tar.gz", architecture=self.architecture, **spec) cmd = format( "cd %(workdir)s && " "TARSHA256=`sha256sum %(storePath)s/%(tarballNameWithRev)s | awk '{ print $1 }'` && " "s3cmd put -s -v --host s3.cern.ch --host-bucket %(b)s.s3.cern.ch %(storePath)s/%(tarballNameWithRev)s s3://%(b)s/%(storePath)s/ 2>&1 || true\n" "HASHEDURL=`readlink %(linksPath)s/%(tarballNameWithRev)s | sed -e's|^../../||'` && " "echo $HASHEDURL | s3cmd put -s -v --host s3.cern.ch --host-bucket %(b)s.s3.cern.ch - s3://%(b)s/%(linksPath)s/%(tarballNameWithRev)s 2>&1 || true\n", workdir=self.workdir, b=self.remoteStore, storePath=resolve_store_path(self.architecture, spec["hash"]), linksPath=resolve_links_path(self.architecture, p), tarballNameWithRev=tarballNameWithRev) err = execute(cmd) dieOnError(err, "Unable to upload tarball.")
def syncToLocal(self, p, spec): debug("Updating remote store for package %s with hashes %s", p, ", ".join(spec["remote_hashes"])) err = execute("""\ for storePath in {storePaths}; do # For the first store path that contains tarballs, fetch them, and skip # any possible later tarballs (we only need one). if [ -n "$(s3cmd ls -s -v --host s3.cern.ch --host-bucket {b}.s3.cern.ch \ "s3://{b}/$storePath/")" ]; then s3cmd --no-check-md5 sync -s -v --host s3.cern.ch --host-bucket {b}.s3.cern.ch \ "s3://{b}/$storePath/" "{workDir}/$storePath/" 2>&1 || : break fi done mkdir -p "{workDir}/{linksPath}" find "{workDir}/{linksPath}" -type l -delete curl -sL "https://s3.cern.ch/swift/v1/{b}/{linksPath}.manifest" | while IFS='\t' read -r symlink target; do ln -sf "../../${{target#../../}}" "{workDir}/{linksPath}/$symlink" || true done for x in $(curl -sL "https://s3.cern.ch/swift/v1/{b}/?prefix={linksPath}/"); do # Skip already existing symlinks -- these were from the manifest. # (We delete leftover symlinks from previous runs above.) [ -L "{workDir}/{linksPath}/$(basename "$x")" ] && continue ln -sf "$(curl -sL "https://s3.cern.ch/swift/v1/{b}/$x" | sed -r 's,^(\\.\\./\\.\\./)?,../../,')" \ "{workDir}/{linksPath}/$(basename "$x")" || true done """.format( b=self.remoteStore, storePaths=" ".join( resolve_store_path(self.architecture, pkg_hash) for pkg_hash in spec["remote_hashes"]), linksPath=resolve_links_path(self.architecture, p), workDir=self.workdir, )) dieOnError(err, "Unable to update from specified store.")
def syncToLocal(self, p, spec): from botocore.exceptions import ClientError debug("Updating remote store for package %s with hashes %s", p, ", ".join(spec["remote_hashes"])) # If we already have a tarball with any equivalent hash, don't check S3. have_tarball = False for pkg_hash in spec["remote_hashes"]: store_path = resolve_store_path(self.architecture, pkg_hash) if glob.glob( os.path.join(self.workdir, store_path, "%s-*.tar.gz" % p)): debug("Reusing existing tarball for %s@%s", p, pkg_hash) have_tarball = True break for pkg_hash in spec["remote_hashes"]: if have_tarball: break store_path = resolve_store_path(self.architecture, pkg_hash) # We don't already have a tarball with the hash that we need, so download # the first existing one from the remote, if possible. (Downloading more # than one is a waste of time as they should be equivalent and we only # ever use one anyway.) for tarball in self._s3_listdir(store_path): debug("Fetching tarball %s", tarball) # Create containing directory locally. (exist_ok= is python3-specific.) os.makedirs(os.path.join(self.workdir, store_path), exist_ok=True) self.s3.download_file(Bucket=self.remoteStore, Key=tarball, Filename=os.path.join( self.workdir, store_path, os.path.basename(tarball))) have_tarball = True # break out of outer loop break if not have_tarball: debug("Remote has no tarballs for %s with hashes %s", p, ", ".join(spec["remote_hashes"])) links_path = resolve_links_path(self.architecture, p) os.makedirs(os.path.join(self.workdir, links_path), exist_ok=True) # Remove existing symlinks: we'll fetch the ones from the remote next. parent = os.path.join(self.workdir, links_path) for fname in os.listdir(parent): path = os.path.join(parent, fname) if os.path.islink(path): os.unlink(path) # Fetch symlink manifest and create local symlinks to match. debug("Fetching symlink manifest") n_symlinks = 0 try: manifest = self.s3.get_object(Bucket=self.remoteStore, Key=links_path + ".manifest") except ClientError as exc: debug("Could not fetch manifest: %s", exc) else: for line in manifest["Body"].iter_lines(): link_name, has_sep, target = line.rstrip(b"\n").partition( b"\t") if not has_sep: debug("Ignoring malformed line in manifest: %r", line) continue if not target.startswith(b"../../"): target = b"../../" + target target = os.fsdecode(target) link_path = os.path.join(self.workdir, links_path, os.fsdecode(link_name)) dieOnError( execute("ln -sf {} {}".format(target, link_path)), "Unable to create symlink {} -> {}".format( link_name, target)) n_symlinks += 1 debug("Got %d entries in manifest", n_symlinks) # Create remote symlinks that aren't in the manifest yet. debug("Looking for symlinks not in manifest") for link_key in self._s3_listdir(links_path): link_path = os.path.join(self.workdir, link_key) if os.path.islink(link_path): continue debug("Fetching leftover symlink %s", link_key) resp = self.s3.get_object(Bucket=self.remoteStore, Key=link_key) target = os.fsdecode(resp["Body"].read()).rstrip("\n") if not target.startswith("../../"): target = "../../" + target dieOnError( execute("ln -sf {} {}".format(target, link_path)), "Unable to create symlink {} -> {}".format(link_key, target))
def syncToLocal(self, p, spec): # Check for any existing tarballs we can use instead of fetching new ones. for pkg_hash in spec["remote_hashes"]: try: have_tarballs = os.listdir( os.path.join( self.workdir, resolve_store_path(self.architecture, pkg_hash))) except OSError: # store path not readable continue for tarball in have_tarballs: if re.match( r"^{package}-{version}-[0-9]+\.{arch}\.tar\.gz$". format( package=re.escape(spec["package"]), version=re.escape(spec["version"]), arch=re.escape(self.architecture), ), os.path.basename(tarball)): debug( "Previously downloaded tarball for %s with hash %s, reusing", p, pkg_hash) return with requests.Session() as session: debug("Updating remote store for package %s; trying hashes %s", p, ", ".join(spec["remote_hashes"])) store_path = use_tarball = None # Find the first tarball that matches any possible hash and fetch it. for pkg_hash in spec["remote_hashes"]: store_path = resolve_store_path(self.architecture, pkg_hash) tarballs = self.getRetry("%s/%s/" % (self.remoteStore, store_path), session=session) if tarballs: use_tarball = tarballs[0]["name"] break if store_path is None or use_tarball is None: debug("Nothing fetched for %s (%s)", p, ", ".join(spec["remote_hashes"])) return links_path = resolve_links_path(self.architecture, spec["package"]) execute("mkdir -p {}/{} {}/{}".format(self.workdir, store_path, self.workdir, links_path)) destPath = os.path.join(self.workdir, store_path, use_tarball) if not os.path.isfile(destPath): # Do not download twice self.getRetry("/".join( (self.remoteStore, store_path, use_tarball)), destPath, session=session) # Fetch manifest file with initial symlinks. This file is updated # regularly; we use it to avoid many small network requests. manifest = self.getRetry("%s/%s.manifest" % (self.remoteStore, links_path), returnResult=True, session=session) symlinks = { linkname.decode("utf-8"): target.decode("utf-8") for linkname, sep, target in ( line.partition(b"\t") for line in manifest.splitlines()) if sep and linkname and target } # If we've just downloaded a tarball, add a symlink to it. # We need to strip the leading TARS/ first, though. assert store_path.startswith("TARS/"), store_path symlinks[use_tarball] = os.path.join(store_path[len("TARS/"):], use_tarball) # Now add any remaining symlinks that aren't in the manifest yet. There # should always be relatively few of these, as the separate network # requests are a bit expensive. for link in self.getRetry("%s/%s/" % (self.remoteStore, links_path), session=session): linkname = link["name"] if linkname in symlinks: # This symlink is already present in the manifest. continue if os.path.islink( os.path.join(self.workdir, links_path, linkname)): # We have this symlink locally. With local revisions, we won't produce # revisions that will conflict with remote revisions unless we upload # them anyway, so there's no need to redownload. continue # This symlink isn't in the manifest yet, and we don't have it locally, # so download it individually. symlinks[linkname] = \ self.getRetry("/".join((self.remoteStore, links_path, linkname)), returnResult=True, log=False, session=session) \ .decode("utf-8").rstrip("\r\n") for linkname, target in symlinks.items(): execute("ln -nsf ../../{target} {workdir}/{linkdir}/{name}".format( workdir=self.workdir, linkdir=links_path, name=linkname, target=target))