def submit_request(self,request,return_response=False): '''submit_request will make the request, via a stream or not. If return response is True, the response is returned as is without further parsing. Given a 401 error, the update_token function is called to try the request again, and only then the error returned. ''' try: response = urlopen(request) # If we have an HTTPError, try to follow the response except HTTPError as error: # Case 1: we have an http 401 error, and need to refresh token bot.debug('Http Error with code %s' %error.code) if error.code == 401: self.update_token(response=error) try: request = self.prepare_request(request.get_full_url(), headers=self.headers) response = urlopen(request) except HTTPError as error: bot.debug('Http Error with code %s' %error.code) return error else: return error return response
def main(): '''main is a wrapper for the client to hand the parser to the executable functions This makes it possible to set up a parser in test cases ''' bot.debug("\n*** STARTING SINGULARITY PYTHON PULL ****") from defaults import LAYERFILE, DISABLE_CACHE, getenv # What image is the user asking for? container = getenv("SINGULARITY_CONTAINER", required=True) pull_folder = getenv("SINGULARITY_PULLFOLDER") image_uri = get_image_uri(container) container = remove_image_uri(container, quiet=True) if image_uri == "shub://": from shub.main import PULL manifest = PULL(image=container, download_folder=pull_folder, layerfile=LAYERFILE) else: bot.error("uri %s is not supported for pull. Exiting." % (image_uri)) sys.exit(1)
def get(self,url,data=None,headers=None,default_headers=True,return_response=False): '''get will use requests to get a particular url :param data: a dictionary of key:value items to add to the data args variable :param url: the url to get :returns response: the requests response object, or stream ''' bot.debug("GET %s" %url) # If we use default headers, start with client's request_headers = dict() if default_headers and len(self.headers) > 0: request_headers = self.headers if headers is not None: request_headers.update(headers) request = self.prepare_request(headers=request_headers, data=data, url=url) response = self.submit_request(request, return_response=return_response) if return_response == True: return response return response.read().decode('utf-8')
def get_image_format(image_file): ''' get image format will use the image-format executable to return the kind of file type for the image Parameters ========== image_file: full path to the image file to inspect Returns ======= GZIP, DIRECTORY, SQUASHFS, EXT3 ''' if image_file.endswith('gz'): bot.debug('Found compressed image') return "GZIP" here = os.path.abspath(os.path.dirname(__file__)) sbin = here.replace("python", "bin/image-type") custom_env = os.environ.copy() custom_env["SINGULARITY_MESSAGELEVEL"] = "1" image_format = run_command([sbin, image_file], env=custom_env, quiet=True) if image_format is not None: if isinstance(image_format, bytes): image_format = image_format.decode('utf-8') image_format = str(image_format).strip('\n') bot.debug('Found %s image' % image_format) return image_format
def get_layer(self, image_id, download_folder=None, change_perms=False, return_tmp=False): '''get_layer will download an image layer (.tar.gz) to a specified download folder. :param download_folder: if specified, download to folder. Otherwise return response with raw data :param change_perms: change permissions additionally (default False to support multiprocessing) :param return_tmp: If true, return the temporary file name (and don't rename to the file's final name). Default is False, should be True for multiprocessing that requires extra permission changes ''' registry = self.registry if registry is None: registry = self.api_base # make sure we have a complete url registry = add_http(registry) # The <name> variable is the namespace/repo_name base = "%s/%s/%s/%s/blobs/%s" % (registry, self.api_version, self.namespace, self.repo_name, image_id) bot.verbose("Downloading layers from %s" % base) if download_folder is None: download_folder = tempfile.mkdtemp() download_folder = "%s/%s.tar.gz" % (download_folder, image_id) # Update user what we are doing bot.debug("Downloading layer %s" % image_id) # Step 1: Download the layer atomically file_name = "%s.%s" % (download_folder, next(tempfile._get_candidate_names())) tar_download = self.download_atomically(url=base, file_name=file_name) bot.debug('Download of raw file (pre permissions fix) is %s' % tar_download) # Step 2: Fix Permissions? if change_perms: tar_download = change_tar_permissions(tar_download) if return_tmp is True: return tar_download try: os.rename(tar_download, download_folder) except Exception: msg = "Cannot untar layer %s," % tar_download msg += " was there a problem with download?" bot.error(msg) sys.exit(1) return download_folder
def create_tar(files,output_folder=None): '''create_memory_tar will take a list of files (each a dictionary with name, permission, and content) and write the tarfile (a sha256 sum name is used) to the output_folder. If there is no output folde specified, the tar is written to a temporary folder. ''' if output_folder is None: output_folder = tempfile.mkdtemp() finished_tar = None additions = [] contents = [] for entity in files: info = tarfile.TarInfo(name=entity['name']) info.mode = entity['mode'] info.mtime = int(datetime.datetime.now().strftime('%s')) info.uid = entity["uid"] info.gid = entity["gid"] info.uname = entity["uname"] info.gname = entity["gname"] # Get size from stringIO write filey = StringIO() content = None try: #python3 info.size = filey.write(entity['content']) content = BytesIO(entity['content'].encode('utf8')) except: #python2 info.size = int(filey.write(entity['content'].decode('utf-8'))) content = BytesIO(entity['content'].encode('utf8')) pass if content is not None: addition = {'content':content, 'info':info} additions.append(addition) contents.append(content) # Now generate the sha256 name based on content if len(additions) > 0: hashy = get_content_hash(contents) finished_tar = "%s/sha256:%s.tar.gz" %(output_folder, hashy) # Warn the user if it already exists if os.path.exists(finished_tar): bot.debug("metadata file %s already exists, will over-write." %(finished_tar)) # Add all content objects to file tar = tarfile.open(finished_tar, "w:gz") for a in additions: tar.addfile(a["info"],a["content"]) tar.close() else: bot.debug("No contents, environment or labels, for tarfile, will not generate.") return finished_tar
def stream(self,url,file_name,data=None,headers=None,default_headers=True, show_progress=False): '''stream is a get that will stream to file_name :param data: a dictionary of key:value items to add to the data args variable :param url: the url to get :param show_progress: if True, show a progress bar with the bot :returns response: the requests response object, or stream ''' bot.debug("GET (stream) %s" %url) # If we use default headers, start with client's request_headers = dict() if default_headers and len(self.headers) > 0: request_headers = self.headers if headers is not None: request_headers.update(headers) request = self.prepare_request(headers=request_headers, data=data, url=url) response = self.submit_request(request) # Keep user updated with Progress Bar? if show_progress: content_size = None if 'Content-Length' in response.headers and response.code not in [400,401]: progress = 0 content_size = int(response.headers['Content-Length']) bot.show_progress(progress,content_size,length=35) chunk_size = 1 << 20 with open(file_name, 'wb') as filey: while True: chunk = response.read(chunk_size) if not chunk: break try: filey.write(chunk) if show_progress: if content_size is not None: progress+=chunk_size bot.show_progress(iteration=progress, total=content_size, length=35, carriage_return=False) except Exception as error: bot.error("Error writing to %s: %s exiting" %(file_name,error)) sys.exit(1) # Newline to finish download if show_progress: sys.stdout.write('\n') return file_name
def SIZE(image,auth=None,contentfile=None): '''size is intended to be run before an import, to return to the contentfile a list of sizes (one per layer) corresponding with the layers that will be downloaded for image ''' bot.debug("Starting Docker SIZE, will get size from manifest") bot.verbose("Docker image: %s" %image) client = DockerApiConnection(image=image,auth=auth) size = client.get_size() if contentfile is not None: write_file(contentfile,str(size),mode="w") return size
def SIZE(image, contentfile=None): '''size is intended to be run before an import, to return to the contentfile a list of sizes (one per layer) corresponding with the layers that will be downloaded for image ''' bot.debug("Starting Singularity Hub SIZE, will get size from manifest") bot.debug("Singularity Hub image: %s" % image) client = SingularityApiConnection(image=image) manifest = client.get_manifest() size = json.loads(manifest['metrics'].replace("'", '"'))['size'] if contentfile is not None: write_file(contentfile, str(size), mode="w") return size
def DUMP(jsonfile): '''DUMP will return the entire layfile as text, key value pairs :param jsonfile_path: the path to the jsonfile ''' bot.debug("Reading %s to prepare dump to STDOUT" %jsonfile) if not os.path.exists(jsonfile): bot.error("Cannot find %s, exiting." %jsonfile) sys.exit(1) contents = read_json(jsonfile) dump = "" for key,value in contents.items(): dump = '%s%s:"%s"\n' %(dump,key,value) dump = dump.strip('\n') print(dump) return dump
def update_headers(self,fields=None): '''get_headers will return a simple default header for a json post. This function will be adopted as needed. ''' if len(self.headers) == 0: headers = self._init_headers() else: headers = self.headers if fields is not None: for key,value in fields.items(): headers[key] = value header_names = ",".join(list(headers.keys())) bot.debug("Headers found: %s" %header_names) self.headers = headers
def redirect_request(self, req, fp, code, msg, headers, newurl): newreq = HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) if 'Authorization' not in req.headers: return newreq src = urlparse(req.get_full_url()).hostname dest = urlparse(newreq.get_full_url()).hostname if dest != src: bot.debug('AuthRedirectHandler: stripping "Authorization" header ' "(%s != %s)" % (dest, src)) del newreq.headers['Authorization'] return newreq
def get_image_uri(image, quiet=False): '''get_image_uri will parse a uri sent from Singularity to determine if it's singularity (shub://) or docker (docker://) :param image: the complete image uri (example: docker://ubuntu:latest ''' image_uri = None image = image.replace(' ', '') match = re.findall('^[A-Za-z0-9-]+[:]//', image) if len(match) == 0: if not quiet: bot.warning("Could not detect any uri in %s" % image) else: image_uri = match[0].lower() if not quiet: bot.debug("Found uri %s" % image_uri) return image_uri
def GET(key,jsonfile): '''GET will return a key from the jsonfile, if it exists. If it doesn't, returns None. ''' key = format_keyname(key) bot.debug("GET %s from %s" %(key,jsonfile)) if not os.path.exists(jsonfile): bot.error("Cannot find %s, exiting." %jsonfile) sys.exit(1) contents = read_json(jsonfile) if key in contents: value = contents[key] print(value) bot.debug('%s is %s' %(key,value)) else: bot.error("%s is not defined in file. Exiting" %key) sys.exit(1) return value
def update_manifests(self): '''update manifests ensures that each of a version1 and version2 manifest are present ''' bot.debug('Updating manifests.') if self.repo_name is None: bot.error("Insufficient metadata to get manifest.") sys.exit(1) # Get full image manifest, using version 2.0 of Docker Registry API if self.manifest is None: bot.debug('MANIFEST (Primary): not found, making initial call.') self.manifest = self.get_manifest() # This is the primary manifest schema version, determines if we # need to reverse layers self.schemaVersion = self.manifest['schemaVersion'] if self.schemaVersion == 1: self.reverseLayers = True if self.manifestv1 is None: bot.debug('MANIFEST (Metadata): not found, making initial call.') self.manifestv1 = self.get_manifest(old_version=True) # https://docs.docker.com/registry/spec/manifest-v2-2/#manifest-list if "manifests" in self.manifest: for entry in self.manifest['manifests']: if entry['platform']['architecture'] == DOCKER_ARCHITECTURE: if entry['platform']['os'] == DOCKER_OS: digest = entry['digest'] bot.debug('Image manifest version 2.2 list found.') bot.debug('Obtaining architecture: %s, OS: %s' % (DOCKER_ARCHITECTURE, DOCKER_OS)) # Obtain specific os, architecture self.manifest = self.get_manifest(version=digest) break # If we didn't get a new manifest, fall back to version 1 if "manifests" in self.manifest: self.manifest = self.manifestv1
def PULL(image, download_folder=None, layerfile=None): '''PULL will retrieve a Singularity Hub image and download to the local file system, to the variable specified by SINGULARITY_PULLFOLDER. :param image: the singularity hub image name :param download folder: the folder to pull the image to. :param layerfile: if defined, write pulled image to file ''' client = SingularityApiConnection(image=image) manifest = client.get_manifest() if download_folder == None: cache_base = get_cache(subfolder="shub") else: cache_base = download_folder # The image name is the md5 hash, download if it's not there image_name = get_image_name(manifest) image_file = "%s/%s" % (cache_base, image_name) if not os.path.exists(image_file): image_file = client.download_image(manifest=manifest, download_folder=cache_base) else: if not bot.is_quiet(): # not --quiet print("Image already exists at %s, skipping download." % image_file) if not bot.is_quiet(): # not --quiet print("Singularity Hub Image Download: %s" % image_file) manifest = { 'image_file': image_file, 'manifest': manifest, 'cache_base': cache_base, 'image': image } if layerfile != None: bot.debug("Writing Singularity Hub image path to %s" % layerfile) write_file(layerfile, image_file, mode="w") return manifest
def verify_layer(targz): '''check that a downloaded layer's sha256 checksum is OK correct checksum is in the filename: sha256:7d460157dea423c1e16c544fecad995439e12dd50c8db4a8e134fa245cd1846e.tar.gz ''' targz_basename = os.path.basename(targz) bot.debug("Verifying checksum for layer: %s" % targz_basename) if targz_basename[:6] != 'sha256': bot.warning( "Unknown hash function for layer (%s) - will not checksum" % targz_basename[:5]) return True expected = targz_basename[7:71] sha256 = hashlib.sha256() try: with open(targz, 'rb') as f: for block in iter(lambda: f.read(1048576), b''): sha256.update(block) except Exception as e: bot.error("Error computing checksum for layer (%s) - %s" % (targz_basename, str(e))) return False computed = sha256.hexdigest() bot.debug("Computed checksum %s, expected checksum %s" % (computed, expected)) if computed != expected: bot.error("Downloaded layer %s does not match checksum" % targz_basename) return False return True
def read_digests(manifest): '''read_layers will return a list of layers from a manifest. The function is intended to work with both version 1 and 2 of the schema :param manifest: the manifest to read_layers from ''' digests = [] # https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-2.md#image-manifest if 'layers' in manifest: layer_key = 'layers' digest_key = 'digest' bot.debug('Image manifest version 2.2 found.') # https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-1.md#example-manifest elif 'fsLayers' in manifest: layer_key = 'fsLayers' digest_key = 'blobSum' bot.debug('Image manifest version 2.1 found.') else: bot.error( 'Improperly formed manifest, layers or fsLayers must be present') sys.exit(1) for layer in manifest[layer_key]: if digest_key in layer: if layer[digest_key] not in digests: bot.debug("Adding digest %s" % layer[digest_key]) digests.append(layer[digest_key]) return digests
def get_template(template_name): '''get_template will return a default template for some function in Singularity Python. This is to reduce redundancy if data structures are used multiple times, etc. If there is no template, None is returned. ''' template_name = template_name.lower() templates = dict() templates['tarinfo'] = { "gid": 0, "uid": 0, "uname": "root", "gname": "root", "mode": 493 } if template_name in templates: bot.debug("Found template for %s" % template_name) return templates[template_name] else: bot.warning("Cannot find template %s" % template_name) return None
def extract_runscript(manifest, includecmd=False): '''create_runscript will write a bash script with default "ENTRYPOINT" into the base_dir. If includecmd is True, CMD is used instead. For both. if the result is found empty, the other is tried, and then a default used. :param manifest: the manifest to use to get the runscript :param includecmd: overwrite default command (ENTRYPOINT) default is False ''' cmd = None # Does the user want to use the CMD instead of ENTRYPOINT? commands = ["Entrypoint", "Cmd"] if includecmd is True: commands.reverse() configs = get_configs(manifest, commands) # Look for non "None" command for command in commands: if configs[command] is not None: cmd = configs[command] break if cmd is not None: bot.verbose3("Adding Docker %s as Singularity runscript..." % command.upper()) # If the command is a list, join. (eg ['/usr/bin/python','hello.py'] if not isinstance(cmd, list): cmd = [cmd] cmd = " ".join(['"%s"' % x for x in cmd]) if not RUNSCRIPT_COMMAND_ASIS: cmd = 'exec %s "$@"' % cmd cmd = "#!/bin/sh\n\n%s\n" % cmd return cmd bot.debug("CMD and ENTRYPOINT not found, skipping runscript.") return cmd
def get_token_url(self, challenge, expires_in, sort_query_params=False): ''' Build token URL from auth challenge ''' params = parse_bearer_challenge(challenge) if not params or 'realm' not in params: bot.debug("update_token: challenge = '%s'" % challenge) bot.error("Unrecognized authentication challenge, exiting.") sys.exit(1) realm = params.pop('realm') params['expires_in'] = expires_in if sort_query_params: items = sorted(params.items()) else: items = params.items() query_fragment = '&'.join(['%s=%s' % (k, v) for k, v in items]) return "{realm}?{query_fragment}".format(realm=realm, query_fragment=query_fragment)
def get_images(self, manifest=None): '''get_images will return a list of layers from a manifest. The function is intended to work with both version 1 and 2 of the schema. All layers (including redundant) are returned. For version 1 manifests: extraction is reversed :param manifest: the manifest to read_layers from ''' if manifest is None: self.update_manifests() manifest = self.manifest digests = [] layer_key = 'layers' digest_key = 'digest' # Docker manifest-v2-2.md#image-manifest if 'layers' in manifest: bot.debug('Image manifest version 2.2 found.') # Docker manifest-v2-1.md#example-manifest # noqa elif 'fsLayers' in manifest: layer_key = 'fsLayers' digest_key = 'blobSum' bot.debug('Image manifest version 2.1 found.') else: msg = "Improperly formed manifest, " msg += "layers, manifests, or fsLayers must be present" bot.error(msg) sys.exit(1) for layer in manifest[layer_key]: if digest_key in layer: bot.debug("Adding digest %s" % layer[digest_key]) digests.append(layer[digest_key]) # Reverse layer order for manifest version 1.0 if self.reverseLayers is True: message = 'v%s manifest, reversing layers' % self.schemaVersion bot.debug(message) digests.reverse() return digests
def main(): '''this function will run a docker import, returning a list of layers and environmental variables and metadata to the metadata base ''' container = getenv("SINGULARITY_CONTAINER", required=True) image_uri = get_image_uri(container, quiet=True) container = remove_image_uri(container) ############################################################################## # Docker Image ############################################################### ############################################################################## if image_uri == "docker://": bot.debug("\n*** STARTING DOCKER IMPORT PYTHON ****") from sutils import basic_auth_header from defaults import LAYERFILE bot.debug( "Docker layers and (env,labels,runscript) will be written to: %s" % LAYERFILE) username = getenv("SINGULARITY_DOCKER_USERNAME") password = getenv("SINGULARITY_DOCKER_PASSWORD", silent=True) auth = None if username is not None and password is not None: auth = basic_auth_header(username, password) from docker.main import IMPORT manifest = IMPORT(auth=auth, image=container, layerfile=LAYERFILE) ############################################################################## # Singularity Hub ############################################################ ############################################################################## elif image_uri == "shub://": bot.debug("\n*** STARTING SINGULARITY HUB IMPORT PYTHON ****") from defaults import LAYERFILE, LABELFILE from shub.main import IMPORT IMPORT(image=container, layerfile=LAYERFILE, labelfile=LABELFILE) else: bot.error( "uri %s is not a currently supported uri for import. Exiting." % image_uri) sys.exit(1)
def ADD(key,value,jsonfile,force=False): '''ADD will write or update a key in a json file ''' key = format_keyname(key) bot.debug("Adding label: '%s' = '%s'" %(key, value)) bot.debug("ADD %s from %s" %(key,jsonfile)) if os.path.exists(jsonfile): contents = read_json(jsonfile) if key in contents: bot.debug('Warning, %s is already set. Overwrite is set to %s' %(key,force)) if force == True: contents[key] = value else: bot.error('%s found in %s and overwrite set to %s.' %(key,jsonfile,force)) sys.exit(1) else: contents[key] = value else: contents = {key:value} bot.debug('%s is %s' %(key,value)) write_json(contents,jsonfile) return value
def DELETE(key,jsonfile): '''DELETE will remove a key from a json file ''' key = format_keyname(key) bot.debug("DELETE %s from %s" %(key,jsonfile)) if not os.path.exists(jsonfile): bot.error("Cannot find %s, exiting." %jsonfile) sys.exit(1) contents = read_json(jsonfile) if key in contents: del contents[key] if len(contents) > 0: write_json(contents,jsonfile) else: bot.debug('%s is empty, deleting.' %jsonfile) os.remove(jsonfile) return True else: bot.debug('Warning, %s not found in %s' %(key,jsonfile)) return False
def get_images(self, manifest=None): '''get_images will return a list of layers from a manifest. The function is intended to work with both version 1 and 2 of the schema :param manifest: the manifest to read_layers from ''' if manifest is None: self.update_manifests() manifest = self.manifestv1 digests = [] # https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-2.md#image-manifest # noqa if 'layers' in manifest: layer_key = 'layers' digest_key = 'digest' bot.debug('Image manifest version 2.2 found.') # https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-1.md#example-manifest # noqa elif 'fsLayers' in manifest: layer_key = 'fsLayers' digest_key = 'blobSum' bot.debug('Image manifest version 2.1 found.') else: msg = "Improperly formed manifest, " msg += "layers or fsLayers must be present" bot.error(msg) sys.exit(1) for layer in manifest[layer_key]: if digest_key in layer: if layer[digest_key] not in digests: bot.debug("Adding digest %s" % layer[digest_key]) digests.append(layer[digest_key]) return digests
def IMPORT(image, auth=None, layerfile=None): '''IMPORT is the main script that will obtain docker layers, runscript information (either entrypoint or cmd), and environment and return a list of tarballs to extract into the image :param auth: if needed, an authentication header (default None) :param layerfile: The file to write layers to extract into ''' bot.debug("Starting Docker IMPORT, includes env, runscript, and metadata.") bot.verbose("Docker image: %s" % image) # Does the user want to override default of using ENTRYPOINT? if INCLUDE_CMD: bot.verbose2("Specified Docker CMD as %runscript.") else: bot.verbose2("Specified Docker ENTRYPOINT as %runscript.") # Input Parsing ---------------------------- # Parse image name, repo name, and namespace client = DockerApiConnection(image=image, auth=auth) docker_image_uri = "Docker image path: %s" % client.assemble_uri("/") bot.info(docker_image_uri) # IMAGE METADATA ------------------------------------------- # Use Docker Registry API (version 2.0) to get images ids, manifest images = client.get_images() # DOWNLOAD LAYERS ------------------------------------------- # Each is a .tar.gz file, obtained from registry with curl # Get the cache (or temporary one) for docker cache_base = get_cache(subfolder="docker") download_client = MultiProcess() # Generate a queue of tasks to run with MultiProcess layers = [] tasks = [] for ii in range(len(images)): image_id = images[ii] targz = "%s/%s.tar.gz" % (cache_base, image_id) if not os.path.exists(targz): tasks.append((client, image_id, cache_base)) layers.append(targz) # Does the user want to change permissions of tar? func2 = None if PLUGIN_FIXPERMS: func2 = change_permissions if len(tasks) > 0: download_layers = download_client.run(func=download_layer, func2=func2, tasks=tasks) # Get Docker runscript runscript = extract_runscript(manifest=client.manifestv1, includecmd=INCLUDE_CMD) # Add the environment export tar_file = extract_metadata_tar(client.manifestv1, client.assemble_uri(), runscript=runscript) bot.verbose2('Tar file with Docker env and labels: %s' % tar_file) # Write all layers to the layerfile if layerfile is not None: bot.verbose3("Writing Docker layers files to %s" % layerfile) write_file(layerfile, "\n".join(layers), mode="w") if tar_file is not None: write_file(layerfile, "\n%s" % tar_file, mode="a") # Return additions dictionary additions = { "layers": layers, "image": image, "manifest": client.manifest, "manifestv1": client.manifestv1, "cache_base": cache_base, "metadata": tar_file } bot.debug("*** FINISHING DOCKER IMPORT PYTHON PORTION ****\n") return additions
def end(self): self.end_time = time.time() self.runtime = self.runtime = self.end_time - self.start_time bot.debug("Ending multiprocess, runtime: %s sec" %(self.runtime))
def start(self): bot.debug("Starting multiprocess") self.start_time = time.time()
def __init__(self, workers=None): if workers is None: workers = SINGULARITY_WORKERS self.workers = workers bot.debug("Using %s workers for multiprocess." %(self.workers))