def test_get_diff(self): print("Testing singularity.analysis.classify.get_diff") from singularity.analysis.classify import get_diff diff = get_diff(self.container) self.assertTrue(len(diff)>0)
#!/usr/bin/env python # This is an example of generating image packages from within python import os os.environ['MESSAGELEVEL'] = 'CRITICAL' from singularity.analysis.classify import (get_tags, get_diff) image_package = "python:3.6.0.img.zip" # The algorithm works as follows: # 1) first compare package to set of base OS (provided with shub) # 2) subtract the most similar os from image, leaving "custom" files # 3) organize custom files into dict based on folder name # 4) return search_folders as tags # Default tags will be returned as software in "bin" tags = get_tags(image_package=image_package) # We can also get the raw "diff" between the image and it's base # which is usable in other functions (and we don't have to calc # it again) diff = get_diff(image_package=image_package) # We can specify other folders of interest folders = ['init', 'init.d', 'bin', 'systemd'] tags = get_tags(search_folders=folders, diff=diff) # Most similar OS found to be %s debian:7.11
def run_build(build_dir,params,verbose=True, compress_image=False): '''run_build takes a build directory and params dictionary, and does the following: - downloads repo to a temporary directory - changes branch or commit, if needed - creates and bootstraps singularity image from Singularity file - returns a dictionary with: image (path), image_package (path), metadata (dict) The following must be included in params: spec_file, repo_url, branch, commit Optional parameters size ''' # Download the repo and image download_repo(repo_url=params['repo_url'], destination=build_dir) os.chdir(build_dir) if params['branch'] != None: bot.info('Checking out branch %s' %params['branch']) os.system('git checkout %s' %(params['branch'])) else: params['branch'] = "master" # Commit if params['commit'] not in [None,'']: bot.info('Checking out commit %s' %params['commit']) os.system('git checkout %s .' %(params['commit'])) # From here on out commit is used as a unique id, if we don't have one, we use current else: params['commit'] = os.popen('git log -n 1 --pretty=format:"%H"').read() bot.warning("commit not specified, setting to current %s" %params['commit']) # Dump some params for the builder, in case it fails after this passing_params = "/tmp/params.pkl" pickle.dump(params,open(passing_params,'wb')) # Now look for spec file if os.path.exists(params['spec_file']): bot.info("Found spec file %s in repository" %params['spec_file']) # If the user has a symbolic link if os.path.islink(params['spec_file']): bot.info("%s is a symbolic link." %params['spec_file']) params['spec_file'] = os.path.realpath(params['spec_file']) # START TIMING start_time = datetime.now() image = build_from_spec(spec_file=params['spec_file'], # default will package the image build_dir=build_dir, isolated=True, sandbox=False, debug=params['debug']) # Save has for metadata (also is image name) version = get_image_file_hash(image) params['version'] = version pickle.dump(params,open(passing_params,'wb')) final_time = (datetime.now() - start_time).seconds bot.info("Final time of build %s seconds." %final_time) # Did the container build successfully? test_result = test_container(image) if test_result['return_code'] != 0: bot.error("Image failed to build, cancelling.") sys.exit(1) # Get singularity version singularity_version = get_singularity_version() # Package the image metadata (files, folders, etc) image_package = package(image_path=image, spec_path=params['spec_file'], output_folder=build_dir, remove_image=True, verbose=True) # Derive software tags by subtracting similar OS diff = get_diff(image_package=image_package) # Inspect to get labels and other metadata cli = Singularity(debug=params['debug']) inspect = cli.inspect(image_path=image) # Get information on apps app_names = cli.apps(image_path=image) apps = extract_apps(image_path=image, app_names=app_names) # Count file types, and extensions counts = dict() counts['readme'] = file_counts(diff=diff) counts['copyright'] = file_counts(diff=diff,patterns=['copyright']) counts['authors-thanks-credit'] = file_counts(diff=diff, patterns=['authors','thanks','credit','contributors']) counts['todo'] = file_counts(diff=diff,patterns=['todo']) extensions = extension_counts(diff=diff) os_sims = estimate_os(image_package=image_package,return_top=False) most_similar = os_sims['SCORE'].values.argmax() most_similar = os_sims['SCORE'].index.tolist()[most_similar] metrics = {'build_time_seconds':final_time, 'singularity_version':singularity_version, 'singularity_python_version':singularity_python_version, 'estimated_os': most_similar, 'os_sims':os_sims['SCORE'].to_dict(), 'file_counts':counts, 'file_ext':extensions, 'inspect':inspect, 'version': version, 'apps': apps} # Compress Image if compress_image is True: compressed_image = "%s.gz" %image os.system('gzip -c -9 %s > %s' %(image,compressed_image)) image = compressed_image output = {'image':image, 'image_package':image_package, 'metadata':metrics, 'params':params } return output else: # Tell the user what is actually there present_files = glob("*") bot.error("Build file %s not found in repository" %params['spec_file']) bot.info("Found files are %s" %"\n".join(present_files)) # Params have been exported, will be found by log sys.exit(1)
def run_build(build_dir, params, verbose=True): '''run_build takes a build directory and params dictionary, and does the following: - downloads repo to a temporary directory - changes branch or commit, if needed - creates and bootstraps singularity image from Singularity file - returns a dictionary with: image (path), image_package (path), metadata (dict) The following must be included in params: spec_file, repo_url, branch, commit Optional parameters size ''' # Download the repo and image download_repo(repo_url=params['repo_url'], destination=build_dir) os.chdir(build_dir) if params['branch'] != None: bot.logger.info('Checking out branch %s', params['branch']) os.system('git checkout %s' % (params['branch'])) # Commit if params['commit'] not in [None, '']: bot.logger.info('Checking out commit %s', params['commit']) os.system('git checkout %s .' % (params['commit'])) # From here on out commit is used as a unique id, if we don't have one, we use current else: params['commit'] = os.popen('git log -n 1 --pretty=format:"%H"').read() bot.logger.warning("commit not specified, setting to current %s", params['commit']) # Dump some params for the builder, in case it fails after this passing_params = "/tmp/params.pkl" pickle.dump(params, open(passing_params, 'wb')) # If there is not a specfile, but is a Dockerfile, try building that if not os.path.exists( params['spec_file']) and os.path.exists('Dockerfile'): bot.logger.warning("Build file %s not found in repository", params['spec_file']) bot.logger.warning( "Dockerfile found in repository, will attempt build.") dockerfile = dockerfile_to_singularity(dockerfile_path='Dockerfile', output_dir=build_dir) if dockerfile is not None: bot.logger.info("""\n -------------------------------------------------------------- Dockerfile -------------------------------------------------------------- \n%s""" % (dockerfile)) # Now look for spec file if os.path.exists(params['spec_file']): bot.logger.info("Found spec file %s in repository", params['spec_file']) # If size is None, set default of 800 if params['size'] in [None, '']: bot.logger.info("""\n -------------------------------------------------------------- Size not detected for build. Will first try to estimate, and then use default of 800MB padding. If your build still fails, you should try setting the size manually under collection --> edit builder --------------------------------------------------------------------- \n""") # Testing estimation of size try: params['size'] = estimate_image_size(spec_file=os.path.abspath( params['spec_file']), sudopw='', padding=params['padding']) bot.logger.info("Size estimated as %s", params['size']) except: params['size'] = 800 bot.logger.info( "Size estimation didn't work, using default %s", params['size']) # START TIMING os.chdir(build_dir) start_time = datetime.now() image = build_from_spec( spec_file=params['spec_file'], # default will package the image size=params['size'], sudopw='', # with root should not need sudo build_dir=build_dir, debug=params['debug']) final_time = (datetime.now() - start_time).seconds bot.logger.info("Final time of build %s seconds.", final_time) # Did the container build successfully? test_result = test_container(image) if test_result['return_code'] == 255: bot.logger.error("Image failed to bootstrap, cancelling build.") sys.exit(1) # Compress image compressed_image = "%s.img.gz" % image os.system('gzip -c -9 %s > %s' % (image, compressed_image)) # Get singularity version singularity_version = get_singularity_version() old_version = False if singularity_version.startswith('2.2'): old_version = True # Package the image metadata (files, folders, etc) image_package = package(image_path=image, spec_path=params['spec_file'], output_folder=build_dir, sudopw='', remove_image=True, verbose=True, old_version=old_version) # Derive software tags by subtracting similar OS diff = get_diff(image_package=image_package) # Get tags for services, executables interesting_folders = ['init', 'init.d', 'bin', 'systemd'] tags = get_tags(search_folders=interesting_folders, diff=diff) # Count file types, and extensions counts = dict() counts['readme'] = file_counts(diff=diff) counts['copyright'] = file_counts(diff=diff, patterns=['copyright']) counts['authors-thanks-credit'] = file_counts( diff=diff, patterns=['authors', 'thanks', 'credit']) counts['todo'] = file_counts(diff=diff, patterns=['todo']) extensions = extension_counts(diff=diff) os_sims = estimate_os(image_package=image_package, return_top=False) most_similar = os_sims['SCORE'].idxmax() metrics = { 'size': params['size'], 'build_time_seconds': final_time, 'singularity_version': singularity_version, 'singularity_python_version': singularity_python_version, 'estimated_os': most_similar, 'os_sims': os_sims['SCORE'].to_dict(), 'tags': tags, 'file_counts': counts, 'file_ext': extensions } output = { 'image': compressed_image, 'image_package': image_package, 'metadata': metrics, 'params': params } return output else: # Tell the user what is actually there present_files = glob("*") bot.logger.error("Build file %s not found in repository", params['spec_file']) bot.logger.info("Found files are %s", "\n".join(present_files)) # Params have been exported, will be found by log sys.exit(1)