def test_get_packages(self): print("Testing singularity.analysis.utils.get_packages") from singularity.package import get_packages print("Case 1: Default returns Docker operating systems") packages = get_packages() self.assertEqual(len(packages),46) print("Case 2: Family specified to Docker library") packages = get_packages(family="docker-library") self.assertEqual(len(packages),117)
def compare_packages(packages_set1=None, packages_set2=None, by=None): '''compare_packages will compare one image or package to one image or package. If the folder isn't specified, the default singularity packages (included with install) will be used (os vs. docker library). Images will take preference over packages :param packages_set1: a list of package files not defined uses docker-library :param packages_set2: a list of package files, not defined uses docker-os :by: metrics to compare by (files.txt and or folders.txt) ''' if packages_set1 == None: packages_set1 = get_packages('docker-library') if packages_set2 == None: packages_set2 = get_packages('docker-os') if by == None: by = ['files.txt'] if not isinstance(by, list): by = [by] if not isinstance(packages_set1, list): packages_set1 = [packages_set1] if not isinstance(packages_set2, list): packages_set2 = [packages_set2] comparisons = dict() for b in by: bot.debug("Starting comparisons for %s" % b) df = pandas.DataFrame(index=packages_set1, columns=packages_set2) for package1 in packages_set1: for package2 in packages_set2: if package1 != package2: sim = calculate_similarity(image_package1=package1, image_package2=package2, by=b)[b] else: sim = 1.0 name1 = os.path.basename(package1).replace('.img.zip', '') name2 = os.path.basename(package2).replace('.img.zip', '') bot.debug("%s vs. %s: %s" % (name1, name2, sim)) df.loc[package1, package2] = sim df.index = [ os.path.basename(x).replace('.img.zip', '') for x in df.index.tolist() ] df.columns = [ os.path.basename(x).replace('.img.zip', '') for x in df.columns.tolist() ] comparisons[b] = df return comparisons
def test_container_similarity(self): print( "Testing singularity.analysis.compare.container_similarity_vector") import pandas from singularity.analysis.compare import container_similarity_vector from singularity.package import get_packages packages_set = get_packages('docker-os')[0:2] vector = container_similarity_vector(container1=self.container, custom_set=packages_set) self.assertTrue('files.txt' in vector) self.assertTrue(isinstance(vector['files.txt'], pandas.DataFrame)) print( "Testing singularity.analysis.compare.compare_singularity_images") from singularity.analysis.compare import compare_singularity_images sim = compare_singularity_images(self.container, self.comparator) self.assertTrue(isinstance(sim, pandas.DataFrame)) self.assertTrue(sim.loc[self.container, self.comparator] - 0.4803262269280298 < 0.01) print("Testitng singularity.analysis.compare.compare_containers") from singularity.analysis.compare import compare_containers comparison = compare_containers(self.container, self.comparator) self.assertTrue('files.txt' in comparison) for key in ['total1', 'total2', 'intersect', 'unique2', 'unique1']: self.assertTrue(key in comparison['files.txt']) print("Testing singularity.analysis.compare.calculate_similarity") from singularity.analysis.compare import calculate_similarity sim = calculate_similarity(self.container, self.comparator) self.assertTrue(sim['files.txt'] - 0.4921837537163134 < 0.01)
def container_similarity_vector(container1=None, packages_set=None, by=None, custom_set=None): '''container similarity_vector is similar to compare_packages, but intended to compare a container object (singularity image or singularity hub container) to a list of packages. If packages_set is not provided, the default used is 'docker-os'. This can be changed to 'docker-library', or if the user wants a custom list, should define custom_set. :param container1: singularity image or singularity hub container. :param packages_set: a name of a package set, provided are docker-os and docker-library :param custom_set: a list of package files, used first if provided. :by: metrics to compare by (files.txt and or folders.txt) ''' if custom_set == None: if packages_set == None: packages_set = get_packages('docker-os') else: packages_set = custom_set if by == None: by = ['files.txt'] if not isinstance(by, list): by = [by] if not isinstance(packages_set, list): packages_set = [packages_set] comparisons = dict() for b in by: bot.debug("Starting comparisons for %s" % b) df = pandas.DataFrame(columns=packages_set) for package2 in packages_set: sim = calculate_similarity(container1=container1, image_package2=package2, by=b)[b] name1 = os.path.basename(package2).replace('.img.zip', '') bot.debug("container vs. %s: %s" % (name1, sim)) df.loc["container", package2] = sim df.columns = [ os.path.basename(x).replace('.img.zip', '') for x in df.columns.tolist() ] comparisons[b] = df return comparisons
# A quick example of making a package tree with data derived from calculate_similarity.py # Set environment variable MESSAGELEVEL to CRITICAL to disable debug output from singularity.views import make_package_tree from singularity.analysis.compare import compare_packages from singularity.package import get_packages import pickle # Compare your own data data = pickle.load(open('comparisons.pkl','rb'))['files.txt'] plt = make_package_tree(matrix=data) # Compare docker-os to docker-os package_set1 = get_packages(family='docker-os') package_set2 = get_packages(family='docker-os') data = compare_packages(packages_set1=package_set1, packages_set2=package_set2)['files.txt'] plt = make_package_tree(matrix=data) # Show the plot plt.show() # or save to file plt.savefig('examples/package_tree/docker-os.png')