def estimate_os(container=None,image_package=None,sudopw=None,return_top=True): '''estimate os will compare a package to singularity python's database of operating system images, and return the docker image most similar :param return_top: return only the most similar (estimated os) default True :param image_package: the package created from the image to estimate. FIGURE OUT WHAT DATA WE NEED ''' if image_package == None: SINGULARITY_HUB = os.environ.get('SINGULARITY_HUB',"False") # Visualization deployed local or elsewhere if SINGULARITY_HUB == "False": image_package = make_package(container,remove_image=True,sudopw=sudopw) comparison = compare_packages(packages_set1=[image_package])['files.txt'].transpose() else: comparison = container_similarity_vector(container1=container)['files.txt'].transpose() else: comparison = compare_packages(packages_set1=[image_package])['files.txt'].transpose() comparison.columns = ['SCORE'] most_similar = comparison['SCORE'].idxmax() print("Most similar OS found to be ", most_similar) if return_top == True: return most_similar return comparison
def test_compare_packages(self): print("Testing") from import compare_packages pwd = get_installdir() pkg1 = "%s/tests/data/" %(pwd) pkg2 = "%s/tests/data/" %(pwd) comparison = compare_packages(pkg1,pkg2) self.assertTrue('files.txt' in comparison) self.assertTrue(isinstance(comparison['files.txt'],pandas.DataFrame))
def make_package_tree(matrix=None, labels=None, width=25, height=10, title=None, font_size=None): '''make package tree will make a dendrogram comparing a matrix of packages :param matrix: a pandas df of packages, with names in index and columns :param labels: a list of labels corresponding to row names, will be pulled from rows if not defined :param title: a title for the plot, if not defined, will be left out. :returns a plot that can be saved with savefig ''' from matplotlib import pyplot as plt from scipy.cluster.hierarchy import (dendrogram, linkage) if font_size is None: font_size = 8. from scipy.cluster.hierarchy import cophenet from scipy.spatial.distance import pdist if not isinstance(matrix, pandas.DataFrame): "No pandas DataFrame (matrix) of similarities defined, will use default." ) matrix = compare_packages()['files.txt'] title = 'Docker Library Similarity to Base OS' Z = linkage(matrix, 'ward') c, coph_dists = cophenet(Z, pdist(matrix)) if labels == None: labels = matrix.index.tolist() plt.figure(figsize=(width, height)) if title != None: plt.title(title) plt.xlabel('image index') plt.ylabel('distance') dendrogram( Z, leaf_rotation=90., # rotates the x axis labels leaf_font_size=font_size, # font size for the x axis labels labels=labels) return plt
package_directory = '%s/examples/package_image/packages' % (base) package_set1 = glob("%s/*.zip" % (package_directory)) # Option 3: provide no input args, and default (os) for package_set1 is used ############################################################################### # Choose another set of packages to compare to ############################################################################### # Option 1: specify another (same or different) family of packages package_set2 = get_packages(family='docker-os') # Option 2: Same as above # Option 3: Don't specify any packages, use defaults ############################################################################### # Run the analysis ############################################################################### # Use your own input arguments... comparisons = compare_packages(packages_set1=package_set1, packages_set2=package_set2, by="folders.txt") # Or use defaults comparisons = compare_packages() # docker-library vs. docker-os, # by files.txt # Save to file pickle.dump(result, open('comparisons.pkl', 'wb'))
# A quick example of making a package tree with data derived from # Set environment variable MESSAGELEVEL to CRITICAL to disable debug output from singularity.views import make_package_tree from import compare_packages from singularity.package import get_packages import pickle # Compare your own data data = pickle.load(open('comparisons.pkl','rb'))['files.txt'] plt = make_package_tree(matrix=data) # Compare docker-os to docker-os package_set1 = get_packages(family='docker-os') package_set2 = get_packages(family='docker-os') data = compare_packages(packages_set1=package_set1, packages_set2=package_set2)['files.txt'] plt = make_package_tree(matrix=data) # Show the plot # or save to file plt.savefig('examples/package_tree/docker-os.png')