def estimate_os(container=None,image_package=None,sudopw=None,return_top=True):
    '''estimate os will compare a package to singularity python's database of
    operating system images, and return the docker image most similar
    :param return_top: return only the most similar (estimated os) default True
    :param image_package: the package created from the image to estimate.
    FIGURE OUT WHAT DATA WE NEED
    '''
    if image_package == None:

        SINGULARITY_HUB = os.environ.get('SINGULARITY_HUB',"False")

        # Visualization deployed local or elsewhere
        if SINGULARITY_HUB == "False":
            image_package = make_package(container,remove_image=True,sudopw=sudopw)
            comparison = compare_packages(packages_set1=[image_package])['files.txt'].transpose()
        else:
            comparison = container_similarity_vector(container1=container)['files.txt'].transpose()

    else:
        comparison = compare_packages(packages_set1=[image_package])['files.txt'].transpose()

    comparison.columns = ['SCORE']
    most_similar = comparison['SCORE'].idxmax()
    print("Most similar OS found to be ", most_similar)    
    if return_top == True:
        return most_similar
    return comparison
 def test_compare_packages(self):
     print("Testing singularity.analysis.compare.compare_packages")
     from singularity.analysis.compare import compare_packages
     pwd = get_installdir()
     pkg1 = "%s/tests/data/busybox-2016-02-16.img.zip" %(pwd)
     pkg2 = "%s/tests/data/cirros-2016-01-04.img.zip" %(pwd)
     comparison = compare_packages(pkg1,pkg2)
     self.assertTrue('files.txt' in comparison)
     self.assertTrue(isinstance(comparison['files.txt'],pandas.DataFrame))
def make_package_tree(matrix=None,
                      labels=None,
                      width=25,
                      height=10,
                      title=None,
                      font_size=None):
    '''make package tree will make a dendrogram comparing a matrix of packages
    :param matrix: a pandas df of packages, with names in index and columns
    :param labels: a list of labels corresponding to row names, will be
    pulled from rows if not defined
    :param title: a title for the plot, if not defined, will be left out.
    :returns a plot that can be saved with savefig
    '''
    from matplotlib import pyplot as plt
    from scipy.cluster.hierarchy import (dendrogram, linkage)

    if font_size is None:
        font_size = 8.

    from scipy.cluster.hierarchy import cophenet
    from scipy.spatial.distance import pdist

    if not isinstance(matrix, pandas.DataFrame):
        bot.info(
            "No pandas DataFrame (matrix) of similarities defined, will use default."
        )
        matrix = compare_packages()['files.txt']
        title = 'Docker Library Similarity to Base OS'

    Z = linkage(matrix, 'ward')
    c, coph_dists = cophenet(Z, pdist(matrix))

    if labels == None:
        labels = matrix.index.tolist()

    plt.figure(figsize=(width, height))

    if title != None:
        plt.title(title)

    plt.xlabel('image index')
    plt.ylabel('distance')
    dendrogram(
        Z,
        leaf_rotation=90.,  # rotates the x axis labels
        leaf_font_size=font_size,  # font size for the x axis labels
        labels=labels)
    return plt
Exemple #4
0
package_directory = '%s/examples/package_image/packages' % (base)
package_set1 = glob("%s/*.zip" % (package_directory))

# Option 3: provide no input args, and default (os) for package_set1 is used

###############################################################################
# Choose another set of packages to compare to
###############################################################################

# Option 1: specify another (same or different) family of packages
package_set2 = get_packages(family='docker-os')

# Option 2: Same as above
# Option 3: Don't specify any packages, use defaults

###############################################################################
# Run the analysis
###############################################################################

# Use your own input arguments...
comparisons = compare_packages(packages_set1=package_set1,
                               packages_set2=package_set2,
                               by="folders.txt")

# Or use defaults
comparisons = compare_packages()  # docker-library vs. docker-os,
# by files.txt

# Save to file
pickle.dump(result, open('comparisons.pkl', 'wb'))
Exemple #5
0
# A quick example of making a package tree with data derived from calculate_similarity.py
# Set environment variable MESSAGELEVEL to CRITICAL to disable debug output

from singularity.views import make_package_tree
from singularity.analysis.compare import compare_packages
from singularity.package import get_packages
import pickle

# Compare your own data
data = pickle.load(open('comparisons.pkl','rb'))['files.txt']
plt = make_package_tree(matrix=data)

# Compare docker-os to docker-os
package_set1 = get_packages(family='docker-os')
package_set2 = get_packages(family='docker-os')
data = compare_packages(packages_set1=package_set1,
                        packages_set2=package_set2)['files.txt']

plt = make_package_tree(matrix=data)


# Show the plot
plt.show()

# or save to file
plt.savefig('examples/package_tree/docker-os.png')