Ejemplo n.º 1
0
def test_api_get_image_data():
    with ImagedirCtx() as ctx:
        # run 1: create fingerprints database, run clustering
        images, fingerprints, timestamps = icio.get_image_data(ctx.imagedir)
        # run 2: only run clustering, should be much faster, this time use all
        # kwds (test API)
        images, fingerprints, timestamps = icio.get_image_data(
            ctx.imagedir,
            pca_kwds=dict(n_components=0.95),
            model_kwds=dict(layer='fc2'),
            img_kwds=dict(size=(224, 224)),
            timestamps_kwds=dict(source='auto'))
        assert len(fingerprints.keys()) == len(ctx.image_fns)
        assert set(fingerprints.keys()) == set(ctx.image_fns)
Ejemplo n.º 2
0
def main():
    clusters_path = os.path.join(IMAGE_PATH, icio.ic_base_dir, 'clusters')

    # The bottleneck is calc.fingerprints() called in this function, all other
    # operations are very fast. get_image_data() writes fingerprints to disk and
    # loads them again instead of re-calculating them.
    print('\nFingerprinting images...\n')
    images, fingerprints, timestamps = icio.get_image_data(IMAGE_PATH)
    print('\nImage fingerprinting done.\n')

    # Run clustering on the fingerprints. Select clusters with similarity index
    print('\nClustering images...\n')
    clusters = calc.cluster(fingerprints, sim=SIMILARITY)
    print('\nClustering done.\n')

    # Re-format clusters into a simple 2D list
    simple_clusters = list()
    for i, (num_in_cluster, cluster_list) in enumerate(clusters.items()):
        for cluster in cluster_list:
            simple_clusters.append(cluster)

    # Find unclustered images
    unclustered_images = set(images.keys())  # Start set with all images
    for cluster in simple_clusters:
        for image in cluster:
            unclustered_images = unclustered_images.difference(set([image]))
    unclustered_images = list(unclustered_images)  # Convert to list

    if ACTION == 'copy':
        print('\nCopying images to clusters...\n')
    elif ACTION == 'move':
        print('\nMoving images to clusters...\n')

    # Remove existing clusters (if present)
    if os.path.exists(clusters_path):
        shutil.rmtree(clusters_path)

    # Move images into cluster folders
    cluster_dir_length = len(str(len(simple_clusters)))
    for i, cluster in enumerate(simple_clusters):
        cluster_name = str(i).zfill(cluster_dir_length)
        cluster_dir = os.path.join(clusters_path, cluster_name)

        os.makedirs(cluster_dir)
        for image in cluster:
            if ACTION == 'copy':
                shutil.copy(os.path.abspath(image), cluster_dir)
            elif ACTION == 'move':
                shutil.move(os.path.abspath(image), cluster_dir)

    # Move unclustered images too
    for i, image in enumerate(unclustered_images):
        if ACTION == 'copy':
            shutil.copy(os.path.abspath(image), clusters_path)
        elif ACTION == 'move':
            shutil.move(os.path.abspath(image), clusters_path)

    print('\nAll done!')
    print('Clustered images can be found in ' + clusters_path + '\n')
Ejemplo n.º 3
0
#!/usr/bin/python3

# Minimal example. Use the convenience function io.get_image_data() without any
# extra arguments.

from imagecluster import calc, io as icio, postproc

# The bottleneck is calc.fingerprints() called in this function, all other
# operations are very fast. get_image_data() writes fingerprints to disk and
# loads them again instead of re-calculating them.
images, fingerprints, timestamps = icio.get_image_data('downloads/cart icon/')

# Run clustering on the fingerprints. Select clusters with similarity index
# sim=0.5.
clusters = calc.cluster(fingerprints, sim=0.5)

# Create dirs with links to images. Dirs represent the clusters the images
# belong to.
postproc.make_links(clusters, 'downloads/cart icon/imagecluster/clusters')

# Plot images arranged in clusters.
postproc.visualize(clusters, images)
Ejemplo n.º 4
0
#!/usr/bin/python3

# Minimal example. Use the convenience function io.get_image_data() without any
# extra arguments.

from imagecluster import calc, io as icio, postproc

# The bottleneck is calc.fingerprints() called in this function, all other
# operations are very fast. get_image_data() writes fingerprints to disk and
# loads them again instead of re-calculating them.
images, fingerprints, timestamps = icio.get_image_data('pics/')

# Run clustering on the fingerprints. Select clusters with similarity index
# sim=0.5.
clusters = calc.cluster(fingerprints, sim=0.5)

# Create dirs with links to images. Dirs represent the clusters the images
# belong to.
postproc.make_links(clusters, 'pics/imagecluster/clusters')

# Plot images arranged in clusters.
postproc.visualize(clusters, images)