Esempio n. 1
0
def search_cluster(keyword):
    directory = 'downloads/%s' % keyword

    print('Starting crawler')

    searcher = crawler()
    try:
        print('Searching for %s' % keyword)
        searcher.search(keyword)
        print('Downloading')
        files = searcher.download(32)
    except:
        searcher.stop()
        sys.exit(0)

    # print('Converting pictures into jpg')
    # for file in files:
    #     try:
    #         if not imghdr.what(file) == 'jpeg':
    #             im = Image.open(file)
    #             rgb_im = im.convert('RGB')
    #             rgb_im.save(file + '.jpg')
    #     except:
    #         pass

    images = icio.read_images(directory, size=(224, 224))

    # Create Keras NN model.
    model = calc.get_model()

    # Feed images through the model and extract fingerprints (feature vectors).
    print('Feeding images to the neural network to extract features')
    fingerprints = calc.fingerprints(images, model)

    # Optionally run a PCA on the fingerprints to compress the dimensions. Use a
    # cumulative explained variance ratio of 0.95.
    fingerprints = calc.pca(fingerprints, n_components=0.95)

    # Run clustering on the fingerprints. Select clusters with similarity index
    clusters = calc.cluster(fingerprints, sim=0.5)

    # Create dirs with links to images. Dirs represent the clusters the images
    # belong to.
    postproc.make_links(clusters, directory + '/imagecluster/clusters')

    # Plot images arranged in clusters and save plot.
    fig, ax = postproc.plot_clusters(clusters, images)
Esempio n. 2
0
def test_low_level_api_and_clustering():
    # use low level API (same as get_image_data) but call all funcs
    # test clustering
    with ImagedirCtx() as ctx:
        images = icio.read_images(ctx.imagedir, size=(224, 224))
        model = ic.get_model()
        fingerprints = ic.fingerprints(images, model)
        for kk, vv in fingerprints.items():
            assert isinstance(vv, np.ndarray)
            assert len(vv) == 4096, len(vv)
        fingerprints = ic.pca(fingerprints, n_components=0.95)
        clusters = ic.cluster(fingerprints, sim=0.5)
        assert set(clusters.keys()) == set(ctx.clusters.keys())
        assert len(fingerprints.keys()) == len(ctx.image_fns)
        assert set(fingerprints.keys()) == set(ctx.image_fns)
        for nimg in ctx.clusters.keys():
            for val_clus, ref_clus in zip(clusters[nimg], ctx.clusters[nimg]):
                msg = f"ref_clus: {ref_clus}, val_clus: {val_clus}"
                assert set(ref_clus) == set(val_clus), msg
Esempio n. 3
0
#!/usr/bin/env python3

# Detailed API example. We show which functions are called inside
# get_image_data() (read_images(), get_model(), fingerprints(), pca(),
# read_timestamps()) and show more options such as time distance scaling.

from imagecluster import calc, io as icio, postproc

##images,fingerprints,timestamps = icio.get_image_data(
##    'pics/',
##    pca_kwds=dict(n_components=0.95),
##    img_kwds=dict(size=(224,224)))

# Create image database in memory. This helps to feed images to the NN model
# quickly.
images = icio.read_images('pics/', size=(224, 224))

# Create Keras NN model.
model = calc.get_model()

# Feed images through the model and extract fingerprints (feature vectors).
fingerprints = calc.fingerprints(images, model)

# Optionally run a PCA on the fingerprints to compress the dimensions. Use a
# cumulative explained variance ratio of 0.95.
fingerprints = calc.pca(fingerprints, n_components=0.95)

# Read image timestamps. Need that to calculate the time distance, can be used
# in clustering.
timestamps = icio.read_timestamps('pics/')
Esempio n. 4
0
#!/usr/bin/python3

from imagecluster import calc, io as icio, postproc
from ecosia_images import crawler

searcher = crawler(naming='hash')
searcher.search('chilaquiles')
searcher.download(50)

images = icio.read_images('downloads/chilaquiles', size=(224, 224))

# Create Keras NN model.
model = calc.get_model()

# Feed images through the model and extract fingerprints (feature vectors).
fingerprints = calc.fingerprints(images, model)

print(fingerprints)

# Optionally run a PCA on the fingerprints to compress the dimensions. Use a
# cumulative explained variance ratio of 0.95.
fingerprints = calc.pca(fingerprints, n_components=0.95)

print(fingerprints)

# Read image timestamps. Need that to calculate the time distance, can be used
# in clustering.
timestamps = icio.read_timestamps('downloads/chilaquiles')

# Run clustering on the fingerprints. Select clusters with similarity index
# sim=0.5. Mix 80% content distance with 20% timestamp distance (alpha=0.2).