def nc05(img,
         n_clusters=5,
         n_neighbors=5,
         gamma=1,
         affinity='nearest_neighbors',
         visualize=True,
         include_spatial=False):
    """
    Normalized cut algorithm for image segmentation
    include_spatial : (Bonus)
    """
    img = resize(img, (int(img.shape[0] * 0.3), int(img.shape[1] * 0.3)),
                 anti_aliasing=True)
    #img = imresize(img, 0.3) / 255
    n = img.shape[0]
    m = img.shape[1]

    if include_spatial:
        xx = np.arange(n)
        yy = np.arange(m)
        X, Y = np.meshgrid(yy, xx)
        img = np.concatenate((Y.reshape(n, m, 1), X.reshape(n, m, 1), img),
                             axis=2)
        print("spectral_segment(:include_spatial) img.shape = {}".format(
            img.shape))

    img = img.reshape(-1, img.shape[-1])

    # Notes:
    # gamma is ignored for affinity='nearest_neighbors'
    # n_neighbors is ignore for affinity='rbf'
    # n_jobs = -1 means using all processors :D
    t0 = time.time()
    labels = SpectralClustering(n_clusters=n_clusters,
                                affinity=affinity,
                                gamma=gamma,
                                n_neighbors=n_neighbors,
                                n_jobs=-1,
                                eigen_solver='arpack').fit_predict(img)
    t1 = time.time()
    labels = labels.reshape(n, m)
    if visualize == True:
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        ax.imshow(labels)
        plt.axis("off")
        plt.show()
        plt.imsave("ns05.png", labels)
    print('Time taken to nc segment {} size image is: {}'.format(
        img.shape, t1 - t0))
    return (labels)
예제 #2
0
def spectral_segment(img,
                     n_clusters=5,
                     n_neighbors=5,
                     gamma=1,
                     affinity='nearest_neighbors',
                     visualize=False,
                     include_spatial=False):
    """
    Normalized cut algorithm for image segmentation

    include_spatial : (Bonus)
    """
    # img = resize(img, (int(img.shape[0] * 0.3), int(img.shape[1] * 0.3)), anti_aliasing=True)
    img = imresize(img, 0.3) / 255
    n = img.shape[0]
    m = img.shape[1]

    if include_spatial:
        xx = np.arange(n)
        yy = np.arange(m)
        X, Y = np.meshgrid(yy, xx)
        img = np.concatenate((Y.reshape(n, m, 1), X.reshape(n, m, 1), img),
                             axis=2)
        print("spectral_segment(:include_spatial) img.shape = {}".format(
            img.shape))

    img = img.reshape(-1, img.shape[-1])

    # Notes:
    # gamma is ignored for affinity='nearest_neighbors'
    # n_neighbors is ignore for affinity='rbf'
    # n_jobs = -1 means using all processors :D
    labels = SpectralClustering(n_clusters=n_clusters,
                                affinity=affinity,
                                gamma=gamma,
                                n_neighbors=n_neighbors,
                                n_jobs=-1,
                                eigen_solver='arpack').fit_predict(img)
    labels = labels.reshape(n, m)
    if visualize:
        plt.figure(figsize=(12, 12))
        plt.axis('off')
        plt.imshow(labels)

    return labels
예제 #3
0
def ncut_clustering(images, k):
    result = []
    i = 0
    for image in images:
        print('clustering image ', i)
        image2 = image.reshape(
            -1,
            3) / 255  #divide by 255 to get values between 0-1->normalization
        #removes distortions

        s_clustering = SpectralClustering(
            n_clusters=5,
            affinity='nearest_neighbors',
            n_neighbors=5,
            n_jobs=-1,
            eigen_solver='arpack').fit_predict(image2)

        result.append(s_clustering.reshape(image.shape[:-1]))
        #result.append(affinity_mat.reshape(image.shape[:-1]))
        i += 1
    return result
예제 #4
0
def main():
    with fileinput() as fs:
        data = list(
            set(r[4].replace('AND ', '').replace('OF ', '')
                for r in reader(fs)))

    vec = CountVectorizer(tokenizer=word_tokenize).fit_transform(data)
    svd = TruncatedSVD().fit_transform(vec)
    fig, axes = plt.subplots(2, 3)
    fig.subplots_adjust(wspace=1)

    for i, ax in enumerate(np.array(axes).flatten()):
        clustering = SpectralClustering(n_clusters=N_CLUSTERS -
                                        i).fit_predict(svd)
        labeled = np.append(svd, clustering.reshape(len(data), 1), axis=1)

        clusters = defaultdict(set)
        for c, r in zip(clustering, data):
            clusters[c].add(r)
        pprint(clusters)

        for c, titles in clusters.items():
            # others = set(data) - titles
            wc = wordcount(titles)
            t = labeled[labeled[:, 2] == c]
            ax.scatter(t[:, 0],
                       t[:, 1],
                       c=COLORS[c],
                       label=', '.join(w for w, _ in wc.most_common(3)))

        ax.set_title(f'{N_CLUSTERS - i} Clusters')
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        # ax.legend(loc='lower center', bbox_to_anchor=(0.0, -0.3))

    # plt.title('Clustering of %d Job Titles' % len(data))
    # plt.show()
    plt.savefig('clustering.png')