Python ClusteringSaveResults.clustDfFromRes Examples

Programming Language: Python

Method/Function: clustDfFromRes

Examples at hotexamples.com: 2

Python ClusteringSaveResults.clustDfFromRes - 2 examples found. These are the top rated real world Python examples of ClusteringSaveResults.clustDfFromRes extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

to_csv(2)

clustDfFromRes(1)

clusterResults(1)

to_csv1(1)

Example #1

Show file

File: Clustering.py Project: Nik0l/UTemPro

def clusterData(data, clust, results, to_plot):
    plot_sample_size = 6000
    if clust['clustering_type'] == 'kmeans':
        #TODO kmeans works well even on 2.000.000 questions
        kmeans = KMeans(init='k-means++', n_clusters=clust['n_clusters'], n_init=10)
        kmeans.fit(data)
        clust['centers'] = kmeans.cluster_centers_
        results['cluster_labels'] = kmeans.labels_
        if to_plot:
            plot.PlotData(data, kmeans, plot_sample_size, clust['exp'])

    if clust['clustering_type'] == 'spectral':
        spectral = cluster.SpectralClustering(n_clusters=clust['n_clusters'],
                                          eigen_solver='arpack',
                                          affinity="nearest_neighbors")
        spectral.fit(data)
        plot.PlotData(data, spectral, plot_sample_size, clust['exp'])

    if clust['clustering_type'] == 'birch':
        birch = cluster.Birch(n_clusters=results['n_clusters'])
        birch.fit(data)
        results['cluster_labels'] = birch.labels_
        print 'number of entries clustered', len(results['cluster_labels'])
        plot.PlotData(data, birch, plot_sample_size, clust['exp'])

    if clust['clustering_type'] == 'dbscan':
        dbscan = cluster.DBSCAN(eps=.2)
        dbscan.fit(data)
        results['cluster_labels'] = dbscan.labels_
        plot.PlotData(data, dbscan, plot_sample_size, clust['exp'])

    if clust['clustering_type'] == 'affinity_propagation':
        affinity_propagation = cluster.AffinityPropagation(damping=.9, preference=-200)
        affinity_propagation.fit(data)
        plot.PlotData(data, affinity_propagation, plot_sample_size, clust['exp'])

    if clust['clustering_type'] == 'ward':
        # connectivity matrix for structured Ward
        connectivity = kneighbors_graph(data, n_neighbors=10, include_self=False)
        # make connectivity symmetric
        connectivity = 0.5 * (connectivity + connectivity.T)
        ward = cluster.AgglomerativeClustering(n_clusters=clust['n_clusters'], linkage='ward',
                                           connectivity=connectivity)
        ward.fit(data)
        results['cluster_labels'] = ward.labels_
        plot.PlotData(data, ward, plot_sample_size, clust['exp'])

    if clust['clustering_type'] == 'average_linkage':
        # connectivity matrix for structured Ward
        connectivity = kneighbors_graph(data, n_neighbors=10, include_self=False)
        # make connectivity symmetric
        connectivity = 0.5 * (connectivity + connectivity.T)
        average_linkage = cluster.AgglomerativeClustering(
        linkage="average", affinity="cityblock", n_clusters=clust['n_clusters'],
        connectivity=connectivity)
        average_linkage.fit(data)
        results['cluster_labels'] = average_linkage.labels_
        plot.PlotData(data, average_linkage, plot_sample_size, clust['exp'])
    df = csr.clustDfFromRes(results)
    stats = csr.clusterResults(df, clust)
    return df, stats

Example #2

Show file

def clusterData(data, clust, results, to_plot):
    plot_sample_size = 6000
    if clust['clustering_type'] == 'kmeans':
        #TODO kmeans works well even on 2.000.000 questions
        kmeans = KMeans(init='k-means++',
                        n_clusters=clust['n_clusters'],
                        n_init=10)
        kmeans.fit(data)
        clust['centers'] = kmeans.cluster_centers_
        results['cluster_labels'] = kmeans.labels_
        if to_plot:
            plot.PlotData(data, kmeans, plot_sample_size, clust['exp'])

    if clust['clustering_type'] == 'spectral':
        spectral = cluster.SpectralClustering(n_clusters=clust['n_clusters'],
                                              eigen_solver='arpack',
                                              affinity="nearest_neighbors")
        spectral.fit(data)
        plot.PlotData(data, spectral, plot_sample_size, clust['exp'])

    if clust['clustering_type'] == 'birch':
        birch = cluster.Birch(n_clusters=results['n_clusters'])
        birch.fit(data)
        results['cluster_labels'] = birch.labels_
        print 'number of entries clustered', len(results['cluster_labels'])
        plot.PlotData(data, birch, plot_sample_size, clust['exp'])

    if clust['clustering_type'] == 'dbscan':
        dbscan = cluster.DBSCAN(eps=.2)
        dbscan.fit(data)
        results['cluster_labels'] = dbscan.labels_
        plot.PlotData(data, dbscan, plot_sample_size, clust['exp'])

    if clust['clustering_type'] == 'affinity_propagation':
        affinity_propagation = cluster.AffinityPropagation(damping=.9,
                                                           preference=-200)
        affinity_propagation.fit(data)
        plot.PlotData(data, affinity_propagation, plot_sample_size,
                      clust['exp'])

    if clust['clustering_type'] == 'ward':
        # connectivity matrix for structured Ward
        connectivity = kneighbors_graph(data,
                                        n_neighbors=10,
                                        include_self=False)
        # make connectivity symmetric
        connectivity = 0.5 * (connectivity + connectivity.T)
        ward = cluster.AgglomerativeClustering(n_clusters=clust['n_clusters'],
                                               linkage='ward',
                                               connectivity=connectivity)
        ward.fit(data)
        results['cluster_labels'] = ward.labels_
        plot.PlotData(data, ward, plot_sample_size, clust['exp'])

    if clust['clustering_type'] == 'average_linkage':
        # connectivity matrix for structured Ward
        connectivity = kneighbors_graph(data,
                                        n_neighbors=10,
                                        include_self=False)
        # make connectivity symmetric
        connectivity = 0.5 * (connectivity + connectivity.T)
        average_linkage = cluster.AgglomerativeClustering(
            linkage="average",
            affinity="cityblock",
            n_clusters=clust['n_clusters'],
            connectivity=connectivity)
        average_linkage.fit(data)
        results['cluster_labels'] = average_linkage.labels_
        plot.PlotData(data, average_linkage, plot_sample_size, clust['exp'])
    df = csr.clustDfFromRes(results)
    stats = csr.clusterResults(df, clust)
    return df, stats