Python GaussianCluster.fit_predict 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: graspy.cluster

클래스/타입: GaussianCluster

메소드/함수: fit_predict

hotexamples.com에서의 예제들: 6

Python GaussianCluster.fit_predict - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 graspy.cluster.GaussianCluster.fit_predict에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

GaussianCluster(17)

fit(13)

fit_predict(6)

predict(6)

예제 #1

파일 보기

def estimate_assignments(graph,
                         n_communities,
                         n_components=None,
                         method="gc",
                         metric=None):
    """Given a graph and n_comunities, sweeps over covariance structures
    Not deterministic
    Not using graph bic or mse to calculate best

    1. Does an embedding on the raw graph
    2. GaussianCluster on the embedding. This will sweep covariance structure for the 
       given n_communities
    3. Returns n_parameters based on the number used in GaussianCluster

    method can be "gc" or "bc" 

    method 
    "gc" : use graspy GaussianCluster
        this defaults to full covariance
    "bc" : tommyclust with defaults
        so sweep covariance, agglom, linkage
    "bc-metric" : tommyclust with custom metric
        still sweep everything
    "bc-none" : mostly for testing, should behave just like GaussianCluster

    """
    embed_graph = graph.copy()
    latent = AdjacencySpectralEmbed(
        n_components=n_components).fit_transform(embed_graph)
    if isinstance(latent, tuple):
        latent = np.concatenate(latent, axis=1)
    if method == "gc":
        gc = GaussianCluster(
            min_components=n_communities,
            max_components=n_communities,
            covariance_type="all",
        )
        vertex_assignments = gc.fit_predict(latent)
        n_params = gc.model_._n_parameters()
    elif method == "bc":
        vertex_assignments, n_params = brute_cluster(latent, [n_communities])
    elif method == "bc-metric":
        vertex_assignments, n_params = brute_cluster(latent, [n_communities],
                                                     metric=metric)
    elif method == "bc-none":
        vertex_assignments, n_params = brute_cluster(
            latent,
            [n_communities],
            affinities=["none"],
            linkages=["none"],
            covariance_types=["full"],
        )
    else:
        raise ValueError("Unspecified clustering method")
    return (vertex_assignments, n_params)

예제 #2

파일 보기

파일: brute_cluster_graspyclust.py 프로젝트: tathey1/autogmm

def brute_graspy_cluster(Ns,
                         x,
                         covariance_types,
                         ks,
                         c_true,
                         savefigs=None,
                         graphList=None):
    if graphList != None and 'all_bics' in graphList:
        _, ((ax0, ax1), (ax2, ax3)) = plt.subplots(2,
                                                   2,
                                                   sharey='row',
                                                   sharex='col',
                                                   figsize=(10, 10))
    titles = ['full', 'tied', 'diag', 'spherical']
    best_bic = -np.inf
    for N in Ns:
        bics = np.zeros([len(ks), len(covariance_types), N])
        aris = np.zeros([len(ks), len(covariance_types), N])
        for i in np.arange(N):
            graspy_gmm = GaussianCluster(min_components=ks[0],
                                         max_components=ks[len(ks) - 1],
                                         covariance_type=covariance_types,
                                         random_state=i)
            c_hat, ari = graspy_gmm.fit_predict(x, y=c_true)
            bic_values = -graspy_gmm.bic_.values
            ari_values = graspy_gmm.ari_.values
            bics[:, :, i] = bic_values
            aris[:, :, i] = ari_values
            bic = bic_values.max()

            if bic > best_bic:
                idx = np.argmax(bic_values)
                idxs = np.unravel_index(idx, bic_values.shape)
                best_ari_bic = ari
                best_bic = bic
                best_k_bic = ks[idxs[0]]
                best_cov_bic = titles[3 - idxs[1]]
                best_c_hat_bic = c_hat

        max_bics = np.amax(bics, axis=2)
        title = 'N=' + str(N)
        if graphList != None and 'all_bics' in graphList:
            ax0.plot(np.arange(1, len(ks) + 1), max_bics[:, 3])
            ax1.plot(np.arange(1, len(ks) + 1), max_bics[:, 2], label=title)
            ax2.plot(np.arange(1, len(ks) + 1), max_bics[:, 1])
            ax3.plot(np.arange(1, len(ks) + 1), max_bics[:, 0])

    if graphList != None and 'best_bic' in graphList:
        #Plot with best BIC*********************************
        if c_true is None:
            best_ari_bic_str = 'NA'
        else:
            best_ari_bic_str = '%1.3f' % best_ari_bic

        fig_bestbic = plt.figure(figsize=(8, 8))
        ax_bestbic = fig_bestbic.add_subplot(1, 1, 1)
        #ptcolors = [colors[i] for i in best_c_hat_bic]
        ax_bestbic.scatter(x[:, 0], x[:, 1], c=best_c_hat_bic)
        #mncolors = [colors[i] for i in np.arange(best_k_bic)]
        mncolors = [i for i in np.arange(best_k_bic)]
        ax_bestbic.set_title(
            "py(agg-gmm) BIC %3.0f from " % best_bic + str(best_cov_bic) +
            " k=" + str(best_k_bic) + ' ari=' +
            best_ari_bic_str)  # + "iter=" + str(best_iter_bic))
        ax_bestbic.set_xlabel("First feature")
        ax_bestbic.set_ylabel("Second feature")
        if savefigs is not None:
            plt.savefig(savefigs + '_python_bestbic.jpg')

    if graphList != None and 'all_bics' in graphList:
        #plot of all BICS*******************************
        titles = ['full', 'tied', 'diag', 'spherical']
        #ax0.set_title(titles[0],fontsize=20,fontweight='bold')
        #ax0.set_ylabel('BIC',fontsize=20)
        ax0.locator_params(axis='y', tight=True, nbins=4)
        ax0.set_yticklabels(ax0.get_yticks(), fontsize=14)

        #ax1.set_title(titles[1],fontsize=20,fontweight='bold')
        legend = ax1.legend(loc='best', title='Number of\nRuns', fontsize=12)
        plt.setp(legend.get_title(), fontsize=14)

        #ax2.set_title(titles[2],fontsize=20,fontweight='bold')
        #ax2.set_xlabel('Number of components',fontsize=20)
        ax2.set_xticks(np.arange(0, 21, 4))
        ax2.set_xticklabels(ax2.get_xticks(), fontsize=14)
        #ax2.set_ylabel('BIC',fontsize=20)
        ax2.locator_params(axis='y', tight=True, nbins=4)
        ax2.set_yticklabels(ax2.get_yticks(), fontsize=14)

        #ax3.set_title(titles[3],fontsize=20,fontweight='bold')
        #ax3.set_xlabel('Number of components',fontsize=20)
        ax3.set_xticks(np.arange(0, 21, 4))
        ax3.set_xticklabels(ax3.get_xticks(), fontsize=14)

        if savefigs is not None:
            plt.savefig('.\\figures\\25_6_19_paperv2\\' + savefigs +
                        '_graspy_bicplot2.jpg')
    plt.show()

    return best_c_hat_bic, best_cov_bic, best_k_bic, best_ari_bic, best_bic

예제 #3

파일 보기

        adj = pass_to_ranks(adj)
    lap = to_laplace(adj, form="R-DAD")
    ase = AdjacencySpectralEmbed(n_components=n_components)
    latent = ase.fit_transform(lap)
    latent = np.concatenate(latent, axis=-1)
    return latent


n_components = None
k = 30

latent = lse(adj, n_components, regularizer=None)

gmm = GaussianCluster(min_components=k, max_components=k)

pred_labels = gmm.fit_predict(latent)

stacked_barplot(pred_labels, class_labels, palette="tab20")

# %% [markdown]
# # verify on sklearn toy dataset
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans

X, y = make_blobs(n_samples=200, n_features=3, centers=None, cluster_std=3)
# y = y.astype(int).astype(str)
data_df = pd.DataFrame(
    data=np.concatenate((X, y[:, np.newaxis]), axis=-1),
    columns=("Dim 0", "Dim 1", "Dim 2", "Labels"),
)
# data_df["Labels"] = data_df["Labels"].values.astype("<U10")

예제 #4

파일 보기

파일: dros_parallelized.py 프로젝트: neurodata/weighted_graph_models

    #- BIC
    bic_ = 2 * likeli - temp_n_params * np.log(n)

    #- ARI
    ari_ = ari(true_labels, temp_c_hat)

    return [combo, likeli, ari_, bic_]


np.random.seed(16661)
A = binarize(right_adj)
X_hat = np.concatenate(ASE(n_components=3).fit_transform(A), axis=1)
n, d = X_hat.shape

gclust = GCLUST(max_components=15)
est_labels = gclust.fit_predict(X_hat)

loglikelihoods = [np.sum(gclust.model_.score_samples(X_hat))]
combos = [None]
aris = [ari(right_labels, est_labels)]
bic = [gclust.model_.bic(X_hat)]

unique_labels = np.unique(est_labels)

class_idx = np.array([np.where(est_labels == u)[0] for u in unique_labels])

for k in range(len(unique_labels)):
    for combo in list(combinations(np.unique(est_labels), k + 1)):
        combo = np.array(list(combo)).astype(int)
        combos.append(combo)

예제 #5

파일 보기

n = 1000
pi = 0.9

A, counts = generate_cyclops(X, n, pi, None)
c = [0] * counts[0]
c += [1] * counts[1]

ase = ASE(n_components=3)
X_hat = ase.fit_transform(A)

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_hat[:, 0], X_hat[:, 1], X_hat[:, 2], c=c)

gclust = GCLUST(max_components=4)
c_hat = gclust.fit_predict(X_hat)

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_hat[:, 0], X_hat[:, 1], X_hat[:, 2], c=c_hat)


def quadratic(data, params):
    if data.ndim == 1:
        sum_ = np.sum(data[:-1]**2 * params[:-1]) + params[-1]
        return sum_
    elif data.ndim == 2:
        sums = np.sum(data[:, :-1]**2 * params[:-1], axis=1) + params[-1]
        return sums
    else:
        raise ValueError("unsuppored data")

예제 #6

파일 보기

파일: 9.0-BDP-omni-new-michael.py 프로젝트: SHAAAAN/maggot_models

unknown = classes == "Other"
plot_unknown = np.tile(unknown, n_graphs)
pairplot(plot_latent, labels=plot_unknown, alpha=0.3, legend_name="Unknown")


clust_latent = np.concatenate(list(latent), axis=-1)
clust_latent.shape
#%%
gc = GaussianCluster(min_components=2, max_components=15, covariance_type="all")

filterwarnings("ignore")
n_init = 50
sim_mat = np.zeros((n_verts, n_verts))

for i in tqdm(range(n_init)):
    assignments = gc.fit_predict(clust_latent)
    for c in np.unique(assignments):
        inds = np.where(assignments == c)[0]
        sim_mat[np.ix_(inds, inds)] += 1


sim_mat -= np.diag(np.diag(sim_mat))
sim_mat = sim_mat / n_init
heatmap(sim_mat)


#%%
thresh_sim_mat = sim_mat.copy()
thresh_sim_mat[thresh_sim_mat > 0.5] = 1
thresh_sim_mat[thresh_sim_mat < 0.5] = 0