Beispiel #1
0
    parser.add_argument('--n_clusters', default=5, type=int)
    parser.add_argument('--input_dim', type=int)

    parser.add_argument('--finetune_iters', default=500, type=int)
    parser.add_argument('--layerwise_pretrain_iters', default=1000, type=int)
    parser.add_argument('--iter_max', default=1000, type=int)
    parser.add_argument('--tol', default=0.001, type=float)

    parser.add_argument('--output_file', default='../data/cluster.csv')

    args = parser.parse_args()

    df_X = pd.read_csv(args.input_file, sep='\t', header=None)
    X = df_X.values
    print(len(X))
    dec = DeepEmbeddingClustering(n_clusters=args.n_clusters,
                                  input_dim=len(df_X.columns))
    dec.initialize(X,
                   finetune_iters=args.finetune_iters,
                   layerwise_pretrain_iters=args.layerwise_pretrain_iters)
    pred_y = dec.cluster(X, y=None, tol=args.tol, iter_max=args.iter_max)

    print(pred_y)

    d = {
        'pred_y': pred_y,
    }

    df_pred_clusters = pd.DataFrame(d)
    df_pred_clusters.to_csv(args.output_file, index=False, header=False)
    n_clusters = 6
    # sub = []
    # kmeans = KMeans(n_clusters=n_clusters, random_state=42).fit(X[0])
    # X[0]['label'] = kmeans.labels_
    # for i in range(n_clusters):
    #     data_sub = data_sc[data_sc['label'] == i].drop(['label'], axis=1)
    #     sub.append(npc(data_sub))
    # sub = np.asarray(sub)
    # print(sub[:,0].mean())

    plot_kmeans(n_clusters)

    # %%
    from keras_dec import DeepEmbeddingClustering

    c = DeepEmbeddingClustering(n_clusters=6, input_dim=X[0].shape[1])
    #c.initialize(X, finetune_iters=100000, layerwise_pretrain_iters=50000)
    c.initialize(X[0], finetune_iters=10000, layerwise_pretrain_iters=5000)
    # c.cluster(X[0], y=np.random.randint(10,size=X[0].shape[0]))
    c.cluster(X[0])
    labels = c.DEC.predict_classes(X[0])
    plot_cluster(labels, n_clusters=6)
    X[0]['label'] = labels
    sub = []
    for i in range(n_clusters):
        data_sub = data_sc[data_sc['label'] == i].drop(['label'], axis=1)
        sub.append(npc(data_sub))
    sub = np.asarray(sub)
    print(sub[:, 0].mean())
    # %%
from keras_dec import DeepEmbeddingClustering
from keras.datasets import mnist
import numpy as np


def preproc(X):
    # 1/d * ||x_i||2**2 = 1.0
    return (X.T / X.mean(1)).T


(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = np.asarray([x.flatten() for x in X_train], dtype='float32')
X_test = np.asarray([x.flatten() for x in X_test], dtype='float32')

X_train = preproc(X_train)
X_test = preproc(X_test)

c = DeepEmbeddingClustering(n_clusters=10, input_dim=784)
c.initialize(X_train, nb_epoch=200)
c.cluster(X_train, y=y_train)