def find_labels(method, n_clusters, data):
    if method == 'KMeans':
        labels = KMeans(n_clusters=n_clusters).fit_predict(data)
    elif method == 'NaiveKMeans':
        labels = []
        dist_matrix, centers_idxs = cluster_centers(data, n_clusters)
        for idx, point in enumerate(data):
            labels.append(
                np.argmin([dist_matrix[idx, c_idx] for c_idx in centers_idxs]))
    elif method == 'Spread':
        labels = []
        dist_matrix, centers_idxs = spread_centers(data, n_clusters)
        for idx, point in enumerate(data):
            labels.append(
                np.argmin([dist_matrix[idx, c_idx] for c_idx in centers_idxs]))

    elif method == 'KMeansGram3':
        labels = KMeans(n_clusters=n_clusters).fit_predict(data.T)

    elif method == 'HarmonyBaskets':
        coeff = find_harmony_coeff(data)
        labels = KMeans(n_clusters=n_clusters).fit_predict(coeff[:,
                                                                 np.newaxis])
    else:
        raise Exception('Method not recognized')
    return np.array(labels)
def find_labels(method, n_clusters, data):
    if method == 'KMeans':
        labels = KMeans(n_clusters=n_clusters).fit_predict(data)
    elif method == 'NaiveKMeans':
        labels = []
        dist_matrix, centers_idxs = cluster_centers(data, n_clusters)
        for idx, point in enumerate(data):
            labels.append(np.argmin([dist_matrix[idx, c_idx] for c_idx in centers_idxs]))
    elif method == 'Spread':
        labels = []
        dist_matrix, centers_idxs = spread_centers(data, n_clusters)
        for idx, point in enumerate(data):
            labels.append(np.argmin([dist_matrix[idx, c_idx] for c_idx in centers_idxs]))

    elif method == 'KMeansGram3':
        labels = KMeans(n_clusters=n_clusters).fit_predict(data.T)

    elif method == 'HarmonyBaskets':
        coeff = find_harmony_coeff(data)
        labels = KMeans(n_clusters=n_clusters).fit_predict(coeff[:, np.newaxis])
    else:
        raise Exception('Method not recognized')
    return np.array(labels)
Beispiel #3
0
def clustering(Xsvd,
               cells,
               dataset,
               suffix,
               labels=None,
               tlabels=None,
               method='knn',
               istsne=True,
               name='',
               batch_labels=None,
               seed=42):
    tsne = TSNE(n_jobs=24).fit_transform(Xsvd)

    for n_components in [15]:
        if method == 'gmm':
            clf = mixture.GaussianMixture(n_components=n_components).fit(mat)
            labels_pred = clf.predict(tsne)
        elif method == 'knn':
            labels_pred = KMeans(n_components,
                                 n_init=200).fit_predict(tsne)  # n_jobs>1 ?
        elif method == 'dbscan':
            labels_pred = DBSCAN(eps=0.3, min_samples=10).fit(tsne).labels_
        elif method == 'spectral':
            spectral = cluster.SpectralClustering(n_clusters=n_components,
                                                  eigen_solver='arpack',
                                                  affinity="nearest_neighbors")
            labels_pred = spectral.fit_predict(tsne)
        elif method == 'louvain':
            from scipy.spatial import distance

            for louvain in [30]:
                print('****', louvain)
                mat = kneighbors_graph(Xsvd,
                                       louvain,
                                       mode='distance',
                                       include_self=True).todense()

                G = nx.from_numpy_matrix(mat)
                partition = community.best_partition(G, random_state=seed)

                labels_pred = []
                for i in range(mat.shape[0]):
                    labels_pred.append(partition[i])

                labels_pred = np.array(labels_pred)
                print('louvain', louvain, tsne[:5], len(labels),
                      len(labels_pred))
                #print(np.unique(labels_pred))

                if labels is not None:
                    nmi_score = NMI(labels, labels_pred)
                    ari_score = ARI(labels, labels_pred)
                    print(
                        n_components, method,
                        "Clustering Scores:\nNMI: %.4f\nARI: %.4f\n" %
                        (nmi_score, ari_score))

    if istsne:
        n_components = len(np.unique(labels_pred))
        vis_x = tsne[:, 0]
        vis_y = tsne[:, 1]
        colors = [
            'blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink',
            'yellow', 'black', 'teal', 'plum', 'tan', 'bisque', 'beige',
            'slategray', 'brown', 'darkred', 'salmon', 'coral', 'olive',
            'lightpink', 'teal', 'darkcyan', 'BlueViolet', 'CornflowerBlue',
            'DarkKhaki', 'DarkTurquoise'
        ]

        show_tsne(tsne,
                  labels,
                  'result/%s/%s-%s-LSI-true.png' % (dataset, name, suffix),
                  tlabels=tlabels)
        show_tsne(tsne, labels_pred,
                  'result/%s/%s-%s-LSI-pred.png' % (dataset, name, suffix))

        with open('result/%s-LSI-cluster_result.csv' % (dataset), 'w') as f:
            f.write('cell,predicted label,tsne-1,tsne-2\n')
            for cell, pred, t in zip(cells, labels_pred, tsne):
                f.write('%s,%d,%f,%f\n' % (cell, pred, t[0], t[1]))

    if batch_labels is not None:
        show_tsne(
            tsne, batch_labels, 'result/%s/%s-GMVAE-%s-%s-batch.png' %
            (dataset, dataset, suffix, name))
Beispiel #4
0
def spk_reseg_with_one_model(det_index, feat_vad, feat_time, spk_model):
    det_index.append(len(feat_vad))
    spk0_rep, spk1_rep = [], []
    cluster_change_point, cluster_result = [], []
    last_index = 0
    block_rep, block_tag = gen_block_representation(det_index, feat_vad,
                                                    feat_time)
    y_pred = KMeans(n_clusters=2).fit_predict(block_rep)
    for i, k in enumerate(y_pred):
        if k == 0:
            for j in range(block_tag[i][0], block_tag[i][1]):
                spk0_rep.append(feat_vad[j])
        else:
            for j in range(block_tag[i][0], block_tag[i][1]):
                spk1_rep.append(feat_vad[j])
        if i == 0:
            continue
        if k != y_pred[i - 1]:
            cluster_change_point.append(block_tag[i][0])
            cluster_result.append([[last_index, block_tag[i][0]], k])
            last_index = block_tag[i][0]
    if k == 0:
        end = 1
    else:
        end = 0
    cluster_result.append([[last_index, block_tag[i][1] - 1], end])

    spk0_model = np.mean(spk0_rep, axis=0)
    spk1_model = np.mean(spk1_rep, axis=0)

    cluster_change_point, cluster_result = [], []
    cos_m0 = np.dot(spk_model, spk0_model) / (np.linalg.norm(spk_model) *
                                              np.linalg.norm(spk0_model))
    cos_m1 = np.dot(spk_model, spk1_model) / (np.linalg.norm(spk_model) *
                                              np.linalg.norm(spk1_model))
    if cos_m0 < cos_m1:
        spk0_model = spk_model
    else:
        spk1_model = spk_model

    last_index = 0
    y_pred = []
    for i in block_rep:
        cos_0 = np.dot(
            i, spk0_model) / (np.linalg.norm(i) * np.linalg.norm(spk0_model))
        cos_1 = np.dot(
            i, spk1_model) / (np.linalg.norm(i) * np.linalg.norm(spk1_model))
        if cos_0 < cos_1:
            y_pred.append(0)
        else:
            y_pred.append(1)
    for i, k in enumerate(y_pred):
        if i == 0:
            continue
        if k != y_pred[i - 1]:
            cluster_change_point.append(block_tag[i][0])
            cluster_result.append([[last_index, block_tag[i][0]], k])
            last_index = block_tag[i][0]
    if k == 0:
        end = 1
    else:
        end = 0
    cluster_result.append([[last_index, block_tag[i][1] - 1], end])
    return cluster_change_point, cluster_result
Beispiel #5
0
array2_1[:i] = array2_1[:i] + array4_1
array2_1[i:501] = array2_1[i:501] + array3_1
experiment_1 = array2_1
#実験群をデータフレーム化
target_1 = pd.DataFrame(np.zeros(10000))
target_1[:500] = 1
target_1[501:] = 0
df_experiment_1 = pd.DataFrame(experiment_1)
df_experiment_1["target"] = target_1
X2 = df_experiment_1
y2 = df_experiment_1["target"]


pre = KMeans(n_clusters=2).fit_predict(df_experiment_1)
predict = []
for i in pre:
    predict.append(i)
matrix = confusion_matrix(df_experiment_1["target"].values.tolist(), predict)
>>>>>>> 94a7a04b363829cf6d9f0ed98f5a343b74d99b7e
print(matrix)

#Fスコア計算
TP = matrix[1,1]
FP = matrix[0,1]
FN = matrix[1,0]
P = TP + FN
precision = TP / (TP + FP)
recall = TP / P
F1 = 2 / (1 / precision + 1 / recall)
print(F1)