def run_k_means_on_loan_data(path):
    data_set = 'loan'
    x_train, y_train = load_data(path + 'data/' + data_set + '/train/')

    estimator = GaussianMixture(n_components=3, random_state=0)
    estimator.fit(x_train)

    predictions = estimator.predict(x_train)
    ss = metrics.silhouette_score(x_train,
                                  predictions,
                                  metric='euclidean',
                                  sample_size=300)
    print("%.6f" % homogeneity_score(y_train, predictions))
    print("%.6f" % ss)

    estimator = KMeans(n_clusters=3, random_state=0)
    estimator.fit(x_train)

    predictions = estimator.predict(x_train)
    ss = metrics.silhouette_score(x_train,
                                  estimator.labels_,
                                  metric='euclidean',
                                  sample_size=300)

    print("%.6f" % homogeneity_score(y_train, predictions))
    print("%.6f" % ss)
    ss = metrics.silhouette_score(x_train,
                                  predictions,
                                  metric='euclidean',
                                  sample_size=300)
    print("%.6f" % ss)
def test_full_vs_elkan():

    km1 = KMeans(algorithm='full', random_state=13)
    km2 = KMeans(algorithm='elkan', random_state=13)

    km1.fit(X)
    km2.fit(X)

    homogeneity_score(km1.predict(X), km2.predict(X)) == 1.0
def test_full_vs_elkan():

    km1 = KMeans(algorithm='full', random_state=13)
    km2 = KMeans(algorithm='elkan', random_state=13)

    km1.fit(X)
    km2.fit(X)

    homogeneity_score(km1.predict(X), km2.predict(X)) == 1.0
Ejemplo n.º 4
0
def reduce_EM_and_score(k, x_train, y_train, x_test, y_test, f):
    print('scoring...')
    kmeans = GaussianMixture(n_components=k, random_state=0).fit(x_train)
    base_predictions = kmeans.predict(x_test)
    pca = TruncatedSVD(n_components=k)
    pca_x_train = pca.fit_transform(x_train)
    pca_x_test = pca.transform(x_test)
    kmeans = GaussianMixture(n_components=k, random_state=0).fit(pca_x_train)
    predictions = kmeans.predict(pca_x_test)

    base_score = homogeneity_score(base_predictions, predictions)
    true_score = homogeneity_score(y_test, predictions)

    f.write('%.3f\t%.3f\t%.3f\n' % (base_score, true_score, 0.0))
def reduce_kmeans_and_score(k, x_train, y_train, x_test, y_test, f):
    print('scoring...')
    kmeans = KMeans(n_clusters=k, random_state=0).fit(x_train)
    base_predictions = kmeans.predict(x_test)
    pca = FastICA(n_components=k)
    pca_x_train = pca.fit_transform(x_train)
    pca_x_test = pca.transform(x_test)
    kmeans = KMeans(n_clusters=k, random_state=0).fit(pca_x_train)
    predictions = kmeans.predict(pca_x_test)

    base_score = homogeneity_score(base_predictions, predictions)
    true_score = homogeneity_score(y_test, predictions)

    f.write('%.3f\t%.3f\t%.3f\n' % (base_score, true_score, 0.0))
Ejemplo n.º 6
0
def cluster(train, val, type, number_of_clusters, plot_folder, classes):
    # todo this should be a class
    if type == "spectral_clustering":
        clustering_model = SpectralClustering(n_clusters=number_of_clusters,
                                              assign_labels="discretize",
                                              random_state=0).fit(
                                                  train["data"])
    elif type == "kmeans":
        clustering_model = KMeans(n_clusters=number_of_clusters,
                                  random_state=0).fit(train["data"])
    else:
        raise NotImplementedError
    # compute metrics
    accuracies = {}
    random_array = np.random.randint(9, size=train["labels"].shape)
    centroids = find_centroids(number_of_clusters, train,
                               clustering_model.labels_)
    test_classifications = cluster_test(val, centroids)
    visualize_clustering(train, clustering_model.labels_, type + "_training",
                         plot_folder, number_of_clusters, centroids)
    visualize_clustering(val, np.asarray(test_classifications),
                         type + "_validation", plot_folder, number_of_clusters,
                         centroids)

    accuracies["random_score"] = homogeneity_score(train["labels"],
                                                   random_array)
    accuracies["v_measure_score"] = v_measure_score(train["labels"],
                                                    clustering_model.labels_)
    accuracies["homogeneity_score"] = homogeneity_score(
        train["labels"], clustering_model.labels_)
    accuracies["completeness_score"] = completeness_score(
        train["labels"], clustering_model.labels_)
    accuracies["silhouette_score"] = silhouette_score(train["data"],
                                                      clustering_model.labels_)
    accuracies["purity_score"], accuracies[
        "contingency_matrix"] = purity_score(train["labels"],
                                             clustering_model.labels_)

    accuracies["v_measure_score_test"] = v_measure_score(
        val["labels"], test_classifications)
    accuracies["homogeneity_score_test"] = homogeneity_score(
        val["labels"], test_classifications)
    accuracies["completeness_score_test"] = completeness_score(
        val["labels"], test_classifications)
    accuracies["silhouette_score_test"] = silhouette_score(
        val["data"], test_classifications)
    accuracies["purity_score_test"], accuracies[
        "contingency_matrix_test"] = purity_score(val["labels"],
                                                  test_classifications)
    return accuracies
Ejemplo n.º 7
0
def fit_and_score_model(X, y_train, y_test, id_train, id_test):
    X_train = X[id_train, :]
    X_test = X[id_test, :]

    # k means
    kmeans = KMeans(n_clusters=2, random_state=0)
    kmeans.fit(X_train)

    # score
    print("Homogeneity score (training):",
          homogeneity_score(y_train, kmeans.labels_))
    print(
        "Homogeneity score (test):",
        homogeneity_score(y_test, kmeans.predict(X_test)),
    )
Ejemplo n.º 8
0
def basic_stats(filename, mode='normal', num_phrases=5):
    d = AutoDupCoarse(filename, mode=mode, num_phrases=num_phrases)
    d.clustering()
    #d.print_clusters()

    #label_list, graph = post_process(d)
    #non_singleton = [len(comp) for comp in nx.connected_components(graph) if len(comp) > 1]
    true_labels = d.data['cluster_label'].values
    labels = d.labels
    print('\n{} stats: with noise in one cluster'.format(mode))
    print('ari', adjusted_rand_score(true_labels, labels))
    print('hom', homogeneity_score(true_labels, labels))
    print('nmi', normalized_mutual_info_score(true_labels, labels))

    indices = [i for i, label in enumerate(true_labels) if label != -1]
    true_labels_noise = [
        label if i in indices else i for i, label in enumerate(true_labels)
    ]
    labels_noise = [
        label if i in indices else i for i, label in enumerate(labels)
    ]
    print('\n{} stats: with noise in all cluster'.format(mode))
    print('ari', adjusted_rand_score(true_labels_noise, labels_noise))
    print('hom', homogeneity_score(true_labels_noise, labels_noise))
    print('nmi', normalized_mutual_info_score(true_labels_noise, labels_noise))

    true_labels_no_noise = [
        label for i, label in enumerate(true_labels) if i in indices
    ]
    labels_no_noise = [label for i, label in enumerate(labels) if i in indices]
    print('\n{} stats: without noise'.format(mode))
    print('ari', adjusted_rand_score(true_labels_no_noise, labels_no_noise))
    print('hom', homogeneity_score(true_labels_no_noise, labels_no_noise))
    print('nmi',
          normalized_mutual_info_score(true_labels_no_noise, labels_no_noise))
    '''
    print('num components:', nx.number_connected_components(graph))
    print('num non-singleton:', len(non_singleton))
    print('max component:', max(non_singleton))
    print('min component:', min(non_singleton))
    print('avg component:', sum(non_singleton) / len(non_singleton))
    print('nodes:', graph.number_of_nodes())
    '''
    num_components = nx.number_connected_components(graph)
    print('num_components', num_components)

    return num_components, len(non_singleton), max(
        non_singleton), sum(non_singleton) / len(non_singleton), d
Ejemplo n.º 9
0
def main():
    m = 2000  # number of points
    n = 50  # Number of dimensions
    k = 30  # Number of latent clusters

    np.random.seed(3)
    X, z_true = draw_points(m, n, k=k)
    show_points(X, z_true, title="True")

    S = fully_connected_similarity(X)

    # Unnormalized spectral clustering
    # A = spectral_clustering(S, k)

    # Normalized spectral clustering according to Shi and Malik (2000)
    # A = spectral_clustering(S, k, normalization=LaplacianNorm.symmetric, generalized_eigenproblem=True)

    # Normalized spectral clustering according to Ng, Jordan, and Weiss (2002)
    A = spectral_clustering(S,
                            k,
                            normalization=LaplacianNorm.symmetric,
                            norm_rows=True)

    show_points(X, A, title="Spectral Clustering")

    complete_score = completeness_score(z_true, A)
    homog_score = homogeneity_score(z_true, A)
    print("Completeness score: %s" % complete_score)
    print("Homogeneity score: %s" % homog_score)
Ejemplo n.º 10
0
def calc_homogenity_comp_vmeas_training(df, y_train):
    # user_input_df['predict'] = y1

    # confidence_counter = -1
    # for c in candidates:
    #     confidence_counter += 1
    #     adj = c.getSource()
    #     noun = c.getTarget()
    #     candidate_df = user_input_df.loc[(user_input_df['adj'] == adj) & (user_input_df['noun'] == noun)]
    #     print(candidate_df["adj"][confidence_counter])
    #     print(candidate_df["noun"][confidence_counter])
    #     if candidate_df["class"][confidence_counter] != 2:
    #         truelabels.append(candidate_df["class"][confidence_counter])
    #         predictlabels.append(candidate_df["predict"][confidence_counter])
    # print("truelables:",truelabels)
    # print("predictlabels:",predictlabels)
    # homogenity_scr = homogeneity_score(truelabels,predictlabels)
    # vmeasure_scr = v_measure_score(truelabels,predictlabels)
    # completness_scr =completeness_score(truelabels,predictlabels)
    # print("homogenity_scr={},vmeasure_scr={},completness_scr={}".format(homogenity_scr,vmeasure_scr,completness_scr))

    truelabels = df['class']
    predictlabels = y_train
    homogenity_scr = homogeneity_score(truelabels, predictlabels)
    vmeasure_scr = v_measure_score(truelabels, predictlabels)
    completness_scr = completeness_score(truelabels, predictlabels)
    print("truelables:", truelabels)
    print("predictlabels:", predictlabels)
    print("homogenity_scr={},vmeasure_scr={},completness_scr={}".format(
        homogenity_scr, vmeasure_scr, completness_scr))
Ejemplo n.º 11
0
def get_purity(c, data, label_data, linkage_proc='ward'):
    #hierarchical
    cluster = AgglomerativeClustering(n_clusters=c,
                                      affinity='euclidean',
                                      linkage=linkage_proc)
    data_p = cluster.fit_predict(data)
    return homogeneity_score(label_data, data_p)
Ejemplo n.º 12
0
    def evaluate_bins(self):
        self.labels = []
        newcolors = []
        for bin in self.bins:
            for b in bin:
                self.labels.append(self.bins.index(bin))
                if self.colors != None:                
                    newcolors.append(self.colors[b])
        self.colors = newcolors
                    
        labels = numpy.array(self.labels)
        colors = numpy.array(self.colors)
        points = []
        for bin in self.bins:
            for b in bin:
                start_lat = self.data[b]['trip_start_location'][1]
                start_lon = self.data[b]['trip_start_location'][0]
                end_lat = self.data[b]['trip_end_location'][1]
                end_lon = self.data[b]['trip_end_location'][0]
                path = [start_lat, start_lon, end_lat, end_lon]
                points.append(path)

        if self.colors != None:
            a = metrics.silhouette_score(numpy.array(points), labels)
            b = homogeneity_score(colors, labels)
            c = completeness_score(colors, labels)
            
            print 'number of bins is ' + str(len(self.bins))
            print 'silhouette score is ' + str(a)
            print 'homogeneity is ' + str(b)
            print 'completeness is ' + str(c)
            print 'accuracy is ' + str(((a+1)/2.0 + b + c)/3.0)
Ejemplo n.º 13
0
    def show_result(self, prediction, msg):
        new_line(50)
        print(msg)
        new_line(50)

        real = self.train_labels

        print "Confusion Matrix: "
        print str(confusion_matrix(real, prediction))

        homo_score = homogeneity_score(real, prediction)
        complete_score = completeness_score(real, prediction)
        v_score = v_measure_score(real, prediction)
        rand_score = adjusted_rand_score(real, prediction)
        mutual_info = adjusted_mutual_info_score(real, prediction)

        print("Homogeneity Score: %0.3f" % homo_score)
        print("Completeness Score: %0.3f" % complete_score)
        print("V-measure: %0.3f" % v_score)
        print("Adjusted Rand Score: %0.3f" % rand_score)
        print("Adjusted Mutual Info Score: %0.3f\n" % mutual_info)

        return {
            'Homogeneity': homo_score,
            'Completeness': complete_score,
            'V-measure': v_score,
            'RAND': rand_score,
            'Mutual': mutual_info
        }
Ejemplo n.º 14
0
def get_clustering_metrics(train_data,
                           cluster_labels,
                           ground_truth_labels=None):
    clustering_metric_dict = dict({})
    clustering_metric_dict['silhouette_score'] = silhouette_score(
        train_data, cluster_labels, random_state=42)
    clustering_metric_dict[
        'calinski_harabasz_score'] = calinski_harabasz_score(
            train_data, cluster_labels)
    clustering_metric_dict['davies_bouldin_score'] = davies_bouldin_score(
        train_data, cluster_labels)

    if ground_truth_labels is not None:
        clustering_metric_dict['v_measure_score'] = v_measure_score(
            ground_truth_labels, cluster_labels)
        clustering_metric_dict[
            'fowlkes_mallows_score'] = fowlkes_mallows_score(
                ground_truth_labels, cluster_labels)
        clustering_metric_dict['homogeneity_score'] = homogeneity_score(
            ground_truth_labels, cluster_labels)
        clustering_metric_dict[
            'normalized_mutual_info_score'] = normalized_mutual_info_score(
                ground_truth_labels, cluster_labels)
        clustering_metric_dict['adjusted_rand_score'] = adjusted_rand_score(
            ground_truth_labels, cluster_labels)
        clustering_metric_dict['completeness_score'] = completeness_score(
            ground_truth_labels, cluster_labels)

    return clustering_metric_dict
Ejemplo n.º 15
0
def compute_V_measure(clusters, classes):
    class_list, cluster_list = [], []
    # not_found_id = 1000000
    clustered_but_unaligned = 0
    for read in clusters:
        if read in classes:
            class_list.append(classes[read])
            cluster_list.append(clusters[read])
        else:
            # print("Read was clustered but unaligned:", read)
            clustered_but_unaligned += 1

    # added the unprocessed reads to the measure
    not_clustered = set(classes.keys()) - set(clusters.keys())
    highest_cluster_id = max(clusters.values())
    highest_cluster_id += 1
    for read in not_clustered:
        class_list.append(classes[read])
        cluster_list.append(highest_cluster_id)
        highest_cluster_id += 1

    v_score = v_measure_score(class_list, cluster_list)
    compl_score = completeness_score(class_list, cluster_list)
    homog_score = homogeneity_score(class_list, cluster_list)
    ari = adjusted_rand_score(class_list, cluster_list)

    print("Not included in clustering but aligned:", len(not_clustered))
    print("V:", v_score, "Completeness:", compl_score, "Homogeneity:",
          homog_score)
    print(
        "Nr reads clustered but unaligned (i.e., no class and excluded from V-measure): ",
        clustered_but_unaligned)
    return v_score, compl_score, homog_score, clustered_but_unaligned, ari
def compareAB(A, B, X):
    #measures the similarity of the two assignments, ignoring permutations and with chance normalization
    ars = metrics.adjusted_rand_score(A, B)
    ars_str = '%17.3f' % ars

    # each cluster contains only members of a single class
    hs = homogeneity_score(A, B)
    hs_str = '%17.3f' % hs

    #all members of a given class are assigned to the same cluster
    cs = completeness_score(A, B)
    cs_str = '%17.3f' % cs

    vms = metrics.v_measure_score(A, B)
    vms_str = '%17.3f' % vms

    # geometric mean of the pairwise precision and recall
    fowlkes_mallows_score = metrics.fowlkes_mallows_score(A, B)
    fms_str = '%17.3f' % fowlkes_mallows_score

    sc = metrics.silhouette_score(X, B, metric='euclidean')
    sc_str = '%17.3f' % sc

    my_str = ars_str + "&" + hs_str + "&" + cs_str + "&" + vms_str + "&" + fms_str + "&" + sc_str
    return my_str
Ejemplo n.º 17
0
 def correlation(self, X, Y, heatmap=False):
     nb_classes = len(set(Y))
     print nb_classes
     km = KMeans(n_clusters=nb_classes, random_state=0).fit(X)
     label_kmeans = km.labels_
     purity = metric.compute_purity(label_kmeans, Y)
     nmi = normalized_mutual_info_score(Y, label_kmeans)
     ari = adjusted_rand_score(Y, label_kmeans)
     homogeneity = homogeneity_score(Y, label_kmeans)
     ami = adjusted_mutual_info_score(Y, label_kmeans)
     print('NMI = {}, ARI = {}, Purity = {},AMI = {}, Homogeneity = {}'.
           format(nmi, ari, purity, ami, homogeneity))
     if heatmap:
         x_ticks = [''] * len(Y)
         y_ticks = [''] * len(Y)
         idx = []
         for i in range(nb_classes):
             sub_idx = [j for j, item in enumerate(Y) if item == i]
             idx += [j for j, item in enumerate(Y) if item == i]
             x_ticks[len(idx) - 1] = str(i)
         assert len(idx) == len(Y)
         X = X[idx, :]
         Y = Y[idx]
         #similarity_mat = pairwise_distances(X,metric='cosine')
         similarity_mat = cosine_similarity(X)
         #sns.heatmap(similarity_mat,cmap='Blues')
         fig, ax = plt.subplots()
         #ax.set_yticks(range(len(y_ticks)))
         ax.set_yticklabels(y_ticks)
         ax.set_xticks(range(len(x_ticks)))
         ax.set_xticklabels(x_ticks)
         im = ax.imshow(similarity_mat, cmap='Blues')
         plt.colorbar(im)
         plt.savefig('heatmap_%s_dim%d.png' % (self.name, X.shape[1]),
                     dpi=600)
Ejemplo n.º 18
0
    def evaluate(self,timestamp,batch_idx):
        if has_label:
            data_y, label_y = self.y_sampler.load_all()
        else:
            data_y = self.y_sampler.load_all()
        data_x_, data_x_onehot_ = self.predict_x(data_y)
        label_infer = np.argmax(data_x_onehot_, axis=1)
        print([list(label_infer).count(item) for item in np.unique(label_infer)])
        print([list(label_y).count(item) for item in np.unique(label_y)])

        # label_kmeans = KMeans(n_clusters=4, n_init = 200).fit_predict(data_y)
        # ari = adjusted_rand_score(label_y, label_kmeans)
        # nmi = normalized_mutual_info_score(label_y, label_kmeans)
        # print('kmeans',ari,nmi,self.nb_classes)
        # sys.exit()

        if has_label:
            purity = metric.compute_purity(label_infer, label_y)
            nmi = normalized_mutual_info_score(label_y, label_infer)
            ari = adjusted_rand_score(label_y, label_infer)
            h**o = homogeneity_score(label_y,label_infer)
            print('scDEC: NMI = {}, ARI = {}, Homogeneity = {}'.format(nmi,ari,h**o))
            if is_train:
                f = open('%s/log.txt'%self.save_dir,'a+')
                f.write('NMI = {}\tARI = {}\tHomogeneity = {}\t batch_idx = {}\n'.format(nmi,ari,h**o,batch_idx))
                f.close()
                np.savez('{}/data_at_{}.npz'.format(self.save_dir, batch_idx+1),data_x_,data_x_onehot_,label_y)
            else:
                np.savez('results/{}/data_pre.npz'.format(self.data),data_x_,data_x_onehot_,label_y)    

        else:
            if is_train:
                np.savez('{}/data_at_{}.npz'.format(self.save_dir, batch_idx+1),data_x_,data_x_onehot_)
            else:
                np.savez('results/{}/data_pre.npz'.format(self.data),data_x_,data_x_onehot_)
Ejemplo n.º 19
0
def test_full_vs_elkan():
    km1 = KMeans(algorithm='full', random_state=13).fit(X)
    km2 = KMeans(algorithm='elkan', random_state=13).fit(X)

    assert homogeneity_score(
        km1.predict(X), km2.predict(X)
    ) == pytest.approx(1.0)
def reduce_kmeans_and_score(k, x_train, y_train, x_test, y_test, f):
    print('scoring...')
    kmeans = KMeans(n_clusters=k, random_state=0).fit(x_train)
    base_predictions = kmeans.predict(x_test)

    true_score = homogeneity_score(base_predictions, y_test)

    f.write('%.3f\t%.3f\n' % (true_score, 0.0))
Ejemplo n.º 21
0
    def get_homogeneity(self, cat_y, label):
        #get matched
        (values, indices) = cat_y.max(dim=1)

        pred_label = indices.view(-1).cpu().data.numpy()
        true_label = label.view(-1).cpu().data.numpy()

        return homogeneity_score(true_label, pred_label)
def reduce_EM_and_score(k, x_train, y_train, x_test, y_test, f):
    print('scoring...')
    kmeans = GaussianMixture(n_components=k, random_state=0).fit(x_train)
    base_predictions = kmeans.predict(x_test)

    true_score = homogeneity_score(base_predictions, y_test)

    f.write('%.3f\t%.3f\n' % (true_score, 0.0))
Ejemplo n.º 23
0
Archivo: eval.py Proyecto: dbrg77/scDEC
def cluster_eval(labels_true, labels_infer):
    purity = metric.compute_purity(labels_infer, labels_true)
    nmi = normalized_mutual_info_score(labels_true, labels_infer)
    ari = adjusted_rand_score(labels_true, labels_infer)
    homogeneity = homogeneity_score(labels_true, labels_infer)
    ami = adjusted_mutual_info_score(labels_true, labels_infer)
    #print('NMI = {}, ARI = {}, Purity = {},AMI = {}, Homogeneity = {}'.format(nmi,ari,purity,ami,homogeneity))
    return nmi, ari, homogeneity
Ejemplo n.º 24
0
def print_scores(labels, predicted, svd):
    print "Homogeneity: " + str(homogeneity_score(labels, predicted))
    print "completeness: " + str(completeness_score(labels, predicted))
    print "V-measure: " + str(v_measure_score(labels, predicted))
    print "RAND score: " + str(adjusted_rand_score(labels, predicted))
    print "Mutual Info: " + str(adjusted_mutual_info_score(labels, predicted))
    ret = []
    ret.append(homogeneity_score(labels, predicted))
    ret.append(completeness_score(labels, predicted))
    ret.append(v_measure_score(labels, predicted))
    ret.append(adjusted_rand_score(labels, predicted))
    ret.append(adjusted_mutual_info_score(labels, predicted))
    if svd:
        svd_all.append(ret)
    else:
        nmf_all.append(ret)
    return homogeneity_score(labels, predicted)
Ejemplo n.º 25
0
def five_measure_scores(label_true, label_pred):
    print("Homogeneity_score = %f" % homogeneity_score(label_true, label_pred))
    print("Completeness_score = %f" %
          completeness_score(label_true, label_pred))
    print("Adjusted_rand_score = %f" %
          adjusted_rand_score(label_true, label_pred))
    print("V_measure_score = %f" % v_measure_score(label_true, label_pred))
    print("Adjusted_mutual_info_score = %f" %
          adjusted_mutual_info_score(label_true, label_pred))
Ejemplo n.º 26
0
def v_measure(cluster_labels, true_labels):
    h_score = homogeneity_score(true_labels, cluster_labels)
    c_score = completeness_score(true_labels, cluster_labels)
    v_score = v_measure_score(true_labels, cluster_labels)

    print("Homogeneity Score: %.6f" % h_score)
    print("Completeness Score: %.6f" % c_score)
    print("V Measure Score: %.6f" % v_score)
    return h_score, c_score, v_score
Ejemplo n.º 27
0
 def calc_performance_score(self, algo_type: str, predicted, y_train):
     homo_score = homogeneity_score(y_train, predicted)
     complete_socre = completeness_score(y_train, predicted)
     adjusted_mute_info_score = adjusted_mutual_info_score(
         y_train, predicted)
     print(algo_type + ' homo_score ' + "{:.2f}".format(homo_score))
     print(algo_type + ' complete_socre ' + "{:.2f}".format(complete_socre))
     print(algo_type + ' adjusted_mute_info_score ' +
           "{:.2f}".format(adjusted_mute_info_score))
Ejemplo n.º 28
0
def process(data):
    emotions = []
    acts = []
    for item in data['metas']:
        if item is None:
            emotions.append(7)
            acts.append(4)
        else:
            emotions.append(int(item['emotion']))
            acts.append(int(item['act']) - 1)
    las = []
    for _, _, la in data['z']:
        las.extend(la)
    las = [transfer(elem) for elem in las]
    act_h = homogeneity_score(las, acts)
    emotion_h = homogeneity_score(las, emotions)
    print('The act homogeneity is:' + str(act_h))
    print('The emotion homogeneity is:' + str(emotion_h))
Ejemplo n.º 29
0
def print_scores(labels, predicted):
    print "Contingency: "
    print str(confusion_matrix(labels, predicted))

    ret = []
    ret.append(homogeneity_score(labels, predicted))
    ret.append(completeness_score(labels, predicted))
    ret.append(v_measure_score(labels, predicted))
    ret.append(adjusted_rand_score(labels, predicted))
    ret.append(adjusted_mutual_info_score(labels, predicted))

    print "Homogeneity: " + str(homogeneity_score(labels, predicted))
    print "completeness: " + str(completeness_score(labels, predicted))
    print "V-measure: " + str(v_measure_score(labels, predicted))
    print "RAND score: " + str(adjusted_rand_score(labels, predicted))
    print "Mutual Info: " + str(adjusted_mutual_info_score(labels, predicted))

    return ret
def evaluate(X, Y, predicted_labels):

    df = pd.DataFrame(predicted_labels, columns=['label'])
    if len(df.groupby('label').groups) < 2:
        return [0, 0, 0, 0, 0, 0]

    try:
        sil = silhouette_score(X, predicted_labels, metric='euclidean')
    except:
        sil = 0

    return [
        sil,
        homogeneity_score(Y, predicted_labels),
        homogeneity_score(predicted_labels, Y),
        normalized_mutual_info_score(Y, predicted_labels),
        adjusted_mutual_info_score(Y, predicted_labels),
        adjusted_rand_score(Y, predicted_labels)
    ]
Ejemplo n.º 31
0
Archivo: p6.py Proyecto: i72sijia/IMD
def clusteringAverageLink(d, testInputs, testOutputs, graphname, clusteres):
    print("\n\t[" + str(graphname) + "]")
    averagelink = AgglomerativeClustering(n_clusters=clusteres,
                                          linkage='average')
    resultados = averagelink.fit(d)
    print("\n\t\tClúster: " + str(resultados.labels_))
    homogeneidad = homogeneity_score(
        testOutputs, averagelink.fit_predict(testInputs, testOutputs))
    print("\n\t\tHomogeneidad: " + str(homogeneidad))
    print("\n\n")
    return homogeneidad
Ejemplo n.º 32
0
 def get_homogeneity_and_completeness(self, clusters, category):
     labels = getattr(self.scb.nodes,
                      'to_' + category)(range(len(self.scb.nodes)))
     keys = dict()
     for i, label in enumerate(labels):
         if label not in keys:
             keys[label] = len(keys)
         labels[i] = keys[label]
     hs = homogeneity_score(labels, clusters)
     cs = completeness_score(labels, clusters)
     return {'homogeneity': hs, 'completeness': cs}
def evaluate(colors, labels):
    b = homogeneity_score(colors, labels)
    c = completeness_score(colors, labels)
    print 'homogeneity is ' + str(b)
    print 'completeness is ' + str(c)
            trt_4 = run_icc_meanDist(samples[sample], dataDir, fsDir, thr, hemi, ['1a', '1b', '2a', '2b'])
            trt_2 = run_icc_meanDist(samples[sample], dataDir, fsDir, thr, hemi, ['1ab', '2ab'])
            nan_mask = run_nan_grmask(samples[sample], dataDir, hemi)
            
            data_gmm = {}
            for n_comp in num_gmm_comps:
                data = meanDist * 1000
                gmm = mixture.GMM(n_components=n_comp, n_iter=1000)
                gmm.fit(data[cort])
                bic = gmm.bic(data[cort])
                aic = gmm.aic(data[cort])
                res = np.zeros(10242)
                res[cort] = gmm.predict(data[cort])
                res[cort] = res[cort] +1
                data_gmm[n_comp] = res
                homogeneity = homogeneity_score(res[cort],yeo7[0][cort])
                df_gmm_eval.loc[len(df_gmm_eval)] = [str(thr), hemi, n_comp, bic, aic, homogeneity, gmm.converged_]

            for node in range(10242):
                df.loc[len(df)] = [sample, str(thr), hemi, node, 
                                   aparc[2][aparc[0][node]],
                                   aparca2009s[2][aparca2009s[0][node]],
                                   yeo7_names[yeo7[0][node]],
                                   clus7[node],
                                   yeo17[2][yeo17[0][node]],
                                   meanDist[node], meanDist_norm[node],
                                   stdev[node],
                                   trt_4[0, node], trt_4[1, node], trt_4[2, node], trt_4[3, node], trt_4[4, node], trt_4[5, node],
                                   trt_2[0, node], trt_2[1, node], trt_2[2, node], trt_2[3, node], trt_2[4, node], trt_2[5, node],
                                   data_gmm[num_comps[0]][node], data_gmm[num_comps[1]][node], data_gmm[num_comps[2]][node],
                                   data_gmm[num_comps[3]][node], data_gmm[num_comps[4]][node], data_gmm[num_comps[5]][node], 
def evaluate(colors, labels):
    b = homogeneity_score(colors, labels)
    c = completeness_score(colors, labels)
    logging.debug('homogeneity is %d' % b)
    logging.debug('completeness is %d' % c)
Ejemplo n.º 36
0
 def check_clusters(self):
     print self.colors
     print 'number of clusters is ' + str(self.clusters)
     print 'silhouette score is ' + str(self.sil)
     print 'homogeneity is ' + str(homogeneity_score(self.colors, self.labels))
     print 'completeness is ' + str(completeness_score(self.colors, self.labels))
Ejemplo n.º 37
0
from sklearn.metrics.cluster import homogeneity_score
print(homogeneity_score([0, 0, 1, 1], [1, 1, 0, 0]))
print(homogeneity_score([0, 0, 0, 1, 1, 1], [3, 2, 2, 2, 3, 3]))

from sklearn.metrics.cluster import completeness_score
print(completeness_score([0, 0, 1, 1], [1, 1, 0, 0]))
print(completeness_score([0, 0, 0, 1, 1, 1], [3, 2, 2, 2, 3, 3]))