Ejemplos de fowlkes_mallows_score en Python, ejemplos de sklearn.metrics.cluster.fowlkes_mallows_score en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: porownanie.py Proyecto: aleksandramiesiac/pd3

def Porownaj_algorytmy(data, klasy, labels, method, baza):
    """
    Oblicza indeksy AM, AR i FM dla wszystkich algorytmów, aprócz napisanego przeze mnie.
    """

    wektor =[]
    test =[0]*len(method)
    i=0
    #algorytmy linkage
    for name in method:
        Z = linkage(data, name)
        test[i] = cluster.hierarchy.cut_tree(Z,klasy)
        test[i] = [y for x in test[i] for y in x]
        wektor.append([fowlkes_mallows_score(labels,test[i]), adjusted_mutual_info_score(labels, test[i]),adjusted_rand_score(labels,test[i]),baza ])
        i+=1
    # algorytm genieclust
    wynikMG = genieclust.genie.Genie(n_clusters=klasy).fit_predict(data)
    wektor.append([fowlkes_mallows_score(labels,wynikMG), adjusted_mutual_info_score(labels, wynikMG),adjusted_rand_score(labels,wynikMG),baza ])
    
    #MeanShift
    wynikCL = MeanShift(bandwidth=klasy).fit(data).labels_
    wektor.append([fowlkes_mallows_score(labels,wynikCL), adjusted_mutual_info_score(labels, wynikCL),adjusted_rand_score(labels,wynikCL),baza ])
    
    #AgglomerativeClustering
    wynikFA = AgglomerativeClustering(n_clusters=klasy).fit(data).labels_
    wektor.append([fowlkes_mallows_score(labels,wynikFA), adjusted_mutual_info_score(labels, wynikFA),adjusted_rand_score(labels,wynikFA),baza ])
    
    #KMeans
    wynikKM = KMeans(n_clusters=klasy, random_state=123).fit(data).labels_
    wektor.append([fowlkes_mallows_score(labels,wynikKM), adjusted_mutual_info_score(labels, wynikKM),adjusted_rand_score(labels,wynikKM) ,baza])
    
    
    index = ["single",'complete','average','weighted','centroid','median','ward', "genieclust","AgglomerativeClustering","KMeans","MeanShift"]
    
    return pd.DataFrame(wektor, index = index, columns = ["FM","AM","AR", "Dane"])

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_supervised.py Proyecto: antoinewdg/scikit-learn

def test_fowlkes_mallows_score():
    # General case
    score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [0, 0, 1, 1, 2, 2])
    assert_almost_equal(score, 4.0 / np.sqrt(12.0 * 6.0))

    # Perfect match but where the label names changed
    perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0])
    assert_almost_equal(perfect_score, 1.0)

    # Worst case
    worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0], [0, 1, 2, 3, 4, 5])
    assert_almost_equal(worst_score, 0.0)

Ejemplo n.º 3

0

Mostrar archivo

def test_fowlkes_mallows_score():
    # General case
    score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [0, 0, 1, 1, 2, 2])
    assert_almost_equal(score, 4.0 / np.sqrt(12.0 * 6.0))

    # Perfect match but where the label names changed
    perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0])
    assert_almost_equal(perfect_score, 1.0)

    # Worst case
    worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0], [0, 1, 2, 3, 4, 5])
    assert_almost_equal(worst_score, 0.0)

Ejemplo n.º 4

0

Mostrar archivo

def computeFowklesMallowsIndex(inputDirTDA, inputFileWavelet,
                               sheetNameWavelet):
    #labels_TDA = ['m59','m39','m102', 'm6', 'm47', 'm8', 'm4', 'm98', 'm2', 'm40', 'm3']
    #clustering_TDA, labelsTDA = tdaClustering("../Results/CohomologyOPPregJNP/")
    clustering_TDA, labelsTDA = tdaClustering(inputDirTDA)
    #labels_Wavelet = ['m39','m40', 'm47', 'm98', 'm102', 'm2', 'm3', 'm4', 'm6', 'm8', 'm59']
    clustering_Wavelet, labelsWavelet = waveletClustering(
        inputFileWavelet, sheetNameWavelet)
    # 167
    #[2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 3] -- cutoff 175
    #[2, 2, 1, 1, 1, 1, 4, 4, 3, 3, 5] -- cutoff 160
    #[2, 2, 1, 1, 1, 1, 3, 3, 3, 3, 4] -- cutoff 167

    #Reorganize labels of wavelet clustering according to TDA
    #label so that they are in same order

    reOrganizedWavelet = []
    dicWavelet = {}
    for pair in zip(labelsWavelet, clustering_Wavelet):
        dicWavelet[pair[0]] = pair[1]

    for l in labelsTDA:
        reOrganizedWavelet.append(dicWavelet[l])

    print(reOrganizedWavelet)

    score = fowlkes_mallows_score(clustering_TDA, reOrganizedWavelet)

    #score = fowlkes_mallows_score([1,1,0,0], [0,0,1,1])
    return score

Ejemplo n.º 5

0

Mostrar archivo

Archivo: cluster.py Proyecto: AspirinCode/DeepChEmbed

    def true_label_metrics(true_label, assigned_label, print_metric):
        """ https://scikit-learn.org/stable/modules/clustering.html#clustering-evaluation"""
        true_label_metrics = {}
        true_label_metrics['adjusted_rand_score'] = \
            cluster_metric.adjusted_rand_score(true_label, assigned_label)
        # true_label_metrics['adjusted_mutual_info_score'] = \
        #     cluster_metric.adjusted_mutual_info_score(true_label,
        #                                               assigned_label)
        # true_label_metrics['homogeneity_completeness_v_measure'] = \
        #     cluster_metric.homogeneity_completeness_v_measure(true_label,
        #                                                       assigned_label)
        true_label_metrics['fowlkes_mallows_score'] = \
            cluster_metric.fowlkes_mallows_score(true_label, assigned_label)

        if (print_metric):
            print("Metric with True label")
            print("adjusted rand score: % s " %
                  true_label_metrics['adjusted_rand_score'])
            # print("adjusted mutual info score: % s"
            #       % true_label_metrics['adjusted_mutual_info_score'])
            # print("homogeneity completeness v measure:" )
            # print(true_label_metrics['homogeneity_completeness_v_measure'])
            print("fowlkes_mallows : % s" %
                  true_label_metrics['fowlkes_mallows_score'])

        return true_label_metrics

Ejemplo n.º 6

0

Mostrar archivo

def get_clustering_metrics(train_data,
                           cluster_labels,
                           ground_truth_labels=None):
    clustering_metric_dict = dict({})
    clustering_metric_dict['silhouette_score'] = silhouette_score(
        train_data, cluster_labels, random_state=42)
    clustering_metric_dict[
        'calinski_harabasz_score'] = calinski_harabasz_score(
            train_data, cluster_labels)
    clustering_metric_dict['davies_bouldin_score'] = davies_bouldin_score(
        train_data, cluster_labels)

    if ground_truth_labels is not None:
        clustering_metric_dict['v_measure_score'] = v_measure_score(
            ground_truth_labels, cluster_labels)
        clustering_metric_dict[
            'fowlkes_mallows_score'] = fowlkes_mallows_score(
                ground_truth_labels, cluster_labels)
        clustering_metric_dict['homogeneity_score'] = homogeneity_score(
            ground_truth_labels, cluster_labels)
        clustering_metric_dict[
            'normalized_mutual_info_score'] = normalized_mutual_info_score(
                ground_truth_labels, cluster_labels)
        clustering_metric_dict['adjusted_rand_score'] = adjusted_rand_score(
            ground_truth_labels, cluster_labels)
        clustering_metric_dict['completeness_score'] = completeness_score(
            ground_truth_labels, cluster_labels)

    return clustering_metric_dict

Ejemplo n.º 7

0

Mostrar archivo

	def _score_clustering(self, labels,metric='vm'):
		# Score clustering compared to true model
		if metric=='fm':
			score = fowlkes_mallows_score(self.true_labels_, labels)
		elif metric=='ami':
			score = adjusted_mutual_info_score(self.true_labels_, labels)
		else:
			score = v_measure_score(self.true_labels_[labels>0], labels[labels>0])
		return score

Ejemplo n.º 8

0

Mostrar archivo

def _clustering_evaluation(label, labels_true, digits):
    if labels_true is None:
        FM = None
        ARI = None
    else:
        ARI = round(adjusted_rand_score(labels_true, label), digits)
        FM = round(fowlkes_mallows_score(labels_true, label),digits)

    return ARI, FM

Ejemplo n.º 9

0

Mostrar archivo

Archivo: landmark_mf.py Proyecto: ItayGabbay/ClusteringAlgorithmSelection

def get_landmarking(dataset_name, df):
    start = time.time()
    record = {'dataset': dataset_name.split('.')[0]}
    results = []
    n_samples = int(len(df)*0.1) if len(df) > 400 else min(df.shape[0], 40)
    data = df.sample(n=n_samples, replace=False)
    labels = get_dbscan(data)
    k = len(np.unique(labels))
    labels2 = get_Kmeans(data, k, 40)
    full_tree = DecisionTreeClassifier()
    full_tree.fit(data, labels)
    worst_attr = np.argmin(full_tree.feature_importances_)

    X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3)
    best_stump = DecisionTreeClassifier(max_depth=1)
    random_stump = DecisionTreeClassifier(splitter="random", max_depth=1)
    worst_stump = DecisionTreeClassifier(max_depth=1)
    elite_knn = KNeighborsClassifier(n_neighbors=1)
    one_knn = KNeighborsClassifier(n_neighbors=1,
            algorithm="auto",
            weights="uniform",
            p=2,
            metric="minkowski")
    nb = GaussianNB()
    lda = LinearDiscriminantAnalysis()
    best_stump.fit(X_train, y_train)
    random_stump.fit(X_train, y_train)
    worst_stump.fit(X_train.iloc[:, worst_attr].values.reshape(-1, 1), y_train)
    elite_knn.fit(X_train, y_train)
    one_knn.fit(X_train, y_train)
    # lda.fit(X_train, y_train)
    nb.fit(X_train, y_train)

    record['LM1'] = np.log2(df.shape[0])
    record['LM2'] = np.log2(df.shape[1])
    record['LM3'] = accuracy_score(best_stump.predict(X_test), y_test)
    # record['LM4'] = f1_score(best_stump.predict(X_test), y_test, average='weighted')
    record['LM5'] = accuracy_score(random_stump.predict(X_test), y_test)
    # record['LM6'] = f1_score(random_stump.predict(X_test), y_test, average='weighted')
    # record['LM7'] = model.inertia_
    record['LM8'] = accuracy_score(elite_knn.predict(X_test), y_test)
    # record['LM9'] = f1_score(elite_knn.predict(X_test), y_test, average='weighted')
    # record['LM10'] = accuracy_score(lda.predict(X_test), y_test)
    # record['LM11'] = f1_score(lda.predict(X_test), y_test, average='weighted')
    record['LM12'] = accuracy_score(nb.predict(X_test), y_test)
    # record['LM13'] = f1_score(nb.predict(X_test), y_test, average='weighted')
    record['LM14'] = accuracy_score(one_knn.predict(X_test), y_test)
    # record['LM15'] = f1_score(one_knn.predict(X_test), y_test, average='weighted')
    record['LM16'] = accuracy_score(worst_stump.predict(X_test.iloc[:, worst_attr].values.reshape(-1, 1)), y_test)
    # record['LM17'] = f1_score(worst_stump.predict(X_test.iloc[:, worst_attr].values.reshape(-1, 1)), y_test, average='weighted')
    record['LM18'] = adjusted_rand_score(labels, labels2)
    record['LM19'] = adjusted_mutual_info_score(labels, labels2)
    record['LM20'] = completeness_score(labels, labels2)
    record['LM21'] = fowlkes_mallows_score(labels, labels2)

    end = time.time()
    return record, (df.shape[0], df.shape[1], end-start)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: test_supervised.py Proyecto: MartinThoma/scikit-learn

def test_int_overflow_mutual_info_fowlkes_mallows_score():
    # Test overflow in mutual_info_classif and fowlkes_mallows_score
    x = np.array([1] * (52632 + 2529) + [2] * (14660 + 793) + [3] * (3271 +
                 204) + [4] * (814 + 39) + [5] * (316 + 20))
    y = np.array([0] * 52632 + [1] * 2529 + [0] * 14660 + [1] * 793 +
                 [0] * 3271 + [1] * 204 + [0] * 814 + [1] * 39 + [0] * 316 +
                 [1] * 20)

    assert_all_finite(mutual_info_score(x, y))
    assert_all_finite(fowlkes_mallows_score(x, y))

Ejemplo n.º 11

0

Mostrar archivo

def test_int_overflow_mutual_info_fowlkes_mallows_score():
    # Test overflow in mutual_info_classif and fowlkes_mallows_score
    x = np.array([1] * (52632 + 2529) + [2] * (14660 + 793) + [3] *
                 (3271 + 204) + [4] * (814 + 39) + [5] * (316 + 20))
    y = np.array([0] * 52632 + [1] * 2529 + [0] * 14660 + [1] * 793 +
                 [0] * 3271 + [1] * 204 + [0] * 814 + [1] * 39 + [0] * 316 +
                 [1] * 20)

    assert_all_finite(mutual_info_score(x, y))
    assert_all_finite(fowlkes_mallows_score(x, y))

Ejemplo n.º 12

0

Mostrar archivo

Archivo: sklearn_cluster_performance.py Proyecto: ly-joy/NJU-Machine-Learning

def cluster_performance(y_true, y_pred):
    '''
    返回FM指数和Rand指数
    :param y_true:参考模型的簇划分，类型为ndarray
    :param y_pred:聚类模型给出的簇划分，类型为ndarray
    :return: FM指数，Rand指数
    '''
    FM = fowlkes_mallows_score(y_true, y_pred)
    Rand = adjusted_rand_score(y_true, y_pred)
    return FM, Rand

Ejemplo n.º 13

0

Mostrar archivo

def cluster_performance(y_true, y_pred):
    '''
    返回FM指数和Rand指数
    :param y_true:参考模型的簇划分，类型为ndarray
    :param y_pred:聚类模型给出的簇划分，类型为ndarray
    :return: FM指数，Rand指数
    '''

    #********* Begin *********#
    return fowlkes_mallows_score(y_true,
                                 y_pred), adjusted_rand_score(y_true, y_pred)

Ejemplo n.º 14

0

Mostrar archivo

def print_stats(x, y, quiet=True):
    ari = adjusted_rand_score(x, y)
    ami = adjusted_mutual_info_score(x, y)
    fms = fowlkes_mallows_score(x, y)

    if not quiet:
        print("ARI: {}".format(ari), file=sys.stderr)
        print("AMI: {}".format(ami), file=sys.stderr)
        print("FMS: {}".format(fms), file=sys.stderr)

    return ari, ami, fms

Ejemplo n.º 15

0

Mostrar archivo

def cluster_performance(y_true, y_pred):
    """
    返回Rand指数和FM指数
    :param y_true:参考模型的簇划分，类型为ndarray
    :param y_pred:聚类模型给出的簇划分，类型为ndarray
    :return: Rand指数，FM指数
    """
    # ********* Begin *********#
    rand = adjusted_rand_score(y_true, y_pred)
    fm = fowlkes_mallows_score(y_true, y_pred)
    return fm, rand

Ejemplo n.º 16

0

Mostrar archivo

def test_fowlkes_mallows_score_properties():
    # handcrafted example
    labels_a = np.array([0, 0, 0, 1, 1, 2])
    labels_b = np.array([1, 1, 2, 2, 0, 0])
    expected = 1. / np.sqrt((1. + 3.) * (1. + 2.))
    # FMI = TP / sqrt((TP + FP) * (TP + FN))

    score_original = fowlkes_mallows_score(labels_a, labels_b)
    assert_almost_equal(score_original, expected)

    # symmetric property
    score_symmetric = fowlkes_mallows_score(labels_b, labels_a)
    assert_almost_equal(score_symmetric, expected)

    # permutation property
    score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)
    assert_almost_equal(score_permuted, expected)

    # symmetric and permutation(both together)
    score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
    assert_almost_equal(score_both, expected)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: test_supervised.py Proyecto: MartinThoma/scikit-learn

def test_fowlkes_mallows_score_properties():
    # handcrafted example
    labels_a = np.array([0, 0, 0, 1, 1, 2])
    labels_b = np.array([1, 1, 2, 2, 0, 0])
    expected = 1. / np.sqrt((1. + 3.) * (1. + 2.))
    # FMI = TP / sqrt((TP + FP) * (TP + FN))

    score_original = fowlkes_mallows_score(labels_a, labels_b)
    assert_almost_equal(score_original, expected)

    # symmetric property
    score_symmetric = fowlkes_mallows_score(labels_b, labels_a)
    assert_almost_equal(score_symmetric, expected)

    # permutation property
    score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)
    assert_almost_equal(score_permuted, expected)

    # symmetric and permutation(both together)
    score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
    assert_almost_equal(score_both, expected)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: porownanie.py Proyecto: aleksandramiesiac/pd3

def Porownaj_algorytmy2(data, klasy, labels, baza):
    """
    Oblicza indeksy AM, AR i FM dla algorytmu napisanego przeze mnie.
    """
    wektor =[]
    
    #moj algorytm
    wynikM = spectral_clustering(data, k=klasy, M=5)
    wektor.append([fowlkes_mallows_score(labels,wynikM), adjusted_mutual_info_score(labels, wynikM),adjusted_rand_score(labels,wynikM),baza ])
    
    index=["Moj"]
    
    return pd.DataFrame(wektor, index = index, columns = ["FM","AM","AR", "Dane"])

Ejemplo n.º 19

0

Mostrar archivo

Archivo: EvaluateClustering.py Proyecto: zxchen110/SEG-BERT

 def evaluate(self):
     eval_result_dict = {}
     eval_result_dict['ami'] = adjusted_mutual_info_score(
         self.data['true_y'], self.data['pred_y'])
     eval_result_dict['rand'] = adjusted_rand_score(self.data['true_y'],
                                                    self.data['pred_y'])
     eval_result_dict['comp'] = completeness_score(self.data['true_y'],
                                                   self.data['pred_y'])
     eval_result_dict['fow'] = fowlkes_mallows_score(
         self.data['true_y'], self.data['pred_y'])
     eval_result_dict['hom'] = homogeneity_score(self.data['true_y'],
                                                 self.data['pred_y'])
     eval_result_dict['nmi'] = normalized_mutual_info_score(
         self.data['true_y'], self.data['pred_y'])
     eval_result_dict['v_score'] = v_measure_score(self.data['true_y'],
                                                   self.data['pred_y'])
     return eval_result_dict

Ejemplo n.º 20

0

Mostrar archivo

def cluster_hac(num_k):
    feature_ds, label_ds = read_dataset()

    user_max_id = num_k - 1
    sub_feature_ds = []
    sub_label_ds = []
    for i in range(0, len(label_ds)):
        if label_ds[i] <= user_max_id:
            sub_feature_ds.append(feature_ds[i])
            sub_label_ds.append(label_ds[i])

    feature_array = np.array(sub_feature_ds)

    x_scalar = StandardScaler()
    x = x_scalar.fit_transform(feature_array)

    pca = PCA(n_components=0.999)
    components = pca.fit_transform(x)
    hac = AgglomerativeClustering(n_clusters=num_k, linkage='average')
    hac.fit_predict(components)
    print(fowlkes_mallows_score(hac.labels_, sub_label_ds))

Ejemplo n.º 21

0

Mostrar archivo

Archivo: kmeans_test.py Proyecto: EnderCheng/YelpLink

def cluster_kmeans(num_k):
    feature_ds, label_ds = read_dataset()

    user_max_id = num_k - 1
    sub_feature_ds = []
    sub_label_ds = []
    for i in range(0, len(label_ds)):
        if label_ds[i] <= user_max_id:
            sub_feature_ds.append(feature_ds[i])
            sub_label_ds.append(label_ds[i])

    feature_array = np.array(sub_feature_ds)

    x_scalar = StandardScaler()
    x = x_scalar.fit_transform(feature_array)

    pca = PCA(n_components=0.999)
    components = pca.fit_transform(x)
    kmeans = KMeans(n_clusters=num_k, random_state=0)
    kmeans.fit_predict(components)
    print(fowlkes_mallows_score(kmeans.labels_, sub_label_ds))

Ejemplo n.º 22

0

Mostrar archivo

def compute_external_metrics(labels_true: List[str],
                             labels_pred: List[int]) -> ExternalEvaluation:
    if len(labels_true) == 0 and len(labels_pred) == 0:
        return None

    homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(
        labels_true, labels_pred)
    adjusted_mutual_info = adjusted_mutual_info_score(labels_true, labels_pred)
    adjusted_rand_index = adjusted_rand_score(labels_true, labels_pred)
    fowlkes_mallows = fowlkes_mallows_score(labels_true, labels_pred)

    mat = contingency_matrix(labels_true, labels_pred)
    purity = purity_score(mat)
    inverse_purity = purity_score(mat, inverse=True)

    return ExternalEvaluation(homogeneity=homogeneity,
                              completeness=completeness,
                              v_measure=v_measure,
                              adjusted_mutual_information=adjusted_mutual_info,
                              adjusted_rand_index=adjusted_rand_index,
                              fowlkes_mallows=fowlkes_mallows,
                              purity=purity,
                              inverse_purity=inverse_purity)

Ejemplo n.º 23

0

Mostrar archivo

Archivo: func.py Proyecto: swqs1989/ds598

 def externalEval(self, y_pred, true_label):
     true_label = np.array(true_label)
     n_cluster = len(set(true_label))
     y_pred_modi = y_pred.copy()
     result = [[] for i in range(len(set(y_pred)))]
     for i in range(len(y_pred)):
         result[y_pred[i]].append(i)
     dict1 = dict.fromkeys([i for i in range(n_cluster)], None)
     for i in list(dict1.keys()):
         dict1[i] = []
     nummostnum = 0
     for i in range(len(result)):
         if len(true_label[result[i]]) > 0:
             mostnum = Counter(true_label[result[i]]).most_common(1)[0][0]
             nummostnum += Counter(
                 true_label[result[i]]).most_common(1)[0][1]
             dict1[mostnum] += (result[i])
     for r in list(dict1.keys()):
         for i in dict1[r]:
             y_pred_modi[i] = r
     nmi = normalized_mutual_info_score(true_label, y_pred)
     purity = nummostnum / len(y_pred_modi)
     fowlkes_mallows = fowlkes_mallows_score(true_label, y_pred_modi)
     return nmi, purity, fowlkes_mallows

Ejemplo n.º 24

0

Mostrar archivo

Archivo: k_means.py Proyecto: shubhampachori12110095/Machine_Learning

def kmeans(data, k):

    centroid = initialize_centroids(data, k)
    a = np.zeros((k, k))
    b = np.zeros(k)
    c1 = np.zeros((k, k))
    d = np.zeros(k)
    clusnew = np.zeros(len(data))
    i = 1
    while (i < 100):
        clusters = closest_centroid(data, centroid, k)
        for l in range(0, k):
            centroid[l, :] = np.mean(data[(np.where(clusters == l))], axis=0)
        i = i + 1
        print i
    c = confusion_matrix(clusters, digits.target)
    for j in range(0, k):
        c1[j, :] = c[:, (np.argmax(c[j, :]))]
        clusnew[clusters == (np.argmax(c[j, :]))] = j
        d[j] = sum(c1[:, j])
    c1[:, (np.argmin(d))] = -1
    print('Confusion Matrix: ', c1)
    print('Fowlkes Mallows Score: ',
          fowlkes_mallows_score(digits.target, clusnew))

Ejemplo n.º 25

0

Mostrar archivo

Archivo: eval.py Proyecto: ChahatBansal8060/2018ANZ8060_HW2

output_file = sys.argv[1]
correct_file = sys.argv[2]

values = np.loadtxt(correct_file, dtype=int)
num_lines = sum(1 for line in open(correct_file))
result = np.zeros(num_lines)

cur_clus = -1

with open(output_file) as f:
    content = f.readlines()

for i in range(0, len(content)):
    if (content[i][0] == '#'):
        cur_clus += 1
        continue

    result[int(content[i])] = cur_clus

net_score = fowlkes_mallows_score(values, result)
'''
unique, counts = np.unique(result, return_counts=True)
print np.asarray((unique, counts)).T

unique, counts = np.unique(values, return_counts=True)
print np.asarray((unique, counts)).T
'''
logs = open('DBSCANLogs.txt', 'a')
logs.write(str(net_score) + '\n')
logs.close()

Ejemplo n.º 26

0

Mostrar archivo

Archivo: clustering.py Proyecto: rtrad89/authorship_clustering_code_repo

    def _eval_clustering(self, labels_true, labels_predicted):
        # To address when COP-KMeans fails to satisfy all constraints at a k:
        if labels_predicted is None:
            # return an empty dictionary to expose in the final output
            return {"nmi": None,
                    "ami": None,
                    "ari": None,
                    "fms": None,
                    "v_measure": None,
                    "bcubed_precision": None,
                    "bcubed_recall": None,
                    "bcubed_fscore": None,
                    "Silhouette": None,
                    "Calinski_harabasz": None,
                    "Davies_Bouldin": None
                    }

        nmi = normalized_mutual_info_score(labels_true,
                                           labels_predicted,
                                           average_method="max")

        ami = adjusted_mutual_info_score(labels_true,
                                         labels_predicted,
                                         average_method="arithmetic")

        ari = adjusted_rand_score(labels_true,
                                  labels_predicted)

        v_measure = v_measure_score(labels_true,
                                    labels_predicted,
                                    beta=1.0)

        fms = fowlkes_mallows_score(labels_true,
                                    labels_predicted)

        # Reshape labels for BCubed measures
        true_dict = self._reshape_labels_as_dicts(labels_true)
        pred_dict = self._reshape_labels_as_dicts(labels_predicted)

        bcubed_precision = bcubed.precision(cdict=pred_dict, ldict=true_dict)
        bcubed_recall = bcubed.recall(cdict=pred_dict, ldict=true_dict)
        bcubed_f1 = bcubed.fscore(bcubed_precision, bcubed_recall)

        # =====================================================================
        # Unsupervised Metrics
        # =====================================================================
        if not labels_predicted.nunique() in (1, len(self.data)):
            sil = silhouette_score(X=self.data,
                                   labels=labels_predicted,
                                   metric=self.distance_metric,
                                   random_state=13712)

            ch = calinski_harabasz_score(X=self.data, labels=labels_predicted)

            dv = davies_bouldin_score(X=self.data, labels=labels_predicted)
        else:
            sil = None
            ch = None
            dv = None

        ret = {}
        ret.update({"nmi": round(nmi, 4),
                    "ami": round(ami, 4),
                    "ari": round(ari, 4),
                    "fms": round(fms, 4),
                    "v_measure": round(v_measure, 4),
                    "bcubed_precision": round(bcubed_precision, 4),
                    "bcubed_recall": round(bcubed_recall, 4),
                    "bcubed_fscore": round(bcubed_f1, 4),
                    "Silhouette": round(sil, 4
                                        ) if sil is not None else None,
                    "Calinski_harabasz": round(ch, 4
                                               ) if ch is not None else None,
                    "Davies_Bouldin": round(dv, 4
                                            ) if dv is not None else None
                    # Here goes the unsupervised indices
                    })

        return ret

Ejemplo n.º 27

0

Mostrar archivo

def _fm(labels, labels_true,digits):
    return round(fowlkes_mallows_score(labels_true, labels),digits)

Ejemplo n.º 28

0

Mostrar archivo

data_copy = copy.copy(data)

# Drop the class
inputs = data.drop('species', axis=1)

# Test from n_clusters = 2 until n_clusters = 6
for n_clusters in range(2, 6 + 1):
    # Fowkes-Mallows and Silhouette evaluation:
    agglo = Agglomerative(n_clusters=n_clusters)
    agglo.fit(inputs)
    labels = np.array(agglo.predict(inputs))

    print("n_clusters =", n_clusters)

    print("Menggunakan metode Fowlkes-Mallows: ")
    fowlkes_mallows = fowlkes_mallows_score(labels, target)
    print("Fowlkes Mallows Score:", fowlkes_mallows)

    print("Menggunakan metode Silhouette:")
    silhouette_avg = silhouette_score(inputs, labels)
    print("Hasil rata-rata skor silhouette:", silhouette_avg)
    print()
    print()

    silhouette_values_per_point = silhouette_samples(inputs, labels)

    # Visualize Silhouette subplot
    # 1 row and 2 columns: Left -> silhouette plot and Right -> Cluster Visualization
    fig, silhouette_viz = plt.subplots(1)
    fig.set_size_inches(18, 7)

Ejemplo n.º 29

0

Mostrar archivo

def report_clustering(distance_file,
                      biom_file,
                      metadata_file,
                      num_clusters,
                      verbose,
                      L=2,
                      output_file=None):
    if not isinstance(distance_file, list):
        distance_matrix = CSV.read(distance_file)
    else:
        distance_matrix = distance_file

    if output_file is not None:
        f = open(output_file, 'w')

    output_matrix = []

    AgglomerativeCluster = AgglomerativeClustering(
        n_clusters=num_clusters, affinity='precomputed',
        linkage='complete').fit_predict(distance_matrix)
    KMedoidsCluster = KMedoids(n_clusters=num_clusters,
                               metric='precomputed',
                               method='pam',
                               init='heuristic').fit_predict(distance_matrix)

    PCoA_Samples = BW.extract_samples(biom_file)
    metadata = meta.extract_metadata(metadata_file)
    region_names = []
    for i in range(len(PCoA_Samples)):
        if metadata[PCoA_Samples[i]]['body_site'] not in region_names:
            region_names.append(metadata[PCoA_Samples[i]]['body_site'])
        PCoA_Samples[i] = region_names.index(
            metadata[PCoA_Samples[i]]['body_site'])

    if verbose and L == 1:
        print('Printing results for L1-UniFrac:')
    elif verbose and L == 2:
        print('Printing results for L2-UniFrac:')
    if verbose:
        print('Metric\t\t\t\t\t\t\tAgglomerativeClustering\t\tKMedoids')

    if output_file is not None:
        if L == 1:
            f.write('Printing results for L1-UniFrac:\n')
        elif L == 2:
            f.write('Printing results for L2-UniFrac:\n')
        f.write('Metric\t\t\t\tAgglomerativeClustering\t\t\tKMedoids\n')

    if L == 1:
        output_matrix.append(['Printing results for L1-UniFrac:'])
    if L == 2:
        output_matrix.append(['Printing results for L2-UniFrac:'])
    output_matrix.append(['Metric', 'AgglomerativeClustering', 'KMedoids'])

    RI1 = rand_score(PCoA_Samples, AgglomerativeCluster)
    RI2 = rand_score(PCoA_Samples, KMedoidsCluster)
    if verbose:
        print(f'Rand Index Score:               {RI1}\t\t\t{RI2}')
    ARI1 = adjusted_rand_score(PCoA_Samples, AgglomerativeCluster)
    ARI2 = adjusted_rand_score(PCoA_Samples, KMedoidsCluster)
    if verbose:
        print(f'Adjusted Rand Index Score:      {ARI1}\t\t\t{ARI2}')
    NMI1 = normalized_mutual_info_score(PCoA_Samples, AgglomerativeCluster)
    NMI2 = normalized_mutual_info_score(PCoA_Samples, KMedoidsCluster)
    if verbose:
        print(f'Normalized Mutual Index Score:  {NMI1}\t\t\t{NMI2}')
    AMI1 = adjusted_mutual_info_score(PCoA_Samples, AgglomerativeCluster)
    AMI2 = adjusted_mutual_info_score(PCoA_Samples, KMedoidsCluster)
    if verbose:
        print(f'Adjusted Mutual Info Score:     {AMI1}\t\t\t{AMI2}')
    FM1 = fowlkes_mallows_score(PCoA_Samples, AgglomerativeCluster)
    FM2 = fowlkes_mallows_score(PCoA_Samples, KMedoidsCluster)
    if verbose:
        print(f'Fowlkes Mallows Score:          {FM1}\t\t\t{FM2}')

    if output_file is not None:
        f.write(f'Rand Index Score:               {RI1}\t\t\t{RI2}\n')
        f.write(f'Adjusted Rand Index Score:      {ARI1}\t\t\t{ARI2}\n')
        f.write(f'Normalized Mutual Index Score:  {NMI1}\t\t\t{NMI2}\n')
        f.write(f'Adjusted Mutual Info Score:     {AMI1}\t\t\t{AMI2}\n')
        f.write(f'Fowlkes Mallows Score:          {FM1}\t\t\t{FM2}\n')

    output_matrix.append(['Rand Index Score:', RI1, RI2])
    output_matrix.append(['Adjusted Rand Index Score:', ARI1, ARI2])
    output_matrix.append(['Normalized Mutual Index Score:', NMI1, NMI2])
    output_matrix.append(['Adjusted Mutual Info Score:', AMI1, AMI2])
    output_matrix.append(['Fowlkes Mallows Score:', FM1, FM2])

    return output_matrix

Ejemplo n.º 30

0

Mostrar archivo

from time import time
import numpy as np
from scipy import ndimage
from matplotlib import pyplot as plt
from sklearn import manifold, datasets
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import confusion_matrix
from sklearn.metrics.cluster import fowlkes_mallows_score
from sklearn.preprocessing import scale
digits = datasets.load_digits(n_class=10)
X = scale(digits.data)
y = digits.target
n_samples, n_features = X.shape
X_red = manifold.SpectralEmbedding(n_components=2).fit_transform(X)
clusnew = np.zeros(len(X))
clustering = AgglomerativeClustering(linkage='ward', n_clusters=10)
t0 = time()
clustering.fit(X_red)
print("%s : %.2fs" % ('ward', time() - t0))
c1 = np.zeros((10, 10))
d = np.zeros(10)
c = confusion_matrix(clustering.labels_, y)
for j in range(0, 10):
    c1[j, :] = c[:, (np.argmax(c[j, :]))]
    clusnew[clustering.labels_ == (np.argmax(c[j, :]))] = j
    d[j] = sum(c1[:, j])
c1[:, (np.argmin(d))] = -1
print('Confusion Matrix: ', c1)
print('Fowlkes Mallows Score: ', fowlkes_mallows_score(y, clusnew))

Ejemplo n.º 31

0

Mostrar archivo

with open(fi, 'w') as outfile:
    json.dump(label_test.tolist(), outfile)
fi = os.getcwd() + "/svm/json/label_train.json"
with open(fi, 'w') as outfile:
    json.dump(label_train.tolist(), outfile)

print "\nLinear SVC: "
Classifier = svm.SVC(kernel='linear', probability=True)
Classifier.fit(feature_train, label_train)
joblib.dump(Classifier, 'linear_2.pkl')
print "predicting.."
predict = Classifier.predict(feature_test)
print "Expected output:", label_test
print "Predicted output:", predict
print "Confusion Matrix:\n", metrics.confusion_matrix(label_test, predict)
print "Fowlkes Mallows Score", fowlkes_mallows_score(label_test, predict)

try:
    print "Precision Score", precision_score(label_test, predict)
    print "Recall Score", recall_score(label_test, predict)
    print "F-measure", f1_score(label_test, predict)
    # exit()
except:
    pass

print "\nRBF SVC: "
Classifier = svm.SVC(kernel='rbf')
Classifier.fit(feature_train, label_train)
joblib.dump(Classifier, 'rbf_2.pkl')
print "predicting.."
predict = Classifier.predict(feature_test)

Ejemplo n.º 32

0

Mostrar archivo

    def compute_scores(self, x):

        self.cluster_labels = np.ndarray((x.shape[0], ))

        for i in range(0, x.shape[0], self.batch_size):
            predictions = self.kmeans.predict(x[i:(i + self.batch_size)])
            self.cluster_labels[i:(i + self.batch_size)] = predictions

        if (i + self.batch_size) > x.shape[0]:
            predictions = self.kmeans.predict(x[i:x.shape[0]])
            self.cluster_labels[i:x.shape[0]] = predictions

        confusion_matrix = cscores.contingency_matrix(self.labels_true,
                                                      self.labels_pred)
        purity_score = np.sum(np.amax(confusion_matrix,
                                      axis=0)) / np.sum(confusion_matrix)
        homogeneity_score, completeness_score, v_measure_score = cscores.homogeneity_completeness_v_measure(
            self.labels_true, self.labels_pred)

        scores = [
            #['calinski_harabasz_score', 'internal', cscores.calinski_harabasz_score(x, self.cluster_labels)],
            [
                'davies_bouldin_score', 'internal',
                metrics.davies_bouldin_score(x, self.cluster_labels)
            ],
            [
                'silhouette_score', 'internal',
                metrics.silhouette_score(x, self.cluster_labels)
            ],
            #['silhouette_samples', 'internal', cscores.silhouette_samples(x, self.cluster_labels)],
            ['purity_score', 'external', purity_score],
            [
                'adjusted_rand_score', 'external',
                cscores.adjusted_rand_score(self.labels_true, self.labels_pred)
            ],
            ['completeness_score', 'external', completeness_score],
            [
                'fowlkes_mallows_score', 'external',
                cscores.fowlkes_mallows_score(self.labels_true,
                                              self.labels_pred)
            ],
            ['homogeneity_score', 'external', homogeneity_score],
            [
                'adjusted_mutual_info_score', 'external',
                cscores.adjusted_mutual_info_score(self.labels_true,
                                                   self.labels_pred)
            ],
            [
                'mutual_info_score', 'external',
                cscores.mutual_info_score(self.labels_true, self.labels_pred)
            ],
            [
                'normalized_mutual_info_score', 'external',
                cscores.normalized_mutual_info_score(self.labels_true,
                                                     self.labels_pred)
            ],
            ['v_measure_score', 'external', v_measure_score]
        ]

        scores = pd.DataFrame(scores, columns=['name', 'type', 'score'])
        scores.to_csv(files.small_images_classes_kmeans_scores, index=False)

Ejemplo n.º 33

0

Mostrar archivo

	n_samples, n_features = X.shape
	np.random.seed(0)
	k=10
	labels_y = list(set(y))

	print 50*"_"
	print "KMeans clustering (implementation of algo from question 1a)"
	no_of_iterations = 10
	dat = {i:0 for i in range(n_samples)} 
	t0 = time()
	k_centers, dat = k_means(X, dat, k,no_of_iterations) 
	y_pred1 = [value for key,value in dat.iteritems()]
	c_m1 = confusion_matrix(y,y_pred1, labels_y)	
	print "PROTOCOL1: The cluster predictions for 10 clusters, i.e k = 10 are:\n",getClusterRepresentatives(c_m1, k)
	print "PROTOCOL2: Confusion Matrix: \n",c_m1
	print "PROTOCOL3: Fowlkes-Mallows score:", fowlkes_mallows_score(y, y_pred1)
	print "Time taken: %.2fs" % (time() - t0)

	print 50*"_"
	print "KMeans clustering (using sklearn)"
	clustering1 = KMeans(n_clusters=k, init='k-means++', n_init=10, max_iter=300, tol=0.0001, precompute_distances='auto', verbose=0, random_state=None, copy_x=True, n_jobs=1, algorithm='auto')
	t01 = time()
	y_pred11 = clustering1.fit_predict(X)
	c_m11 = confusion_matrix(y,y_pred11)	
	print "PROTOCOL1: The cluster predictions for 10 clusters, i.e k = 10 are:\n",getClusterRepresentatives(c_m11, k)
	print "PROTOCOL2: Confusion Matrix: \n",c_m11
	print "PROTOCOL3: Fowlkes-Mallows score:", fowlkes_mallows_score(y, y_pred11)
	print "Time taken: %.2fs" % (time() - t01)

	print 50*"_"
	print "Agglomerative Clustering with Ward linkage"