def perform_clustering(seed, m_data, labels, n_clusters):
    # Singleview spherical kmeans clustering
    # Cluster each view separately
    s_kmeans = SphericalKMeans(n_clusters=n_clusters,
                               random_state=seed,
                               n_init=100)
    s_clusters_v1 = s_kmeans.fit_predict(m_data[0])
    s_clusters_v2 = s_kmeans.fit_predict(m_data[1])

    # Concatenate the multiple views into a single view
    s_data = np.hstack(m_data)
    s_clusters = s_kmeans.fit_predict(s_data)

    # Compute nmi between true class labels and singleview cluster labels
    s_nmi_v1 = nmi_score(labels, s_clusters_v1)
    s_nmi_v2 = nmi_score(labels, s_clusters_v2)
    s_nmi = nmi_score(labels, s_clusters)
    print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1))
    print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2))
    print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi))

    # Multiview spherical kmeans clustering

    # Use the MultiviewKMeans instance to cluster the data
    m_kmeans = MultiviewSphericalKMeans(n_clusters=n_clusters,
                                        n_init=100,
                                        random_state=seed)
    m_clusters = m_kmeans.fit_predict(m_data)

    # Compute nmi between true class labels and multiview cluster labels
    m_nmi = nmi_score(labels, m_clusters)
    print('Multiview NMI Score: {0:.3f}\n'.format(m_nmi))

    return m_clusters
def perform_clustering(seed, m_data, labels, n_clusters, kernel='rbf'):

    # Single-view spectral clustering
    # Cluster each view separately
    s_spectral = SpectralClustering(n_clusters=n_clusters,
                                    random_state=RANDOM_SEED,
                                    affinity=kernel,
                                    n_init=100)
    s_clusters_v1 = s_spectral.fit_predict(m_data[0])
    s_clusters_v2 = s_spectral.fit_predict(m_data[1])

    # Concatenate the multiple views into a single view
    s_data = np.hstack(m_data)
    s_clusters = s_spectral.fit_predict(s_data)

    # Compute nmi between true class labels and single-view cluster labels
    s_nmi_v1 = nmi_score(labels, s_clusters_v1)
    s_nmi_v2 = nmi_score(labels, s_clusters_v2)
    s_nmi = nmi_score(labels, s_clusters)
    print('Single-view View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1))
    print('Single-view View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2))
    print('Single-view Concatenated NMI Score: {0:.3f}\n'.format(s_nmi))

    # Multi-view spectral clustering

    # Use the MultiviewSpectralClustering instance to cluster the data
    m_spectral = MultiviewSpectralClustering(n_clusters=n_clusters,
                                             random_state=RANDOM_SEED,
                                             affinity=kernel,
                                             n_init=100)
    m_clusters = m_spectral.fit_predict(m_data)

    # Compute nmi between true class labels and multi-view cluster labels
    m_nmi = nmi_score(labels, m_clusters)
    print('Multi-view Concatenated NMI Score: {0:.3f}\n'.format(m_nmi))

    return m_clusters
###############################################################################
# Multiview spherical KMeans clustering on 2 views
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Here we will demonstrate the performance of the multiview spherical kmeans
# clustering. We will evaluate the purity of the resulting clusters with
# respect to the class labels using the normalized mutual information metric.
#
# Use the MultiviewSphericalKMeans instance to cluster the data
m_kmeans = MultiviewSphericalKMeans(n_clusters=n_class,
                                    random_state=RANDOM_SEED)

m_clusters = m_kmeans.fit_predict(Xs)

# Compute nmi between true class labels and multiview cluster labels
m_nmi = nmi_score(labels, m_clusters)
print('multiview NMI Score: {0:.3f}\n'.format(m_nmi))

###############################################################################
# Multiview spectral clustering results and the true clusters
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# We will display the clustering results of the multiview kmeans clustering
# algorithm below, along with the true class labels.

# Running TSNE to display clustering results via low dimensional embedding
tsne = TSNE()
new_data_1 = tsne.fit_transform(Xs[0])
new_data_2 = tsne.fit_transform(Xs[1])
new_data = [new_data_1, new_data_2]

display_plots('True Labels', new_data, labels)
n_class = 5
Xs, labels = load_UCImultifeature(
    select_labeled=list(range(n_class)), views=[0, 1])

###############################################################################
# Singleview spectral clustering
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Cluster each view separately and compute nmi

s_spectral = SpectralClustering(
    n_clusters=n_class, random_state=RANDOM_SEED, n_init=100)

for i in range(len(Xs)):
    s_clusters = s_spectral.fit_predict(Xs[i])
    s_nmi = nmi_score(labels, s_clusters, average_method='arithmetic')
    print('Single-view View {0:d} NMI Score: {1:.3f}\n'.format(i + 1, s_nmi))

# Concatenate the multiple views into a single view and produce clusters
s_data = np.hstack(Xs)
s_clusters = s_spectral.fit_predict(s_data)

s_nmi = nmi_score(labels, s_clusters)
print('Single-view Concatenated NMI Score: {0:.3f}\n'.format(s_nmi))

###############################################################################
# Co-Regularized multiview spectral clustering
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Use the MultiviewSpectralClustering instance to cluster the data
m_spectral1 = MultiviewCoRegSpectralClustering(n_clusters=n_class,
Esempio n. 5
0
# clustering the two views concatenated together.


# Singleview kmeans clustering

# Cluster each view separately
s_kmeans = KMeans(n_clusters=n_class, random_state=RANDOM_SEED)
s_clusters_v1 = s_kmeans.fit_predict(Xs[0])
s_clusters_v2 = s_kmeans.fit_predict(Xs[1])

# Concatenate the multiple views into a single view
s_data = np.hstack(Xs)
s_clusters = s_kmeans.fit_predict(s_data)

# Compute nmi between true class labels and singleview cluster labels
s_nmi_v1 = nmi_score(labels, s_clusters_v1)
s_nmi_v2 = nmi_score(labels, s_clusters_v2)
s_nmi = nmi_score(labels, s_clusters)
print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1))
print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2))
print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi))

# Multiview kmeans clustering

# Use the MultiviewKMeans instance to cluster the data
m_kmeans = MultiviewKMeans(n_clusters=n_class, random_state=RANDOM_SEED)
m_clusters = m_kmeans.fit_predict(Xs)

# Compute nmi between true class labels and multiview cluster labels
m_nmi = nmi_score(labels, m_clusters)
print('Multiview NMI Score: {0:.3f}\n'.format(m_nmi))
Esempio n. 6
0
def accuracy_scores_binary(y_true, y_pred):
    '''
    A function to calculate accuracy measures for a binary classification.
    Function written by Osian Roberts.

    Parameters:
    :param y_true: observed binary labels, where 0 is absence and 1 is presence.
    :param y_pred: predicted binary labels, where 0 is absence and 1 is presence.

    :returns: a list containing two numpy.arrays - (metrics: name of test metrics, scores: test scores for each metric)

    Reference: See pages 253 - 255 in:
    Guisan et al. (2017). Habitat suitability and distribution models: with applications in R.
    '''
    import numpy

    # check inputs:
    if not isinstance(y_true, numpy.ndarray):
        y_true = numpy.array(y_true)
    if not isinstance(y_pred, numpy.ndarray):
        y_pred = numpy.array(y_pred)
    if y_true.ndim != 1:
        raise SystemExit('ERROR: the true labels are not in a 1D array.')
    if y_pred.ndim != 1:
        raise SystemExit('ERROR: the predicted labels are not in a 1D array.')
    if y_true.size != y_pred.size:
        raise SystemExit('ERROR: unequal number of binary labels.')

    # ensure that y_true, y_pred contain binary labels (i.e. 0 or 1 values):
    y_true = y_true.astype('uint8')
    y_pred = y_pred.astype('uint8')
    if numpy.min(y_true) != 0 or numpy.max(y_true) != 1:
        raise SystemExit('ERROR: the true labels are not binary (zero or one values).')
    if numpy.min(y_pred) != 0 or numpy.max(y_pred) != 1:
        raise SystemExit('ERROR: the predicted labels are not binary (zero or one values).')

    metrics = numpy.array(['Prevalence', 'Overall Diagnostic Power', 'Correct Classification Rate',
                           'Misclassification Rate', 'Presence Predictive Power', 'Absence Predictive Power',
                           'Accuracy', 'Balanced Accuracy', 'Sensitivity', 'Specificity', 'Precision', 'F1 Score',
                           'Matthews Correlation', 'Cohen Kappa', 'Normalised Mutual Information',
                           'Hanssen-Kuiper skill'])

    try:
        n_presence = numpy.where(y_true == 1)[0].size
        n_absence = numpy.where(y_true == 0)[0].size

        # calculate true-presence, true-absence, false-presence and false-absence:
        TP = numpy.where((y_true == 1) & (y_pred == 1))[0].size
        TA = numpy.where((y_true == 0) & (y_pred == 0))[0].size
        FP = numpy.where((y_true == 1) & (y_pred == 0))[0].size
        FA = numpy.where((y_true == 0) & (y_pred == 1))[0].size  # aka sweet FA!

        # proportion of presence records:
        prevalence = (TP / FA) / y_true.size

        # proportion of absence records:
        ODP = 1 - prevalence

        # correct classification & misclassification rate
        CCR = (TP + TA) / y_true.size
        MR = (FP + FA) / y_true.size

        # Sensitivity (aka Recall or True Positive Rate):
        sensitivity = TP / n_presence

        # false presence rate - inverse of sensitivity (redundant?)
        #FPR = 1  - sensitivity

        # Presence and absence predictive power:
        PPP = TP / (TP + FP)
        APP = TA / (TA + FA)

        # Specificity (aka True Negative Rate):
        specificity = TA / n_absence

        # false positive rate - inverse of specificity (redundant?)
        #FPR = 1 - specificity

        # Accuracy scores:
        accuracy = (TP + TA) / (n_presence + n_absence)
        balanced_accuracy = ((TP / n_presence) + (TA / n_absence)) / 2

        # precision:
        precision = TP / (TP + FP)

        # F1 score:
        f1_score = 2 * TP / ((2*TP) + FP + FA)

        # Matthews Correlation Coefficient:
        MCC = ((TP * TA) - (FP * FA)) / (((TP + FP) * (TP + FA) * (TA + FP) * (TA + FA))**0.5)

        # Hanssen-Kuiper skill (unreliable when TA is very large):
        TSS = sensitivity + specificity - 1
        del n_presence, n_absence, TP, TA, FP, FA

        from sklearn.metrics import normalized_mutual_info_score as nmi_score
        nmi_score = nmi_score(y_true, y_pred)

        # Cohen's Kappa (caution: sensitive to sample size and proportion of presence records):
        from sklearn.metrics import cohen_kappa_score as kappa
        kappa = kappa(y_true, y_pred)

        scores = numpy.array([prevalence, ODP, CCR, MR, PPP, APP, accuracy, balanced_accuracy, sensitivity,
                           specificity, precision, f1_score, MCC, kappa, nmi_score, TSS]).round(decimals=6)
        del prevalence, ODP, CCR, MR, PPP, APP, accuracy, balanced_accuracy, sensitivity
        del specificity, precision, f1_score, MCC, kappa, nmi_score, TSS
    except Exception:
        scores = numpy.zeros(len(metrics))

    if metrics.size == scores.size:
        return [metrics, scores]
    else:
        raise SystemExit('ERROR: unable to calculate accuracy metrics.')