Example #1
0
class FCmeansDaily:
    def __init__(self, training_data):
        self.training_data = training_data
        data_for_clustering = self.preprocess_daily_data_to_fit_fcmeans_library(
            training_data)
        self.fcmeans = FCM(n_clusters=5, random_state=0)
        self.fcmeans.fit(data_for_clustering)

    def preprocess_daily_data_to_fit_fcmeans_library(self, training_data):
        list_to_pass_kmeans_function = []
        for i in range(0, len(training_data)):
            temp = []
            temp.append(training_data[i].upper_shadow_length)
            temp.append(training_data[i].lower_shadow_length)
            temp.append(training_data[i].body_length)
            temp.append(training_data[i].color)
            list_to_pass_kmeans_function.append(temp)
        data_for_clustering = np.array(list_to_pass_kmeans_function)
        return data_for_clustering

    def get_clusters(self):
        return self.fcmeans.centers

    def get_labels_list(self):
        X = self.preprocess_daily_data_to_fit_fcmeans_library(
            self.training_data)
        return self.fcmeans.predict(X)

    def get_labels_for_each_data_point(self, data_point_index):
        return self.get_labels_list()[data_point_index]
def fcm_alg(dataset_num,
            num_of_samples=None,
            num_of_clusters=None,
            get_figure=False,
            calc_ami_score=True,
            plot_anomaly=False):
    if plot_anomaly and (num_of_samples is None or
                         (num_of_samples is not None
                          and num_of_samples > 6000)):
        num_of_samples = 6000

    data = d.get_data(dataset_num, n_samples=num_of_samples)
    # store the data frame which is given by a dimension reduction (using PCA) into 2 dimensions of the original
    # data set
    df = d.get_df_to_cluster(data)
    tag = d.get_tag(data)

    # if the number of clusters isn't defined, choose it to be the "real" number of clusters (according to the tag)
    if num_of_clusters is None:
        num_of_clusters = d.get_num_of_clusters(tag)

    # create a FCM-type object with the relevant number of clusters and fit it to the data frame
    fcm = FCM(n_clusters=num_of_clusters)
    fcm.fit(df)
    # store the labels result after the fitting
    labels = fcm.predict(df)

    if get_figure or plot_anomaly:
        if plot_anomaly:
            silhouettes = silhouette_samples(df, labels)
            labels[silhouettes < 0] = -1

        # plot the clustered data and the centroid of each cluster
        plt.scatter(df['PC1'][labels != -1],
                    df['PC2'][labels != -1],
                    c=labels[labels != -1])
        plt.scatter(df['PC1'][labels == -1],
                    df['PC2'][labels == -1],
                    c=['black'] * len(labels[labels == -1]),
                    label='Anomaly')
        plt.scatter(fcm.centers["PC1"],
                    fcm.centers["PC2"],
                    marker="*",
                    label='centroid',
                    c='black')
        plt.legend()
        title = 'DS{} - Fuzzy C Means'.format(dataset_num)
        # fig_name = 'Images\Fuzzy C Means\\' + title
        plt.title(title)

        # save the figure
        # plt.savefig(fig_name)
        plt.show()

    # calculate the adjusted mutual info score of the clustering
    if calc_ami_score:
        labels_true = d.get_labels(tag)
        return adjusted_mutual_info_score(labels_true=labels_true,
                                          labels_pred=labels)
Example #3
0
 def fuzzy_cmeans_clustering(self, show_plt=True):
     fcm = FCM(n_clusters=self.k)
     fcm.fit(self.data)
     labels = fcm.predict(self.data)
     if show_plt:
         plt.title('Fuzzy C Means')
         plt.scatter(self.data[:, 0], self.data[:, 1], c=labels, s=7, cmap='rainbow')
         plt.show()
     return metrics.adjusted_mutual_info_score(self.tags, labels)
def fuzzy_cmeans_clustering(dataset, tags, k, show_plt=True):
    fcm = FCM(n_clusters=k)
    fcm.fit(dataset)
    labels = fcm.predict(dataset)
    if show_plt:
        plt.title('Fuzzy C Means')
        plt.scatter(dataset[:, 0], dataset[:, 1], c=labels, s=7, cmap='rainbow')
        plt.show()
    return metrics.adjusted_mutual_info_score(tags, labels)
Example #5
0
def decompose(
    path_to_features: str, 
    path_to_images: str, 
    path_to_decomposed_images_1: str, 
    path_to_decomposed_images_2: str, 
    class_name: str,
    fc: int
):
    """
    Decomposition of extracted features using Fuzzy c means clustering.

    params:
        <string> path_to_features
        <string> path_to_images
        <string> path_to_decomposed_images_1
        <string> path_to_decomposed_images_2
        <int> fc: Number of clusters
    """

    # Load features
    features = np.load(path_to_features)
    
    #fcm 
    fcm = FCM(n_clusters=fc)
    fcm.fit(features)
    idx = fcm.predict(features)

    # Cluster index
    #idx = FCM(n_clusters=fc, random_state=111).fit(features)
    #idx = idx.predict(features)

    # Images list
    images = [filename for filename in os.listdir(path_to_images)]

    # Iterate through images
    progress_bar = tqdm(range(len(images)))
    progress_bar.set_description(f"Composing {class_name} images")
    for i in progress_bar:
        filename = os.path.join(path_to_images, images[i])
        
        # Read image
        I = plt.imread(filename)

        filename_1 = os.path.join(path_to_decomposed_images_1, images[i])
        filename_2 = os.path.join(path_to_decomposed_images_2, images[i])
        
        # If image belongs to a cluster, write the image to a certain folder, otherwise, write it to the other folder.
        if (idx[i] == 1):
            plt.imsave(filename_1, I)
        else:
            plt.imsave(filename_2, I)
Example #6
0
def fuuzyc(train_x1, train_y):
    #standarization
    pca = PCA(n_components=2)
    pca.fit(train_x1)
    train_x1 = pca.transform(train_x1)
    fcm = FCM(n_clusters=7)
    fcm.fit(train_x1)
    m = fcm.predict(train_x1)
    ps = purity_score(train_y, m)
    ss = silhouette_score(train_x1, m)
    #print(ss)
    #print(ps)
    a = f1_score(train_y.flatten(), m, average='weighted')
    #print(a)
    #plot_clusters(train_x1, m)
    return ps, a, ss
Example #7
0
def fcm_alg(dataset_num,
            num_of_samples=10000,
            num_of_clusters=None,
            get_figure=False,
            calc_ami_score=True):
    data = d.get_data(dataset_num, n_samples=num_of_samples)
    # store the data frame which is given by a dimension reduction (using PCA) into 2 dimensions of the original
    # data set
    df = d.get_df_to_cluster(data)
    tag = d.get_tag(data)

    # if the number of clusters isn't defined, choose it to be the "real" number of clusters (according to the tag)
    if num_of_clusters is None:
        num_of_clusters = d.get_num_of_clusters(tag)

    # create a FCM-type object with the relevant number of clusters and fit it to the data frame
    fcm = FCM(n_clusters=num_of_clusters)
    fcm.fit(df)
    # store the labels result after the fitting
    labels = fcm.predict(df)

    if get_figure:
        # plot the clustered data and the centroid of each cluster
        plt.scatter(df["PC1"], df["PC2"], c=labels)
        plt.scatter(fcm.centers["PC1"],
                    fcm.centers["PC2"],
                    marker="*",
                    label='centroid',
                    c='black')
        plt.legend()
        title = 'DS{} - Fuzzy C Means'.format(dataset_num)
        fig_name = 'images/dataset {}/'.format(
            dataset_num) + title + " ({} clusters)".format(num_of_clusters)
        plt.title(title)

        # save the figure
        plt.savefig(fig_name)
        plt.show()

    # calculate the adjusted mutual info score of the clustering
    if calc_ami_score:
        labels_true = d.get_labels(tag)
        return adjusted_mutual_info_score(labels_true=labels_true,
                                          labels_pred=labels)
Example #8
0
def fuuzyc(train_x1, tag, gender, race):
    #standarization
    pca = PCA(n_components=2)
    pca.fit(train_x1)
    train_x1 = pca.transform(train_x1)
    fcm = FCM(n_clusters=9)
    fcm.fit(train_x1)
    m = fcm.predict(train_x1)
    ps = []
    ps.append(purity_score(tag, m))
    ps.append(purity_score(gender, m))
    ps.append(purity_score(race, m))
    ss = silhouette_score(train_x1, m)
    #print(ss)
    #print(ps)
    a = []
    a.append(f1_score(tag.flatten(), m, average='weighted'))
    a.append(f1_score(gender.flatten(), m, average='weighted'))
    a.append(f1_score(race.flatten(), m, average='weighted'))
    #print(a)
    # plot_clusters(train_x1, m)
    return ps, a, ss
Example #9
0
def fuuzyc(train_x1, vtype, weekend, rev, c):
    #standarization
    pca = PCA(n_components=2)
    pca.fit(train_x1)
    train_x1 = pca.transform(train_x1)
    fcm = FCM(n_clusters=c)
    fcm.fit(train_x1)
    m = fcm.predict(train_x1)
    ps = []
    ps.append(purity_score(vtype, m))
    ps.append(purity_score(weekend, m))
    ps.append(purity_score(rev, m))
    ss = silhouette_score(train_x1, m)
    print(ss)
    print(ps)
    a = []
    a.append(f1_score(vtype.flatten(), m, average='weighted'))
    a.append(f1_score(weekend.flatten(), m, average='weighted'))
    a.append(f1_score(rev.flatten(), m, average='weighted'))
    print(a)
    plot_clusters(train_x1, m)
    return ps, a, ss
Example #10
0
def merge_clusters(rgb_array_in,
                   counts_from_cluster,
                   clsuter_1D_method='Diff'):
    use_std = True  # This is not working as expected, so we are setting it to False

    rgb_array = rgb_array_in.copy(
    )  # we need to make a copy, and it has to be of float type to prevent overflow
    rgb_array = np.expand_dims(rgb_array, axis=0)
    hsv = cv2.cvtColor(rgb_array,
                       cv2.COLOR_RGB2HSV)  # how about COLOR_BGR2HLS?
    hsv = np.squeeze(hsv, axis=0)
    if clsuter_1D_method == 'Diff':
        labels = differntial_1D_cluster(
            hsv[:, 0])  # hsv[:, 0] is the hue component
    elif clsuter_1D_method == 'MeanSift':
        result, ms = cluster_1D(
            hsv[:, 0:1]
        )  # hsv[:, 0:1] # getting the h value to be used in clustering
        labels = result['labels']
    elif clsuter_1D_method == '2nd_fcm':  # not so good
        clf = FCM(n_clusters=13)
        clf.fit(rgb_array_in)
        labels = clf.predict(rgb_array_in)
        rgb_array = clf.centers.round()
        rgb_array = np.expand_dims(rgb_array, axis=0)
        counts_from_cluster = Counter(labels)
    else:
        print('Incorrect choice of clsuter_1D_method')
        labels = 0

    if use_std and clsuter_1D_method != '2nd_fcm':  # second stage, decompose similar hue(s)
        labels = decompose_hue(labels, rgb_array_in.copy())
    # now, average simliar colors
    rgb_array = average_similar_colors_pix_cnt(rgb_array, counts_from_cluster,
                                               labels)

    return rgb_array, labels
Example #11
0
import time


def RGBXY(image):
    shape = list(image.shape)
    shape[2] = 5
    img = np.zeros(shape)
    img[:, :, :3] = image / 255
    indexes = np.array([[i, j] for i in range(shape[0])
                        for j in range(shape[1])])
    img[:, :, 3:] = indexes.reshape((shape[0], shape[1], 2))
    img[:, :, 3] = img[:, :, 3] / shape[0]
    img[:, :, 4] = img[:, :, 4] / shape[1]
    return img.reshape([-1, 5]), indexes


c = int(input("Numbers of clusters(c): "))
img = np.asarray(Image.open('1558014721_E7jyWs_iiit_d.jpg')).copy()
t1 = time.time()
features, indexes = RGBXY(img)
fcm = FCM(n_clusters=c)
fcm.fit(features)
fcm_centers = fcm.centers
fcm_labels = fcm.predict(features)
print(fcm_centers, fcm_centers.shape)
img = fcm_centers[fcm_labels, :3].reshape((img.shape[0], img.shape[1], 3))
print("Time taken: %f" % (time.time() - t1))
plt.imshow(img)
plt.axis('off')
plt.show()
import pandas as pd
from fcmeans import FCM

dataset = pd.read_csv(
    'D://Visual Exercise//Python//New folder//Fuzzy-C-Means Clustering//Fuzzy-C-Means Clustering//Mall_Customers.csv'
)

#Getting data set
X = dataset.iloc[:, [3, 4]].values
print(X)

# fit the fuzzy-c-means
fcm = FCM(n_clusters=3, max_iter=150, random_state=0)
fcm.fit(X)

y_pred = fcm.predict(X)

print(y_pred)

# outputs
#predict and labels are same
fcm_centers = fcm.centers
fcm_labels = fcm.u.argmax(axis=1)

print(fcm_labels)

# Visualising the clusters
plt.scatter(X[y_pred == 0, 0],
            X[y_pred == 0, 1],
            s=100,
            c='red',
Example #13
0
        inertia = km.inertia_
        cents = km.cluster_centers_

        cents_list.append(cents)
        inert_list.append(inertia)

    # Get best centroids to use for full clustering
    best_cents = cents_list[inert_list.index(min(inert_list))]

    # fit the fuzzy-c-means
    fcm = FCM(n_clusters=2,
              first_center=best_cents,
              max_iter=500,
              random_state=42)
    fcm.fit(X)
    probability = fcm.predict(x_add)
    probability_df = pd.DataFrame(data=probability[:, 2], columns=['predict'])
    cluster = collections.Counter(probability[:, 2])
    print(cluster)
    if cluster[0.0] >= cluster[1.0]:
        probability_df.loc[probability_df['predict'] == 0.0, 'predict'] = name
        probability_df.loc[probability_df['predict'] == 1.0,
                           'predict'] = 'outlier'
    else:
        probability_df.loc[probability_df['predict'] == 1.0, 'predict'] = name
        probability_df.loc[probability_df['predict'] == 0.0,
                           'predict'] = 'outlier'

    result_df = pd.DataFrame(data=x_add, columns=data.columns)
    result_df['actual'] = list(label)
    result_df['predict'] = probability_df['predict'].tolist()
Example #14
0
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
from shapely.geometry import Point, Polygon
from sklearn.cluster import KMeans
from fcmeans import FCM

colnames = [
    'ozone', 'particullate_matter', 'carbon_monoxide', 'sulfure_dioxide',
    'nitrogen_dioxide', 'longitude', 'latitude', 'timestamp'
]
df = pd.read_csv('./pollution data/combined_dataset.csv', names=colnames)

# plt.scatter(x=df['longitude'], y=df['latitude'])
# plt.show()
numOfClusters = 4
fcm = FCM(n_clusters=numOfClusters)
fcm.fit(map[['longitude', 'latitude']])
y_fcmeans = fcm.predict(df[['longitude', 'latitude']])
plt.scatter(df['longitude'],
            y=df['latitude'],
            c=y_fcmeans,
            s=50,
            cmap='viridis')
df['cluster_label'] = y_fcmeans

centers = fcm.centers_
plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
plt.show()
# df.to_csv ('clustered_data.csv', index=None, header = True)
Example #15
0
import numpy as np
from fcmeans import FCM
from matplotlib import pyplot as plt
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D

n_samples = 3000

df = pd.read_excel('120_data.xlsx', header=None)
X = df.to_numpy()

fcm = FCM(n_clusters=3)
fcm.fit(X)

# outputs
fcm_centers = fcm.centers
fcm_labels = fcm.predict(X)

# plot result
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=fcm_labels, cmap='Set1')
plt.show()
               13.051967221349628, 4.065577055191767, 29.922603051099713,
               7.58443241651873, 25286.235250015743, 17660.494008300826,
               21851.365482692607, 14800.013759329424, 69.73815264740048,
               5.022356752792227, 1233.9959969564013, 4.065577055191767,
               7.58443241651873, 13.311492579165701, 35.45257318447153,
               1053.2675415630874
           ]]

# fit the fuzzy-c-means
fcm = FCM(n_clusters=2, first_center=centers, max_iter=100)
fcm.fit(X_train)

# outputs
fcm_centers = fcm.centers  # 첫번째는 'Bengin' cetroid, 두번쨰는 'attack' centroid
fcm_labels = fcm.u.argmax(axis=1)
probability = fcm.predict(X_test)

result_df = pd.DataFrame(data=probability, columns=[0, 1, 'pre_class'])
result_df['class'] = y_test

print(color.BOLD + "\nValidate records in cluster(find invalid record)" +
      color.END)
dif_data = check_different(result_df)
print(dif_data.shape)

#probability threshold(pt)를 기준으로 부합한 데이터 추출
# 0.4 <= pt <= 0.6
small_num = [round(i * (0.01), 2) for i in range(1, 50)]
small_num.reverse()
big_num = [round(i * (0.01), 2) for i in range(51, 101)]
result_data = pd.DataFrame(index=range(0, len(big_num)),
Example #17
0
def compute_centers_PSC(language, labels, num_protos=10):

    encoding = torch.load(f'logs/train_Encodings_{language}.pt')
    # hate_usage = torch.load(f'logs/train_pred_{language}.pt')

    points = np.zeros((encoding.shape[0], encoding.shape[-1]))
    for i in range(len(points)):
        well = 0
        for j in range(encoding.shape[1]):
            # if hate_usage[i][j] == labels[i]:
            points[i] += encoding[i][j]
            well += 1.0
        points[i] /= well

    fcm = FCM(n_clusters=num_protos)
    fcm.fit(points)
    fcm_labels = fcm.predict(points)

    #%%
    # plot result
    # plt.scatter(X[:,0], X[:,1], c=fcm_labels, alpha=.1)
    colors = ['b', 'g', 'r', 'y', 'c', 'b', 'g', 'r', 'y', 'c']
    protos = []
    for i in range(num_protos):
        idx = list(np.where(fcm_labels == i)[0].reshape(-1))

        homogeneus = True
        for j in idx:
            if labels[j] != labels[idx[0]]:
                homogeneus = False
                break

        if homogeneus == True:

            midle = points[idx].sum(axis=0) / len(idx)
            protos.append(idx[0])
            closeness = None
            for j in range(len(idx)):

                d = cosine_similarity(
                    midle.reshape(1, points.shape[1]),
                    points[idx[j]].reshape(1, points.shape[1]))
                if closeness == None or closeness < d:
                    closeness = d
                    protos[-1] = idx[j]
        else:

            Major_class = 0
            if labels[idx].sum() > len(idx) / 2:
                Major_class = 1

            for j in range(len(idx)):
                if labels[idx[j]] == Major_class:
                    new_p = None
                    closeness = None
                    for k in range(len(idx)):
                        if labels[idx[k]] != Major_class:
                            d = cosine_similarity(
                                points[idx[j]].reshape(1, points.shape[1]),
                                points[idx[k]].reshape(1, points.shape[1]))
                            if closeness == None or closeness < d:
                                closeness = d
                                new_p = idx[k]
                    protos.append(new_p)
                    new_p = None
                    closeness = None
                    for k in range(len(idx)):
                        if labels[idx[k]] == Major_class:
                            d = cosine_similarity(
                                points[protos[-1]].reshape(1, points.shape[1]),
                                points[idx[k]].reshape(1, points.shape[1]))
                            if closeness == None or closeness < d:
                                closeness = d
                                new_p = idx[k]
                    protos.append(new_p)

    protos = list(set(protos))
    P_set = []
    N_set = []

    for i in protos:
        if labels[i] == 1:
            P_set.append(i)
        else:
            N_set.append(i)

    print(
        f'{bcolors.BOLD}Computed prototypes {language}:\t{len(protos)}\nNegative: {len(N_set)} Positive: {len(P_set)}{bcolors.ENDC}'
    )
    P_idx = list(np.argwhere(labels == 1).reshape(-1))
    N_idx = list(np.argwhere(labels == 0).reshape(-1))

    Z = TSNE(n_components=2).fit_transform(points)

    P = Z[P_idx]
    N = Z[N_idx]
    C = Z[P_set]
    F = Z[N_set]

    colors = ['b', 'g', 'r', 'y', 'w']
    plt.scatter(P[:, 0], P[:, 1], c='c', label='Pos', alpha=.5)
    plt.scatter(N[:, 0], N[:, 1], c='r', label='Neg', alpha=.3)
    plt.scatter(C[:, 0], C[:, 1], c='0', label='Proto_Pos', alpha=.7)
    plt.scatter(F[:, 0], F[:, 1], c='#723a91', label='Proto_Neg', alpha=.7)
    plt.legend(loc=1)
    plt.savefig(f'logs/protos_{language}.png')
    plt.close()

    return P_set, N_set