Esempio n. 1
0
def evaluate():
    ds = pd.read_csv("Sales_Transactions_Dataset_Weekly.csv")
    data = ds[
        ["Normalized 0", "Normalized 1", "Normalized 2", "Normalized 3", "Normalized 4", "Normalized 5", "Normalized 6",
         "Normalized 7", "Normalized 8", "Normalized 9", "Normalized 10", "Normalized 11", "Normalized 12",
         "Normalized 13", "Normalized 14", "Normalized 15",
         "Normalized 16", "Normalized 17", "Normalized 18",
         "Normalized 19", "Normalized 20", "Normalized 21", "Normalized 22", "Normalized 23", "Normalized 24",
         "Normalized 25", "Normalized 26", "Normalized 27", "Normalized 28", "Normalized 29", "Normalized 30",
         "Normalized 31", "Normalized 32", "Normalized 33",
         "Normalized 34", "Normalized 35",
         "Normalized 36", "Normalized 37", "Normalized 38", "Normalized 39", "Normalized 40", "Normalized 41",
         "Normalized 42", "Normalized 43",
         "Normalized 44", "Normalized 45", "Normalized 46", "Normalized 47", "Normalized 48", "Normalized 49",
         "Normalized 51"
         ]]
    gng = GrowingNeuralGas(data.as_matrix(), output_folder="visualization")
    gng.fit_network(e_b=0.05, e_n=0.006, a_max=8, l=100, a=0.5, d=0.995, passes=10, plot_evolution=True)
    clustered_data = gng.cluster_data()
    print('Found %d clusters.' % nx.number_connected_components(gng.network))
    target_infered = []
    for observation, cluster in clustered_data:
        target_infered.append(cluster)
    # homogeneity = metrics.homogeneity_score(data, target_infered)
    # print(homogeneity)
    gng.plot_clusters(gng.cluster_data())
    clusters = [[]]
    len = 1
    for elem in gng.cluster_data():
        if (clusters.__len__() <= elem[1]):
            for i in range(int(elem[1] + 1)):
                clusters.append([])
        clusters[int(elem[1])].append(len)
        len += 1
    print(clusters)
Esempio n. 2
0
def evaluate_on_digits():
    digits = datasets.load_digits()
    data = digits.data
    target = digits.target
    gng = GrowingNeuralGas(data)
    gng.fit_network(e_b=0.05, e_n=0.006, a_max=8, l=100, a=0.5, d=0.995, passes=5, plot_evolution=False)
    clustered_data = gng.cluster_data()
    print('Found %d clusters.' % nx.number_connected_components(gng.network))
    target_infered = []
    for observation, cluster in clustered_data:
        target_infered.append(cluster)
    homogeneity = metrics.homogeneity_score(target, target_infered)
    print(homogeneity)
    gng.plot_clusters(gng.reduce_dimension(gng.cluster_data()))
Esempio n. 3
0
def evaluate(e_b, e_n, a_max, l, a, d, passes):
    ds = pd.read_csv("Sales_Transactions_Dataset_Weekly.csv")
    data = ds[[
        "W0", "W1", "W2", "W3", "W4", "W5", "W6", "W7", "W8", "W9", "W10",
        "W11", "W12", "W13", "W14", "W15", "W16", "W17", "W18", "W19", "W20",
        "W21", "W22", "W23", "W24", "W25", "W26", "W27", "W28", "W29", "W30",
        "W31", "W32", "W33", "W34", "W35", "W36", "W37", "W38", "W39", "W40",
        "W41", "W42", "W43", "W44", "W45", "W46", "W47", "W48", "W49", "W51"
    ]]
    # plt.plot(data, label='Data')
    # plt.show()
    gng = GrowingNeuralGas(data.as_matrix(), output_folder="visualization")
    gng.fit_network(e_b=e_b,
                    e_n=e_n,
                    a_max=a_max,
                    l=l,
                    a=a,
                    d=d,
                    passes=passes,
                    plot_evolution=True)
    clustered_data = gng.cluster_data()
    print('Found %d clusters.' % nx.number_connected_components(gng.network))
    target_infered = []
    for observation, cluster in clustered_data:
        target_infered.append(cluster)
    # homogeneity = metrics.homogeneity_score(data, target_infered)
    # print(homogeneity)
    gng.plot_clusters(gng.cluster_data())
    clusters = [[]]
    len = 1
    for elem in gng.cluster_data():
        if (clusters.__len__() <= elem[1]):
            for i in range(int(elem[1] + 1)):
                clusters.append([])
        clusters[int(elem[1])].append(len)
        len += 1
    ind = 0
    for clust in clusters:
        if clust.__len__() != 0:
            print('Cluster №' + str(ind) + ' size: (' + str(clust.__len__()) +
                  ') contains data:')
            ind += 1
            print(clust)
    print('Global error all network(on euclidean distance): ' +
          str(gng.compute_global_error()))
    return clusters
Esempio n. 4
0
def evaluate_on_digits():
    digits = datasets.load_digits()
    data = digits.data
    target = digits.target
    gng = GrowingNeuralGas(data)
    gng.fit_network(e_b=0.05,
                    e_n=0.006,
                    a_max=8,
                    l=100,
                    a=0.5,
                    d=0.995,
                    passes=5,
                    plot_evolution=False)
    clustered_data = gng.cluster_data()
    print('Found %d clusters.' % nx.number_connected_components(gng.network))
    target_infered = []
    for observation, cluster in clustered_data:
        target_infered.append(cluster)
    homogeneity = metrics.homogeneity_score(target, target_infered)
    print(homogeneity)
    gng.plot_clusters(gng.reduce_dimension(gng.cluster_data()))
Esempio n. 5
0
    # returning values
    return values

if __name__ == '__main__':
    if os.path.exists('visualization/sequence'):
        shutil.rmtree('visualization/sequence')
        
    os.makedirs('visualization/sequence')
    n_samples = 1500

    data = None
    #data = dataset() # in case the user wants to use a specific dataset
    #data = datasets.load_breast_cancer()
    
    #data = datasets.load_iris(n_samples=n_samples, random_state=8)
    #data = datasets.make_blobs(n_samples=n_samples, random_state=8)
    data = datasets.make_moons(n_samples=n_samples, noise=.05)
    #data = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05)

    #data = StandardScaler().fit_transform(data.data) # depends on the data variable
    data = StandardScaler().fit_transform(data[0]) # depends on the data variable
    
    print('Done.')
    print('Fitting neural network...')
    gng = GrowingNeuralGas(data)
    gng.fit_network(e_b=0.1, e_n=0.006, a_max=10, l=200, a=0.5, d=0.995, passes=5, plot_evolution=True)
    
    print('Found %d clusters.' % gng.number_of_clusters())
    gng.plot_clusters(gng.cluster_data())
Esempio n. 6
0
from gng import GrowingNeuralGas
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
import os
import shutil

__authors__ = 'Adrien Guille'
__email__ = '*****@*****.**'

if __name__ == '__main__':
    if os.path.exists('visualization/sequence'):
        shutil.rmtree('visualization/sequence')
    os.makedirs('visualization/sequence')
    n_samples = 2000
    dataset_type = 'moons'
    data = None
    print('Preparing data...')
    if dataset_type == 'blobs':
        data = datasets.make_blobs(n_samples=n_samples, random_state=8)
    elif dataset_type == 'moons':
        data = datasets.make_moons(n_samples=n_samples, noise=.05)
    elif dataset_type == 'circles':
        data = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05)
    data = StandardScaler().fit_transform(data[0])
    print('Done.')
    print('Fitting neural network...')
    gng = GrowingNeuralGas(data)
    gng.fit_network(e_b=0.1, e_n=0.006, a_max=10, l=200, a=0.5, d=0.995, passes=8, plot_evolution=True)
    print('Found %d clusters.' % gng.number_of_clusters())
    gng.plot_clusters(gng.cluster_data())