Exemple #1
0
 def calcHiddenOutputs(self, input, center, std, data):
     knn = KNN(2, data)
     dist_between = knn.get_euclidean_distance(input, center)
     #  print(type(input[1]))
     #print(type(center[1]))
     # print(dist_between)
     output = np.exp(-1 / (2 * std**2) * dist_between**2)
     # print(output)
     return output
 def test_euclidean(self):
     """
     Test if euclidean distance is working
     :return:
     """
     data = Data('abalone', pd.read_csv(r'data/abalone.data', header=None), 8)  # load data
     df = data.df.sample(n=10)  # minimal data frame
     data.split_data(data_frame=df)  # sets test and train data
     knn = KNN(5, data)
     print(knn.get_euclidean_distance(df.iloc[1], df.iloc[2]))
 def getMaxDistMeans(self, mean_list, data):
     maxDist = 0
     knn = KNN(2, data)
     for clust in mean_list:
         for clus2 in mean_list:
             # compare against all other medoids
             curDist = knn.get_euclidean_distance()
             if curDist > maxDist:
                 maxDist = curDist
     # print(maxDist)
     return maxDist
Exemple #4
0
    def getMaxDist(self, medoids_list, data):
        maxDist = 0
        knn = KNN(2, data)
        for medoid in medoids_list:
            for medoid2 in medoids_list:
                # compare against all other medoids
                curDist = knn.get_euclidean_distance(medoid.row, medoid2.row)
                if curDist > maxDist:
                    maxDist = curDist

    # print(maxDist)
        return maxDist
    def predict_centroids(
            self, centroids,
            data_set):  # Method to return closest cluster to test data

        for _, data in data_set[data_set].iterrows(
        ):  # Loops through the rows of the data set
            distance = None  # Initializes distance
            closest_centroid = None  # Keeps track of the current closes centroid cluster
            closest_centroid_euclidian_distance = None  # Keeps track of the closest euclidian distance.
            cluster_val = 1
            for centroid in centroids:  # Loops through the k centroid points
                euclid_distance = KNN.get_euclidean_distance(
                    centroid, data
                )  # Gets the distance between the centroid and the data point

                if distance is None or euclid_distance < distance:  # Updates the distance to keep track of the closest point
                    distance = euclid_distance
                    # closest_centroid = centroid
                    closest_centroid = cluster_val
                    closest_centroid_euclidian_distance = distance
                cluster_val += 1
    def cluster_data(self, clusters,
                     data_set):  # Loop until clusters have converged
        previous_clusters = []  # Initializes to check if previous value mached
        while (True):
            current_clusters = []
            for point in range(len(clusters)):  # Appends an empty list
                current_clusters.append([])

            for _, value in data_set.iterrows():  # Loop rows of the data set
                cluster_key = 0  # Appends a key for the closest value of the dictionary
                closest_point = [None, float('inf')
                                 ]  # Index of dictionary, distance value
                value = list(value)  # Won't work without this
                for row in clusters.values(
                ):  # Loops through the values in the cluster to compare distance
                    distance = KNN.get_euclidean_distance(
                        row, value)  # Gets the euclidean distance
                    if distance < closest_point[
                            1]:  # Checks if it is closer than the previous closest point
                        closest_point = [cluster_key,
                                         distance]  # Sets the closest point
                    cluster_key += 1
                current_clusters[closest_point[0]].append(
                    value
                )  # Appends the closest point to a the corresponding cluster

            clusters = self.mean_clusters(
                current_clusters, data_set)  # Gets the updated k-mean clusters
            if previous_clusters == current_clusters:
                print(
                    '-------------------------- K-Means has converged ------------------'
                )
                cluster_list = []
                for cluster in clusters.values(
                ):  # Convert the k-means points to a list
                    cluster_list.append(cluster)
                return cluster_list
            previous_clusters = current_clusters