Ejemplo n.º 1
0
	def categorize(self, element):

		dists = {}

		for number_id in self.data:
			dists[number_id] = Utils.euclidean_distance( self.data[number_id], element )

		s_dists = list( dists.values() )
		s_dists.sort()

		k_th = s_dists[self.k-1]

		k_labels = {}

		for number_id in dists:
			if dists[number_id] <= k_th:

				label = self.labels[number_id]

				if label in k_labels:
					k_labels[label] += 1
				else:
					k_labels[label] = 1

		return max(k_labels, key=k_labels.get)		
Ejemplo n.º 2
0
    def measure(self):

        x = []
        y = []

        for i in range(len(self.data)):
            k = i + 1

            knn = KNN(self.labels, self.data, k, 'regression')
            KFoldCV_error = knn.KFoldCV(5)

            if KFoldCV_error > -1:
                x.append(1.0 / k)
                y.append(KFoldCV_error)

        Utils.plot2D(x, y)
Ejemplo n.º 3
0
	def measure(self):

		x = []
		y = []

		for i in range( len(self.data) ):
			k = i+1

			knn = KNN(self.labels, self.data, k, 'regression')
			KFoldCV_error = knn.KFoldCV( 5 )

			if KFoldCV_error > -1:
				x.append( 1.0 / k )
				y.append( KFoldCV_error )

		Utils.plot2D(x,y) 
Ejemplo n.º 4
0
	def absolute_error(self, clusters, means):

		ae = 0.0

		for cluster in clusters:
			
			sum_dissimilarities = 0

			for element in clusters[cluster]:
				sum_dissimilarities += Utils.euclidean_distance( element, means[cluster] )

			ae += sum_dissimilarities

		return ae
Ejemplo n.º 5
0
    def absolute_error(self, clusters, means):

        ae = 0.0

        for cluster in clusters:

            sum_dissimilarities = 0

            for element in clusters[cluster]:
                sum_dissimilarities += Utils.euclidean_distance(
                    element, means[cluster])

            ae += sum_dissimilarities

        return ae
Ejemplo n.º 6
0
	def MSE(self, clusters, means):

		mse = 0.0

		for cluster in clusters:
			
			mse_c = 0.0

			for element in clusters[cluster]:
				mse_c += pow( Utils.euclidean_distance( element, means[cluster] ), 2 )

			if len(clusters[cluster]) > 0:
				mse_c = ( mse_c / len(clusters[cluster]) )

			mse += mse_c

		return (mse / len(clusters) )
Ejemplo n.º 7
0
    def MSE(self, clusters, means):

        mse = 0.0

        for cluster in clusters:

            mse_c = 0.0

            for element in clusters[cluster]:
                mse_c += pow(Utils.euclidean_distance(element, means[cluster]),
                             2)

            if len(clusters[cluster]) > 0:
                mse_c = (mse_c / len(clusters[cluster]))

            mse += mse_c

        return (mse / len(clusters))
Ejemplo n.º 8
0
    def getMedoid(self, cluster_elements):

        if len(cluster_elements) == 0:
            return self.empty_row

        N = len(cluster_elements[0])

        centroid = self.getCentroid(cluster_elements)

        # just a big number to start
        min_dist = self.big_number
        medoid = self.empty_row

        for element in cluster_elements:

            dist = Utils.euclidean_distance(centroid, element)

            if dist < min_dist:
                min_dist = dist
                medoid = element

        return medoid
Ejemplo n.º 9
0
	def getMedoid(self, cluster_elements):

		if len(cluster_elements) == 0:
			return self.empty_row

		N = len(cluster_elements[0])

		centroid = self.getCentroid( cluster_elements )

		# just a big number to start
		min_dist = self.big_number
		medoid = self.empty_row

		for element in cluster_elements:

			dist = Utils.euclidean_distance( centroid, element )

			if dist < min_dist:
				min_dist = dist
				medoid = element

		return medoid
Ejemplo n.º 10
0
    def KMeansCore(self, data, k, mean_method, means):

        K = {}

        min_AE = self.big_number
        prev_AE = 0
        min_means = {}
        min_K = {}

        iter_flag = True

        # do iterative relocation, until nothing changes
        while (iter_flag):

            for cluster in range(k):
                K[cluster] = []

            # assign each element to the cluster which has the closest mean
            for element in data:

                # for start, just set a big max number, and just pick the cluster 0 as the closest cluster
                min_dist = self.big_number
                closest_cluster = 0

                for cluster in range(k):

                    dist = Utils.euclidean_distance(means[cluster], element)

                    if dist < min_dist:
                        min_dist = dist
                        closest_cluster = cluster

                K[closest_cluster].append(element)
                means[closest_cluster] = element

            # calculate new mean for each cluster

            if mean_method == self.mean_methods[0]:
                for cluster in range(k):
                    means[cluster] = self.getCentroid(K[cluster])
            elif mean_method == self.mean_methods[1]:
                for cluster in range(k):
                    means[cluster] = self.getMedoid(K[cluster])

            # calculate the absolute error
            if mean_method == self.mean_methods[0]:
                AE = self.MSE(K, means)
            elif mean_method == self.mean_methods[1]:
                AE = self.absolute_error(K, means)

            # keep the clustering that minimizes the absolute error
            if AE < min_AE:
                min_AE = AE
                min_means = means
                min_K = K

            # stop when nothing changes
            if prev_AE == AE:
                iter_flag = False
            else:
                prev_AE = AE

        return (min_K, min_means, min_AE)
Ejemplo n.º 11
0
	def KMeansCore(self, data, k, mean_method, means):

		K = {}
	
		min_AE = self.big_number
		prev_AE = 0
		min_means = {}
		min_K = {}

		iter_flag = True

		# do iterative relocation, until nothing changes
		while(iter_flag):

			for cluster in range(k):
				K[cluster] = []

			# assign each element to the cluster which has the closest mean
			for element in data:

				# for start, just set a big max number, and just pick the cluster 0 as the closest cluster
				min_dist = self.big_number
				closest_cluster = 0

				for cluster in range(k):

					dist = Utils.euclidean_distance( means[cluster], element )

					if dist < min_dist:
						min_dist = dist
						closest_cluster = cluster

				K[closest_cluster].append( element )
				means[closest_cluster] = element


			# calculate new mean for each cluster
	
			if mean_method == self.mean_methods[0]:
				for cluster in range(k):
					means[cluster] = self.getCentroid( K[cluster] )
			elif mean_method == self.mean_methods[1]:
				for cluster in range(k):
					means[cluster] = self.getMedoid( K[cluster] )	

			# calculate the absolute error
			if mean_method == self.mean_methods[0]:
				AE = self.MSE(K, means)
			elif mean_method == self.mean_methods[1]:
				AE = self.absolute_error(K, means)

			# keep the clustering that minimizes the absolute error
			if AE < min_AE:
				min_AE = AE
				min_means = means
				min_K = K

			# stop when nothing changes
			if prev_AE == AE:
				iter_flag = False
			else:
				prev_AE = AE

		return (min_K, min_means, min_AE)