Exemple #1
0
    def predict(self, X_test, X_train, y_train):

        y_predict = np.zeros(X_test.shape[0])

        for i in range(X_test.shape[0]):

            distances = np.zeros((X_train.shape[0], 2))  #测试的数据和训练的各个数据的欧式距离

            for j in range(X_train.shape[0]):
                dis = euclidean_distance(X_test[i], X_train[j])  #计算欧式距离
                label = y_train[j]  #测试集到的每个训练集的数据的分类标签
                distances[j] = [dis, label]

                # argsort()得到测试集到训练的各个数据的欧式距离从小到大排列并且得到序列,然后再取前k个.
                k_nearest_neighbors = distances[
                    distances[:, 0].argsort()][:self.k]

                #利用np.bincount统计k个近邻里面各类别出现的次数
                counts = np.bincount(k_nearest_neighbors[:, 1].astype('int'))

                #得出每个测试数据k个近邻里面各类别出现的次数最多的类别
                testLabel = counts.argmax()
                y_predict[i] = testLabel

        return y_predict
 def _calculate_cost(self, X, clusters, medoids):
     """ Calculate the cost (total distance between samples and their medoids) """
     cost = 0
     # For each cluster
     for i, cluster in enumerate(clusters):
         medoid = medoids[i]
         for sample_i in cluster:
             # Add distance between sample and medoid as cost
             cost += euclidean_distance(X[sample_i], medoid)
     return cost
 def _closest_medoid(self, sample, medoids):
     """ Return the index of the closest medoid to the sample """
     closest_i = None
     closest_distance = float("inf")
     for i, medoid in enumerate(medoids):
         distance = euclidean_distance(sample, medoid)
         if distance < closest_distance:
             closest_i = i
             closest_distance = distance
     return closest_i
 def _closest_centroid(self, sample, centroids):
     """ Return the index of the closest centroid to the sample """
     closest_i = 0
     closest_dist = float('inf')
     for i, centroid in enumerate(centroids):
         distance = euclidean_distance(sample, centroid)
         if distance < closest_dist:
             closest_i = i
             closest_dist = distance
     return closest_i
 def _get_neighbors(self, sample_i):
     """ Return a list of indexes of neighboring samples
     A sample_2 is considered a neighbor of sample_1 if the distance between
     them is smaller than epsilon """
     neighbors = []
     for _sample_i, _sample in enumerate(self.X):
         if _sample_i != sample_i and euclidean_distance(
                 self.X[sample_i], _sample) < self.eps:
             neighbors.append(_sample_i)
     return np.array(neighbors)
 def _closest_centroid(self, sample, centroids):
     """ Return the index of the closest centroid to the sample """
     closest_i = 0
     closest_dist = float('inf')
     for i, centroid in enumerate(centroids):
         distance = euclidean_distance(sample, centroid)
         if distance < closest_dist:
             closest_i = i
             closest_dist = distance
     return closest_i
 def _calculate_cost(self, X, clusters, medoids):
     """ Calculate the cost (total distance between samples and their medoids) """
     cost = 0
     # For each cluster
     for i, cluster in enumerate(clusters):
         medoid = medoids[i]
         for sample_i in cluster:
             # Add distance between sample and medoid as cost
             cost += euclidean_distance(X[sample_i], medoid)
     return cost
 def _closest_medoid(self, sample, medoids):
     """ Return the index of the closest medoid to the sample """
     closest_i = None
     closest_distance = float("inf")
     for i, medoid in enumerate(medoids):
         distance = euclidean_distance(sample, medoid)
         if distance < closest_distance:
             closest_i = i
             closest_distance = distance
     return closest_i
 def _get_neighbors(self, sample_i):
     """ Return a list of indexes of neighboring samples
     A sample_2 is considered a neighbor of sample_1 if the distance between
     them is smaller than epsilon """
     neighbors = []
     idxs = np.arange(len(self.X))
     for i, _sample in enumerate(self.X[idxs != sample_i]):
         distance = euclidean_distance(self.X[sample_i], _sample)
         if distance < self.eps:
             neighbors.append(i)
     return np.array(neighbors)
Exemple #10
0
 def _get_neighbors(self, sample_i):
     """ Return a list of indexes of neighboring samples
     A sample_2 is considered a neighbor of sample_1 if the distance between
     them is smaller than epsilon """
     neighbors = []
     idxs = np.arange(len(self.X))
     for i, _sample in enumerate(self.X[idxs != sample_i]):
         distance = euclidean_distance(self.X[sample_i], _sample)
         if distance < self.eps:
             neighbors.append(i)
     return np.array(neighbors)
Exemple #11
0
    def predict(self, X_test, X_train, y_train):
        y_pred = np.empty(X_test.shape[0])
        # Determine the class of each sample
        for i, test_sample in enumerate(X_test):
            # Sort the training samples by their distance to the test sample and get the K nearest
            idx = np.argsort(
                [euclidean_distance(test_sample, x) for x in X_train])[:self.k]
            # Extract the labels of the K nearest neighboring training samples
            k_nearest_neighbors = np.array([y_train[i] for i in idx])
            # Label sample as the most common class label
            y_pred[i] = self._vote(k_nearest_neighbors)

        return y_pred
    def predict(self, X_test, X_train, y_train):
        y_pred = np.empty(X_test.shape[0])
        # Determine the class of each sample
        for i, test_sample in enumerate(X_test):
            # Sort the training samples by their distance to the test sample and get the K nearest
            idx = np.argsort([euclidean_distance(test_sample, x) for x in X_train])[:self.k]
            # Extract the labels of the K nearest neighboring training samples
            k_nearest_neighbors = np.array([y_train[i] for i in idx])
            # Label sample as the most common class label
            y_pred[i] = self._vote(k_nearest_neighbors)

        return y_pred
        
 def predict(self, X_test, X_train, y_train):
     y_pred = np.empty(X_test.shape[0])
     # Determine the class of each sample
     for i, test_sample in enumerate(X_test):
         neighbors = np.empty((X_train.shape[0], 2))
         # Calculate the distance from each observed sample to the
         # sample we wish to predict
         for j, observed_sample in enumerate(X_train):
             distance = euclidean_distance(test_sample, observed_sample)
             label = y_train[j]
             # Add neighbor information
             neighbors[j] = [distance, label]
         # Sort the list of observed samples from lowest to highest distance
         # and select the k first
         k_nearest_neighbors = neighbors[neighbors[:, 0].argsort()][:self.k]
         # Get the most common label among the neighbors
         label = self._vote(k_nearest_neighbors)
         y_pred[i] = label
     return y_pred
Exemple #14
0
 def predict(self, X_test, X_train, y_train):
     classes = np.unique(y_train)
     y_pred = []
     # Determine the class of each sample
     for test_sample in X_test:
         neighbors = []
         # Calculate the distance from each observed sample to the
         # sample we wish to predict
         for j, observed_sample in enumerate(X_train):
             distance = euclidean_distance(test_sample, observed_sample)
             label = y_train[j]
             # Add neighbor information
             neighbors.append([distance, label])
         neighbors = np.array(neighbors)
         # Sort the list of observed samples from lowest to highest distance
         # and select the k first
         k_nearest_neighbors = neighbors[neighbors[:, 0].argsort()][:self.k]
         # Do a majority vote among the k neighbors and set prediction as the
         # class receing the most votes
         label = self._vote(k_nearest_neighbors, classes)
         y_pred.append(label)
     return np.array(y_pred)
    def predict(self, X_test, X_train, y_train):

        y_predict = np.zeros(X_test.shape[0])

        for i in range(X_test.shape[0]):

            distances = np.zeros((X_train.shape[0], 2)) #测试的数据和训练的各个数据的欧式距离

            for j in range(X_train.shape[0]):
                dis = euclidean_distance(X_test[i], X_train[j]) #计算欧式距离
                label = y_train[j] #测试集到的每个训练集的数据的分类标签
                distances[j] = [dis, label]

                # argsort()得到测试集到训练的各个数据的欧式距离从小到大排列并且得到序列,然后再取前k个.
                k_nearest_neighbors = distances[distances[:, 0].argsort()][:self.k]

                #利用np.bincount统计k个近邻里面各类别出现的次数
                counts = np.bincount(k_nearest_neighbors[:, 1].astype('int'))

                #得出每个测试数据k个近邻里面各类别出现的次数最多的类别
                testLabel = counts.argmax()
                y_predict[i] = testLabel

        return y_predict