def _get_neighbors(self, sample_i): neighbors = [] for _sample_i, _sample in enumerate(self.data): if _sample_i != sample_i and ml_helpers.euclidean_distance( self.data[sample_i], _sample) < self.radius: neighbors.append(_sample_i) return np.array(neighbors)
def _closest_centroid(self, sample, centroids): closest_i = None closest_distance = float("inf") for i, centroid in enumerate(centroids): distance = ml_helpers.euclidean_distance(sample, centroid) if distance < closest_distance: closest_i = i closest_distance = distance return closest_i
def _calculate_centroids(self, clusters, data): n_features = np.shape(data)[1] centroids = np.zeros((self.k, n_features)) for i, cluster in enumerate(clusters): curr_cluster = data[cluster] smallest_dist = float("inf") for point in curr_cluster: total_dist = np.sum( ml_helpers.euclidean_distance(curr_cluster, [point] * len(curr_cluster))) if total_dist < smallest_dist: centroids[i] = point return centroids
def predict(self, X_test, X_train, y_train): classes = np.unique(y_train) y_pred = [] # Determine the class of each sample for test_sample in X_test: neighbors = [] # Calculate the distance form each observed sample to the sample we wish to predict for j, observed_sample in enumerate(X_train): distance = ml_helpers.euclidean_distance( test_sample, observed_sample) label = y_train[j] # Add neighbor information neighbors.append([distance, label]) neighbors = np.array(neighbors) # Sort the list of observed samples from lowest to highest distance and select the k first k_nearest_neighbors = neighbors[neighbors[:, 0].argsort()][:self.k] # Do a majority vote among the k neighbors and set prediction as the class receing the most votes label = self._majority_vote(k_nearest_neighbors, classes) y_pred.append(label) return np.array(y_pred)