def _get_neighbors(self, sample_i):
     neighbors = []
     for _sample_i, _sample in enumerate(self.data):
         if _sample_i != sample_i and ml_helpers.euclidean_distance(
                 self.data[sample_i], _sample) < self.radius:
             neighbors.append(_sample_i)
     return np.array(neighbors)
Exemple #2
0
 def _closest_centroid(self, sample, centroids):
     closest_i = None
     closest_distance = float("inf")
     for i, centroid in enumerate(centroids):
         distance = ml_helpers.euclidean_distance(sample, centroid)
         if distance < closest_distance:
             closest_i = i
             closest_distance = distance
     return closest_i
Exemple #3
0
 def _calculate_centroids(self, clusters, data):
     n_features = np.shape(data)[1]
     centroids = np.zeros((self.k, n_features))
     for i, cluster in enumerate(clusters):
         curr_cluster = data[cluster]
         smallest_dist = float("inf")
         for point in curr_cluster:
             total_dist = np.sum(
                 ml_helpers.euclidean_distance(curr_cluster,
                                               [point] * len(curr_cluster)))
             if total_dist < smallest_dist:
                 centroids[i] = point
     return centroids
Exemple #4
0
    def predict(self, X_test, X_train, y_train):
        classes = np.unique(y_train)
        y_pred = []
        # Determine the class of each sample
        for test_sample in X_test:
            neighbors = []

            # Calculate the distance form each observed sample to the sample we wish to predict
            for j, observed_sample in enumerate(X_train):
                distance = ml_helpers.euclidean_distance(
                    test_sample, observed_sample)
                label = y_train[j]

                # Add neighbor information
                neighbors.append([distance, label])
            neighbors = np.array(neighbors)

            # Sort the list of observed samples from lowest to highest distance and select the k first
            k_nearest_neighbors = neighbors[neighbors[:, 0].argsort()][:self.k]

            # Do a majority vote among the k neighbors and set prediction as the class receing the most votes
            label = self._majority_vote(k_nearest_neighbors, classes)
            y_pred.append(label)
        return np.array(y_pred)