Ejemplo n.º 1
0
    def refresh_cluster(self):
        self.clusters = []

        for data in self.dataset:
            cluster = 0
            min_dist = eucledian_distance(data, self.centroids[0])
            for i in range(1, len(self.centroids)):
                if eucledian_distance(data, self.centroids[i]) < min_dist:
                    min_dist = eucledian_distance(data, self.centroids[i])
                    cluster = i

            self.clusters.append(cluster)
Ejemplo n.º 2
0
    def predict(self, test_data):
        result = []

        for td in test_data:
            closest_centroid = 0
            min_distance = eucledian_distance(self.centroids[closest_centroid],
                                              td)
            for i in range(1, len(self.centroids)):
                distance = eucledian_distance(self.centroids[i], td)
                if (distance < min_distance):
                    min_distance = distance
                    closest_centroid = i

            result.append(closest_centroid)
        return result
Ejemplo n.º 3
0
def scikmeans_predict(scikit_kmeans, test_data):
    result = []

    for td in test_data:
        closest_centroid = 0
        min_distance = eucledian_distance(
            scikit_kmeans.cluster_centers_[closest_centroid], td)
        for i in range(1, len(scikit_kmeans.cluster_centers_)):
            distance = eucledian_distance(scikit_kmeans.cluster_centers_[i],
                                          td)
            if (distance < min_distance):
                min_distance = distance
                closest_centroid = i

        result.append(closest_centroid)
    return result
Ejemplo n.º 4
0
    def search_neighbor(self, data_index):
        neighbors = []
        data = self.dataset[data_index]

        for i in range(len(self.dataset)):
            dist = eucledian_distance(self.dataset[i], data)
            if (dist < self.eps and i != data_index):
                neighbors.append(i)

        return neighbors
Ejemplo n.º 5
0
def scidbscan_predict(scikitdbscan, train_data, test_data):
    result = []
    test = []
    for td in test_data:
        closest_data = 0
        min_distance = 9999
        for i in range(len(train_data)):
            distance = eucledian_distance(train_data[i], td)
            if (distance < min_distance and scikitdbscan.labels_[i] != -1):
                min_distance = distance
                closest_data = i

        result.append(scikitdbscan.labels_[closest_data])
    return result
Ejemplo n.º 6
0
    def predict(self, test_data):
        result = []
        test = []
        print(len(self.dataset))
        for td in test_data:
            closest_data = 0
            min_distance = 9999
            for i in range(len(self.dataset)):
                distance = eucledian_distance(self.dataset[i], td)
                if (distance < min_distance and self.cluster[i] != -1):
                    min_distance = distance
                    closest_data = i

            result.append(self.cluster[closest_data])
        return result
Ejemplo n.º 7
0
def predict_gaussian_naive_bayes(training_data, training_labels, test_data,
                                 test_labels, errors):
    print("Predicting GNB.....")
    predictor = GaussianNB()

    #convert training labels from integer tuples to strings
    training_labels_str = [str(label) for label in training_labels]
    #test_labels_str = [str(label) for label in test_labels]

    pred_labels_str = predictor.fit(training_data,
                                    training_labels_str).predict(test_data)
    pred_labels = [make_tuple(label) for label in pred_labels_str]

    #error = accuracy_score(pred_labels, test_labels_str, normalize = False);
    for i in range(len(pred_labels)):
        errors.append(utils.eucledian_distance(pred_labels[i], test_labels[i]))
Ejemplo n.º 8
0
def generate_feature_vectors_at_centroid(K, T, SD, centroid, transmitter_locs,
                                         feature_vector, labels, seed):
    feature_vectors_at_centroid = []
    for i in range(utils.FEATURES_PER_CELL):
        features = []
        for j in range(T):
            d = utils.eucledian_distance(centroid, transmitter_locs[j])
            if d == 0:
                #centroid coincides with one of the transmitter, ignore that centroid
                return feature_vectors_at_centroid
            pl_d = utils.generate_power_at_d(d, K, SD, seed)
            features.append(pl_d)

        feature_vectors_at_centroid.append(features)
        feature_vector.append(features)
        labels.append(centroid)

    return feature_vectors_at_centroid
Ejemplo n.º 9
0
def get_init_means():
    mean_init = []
    grid_indices, transmitter_locs = utils.generate_transmitter_locations(
        utils.K, utils.T, utils.SEED)
    centroids = get_locations()
    for i in range(len(centroids)):
        mean_cell = []
        for j in range(utils.T):
            d = utils.eucledian_distance(transmitter_locs[j], centroids[i])
            if d == 0:
                mean_cell.append(utils.POWER_T)
            else:
                mean_cell.append(
                    utils.generate_power_at_d(d, utils.K, 0, utils.SEED))

        mean_init.append(mean_cell)
    print(len(mean_init))
    return mean_init
Ejemplo n.º 10
0
    def predict(self, test_data, test_labels, res):
        print("Testing")
        errors = []
        pred_classes = []
        pred_locs = []

        for start_index in range(0, len(test_data), BATCH_SIZE):
            end_index = start_index + BATCH_SIZE
            test_batch = test_data[start_index:end_index]
            pred_classes.extend(self.gmm.predict(test_batch).tolist())

        for i in range(len(pred_classes)):
            loc = self.get_location(pred_classes[i])
            pred_locs.append(loc)
            errors.append(utils.eucledian_distance(loc, test_labels[i]))

        avg_error = float(sum(errors)) / len(errors)
        response = {
            "predictions": pred_locs,
            "avg_error": avg_error
        }
        res.body = json.dumps(response)