Python NearestNeighbors.sum Examples

Programming Language: Python

Namespace/Package Name: sklearn.neighbors

Class/Type: NearestNeighbors

Method/Function: sum

Examples at hotexamples.com: 9

Python NearestNeighbors.sum - 9 examples found. These are the top rated real world Python examples of sklearn.neighbors.NearestNeighbors.sum extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NearestNeighbors(30)

set_params(30)

radius_neighbors_graph(30)

radius_neighbors(30)

kneighbors_graph(30)

kneighbors(30)

fit(30)

predict(12)

sum(9)

compute_neighbors(7)

search(5)

query(5)

__init__(4)

distances(3)

search_by_vector(3)

knnQueryBatch(3)

get_params(3)

add_data(3)

build(3)

createIndex(3)

addDataPointBatch(3)

transform(2)

add(2)

setQueryTimeParams(2)

ravel(2)

predict_proba(1)

_random_state(1)

decision_function(1)

score(1)

nonzero(1)

_graph_mode(1)

_fitid(1)

_cluster_mode(1)

train(1)

_n_clusters(1)

nn(1)

nn_index(1)

closest_neighbor(1)

fit_predict(1)

fit_transform(1)

flatten(1)

getNearestDist(1)

getNearestDistToMature(1)

get_feature_names(1)

kneighborgs(1)

find_nnf(1)

astype(1)

kneighbours(1)

append(1)

metric(1)

Example #1

Show file

def smoothed_saliency(ind, colors, probs):
    lab = rgb2lab(colors[None].astype(np.uint8)).squeeze()
    #lab_dist = np.square(lab[...,None] - lab.T).sum(1)
    lab_dist = squareform(pdist(lab, 'sqeuclidean'))
    s = (lab_dist * probs).sum(1)
    s = (s - s.min()) / (s.max() - s.min())
    m = lab.shape[0] // 4
    dist, nn = NearestNeighbors(m).fit(lab).kneighbors()
    T = dist.sum(1)
    sp = ((T[:, None] - dist) * s[nn]).sum(1) / ((m - 1) * T)

    return sp

Example #2

Show file

def load_data():
    """
    Function to load the data
    :param size: total number of points to get
    :param num_lm: number of points which will be landmarks
    :return: the batch loader, landmark points, labels, batched data without landmark points,
    data organized in graphs, neighborhood graph for the landmarks, original data, original labels,
    neighborhood graphs for non-landmarks
    """
    global batch_size, num_batches
    # import data
    data, labels = original_clean()
    test_data = data[:test_size, :]
    test_labels = labels[:test_size]

    data = data[test_size:, :]

    # make landmarks with points with most neighbors
    N = NearestNeighbors(
        n_neighbors=k_start).fit(data).kneighbors_graph(data).todense()
    N = np.array(N)
    num_connections = N.sum(
        axis=0).argsort()[::-1]  # see how many neighbors each point has
    top_landmarks_idxs = num_connections[:num_lm]  # sort in descending order
    land_marks = data[top_landmarks_idxs, :]  # pick the top ones
    data = np.delete(data, top_landmarks_idxs, axis=0)  # delete the landmarks
    # find the nearest landmarks for the landmarks
    landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(
        land_marks).kneighbors_graph(land_marks).todense()
    # break data into batches, create empty holders
    batch_loader = np.zeros((num_batches, batch_size + num_lm, n))
    batch_graph = np.zeros(
        (num_batches, batch_size + num_lm, batch_size + num_lm))
    # create the full neighborhood graph for each batch
    for i in range(num_batches):
        holder = data[batch_size * i:batch_size * (i + 1)]
        # find the nearest landmarks for the rest of the points
        holder_graph = NearestNeighbors(n_neighbors=k_other).fit(
            land_marks).kneighbors_graph(holder).todense()
        for j in range(batch_size):  # copy over the holder graph
            for l in range(num_lm):
                if holder_graph[j, l] == 1:
                    batch_graph[i, j, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j] = 1
        for j in range(num_lm):  # copy over landmark neighbors
            for l in range(j, num_lm):
                if landmark_neighbors[j, l] == 1 and j != l:
                    batch_graph[i, j + batch_size, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j + batch_size] = 1
        holder = np.concatenate((holder, land_marks))
        batch_loader[i] = holder
    batch_size += num_lm  # adjust the batch size
    return batch_loader, data, batch_graph, landmark_neighbors, test_data, test_labels, land_marks

Example #3

Show file

File: LMDMVU-Pol.py Project: DeclanNelson178/REU-Landmark

def load_data(size, num_lm):
    global divisor, m, n, batch_size, set_random
    # import data
    data = pd.read_csv('pol_data.csv', delimiter=',').values[:, :-1]
    labels = np.asarray([0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0])
    test_data = data[size:, :]
    test_labels = labels[size:]
    print(size)
    data = data[:size, :]
    labels = labels[:size]
    m = np.size(data, 0)
    n = np.size(data, 1)
    # data = normalize(data)
    # make landmarks, select x random points in the data set
    land_marks = np.empty((num_lm, n))
    top_landmarks_idxs = []
    if set_random:
        for i in range(num_lm):
            index = random.randint(0, m - i)
            land_marks[i] = data[index]
            data = np.delete(data, index, axis=0)
            labels = np.delete(labels, index, axis=0)
    else:
        N = NearestNeighbors(n_neighbors=k_start).fit(data).kneighbors_graph(data).todense()
        N = np.array(N)
        num_connections = N.sum(axis=0).argsort()[::-1]
        top_landmarks_idxs = num_connections[:num_lm]
        land_marks = data[top_landmarks_idxs, :]
        data = np.delete(data, top_landmarks_idxs, axis=0)

    landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(land_marks).kneighbors_graph(land_marks).todense()
    divisor = int(size / batch_size) # review this line
    batch_loader = np.zeros((divisor, batch_size + num_lm, n))
    batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm))
    for i in range(divisor):
        holder = data[batch_size * i: batch_size * (i + 1)]
        holder_graph = NearestNeighbors(n_neighbors=k_other).fit(land_marks).kneighbors_graph(holder).todense()
        for j in range(batch_size):  # copy over the holder graph
            for l in range(num_lm):
                if holder_graph[j, l] == 1:
                    batch_graph[i, j, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j] = 1
        for j in range(num_lm):  # copy over landmark neighbors
            for l in range(j, num_lm):
                if landmark_neighbors[j, l] == 1 and j != l:
                    batch_graph[i, j + batch_size, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j + batch_size] = 1
        holder = np.concatenate((holder, land_marks))
        batch_loader[i] = holder
    batch_size += num_lm
    return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, test_data, test_labels, landmark_neighbors

Example #4

Show file

File: LandmarkGrapher.py Project: DeclanNelson178/REU-Landmark

def load_data(size, num_lm):
    global divisor, m, n, batch_size, set_random
    # import data
    data, labels = sklearn.datasets.make_swiss_roll(size)
    data, labels = shuffle(data, labels)
    m = np.size(data, 0)
    n = np.size(data, 1)
    data = normalize(data)
    saveLabels = labels
    saveData = data
    # make landmarks, select x random points in the data set
    land_marks = np.empty((num_lm, n))
    top_landmarks_idxs = []
    if set_random:
        for i in range(num_lm):
            index = random.randint(0, m - i)
            land_marks[i] = data[index]
            data = np.delete(data, index, axis=0)
            labels = np.delete(labels, index, axis=0)
    else:
        N = NearestNeighbors(
            n_neighbors=k_start).fit(data).kneighbors_graph(data).todense()
        N = np.array(N)
        num_connections = N.sum(axis=0).argsort()[::-1]
        top_landmarks_idxs = num_connections[:num_lm]
        land_marks = data[top_landmarks_idxs, :]
        data = np.delete(data, top_landmarks_idxs, axis=0)

    landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(
        land_marks).kneighbors_graph(land_marks).todense()
    divisor = int(size / batch_size)
    batch_loader = np.zeros((divisor, batch_size + num_lm, n))
    batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm))
    for i in range(divisor):
        holder = data[batch_size * i:batch_size * (i + 1)]
        holder_graph = NearestNeighbors(n_neighbors=k_other).fit(
            land_marks).kneighbors_graph(holder).todense()
        for j in range(batch_size):  # copy over the holder graph
            for l in range(num_lm):
                if holder_graph[j, l] == 1:
                    batch_graph[i, j, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j] = 1
        for j in range(num_lm):  # copy over landmark neighbors
            for l in range(j, num_lm):
                if landmark_neighbors[j, l] == 1 and j != l:
                    batch_graph[i, j + batch_size, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j + batch_size] = 1
        holder = np.concatenate((holder, land_marks))
        batch_loader[i] = holder
    batch_size += num_lm
    return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, saveData, saveLabels

Example #5

Show file

def load_data():
    global batch_size, divisor
    data, labels = original_clean()
    test_data = data[300:, :]
    test_labels = labels[300:]
    data = data[:300, :]
    labels = labels[:300]

    N = NearestNeighbors(
        n_neighbors=k_start).fit(data).kneighbors_graph(data).todense()
    N = np.array(N)
    num_connections = N.sum(axis=0).argsort()[::-1]
    top_landmarks_idxs = num_connections[:num_lm]
    land_marks = data[top_landmarks_idxs, :]
    data = np.delete(data, top_landmarks_idxs, axis=0)
    landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(
        land_marks).kneighbors_graph(land_marks).todense()
    divisor = int(size / batch_size)
    batch_loader = np.zeros((divisor, batch_size + num_lm, n))
    batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm))
    for i in range(divisor):
        holder = data[batch_size * i:batch_size * (i + 1)]
        holder_graph = NearestNeighbors(n_neighbors=k_other).fit(
            land_marks).kneighbors_graph(holder).todense()
        for j in range(batch_size):  # copy over the holder graph
            for l in range(num_lm):
                if holder_graph[j, l] == 1:
                    batch_graph[i, j, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j] = 1
        for j in range(num_lm):  # copy over landmark neighbors
            for l in range(j, num_lm):
                if landmark_neighbors[j, l] == 1 and j != l:
                    batch_graph[i, j + batch_size, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j + batch_size] = 1
        holder = np.concatenate((holder, land_marks))
        batch_loader[i] = holder
    batch_size += num_lm
    return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, test_data, test_labels, landmark_neighbors

Example #6

Show file

File: MVU.py Project: buquicchiol/MaximumVarianceUnfolding

    def fit(self, data, k):
        """
        The method to fit an MVU model to the data.

        :param data: The data to which the model will be fitted.
        :param k: The number of neighbors to fix.
        :return: Embedded Gramian: The Gramian matrix of the embedded data.
        """
        # Number of data points in the set
        n = data.shape[0]

        # Set the seed
        np.random.seed(self.seed)

        # Calculate the nearest neighbors of each data point and build a graph
        N = NearestNeighbors(n_neighbors=k).fit(data).kneighbors_graph(data).todense()
        N = np.array(N)

        # Sort the neighbor graph to find the points with the most connections
        num_connections = N.sum(axis=0).argsort()[::-1]

        # Separate the most popular points
        top_landmarks_idxs = num_connections[:self.landmarks]
        top_landmarks = data[top_landmarks_idxs, :]

        # Compute the nearest neighbors for all of the landmarks so they are all connected
        L = NearestNeighbors(n_neighbors=3).fit(top_landmarks).kneighbors_graph(top_landmarks).todense()
        L = np.array(L)

        # The data without the landmarks
        new_data_idxs = [x for x in list(range(n)) if x not in top_landmarks_idxs]
        new_data = np.delete(data, top_landmarks_idxs, axis=0)

        # Construct a neighborhood graph where each point finds its closest landmark
        l = NearestNeighbors(n_neighbors=3).fit(top_landmarks).kneighbors_graph(new_data).todense()
        l = np.array(l)

        # Reset N to all 0's
        N = np.zeros((n, n))

        # Add all of the intra-landmark connections to the neighborhood graph
        for i in range(self.landmarks):
            for j in range(self.landmarks):
                if L[i, j] == 1.:
                    N[top_landmarks_idxs[i], top_landmarks_idxs[j]] = 1.

        # Add all of the inter-landmark connections to the neighborhood graph
        for i in range(n - self.landmarks):
            for j in range(self.landmarks):
                if l[i, j] == 1.:
                    N[new_data_idxs[i], top_landmarks_idxs[j]] = 1.

        # Save the neighborhood graph to be accessed latter
        self.neighborhood_graph = N

        # To check for disconnected regions in the neighbor graph
        lap = laplacian(N, normed=True)
        eigvals, _ = np.linalg.eig(lap)

        for e in eigvals:
            if e == 0. and self.solver_iters is None:
                raise DisconnectError("DISCONNECTED REGIONS IN NEIGHBORHOOD GRAPH. "
                                      "PLEASE SPECIFY MAX ITERATIONS FOR THE SOLVER")

        # Declare some CVXPy variables
        # Gramian of the original data
        P = cp.Constant(data.dot(data.T))
        # The projection of the Gramian
        Q = cp.Variable((n, n), PSD=True)
        # Initialized to zeros
        Q.value = np.zeros((n, n))
        # A shorter way to call a vector of 1's
        ONES = cp.Constant(np.ones((n, 1)))
        # A variable to keep the notation consistent with the Berkley lecture
        T = cp.Constant(n)

        # Declare placeholders to get rid of annoying warnings
        objective = None
        constraints = []

        # Wikipedia Solution
        if self.equation == "wikipedia":
            objective = cp.Maximize(cp.trace(Q))

            constraints = [Q >> 0, cp.sum(Q, axis=1) == 0]

            for i in range(n):
                for j in range(n):
                    if N[i, j] == 1:
                        constraints.append((P[i, i] + P[j, j] - P[i, j] - P[j, i]) -
                                           (Q[i, i] + Q[j, j] - Q[i, j] - Q[j, i]) == 0)

        # UC Berkley Solution
        if self.equation == "berkley":
            objective = cp.Maximize(cp.multiply((1 / T), cp.trace(Q)) -
                                    cp.multiply((1 / (T * T)), cp.trace(cp.matmul(cp.matmul(Q, ONES), ONES.T))))

            constraints = [Q >> 0, cp.sum(Q, axis=1) == 0]
            for i in range(n):
                for j in range(n):
                    if N[i, j] == 1.:
                        constraints.append(Q[i, i] - 2 * Q[i, j] + Q[j, j] -
                                           (P[i, i] - 2 * P[i, j] + P[j, j]) == 0)

        # Solve the problem with the SCS Solver
        problem = cp.Problem(objective, constraints)
        # FIXME The solvertol syntax is unique to SCS
        problem.solve(solver=self.solver,
                      eps=self.solver_tol,
                      max_iters=self.solver_iters,
                      warm_start=self.warm_start)

        return Q.value

Example #7

Show file

File: LDMVU_SwissRoll.py Project: jabader97/DeepMaximumVarianceUnfolding

def load_data(size, num_lm):
    """
    Function to load the data
    :param size: total number of points to get
    :param num_lm: number of points which will be landmarks
    :return: the batch loader, landmark points, labels, batched data without landmark points,
    data organized in graphs, neighborhood graph for the landmarks, original data, original labels,
    neighborhood graphs for non-landmarks
    """
    global divisor, m, n, batch_size, set_random
    # import data
    data, labels = sklearn.datasets.make_swiss_roll(size)
    # data, labels = shuffle(data, labels)
    m = np.size(data, 0)
    n = np.size(data, 1)
    data = normalize(data)
    save_labels = labels
    save_data = data
    # make landmarks, select x random points in the data set
    land_marks = np.empty((num_lm, n))
    top_landmarks_idxs = []
    if set_random:
        # select x random points in the data set
        for i in range(num_lm):
            index = random.randint(0, m - i)
            land_marks[i] = data[index]
            data = np.delete(data, index, axis=0)
            labels = np.delete(labels, index, axis=0)
    else:
        # pick the points with the most neighbors
        N = NearestNeighbors(
            n_neighbors=k_start).fit(data).kneighbors_graph(data).todense()
        N = np.array(N)
        num_connections = N.sum(axis=0).argsort()[::-1]
        top_landmarks_idxs = num_connections[:num_lm]
        land_marks = data[top_landmarks_idxs, :]
        data = np.delete(data, top_landmarks_idxs, axis=0)

    # find the nearest landmarks for the landmarks
    landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(
        land_marks).kneighbors_graph(land_marks).todense()
    # break data into batches
    divisor = int(size / batch_size)
    batch_loader = np.zeros((divisor, batch_size + num_lm, n))
    batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm))
    for i in range(divisor):
        holder = data[batch_size * i:batch_size * (i + 1)]
        # find the nearest landmarks for the rest of the points
        holder_graph = NearestNeighbors(n_neighbors=k_other).fit(
            land_marks).kneighbors_graph(holder).todense()
        for j in range(batch_size):  # copy over the holder graph
            for l in range(num_lm):
                if holder_graph[j, l] == 1:
                    batch_graph[i, j, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j] = 1
        for j in range(num_lm):  # copy over landmark neighbors
            for l in range(j, num_lm):
                if landmark_neighbors[j, l] == 1 and j != l:
                    batch_graph[i, j + batch_size, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j + batch_size] = 1
        holder = np.concatenate((holder, land_marks))
        batch_loader[i] = holder
    batch_size += num_lm  # adjust the batch size
    return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, save_data, save_labels, landmark_neighbors

Example #8

Show file

def load_data(size, num_lm):
    global divisor, m, n, batch_size, set_random
    # import data
    data, labels = sklearn.datasets.make_swiss_roll(size)
    data, labels = shuffle(data, labels)
    m = np.size(data, 0)
    n = np.size(data, 1)
    data = normalize(data)
    saveLabels = labels
    saveData = data
    # make landmarks, select x random points in the data set
    land_marks = np.empty((num_lm, n))
    top_landmarks_idxs = []
    if set_random:
        for i in range(num_lm):
            index = random.randint(0, m - i)
            land_marks[i] = data[index]
            data = np.delete(data, index, axis=0)
            labels = np.delete(labels, index, axis=0)
    else:
        N = NearestNeighbors(
            n_neighbors=k_start).fit(data).kneighbors_graph(data).todense()
        N = np.array(N)
        num_connections = N.sum(axis=0).argsort()[::-1]
        top_landmarks_idxs = num_connections[:num_lm]
        land_marks = data[top_landmarks_idxs, :]
        data = np.delete(data, top_landmarks_idxs, axis=0)
        labels = np.delete(labels, top_landmarks_idxs, axis=0)

    landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(
        land_marks).kneighbors_graph(land_marks).todense()
    divisor = int(size / batch_size)
    batch_loader = np.zeros((divisor, batch_size + num_lm, n))
    batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm))
    for i in range(divisor):
        holder = data[batch_size * i:batch_size * (i + 1)]
        holderLabel = labels[batch_size * i:batch_size *
                             (i + 1)]  #For ShortCircuit Detection
        holder_graph = NearestNeighbors(n_neighbors=k_other).fit(
            land_marks).kneighbors_graph(holder).todense()
        for j in range(batch_size):  # copy over the holder graph
            for l in range(num_lm):
                if holder_graph[j, l] == 1:
                    batch_graph[i, j, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j] = 1
        for j in range(num_lm):  # copy over landmark neighbors
            for l in range(j, num_lm):
                if landmark_neighbors[j, l] == 1 and j != l:
                    batch_graph[i, j + batch_size, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j + batch_size] = 1
        holder = np.concatenate((holder, land_marks))
        fig = plt.figure()  #For Short Circuit Detection
        ax = plt.axes(projection='3d')
        landmarkplaceholder = []
        for j in range(num_lm):
            landmarkplaceholder.append(batch_size + j)
        ax.scatter(holder[range(batch_size), 0],
                   holder[range(batch_size), 1],
                   holder[range(batch_size), 2],
                   c=holderLabel)
        ax.scatter(holder[landmarkplaceholder, 0],
                   holder[landmarkplaceholder, 1],
                   holder[landmarkplaceholder, 2],
                   c="Red",
                   marker="^",
                   alpha=1)
        for o in range(batch_size):
            for j in range(batch_size, batch_size + num_lm):
                if batch_graph[i][o][j] > 0:
                    ax.plot([holder[o][0], holder[j][0]],
                            [holder[o][1], holder[j][1]],
                            [holder[o][2], holder[j][2]],
                            c='Red',
                            alpha=0.5)
        plt.show()  #End of Short Circuit Detection Code
        batch_loader[i] = holder
    batch_size += num_lm
    return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, saveData, saveLabels

Example #9

Show file

File: RSMOTE_V8.py Project: 1394102319/RSMOTE

    def over_sampling(self):
        if self.k + 1 > self.n_train_less:
            print(
                'Expected n_neighbors <= n_samples,  but n_samples = {}, n_neighbors = {}, '
                'has changed the n_neighbors to {}'.format(
                    self.n_train_less, self.k + 1, self.n_train_less))
            self.k = self.n_train_less - 1
        data_less_filter = []
        num_maj_filter = []
        length_less = len(self.train_less)
        num_maj = number_maj(self.train[:, 1:], self.train_less[:, 1:],
                             self.tp_less, self.train[:, 0])
        for m in range(len(num_maj)):
            if num_maj[m] < self.k:
                data_less_filter.append(self.train_less[m])
                num_maj_filter.append(num_maj[m])
        self.train_less = np.array(data_less_filter)
        distance_more, nn_array_more = NearestNeighbors(
            n_neighbors=self.k + 1).fit(self.train_more[:, 1:]).kneighbors(
                self.train_less[:, 1:], return_distance=True)
        distance_less, nn_array = NearestNeighbors(n_neighbors=self.k + 1).fit(
            self.train_less[:, 1:]).kneighbors(self.train_less[:, 1:],
                                               return_distance=True)

        distance_less = distance_less.sum(axis=1)
        distance_more = distance_more.sum(axis=1)
        distance = distance_less / distance_more
        # print(distance)
        density = 1 / distance  # calculate density

        density = list(
            map(lambda x: min(100, x),
                density))  # Control the maximum density range at 100

        # The density is sorted below, and the minority samples are also sorted in order of density.
        density_sorted = sorted(range(len(density)),
                                key=lambda a: density[a],
                                reverse=True)  # sorted
        data_resorted = []
        density_sorted_data = []
        num_sorted = []
        for i in range(len(self.train_less)):
            data_resorted.append(self.train_less[density_sorted[i]])
            density_sorted_data.append(density[density_sorted[i]])
            num_sorted.append(num_maj_filter[density_sorted[i]])

        density = np.array(density_sorted_data)
        cluster_big_density = []
        cluster_small_density = []
        cluster_big_data = []
        cluster_small_data = []
        cluster_big_num = []
        cluster_small_num = []
        cluster = k_means(X=density.reshape((len(density), 1)), n_clusters=2)
        for i in range(cluster[1].shape[0]):
            if cluster[1][i] != cluster[1][i + 1]:  # Partition cluster
                cluster_big_density = density[:i + 1]
                cluster_big_data = np.array(data_resorted)[:i + 1, :]
                cluster_big_num = num_sorted[:i + 1]
                cluster_small_density = density[i + 1:]
                cluster_small_data = np.array(data_resorted)[i + 1:, :]
                cluster_small_num = num_sorted[i + 1:]
                break

        # If there is only one point in a cluster, do not divide the cluster
        if len(cluster_big_data) < 2 or len(cluster_small_data) < 2:
            cluster_big_data = np.array(data_resorted)
            cluster_big_density = density
            cluster_big_num = num_sorted
            flag = 1  # if flag==1 only run big cluster once
        else:
            flag = 2
        sum_0 = 0
        sum_1 = 0
        # Calculate weight
        for p in range(len(cluster_big_num)):
            sum_0 += (5 - cluster_big_num[p]) / self.k + 1
        for p in range(len(cluster_small_num)):
            sum_0 += (5 - cluster_small_num[p]) / self.k + 1

        ratio = []  # save the every cluster's totol weight
        ratio.append(sum_0)
        ratio.append(sum_1)
        wight = [5 / 6, 4 / 6, 3 / 6, 2 / 6, 1 / 6]
        kk = self.k
        diff = len(self.train_more
                   ) - length_less  # the number of samples need to synthesize
        totol_less = len(self.train_less)

        for i in range(flag):
            if i == 0:  # big cluster
                density = cluster_big_density
                self.n_train_less = len(cluster_big_data)
                self.train_less = cluster_big_data
                maj_num_ab = cluster_big_num
            else:  # small cluster
                density = cluster_small_density
                self.n_train_less = len(cluster_small_data)
                self.train_less = cluster_small_data
                maj_num_ab = cluster_small_num

            self.k = min(
                len(self.train_less) - 1,
                kk)  # if len(self.train_less)<k,set k =len(self.train_less)

            # The number of sample points that need to be inserted at each point
            if flag == 1:
                number_synthetic = int(
                    len(self.train_more) / self.IR - len(self.train_less))
            else:
                if i == 0:
                    number_synthetic = int(
                        (len(self.train_less) / totol_less) * diff)
                    len_big = number_synthetic
                else:
                    number_synthetic = diff - len_big

            # Calculate how many points should be inserted for each sample
            N = list(
                map(lambda x: int((x / ratio[i]) * number_synthetic), wight))
            self.reminder = number_synthetic - sum(N)
            self.num = 0

            neighbors = NearestNeighbors(n_neighbors=self.k + 1).fit(
                self.train_less[:, 1:])
            nn_array = neighbors.kneighbors(self.train_less[:, 1:],
                                            return_distance=False)

            self.synthetic = np.zeros((number_synthetic, self.n_attrs - 1))
            for p in range(self.train_less.shape[0]):
                self._populate(p, nn_array[p][1:], number_synthetic, N,
                               maj_num_ab)

            label_synthetic = np.array([self.tp_less] *
                                       number_synthetic).reshape(
                                           (number_synthetic, 1))
            np.random.seed(self.random_state)
            synthetic_dl = self.synthetic
            synthetic_dl = np.hstack(
                (label_synthetic, synthetic_dl))  # class column

            data_res = synthetic_dl
            if i == 0:
                return_data = np.vstack((copy.deepcopy(self.train), data_res))
                if flag == 1:
                    return return_data
                self.new_index = 0
            else:
                return_data = np.vstack((copy.deepcopy(return_data), data_res))

                return return_data