Exemple #1
0
def smoothed_saliency(ind, colors, probs):
    lab = rgb2lab(colors[None].astype(np.uint8)).squeeze()
    #lab_dist = np.square(lab[...,None] - lab.T).sum(1)
    lab_dist = squareform(pdist(lab, 'sqeuclidean'))
    s = (lab_dist * probs).sum(1)
    s = (s - s.min()) / (s.max() - s.min())
    m = lab.shape[0] // 4
    dist, nn = NearestNeighbors(m).fit(lab).kneighbors()
    T = dist.sum(1)
    sp = ((T[:, None] - dist) * s[nn]).sum(1) / ((m - 1) * T)

    return sp
Exemple #2
0
def load_data():
    """
    Function to load the data
    :param size: total number of points to get
    :param num_lm: number of points which will be landmarks
    :return: the batch loader, landmark points, labels, batched data without landmark points,
    data organized in graphs, neighborhood graph for the landmarks, original data, original labels,
    neighborhood graphs for non-landmarks
    """
    global batch_size, num_batches
    # import data
    data, labels = original_clean()
    test_data = data[:test_size, :]
    test_labels = labels[:test_size]

    data = data[test_size:, :]

    # make landmarks with points with most neighbors
    N = NearestNeighbors(
        n_neighbors=k_start).fit(data).kneighbors_graph(data).todense()
    N = np.array(N)
    num_connections = N.sum(
        axis=0).argsort()[::-1]  # see how many neighbors each point has
    top_landmarks_idxs = num_connections[:num_lm]  # sort in descending order
    land_marks = data[top_landmarks_idxs, :]  # pick the top ones
    data = np.delete(data, top_landmarks_idxs, axis=0)  # delete the landmarks
    # find the nearest landmarks for the landmarks
    landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(
        land_marks).kneighbors_graph(land_marks).todense()
    # break data into batches, create empty holders
    batch_loader = np.zeros((num_batches, batch_size + num_lm, n))
    batch_graph = np.zeros(
        (num_batches, batch_size + num_lm, batch_size + num_lm))
    # create the full neighborhood graph for each batch
    for i in range(num_batches):
        holder = data[batch_size * i:batch_size * (i + 1)]
        # find the nearest landmarks for the rest of the points
        holder_graph = NearestNeighbors(n_neighbors=k_other).fit(
            land_marks).kneighbors_graph(holder).todense()
        for j in range(batch_size):  # copy over the holder graph
            for l in range(num_lm):
                if holder_graph[j, l] == 1:
                    batch_graph[i, j, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j] = 1
        for j in range(num_lm):  # copy over landmark neighbors
            for l in range(j, num_lm):
                if landmark_neighbors[j, l] == 1 and j != l:
                    batch_graph[i, j + batch_size, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j + batch_size] = 1
        holder = np.concatenate((holder, land_marks))
        batch_loader[i] = holder
    batch_size += num_lm  # adjust the batch size
    return batch_loader, data, batch_graph, landmark_neighbors, test_data, test_labels, land_marks
def load_data(size, num_lm):
    global divisor, m, n, batch_size, set_random
    # import data
    data = pd.read_csv('pol_data.csv', delimiter=',').values[:, :-1]
    labels = np.asarray([0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0])
    test_data = data[size:, :]
    test_labels = labels[size:]
    print(size)
    data = data[:size, :]
    labels = labels[:size]
    m = np.size(data, 0)
    n = np.size(data, 1)
    # data = normalize(data)
    # make landmarks, select x random points in the data set
    land_marks = np.empty((num_lm, n))
    top_landmarks_idxs = []
    if set_random:
        for i in range(num_lm):
            index = random.randint(0, m - i)
            land_marks[i] = data[index]
            data = np.delete(data, index, axis=0)
            labels = np.delete(labels, index, axis=0)
    else:
        N = NearestNeighbors(n_neighbors=k_start).fit(data).kneighbors_graph(data).todense()
        N = np.array(N)
        num_connections = N.sum(axis=0).argsort()[::-1]
        top_landmarks_idxs = num_connections[:num_lm]
        land_marks = data[top_landmarks_idxs, :]
        data = np.delete(data, top_landmarks_idxs, axis=0)

    landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(land_marks).kneighbors_graph(land_marks).todense()
    divisor = int(size / batch_size) # review this line
    batch_loader = np.zeros((divisor, batch_size + num_lm, n))
    batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm))
    for i in range(divisor):
        holder = data[batch_size * i: batch_size * (i + 1)]
        holder_graph = NearestNeighbors(n_neighbors=k_other).fit(land_marks).kneighbors_graph(holder).todense()
        for j in range(batch_size):  # copy over the holder graph
            for l in range(num_lm):
                if holder_graph[j, l] == 1:
                    batch_graph[i, j, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j] = 1
        for j in range(num_lm):  # copy over landmark neighbors
            for l in range(j, num_lm):
                if landmark_neighbors[j, l] == 1 and j != l:
                    batch_graph[i, j + batch_size, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j + batch_size] = 1
        holder = np.concatenate((holder, land_marks))
        batch_loader[i] = holder
    batch_size += num_lm
    return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, test_data, test_labels, landmark_neighbors
def load_data(size, num_lm):
    global divisor, m, n, batch_size, set_random
    # import data
    data, labels = sklearn.datasets.make_swiss_roll(size)
    data, labels = shuffle(data, labels)
    m = np.size(data, 0)
    n = np.size(data, 1)
    data = normalize(data)
    saveLabels = labels
    saveData = data
    # make landmarks, select x random points in the data set
    land_marks = np.empty((num_lm, n))
    top_landmarks_idxs = []
    if set_random:
        for i in range(num_lm):
            index = random.randint(0, m - i)
            land_marks[i] = data[index]
            data = np.delete(data, index, axis=0)
            labels = np.delete(labels, index, axis=0)
    else:
        N = NearestNeighbors(
            n_neighbors=k_start).fit(data).kneighbors_graph(data).todense()
        N = np.array(N)
        num_connections = N.sum(axis=0).argsort()[::-1]
        top_landmarks_idxs = num_connections[:num_lm]
        land_marks = data[top_landmarks_idxs, :]
        data = np.delete(data, top_landmarks_idxs, axis=0)

    landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(
        land_marks).kneighbors_graph(land_marks).todense()
    divisor = int(size / batch_size)
    batch_loader = np.zeros((divisor, batch_size + num_lm, n))
    batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm))
    for i in range(divisor):
        holder = data[batch_size * i:batch_size * (i + 1)]
        holder_graph = NearestNeighbors(n_neighbors=k_other).fit(
            land_marks).kneighbors_graph(holder).todense()
        for j in range(batch_size):  # copy over the holder graph
            for l in range(num_lm):
                if holder_graph[j, l] == 1:
                    batch_graph[i, j, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j] = 1
        for j in range(num_lm):  # copy over landmark neighbors
            for l in range(j, num_lm):
                if landmark_neighbors[j, l] == 1 and j != l:
                    batch_graph[i, j + batch_size, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j + batch_size] = 1
        holder = np.concatenate((holder, land_marks))
        batch_loader[i] = holder
    batch_size += num_lm
    return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, saveData, saveLabels
Exemple #5
0
def load_data():
    global batch_size, divisor
    data, labels = original_clean()
    test_data = data[300:, :]
    test_labels = labels[300:]
    data = data[:300, :]
    labels = labels[:300]

    N = NearestNeighbors(
        n_neighbors=k_start).fit(data).kneighbors_graph(data).todense()
    N = np.array(N)
    num_connections = N.sum(axis=0).argsort()[::-1]
    top_landmarks_idxs = num_connections[:num_lm]
    land_marks = data[top_landmarks_idxs, :]
    data = np.delete(data, top_landmarks_idxs, axis=0)
    landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(
        land_marks).kneighbors_graph(land_marks).todense()
    divisor = int(size / batch_size)
    batch_loader = np.zeros((divisor, batch_size + num_lm, n))
    batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm))
    for i in range(divisor):
        holder = data[batch_size * i:batch_size * (i + 1)]
        holder_graph = NearestNeighbors(n_neighbors=k_other).fit(
            land_marks).kneighbors_graph(holder).todense()
        for j in range(batch_size):  # copy over the holder graph
            for l in range(num_lm):
                if holder_graph[j, l] == 1:
                    batch_graph[i, j, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j] = 1
        for j in range(num_lm):  # copy over landmark neighbors
            for l in range(j, num_lm):
                if landmark_neighbors[j, l] == 1 and j != l:
                    batch_graph[i, j + batch_size, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j + batch_size] = 1
        holder = np.concatenate((holder, land_marks))
        batch_loader[i] = holder
    batch_size += num_lm
    return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, test_data, test_labels, landmark_neighbors
    def fit(self, data, k):
        """
        The method to fit an MVU model to the data.

        :param data: The data to which the model will be fitted.
        :param k: The number of neighbors to fix.
        :return: Embedded Gramian: The Gramian matrix of the embedded data.
        """
        # Number of data points in the set
        n = data.shape[0]

        # Set the seed
        np.random.seed(self.seed)

        # Calculate the nearest neighbors of each data point and build a graph
        N = NearestNeighbors(n_neighbors=k).fit(data).kneighbors_graph(data).todense()
        N = np.array(N)

        # Sort the neighbor graph to find the points with the most connections
        num_connections = N.sum(axis=0).argsort()[::-1]

        # Separate the most popular points
        top_landmarks_idxs = num_connections[:self.landmarks]
        top_landmarks = data[top_landmarks_idxs, :]

        # Compute the nearest neighbors for all of the landmarks so they are all connected
        L = NearestNeighbors(n_neighbors=3).fit(top_landmarks).kneighbors_graph(top_landmarks).todense()
        L = np.array(L)

        # The data without the landmarks
        new_data_idxs = [x for x in list(range(n)) if x not in top_landmarks_idxs]
        new_data = np.delete(data, top_landmarks_idxs, axis=0)

        # Construct a neighborhood graph where each point finds its closest landmark
        l = NearestNeighbors(n_neighbors=3).fit(top_landmarks).kneighbors_graph(new_data).todense()
        l = np.array(l)

        # Reset N to all 0's
        N = np.zeros((n, n))

        # Add all of the intra-landmark connections to the neighborhood graph
        for i in range(self.landmarks):
            for j in range(self.landmarks):
                if L[i, j] == 1.:
                    N[top_landmarks_idxs[i], top_landmarks_idxs[j]] = 1.

        # Add all of the inter-landmark connections to the neighborhood graph
        for i in range(n - self.landmarks):
            for j in range(self.landmarks):
                if l[i, j] == 1.:
                    N[new_data_idxs[i], top_landmarks_idxs[j]] = 1.

        # Save the neighborhood graph to be accessed latter
        self.neighborhood_graph = N

        # To check for disconnected regions in the neighbor graph
        lap = laplacian(N, normed=True)
        eigvals, _ = np.linalg.eig(lap)

        for e in eigvals:
            if e == 0. and self.solver_iters is None:
                raise DisconnectError("DISCONNECTED REGIONS IN NEIGHBORHOOD GRAPH. "
                                      "PLEASE SPECIFY MAX ITERATIONS FOR THE SOLVER")

        # Declare some CVXPy variables
        # Gramian of the original data
        P = cp.Constant(data.dot(data.T))
        # The projection of the Gramian
        Q = cp.Variable((n, n), PSD=True)
        # Initialized to zeros
        Q.value = np.zeros((n, n))
        # A shorter way to call a vector of 1's
        ONES = cp.Constant(np.ones((n, 1)))
        # A variable to keep the notation consistent with the Berkley lecture
        T = cp.Constant(n)

        # Declare placeholders to get rid of annoying warnings
        objective = None
        constraints = []

        # Wikipedia Solution
        if self.equation == "wikipedia":
            objective = cp.Maximize(cp.trace(Q))

            constraints = [Q >> 0, cp.sum(Q, axis=1) == 0]

            for i in range(n):
                for j in range(n):
                    if N[i, j] == 1:
                        constraints.append((P[i, i] + P[j, j] - P[i, j] - P[j, i]) -
                                           (Q[i, i] + Q[j, j] - Q[i, j] - Q[j, i]) == 0)

        # UC Berkley Solution
        if self.equation == "berkley":
            objective = cp.Maximize(cp.multiply((1 / T), cp.trace(Q)) -
                                    cp.multiply((1 / (T * T)), cp.trace(cp.matmul(cp.matmul(Q, ONES), ONES.T))))

            constraints = [Q >> 0, cp.sum(Q, axis=1) == 0]
            for i in range(n):
                for j in range(n):
                    if N[i, j] == 1.:
                        constraints.append(Q[i, i] - 2 * Q[i, j] + Q[j, j] -
                                           (P[i, i] - 2 * P[i, j] + P[j, j]) == 0)

        # Solve the problem with the SCS Solver
        problem = cp.Problem(objective, constraints)
        # FIXME The solvertol syntax is unique to SCS
        problem.solve(solver=self.solver,
                      eps=self.solver_tol,
                      max_iters=self.solver_iters,
                      warm_start=self.warm_start)

        return Q.value
def load_data(size, num_lm):
    """
    Function to load the data
    :param size: total number of points to get
    :param num_lm: number of points which will be landmarks
    :return: the batch loader, landmark points, labels, batched data without landmark points,
    data organized in graphs, neighborhood graph for the landmarks, original data, original labels,
    neighborhood graphs for non-landmarks
    """
    global divisor, m, n, batch_size, set_random
    # import data
    data, labels = sklearn.datasets.make_swiss_roll(size)
    # data, labels = shuffle(data, labels)
    m = np.size(data, 0)
    n = np.size(data, 1)
    data = normalize(data)
    save_labels = labels
    save_data = data
    # make landmarks, select x random points in the data set
    land_marks = np.empty((num_lm, n))
    top_landmarks_idxs = []
    if set_random:
        # select x random points in the data set
        for i in range(num_lm):
            index = random.randint(0, m - i)
            land_marks[i] = data[index]
            data = np.delete(data, index, axis=0)
            labels = np.delete(labels, index, axis=0)
    else:
        # pick the points with the most neighbors
        N = NearestNeighbors(
            n_neighbors=k_start).fit(data).kneighbors_graph(data).todense()
        N = np.array(N)
        num_connections = N.sum(axis=0).argsort()[::-1]
        top_landmarks_idxs = num_connections[:num_lm]
        land_marks = data[top_landmarks_idxs, :]
        data = np.delete(data, top_landmarks_idxs, axis=0)

    # find the nearest landmarks for the landmarks
    landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(
        land_marks).kneighbors_graph(land_marks).todense()
    # break data into batches
    divisor = int(size / batch_size)
    batch_loader = np.zeros((divisor, batch_size + num_lm, n))
    batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm))
    for i in range(divisor):
        holder = data[batch_size * i:batch_size * (i + 1)]
        # find the nearest landmarks for the rest of the points
        holder_graph = NearestNeighbors(n_neighbors=k_other).fit(
            land_marks).kneighbors_graph(holder).todense()
        for j in range(batch_size):  # copy over the holder graph
            for l in range(num_lm):
                if holder_graph[j, l] == 1:
                    batch_graph[i, j, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j] = 1
        for j in range(num_lm):  # copy over landmark neighbors
            for l in range(j, num_lm):
                if landmark_neighbors[j, l] == 1 and j != l:
                    batch_graph[i, j + batch_size, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j + batch_size] = 1
        holder = np.concatenate((holder, land_marks))
        batch_loader[i] = holder
    batch_size += num_lm  # adjust the batch size
    return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, save_data, save_labels, landmark_neighbors
Exemple #8
0
def load_data(size, num_lm):
    global divisor, m, n, batch_size, set_random
    # import data
    data, labels = sklearn.datasets.make_swiss_roll(size)
    data, labels = shuffle(data, labels)
    m = np.size(data, 0)
    n = np.size(data, 1)
    data = normalize(data)
    saveLabels = labels
    saveData = data
    # make landmarks, select x random points in the data set
    land_marks = np.empty((num_lm, n))
    top_landmarks_idxs = []
    if set_random:
        for i in range(num_lm):
            index = random.randint(0, m - i)
            land_marks[i] = data[index]
            data = np.delete(data, index, axis=0)
            labels = np.delete(labels, index, axis=0)
    else:
        N = NearestNeighbors(
            n_neighbors=k_start).fit(data).kneighbors_graph(data).todense()
        N = np.array(N)
        num_connections = N.sum(axis=0).argsort()[::-1]
        top_landmarks_idxs = num_connections[:num_lm]
        land_marks = data[top_landmarks_idxs, :]
        data = np.delete(data, top_landmarks_idxs, axis=0)
        labels = np.delete(labels, top_landmarks_idxs, axis=0)

    landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(
        land_marks).kneighbors_graph(land_marks).todense()
    divisor = int(size / batch_size)
    batch_loader = np.zeros((divisor, batch_size + num_lm, n))
    batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm))
    for i in range(divisor):
        holder = data[batch_size * i:batch_size * (i + 1)]
        holderLabel = labels[batch_size * i:batch_size *
                             (i + 1)]  #For ShortCircuit Detection
        holder_graph = NearestNeighbors(n_neighbors=k_other).fit(
            land_marks).kneighbors_graph(holder).todense()
        for j in range(batch_size):  # copy over the holder graph
            for l in range(num_lm):
                if holder_graph[j, l] == 1:
                    batch_graph[i, j, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j] = 1
        for j in range(num_lm):  # copy over landmark neighbors
            for l in range(j, num_lm):
                if landmark_neighbors[j, l] == 1 and j != l:
                    batch_graph[i, j + batch_size, l + batch_size] = 1
                    batch_graph[i, l + batch_size, j + batch_size] = 1
        holder = np.concatenate((holder, land_marks))
        fig = plt.figure()  #For Short Circuit Detection
        ax = plt.axes(projection='3d')
        landmarkplaceholder = []
        for j in range(num_lm):
            landmarkplaceholder.append(batch_size + j)
        ax.scatter(holder[range(batch_size), 0],
                   holder[range(batch_size), 1],
                   holder[range(batch_size), 2],
                   c=holderLabel)
        ax.scatter(holder[landmarkplaceholder, 0],
                   holder[landmarkplaceholder, 1],
                   holder[landmarkplaceholder, 2],
                   c="Red",
                   marker="^",
                   alpha=1)
        for o in range(batch_size):
            for j in range(batch_size, batch_size + num_lm):
                if batch_graph[i][o][j] > 0:
                    ax.plot([holder[o][0], holder[j][0]],
                            [holder[o][1], holder[j][1]],
                            [holder[o][2], holder[j][2]],
                            c='Red',
                            alpha=0.5)
        plt.show()  #End of Short Circuit Detection Code
        batch_loader[i] = holder
    batch_size += num_lm
    return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, saveData, saveLabels
Exemple #9
0
    def over_sampling(self):
        if self.k + 1 > self.n_train_less:
            print(
                'Expected n_neighbors <= n_samples,  but n_samples = {}, n_neighbors = {}, '
                'has changed the n_neighbors to {}'.format(
                    self.n_train_less, self.k + 1, self.n_train_less))
            self.k = self.n_train_less - 1
        data_less_filter = []
        num_maj_filter = []
        length_less = len(self.train_less)
        num_maj = number_maj(self.train[:, 1:], self.train_less[:, 1:],
                             self.tp_less, self.train[:, 0])
        for m in range(len(num_maj)):
            if num_maj[m] < self.k:
                data_less_filter.append(self.train_less[m])
                num_maj_filter.append(num_maj[m])
        self.train_less = np.array(data_less_filter)
        distance_more, nn_array_more = NearestNeighbors(
            n_neighbors=self.k + 1).fit(self.train_more[:, 1:]).kneighbors(
                self.train_less[:, 1:], return_distance=True)
        distance_less, nn_array = NearestNeighbors(n_neighbors=self.k + 1).fit(
            self.train_less[:, 1:]).kneighbors(self.train_less[:, 1:],
                                               return_distance=True)

        distance_less = distance_less.sum(axis=1)
        distance_more = distance_more.sum(axis=1)
        distance = distance_less / distance_more
        # print(distance)
        density = 1 / distance  # calculate density

        density = list(
            map(lambda x: min(100, x),
                density))  # Control the maximum density range at 100

        # The density is sorted below, and the minority samples are also sorted in order of density.
        density_sorted = sorted(range(len(density)),
                                key=lambda a: density[a],
                                reverse=True)  # sorted
        data_resorted = []
        density_sorted_data = []
        num_sorted = []
        for i in range(len(self.train_less)):
            data_resorted.append(self.train_less[density_sorted[i]])
            density_sorted_data.append(density[density_sorted[i]])
            num_sorted.append(num_maj_filter[density_sorted[i]])

        density = np.array(density_sorted_data)
        cluster_big_density = []
        cluster_small_density = []
        cluster_big_data = []
        cluster_small_data = []
        cluster_big_num = []
        cluster_small_num = []
        cluster = k_means(X=density.reshape((len(density), 1)), n_clusters=2)
        for i in range(cluster[1].shape[0]):
            if cluster[1][i] != cluster[1][i + 1]:  # Partition cluster
                cluster_big_density = density[:i + 1]
                cluster_big_data = np.array(data_resorted)[:i + 1, :]
                cluster_big_num = num_sorted[:i + 1]
                cluster_small_density = density[i + 1:]
                cluster_small_data = np.array(data_resorted)[i + 1:, :]
                cluster_small_num = num_sorted[i + 1:]
                break

        # If there is only one point in a cluster, do not divide the cluster
        if len(cluster_big_data) < 2 or len(cluster_small_data) < 2:
            cluster_big_data = np.array(data_resorted)
            cluster_big_density = density
            cluster_big_num = num_sorted
            flag = 1  # if flag==1 only run big cluster once
        else:
            flag = 2
        sum_0 = 0
        sum_1 = 0
        # Calculate weight
        for p in range(len(cluster_big_num)):
            sum_0 += (5 - cluster_big_num[p]) / self.k + 1
        for p in range(len(cluster_small_num)):
            sum_0 += (5 - cluster_small_num[p]) / self.k + 1

        ratio = []  # save the every cluster's totol weight
        ratio.append(sum_0)
        ratio.append(sum_1)
        wight = [5 / 6, 4 / 6, 3 / 6, 2 / 6, 1 / 6]
        kk = self.k
        diff = len(self.train_more
                   ) - length_less  # the number of samples need to synthesize
        totol_less = len(self.train_less)

        for i in range(flag):
            if i == 0:  # big cluster
                density = cluster_big_density
                self.n_train_less = len(cluster_big_data)
                self.train_less = cluster_big_data
                maj_num_ab = cluster_big_num
            else:  # small cluster
                density = cluster_small_density
                self.n_train_less = len(cluster_small_data)
                self.train_less = cluster_small_data
                maj_num_ab = cluster_small_num

            self.k = min(
                len(self.train_less) - 1,
                kk)  # if len(self.train_less)<k,set k =len(self.train_less)

            # The number of sample points that need to be inserted at each point
            if flag == 1:
                number_synthetic = int(
                    len(self.train_more) / self.IR - len(self.train_less))
            else:
                if i == 0:
                    number_synthetic = int(
                        (len(self.train_less) / totol_less) * diff)
                    len_big = number_synthetic
                else:
                    number_synthetic = diff - len_big

            # Calculate how many points should be inserted for each sample
            N = list(
                map(lambda x: int((x / ratio[i]) * number_synthetic), wight))
            self.reminder = number_synthetic - sum(N)
            self.num = 0

            neighbors = NearestNeighbors(n_neighbors=self.k + 1).fit(
                self.train_less[:, 1:])
            nn_array = neighbors.kneighbors(self.train_less[:, 1:],
                                            return_distance=False)

            self.synthetic = np.zeros((number_synthetic, self.n_attrs - 1))
            for p in range(self.train_less.shape[0]):
                self._populate(p, nn_array[p][1:], number_synthetic, N,
                               maj_num_ab)

            label_synthetic = np.array([self.tp_less] *
                                       number_synthetic).reshape(
                                           (number_synthetic, 1))
            np.random.seed(self.random_state)
            synthetic_dl = self.synthetic
            synthetic_dl = np.hstack(
                (label_synthetic, synthetic_dl))  # class column

            data_res = synthetic_dl
            if i == 0:
                return_data = np.vstack((copy.deepcopy(self.train), data_res))
                if flag == 1:
                    return return_data
                self.new_index = 0
            else:
                return_data = np.vstack((copy.deepcopy(return_data), data_res))

                return return_data