def smoothed_saliency(ind, colors, probs): lab = rgb2lab(colors[None].astype(np.uint8)).squeeze() #lab_dist = np.square(lab[...,None] - lab.T).sum(1) lab_dist = squareform(pdist(lab, 'sqeuclidean')) s = (lab_dist * probs).sum(1) s = (s - s.min()) / (s.max() - s.min()) m = lab.shape[0] // 4 dist, nn = NearestNeighbors(m).fit(lab).kneighbors() T = dist.sum(1) sp = ((T[:, None] - dist) * s[nn]).sum(1) / ((m - 1) * T) return sp
def load_data(): """ Function to load the data :param size: total number of points to get :param num_lm: number of points which will be landmarks :return: the batch loader, landmark points, labels, batched data without landmark points, data organized in graphs, neighborhood graph for the landmarks, original data, original labels, neighborhood graphs for non-landmarks """ global batch_size, num_batches # import data data, labels = original_clean() test_data = data[:test_size, :] test_labels = labels[:test_size] data = data[test_size:, :] # make landmarks with points with most neighbors N = NearestNeighbors( n_neighbors=k_start).fit(data).kneighbors_graph(data).todense() N = np.array(N) num_connections = N.sum( axis=0).argsort()[::-1] # see how many neighbors each point has top_landmarks_idxs = num_connections[:num_lm] # sort in descending order land_marks = data[top_landmarks_idxs, :] # pick the top ones data = np.delete(data, top_landmarks_idxs, axis=0) # delete the landmarks # find the nearest landmarks for the landmarks landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit( land_marks).kneighbors_graph(land_marks).todense() # break data into batches, create empty holders batch_loader = np.zeros((num_batches, batch_size + num_lm, n)) batch_graph = np.zeros( (num_batches, batch_size + num_lm, batch_size + num_lm)) # create the full neighborhood graph for each batch for i in range(num_batches): holder = data[batch_size * i:batch_size * (i + 1)] # find the nearest landmarks for the rest of the points holder_graph = NearestNeighbors(n_neighbors=k_other).fit( land_marks).kneighbors_graph(holder).todense() for j in range(batch_size): # copy over the holder graph for l in range(num_lm): if holder_graph[j, l] == 1: batch_graph[i, j, l + batch_size] = 1 batch_graph[i, l + batch_size, j] = 1 for j in range(num_lm): # copy over landmark neighbors for l in range(j, num_lm): if landmark_neighbors[j, l] == 1 and j != l: batch_graph[i, j + batch_size, l + batch_size] = 1 batch_graph[i, l + batch_size, j + batch_size] = 1 holder = np.concatenate((holder, land_marks)) batch_loader[i] = holder batch_size += num_lm # adjust the batch size return batch_loader, data, batch_graph, landmark_neighbors, test_data, test_labels, land_marks
def load_data(size, num_lm): global divisor, m, n, batch_size, set_random # import data data = pd.read_csv('pol_data.csv', delimiter=',').values[:, :-1] labels = np.asarray([0,1,1,1,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,0,1,0,1,1,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,0,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,1,0,1,0,1,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,0,1,1,0,1,0,0]) test_data = data[size:, :] test_labels = labels[size:] print(size) data = data[:size, :] labels = labels[:size] m = np.size(data, 0) n = np.size(data, 1) # data = normalize(data) # make landmarks, select x random points in the data set land_marks = np.empty((num_lm, n)) top_landmarks_idxs = [] if set_random: for i in range(num_lm): index = random.randint(0, m - i) land_marks[i] = data[index] data = np.delete(data, index, axis=0) labels = np.delete(labels, index, axis=0) else: N = NearestNeighbors(n_neighbors=k_start).fit(data).kneighbors_graph(data).todense() N = np.array(N) num_connections = N.sum(axis=0).argsort()[::-1] top_landmarks_idxs = num_connections[:num_lm] land_marks = data[top_landmarks_idxs, :] data = np.delete(data, top_landmarks_idxs, axis=0) landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit(land_marks).kneighbors_graph(land_marks).todense() divisor = int(size / batch_size) # review this line batch_loader = np.zeros((divisor, batch_size + num_lm, n)) batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm)) for i in range(divisor): holder = data[batch_size * i: batch_size * (i + 1)] holder_graph = NearestNeighbors(n_neighbors=k_other).fit(land_marks).kneighbors_graph(holder).todense() for j in range(batch_size): # copy over the holder graph for l in range(num_lm): if holder_graph[j, l] == 1: batch_graph[i, j, l + batch_size] = 1 batch_graph[i, l + batch_size, j] = 1 for j in range(num_lm): # copy over landmark neighbors for l in range(j, num_lm): if landmark_neighbors[j, l] == 1 and j != l: batch_graph[i, j + batch_size, l + batch_size] = 1 batch_graph[i, l + batch_size, j + batch_size] = 1 holder = np.concatenate((holder, land_marks)) batch_loader[i] = holder batch_size += num_lm return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, test_data, test_labels, landmark_neighbors
def load_data(size, num_lm): global divisor, m, n, batch_size, set_random # import data data, labels = sklearn.datasets.make_swiss_roll(size) data, labels = shuffle(data, labels) m = np.size(data, 0) n = np.size(data, 1) data = normalize(data) saveLabels = labels saveData = data # make landmarks, select x random points in the data set land_marks = np.empty((num_lm, n)) top_landmarks_idxs = [] if set_random: for i in range(num_lm): index = random.randint(0, m - i) land_marks[i] = data[index] data = np.delete(data, index, axis=0) labels = np.delete(labels, index, axis=0) else: N = NearestNeighbors( n_neighbors=k_start).fit(data).kneighbors_graph(data).todense() N = np.array(N) num_connections = N.sum(axis=0).argsort()[::-1] top_landmarks_idxs = num_connections[:num_lm] land_marks = data[top_landmarks_idxs, :] data = np.delete(data, top_landmarks_idxs, axis=0) landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit( land_marks).kneighbors_graph(land_marks).todense() divisor = int(size / batch_size) batch_loader = np.zeros((divisor, batch_size + num_lm, n)) batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm)) for i in range(divisor): holder = data[batch_size * i:batch_size * (i + 1)] holder_graph = NearestNeighbors(n_neighbors=k_other).fit( land_marks).kneighbors_graph(holder).todense() for j in range(batch_size): # copy over the holder graph for l in range(num_lm): if holder_graph[j, l] == 1: batch_graph[i, j, l + batch_size] = 1 batch_graph[i, l + batch_size, j] = 1 for j in range(num_lm): # copy over landmark neighbors for l in range(j, num_lm): if landmark_neighbors[j, l] == 1 and j != l: batch_graph[i, j + batch_size, l + batch_size] = 1 batch_graph[i, l + batch_size, j + batch_size] = 1 holder = np.concatenate((holder, land_marks)) batch_loader[i] = holder batch_size += num_lm return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, saveData, saveLabels
def load_data(): global batch_size, divisor data, labels = original_clean() test_data = data[300:, :] test_labels = labels[300:] data = data[:300, :] labels = labels[:300] N = NearestNeighbors( n_neighbors=k_start).fit(data).kneighbors_graph(data).todense() N = np.array(N) num_connections = N.sum(axis=0).argsort()[::-1] top_landmarks_idxs = num_connections[:num_lm] land_marks = data[top_landmarks_idxs, :] data = np.delete(data, top_landmarks_idxs, axis=0) landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit( land_marks).kneighbors_graph(land_marks).todense() divisor = int(size / batch_size) batch_loader = np.zeros((divisor, batch_size + num_lm, n)) batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm)) for i in range(divisor): holder = data[batch_size * i:batch_size * (i + 1)] holder_graph = NearestNeighbors(n_neighbors=k_other).fit( land_marks).kneighbors_graph(holder).todense() for j in range(batch_size): # copy over the holder graph for l in range(num_lm): if holder_graph[j, l] == 1: batch_graph[i, j, l + batch_size] = 1 batch_graph[i, l + batch_size, j] = 1 for j in range(num_lm): # copy over landmark neighbors for l in range(j, num_lm): if landmark_neighbors[j, l] == 1 and j != l: batch_graph[i, j + batch_size, l + batch_size] = 1 batch_graph[i, l + batch_size, j + batch_size] = 1 holder = np.concatenate((holder, land_marks)) batch_loader[i] = holder batch_size += num_lm return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, test_data, test_labels, landmark_neighbors
def fit(self, data, k): """ The method to fit an MVU model to the data. :param data: The data to which the model will be fitted. :param k: The number of neighbors to fix. :return: Embedded Gramian: The Gramian matrix of the embedded data. """ # Number of data points in the set n = data.shape[0] # Set the seed np.random.seed(self.seed) # Calculate the nearest neighbors of each data point and build a graph N = NearestNeighbors(n_neighbors=k).fit(data).kneighbors_graph(data).todense() N = np.array(N) # Sort the neighbor graph to find the points with the most connections num_connections = N.sum(axis=0).argsort()[::-1] # Separate the most popular points top_landmarks_idxs = num_connections[:self.landmarks] top_landmarks = data[top_landmarks_idxs, :] # Compute the nearest neighbors for all of the landmarks so they are all connected L = NearestNeighbors(n_neighbors=3).fit(top_landmarks).kneighbors_graph(top_landmarks).todense() L = np.array(L) # The data without the landmarks new_data_idxs = [x for x in list(range(n)) if x not in top_landmarks_idxs] new_data = np.delete(data, top_landmarks_idxs, axis=0) # Construct a neighborhood graph where each point finds its closest landmark l = NearestNeighbors(n_neighbors=3).fit(top_landmarks).kneighbors_graph(new_data).todense() l = np.array(l) # Reset N to all 0's N = np.zeros((n, n)) # Add all of the intra-landmark connections to the neighborhood graph for i in range(self.landmarks): for j in range(self.landmarks): if L[i, j] == 1.: N[top_landmarks_idxs[i], top_landmarks_idxs[j]] = 1. # Add all of the inter-landmark connections to the neighborhood graph for i in range(n - self.landmarks): for j in range(self.landmarks): if l[i, j] == 1.: N[new_data_idxs[i], top_landmarks_idxs[j]] = 1. # Save the neighborhood graph to be accessed latter self.neighborhood_graph = N # To check for disconnected regions in the neighbor graph lap = laplacian(N, normed=True) eigvals, _ = np.linalg.eig(lap) for e in eigvals: if e == 0. and self.solver_iters is None: raise DisconnectError("DISCONNECTED REGIONS IN NEIGHBORHOOD GRAPH. " "PLEASE SPECIFY MAX ITERATIONS FOR THE SOLVER") # Declare some CVXPy variables # Gramian of the original data P = cp.Constant(data.dot(data.T)) # The projection of the Gramian Q = cp.Variable((n, n), PSD=True) # Initialized to zeros Q.value = np.zeros((n, n)) # A shorter way to call a vector of 1's ONES = cp.Constant(np.ones((n, 1))) # A variable to keep the notation consistent with the Berkley lecture T = cp.Constant(n) # Declare placeholders to get rid of annoying warnings objective = None constraints = [] # Wikipedia Solution if self.equation == "wikipedia": objective = cp.Maximize(cp.trace(Q)) constraints = [Q >> 0, cp.sum(Q, axis=1) == 0] for i in range(n): for j in range(n): if N[i, j] == 1: constraints.append((P[i, i] + P[j, j] - P[i, j] - P[j, i]) - (Q[i, i] + Q[j, j] - Q[i, j] - Q[j, i]) == 0) # UC Berkley Solution if self.equation == "berkley": objective = cp.Maximize(cp.multiply((1 / T), cp.trace(Q)) - cp.multiply((1 / (T * T)), cp.trace(cp.matmul(cp.matmul(Q, ONES), ONES.T)))) constraints = [Q >> 0, cp.sum(Q, axis=1) == 0] for i in range(n): for j in range(n): if N[i, j] == 1.: constraints.append(Q[i, i] - 2 * Q[i, j] + Q[j, j] - (P[i, i] - 2 * P[i, j] + P[j, j]) == 0) # Solve the problem with the SCS Solver problem = cp.Problem(objective, constraints) # FIXME The solvertol syntax is unique to SCS problem.solve(solver=self.solver, eps=self.solver_tol, max_iters=self.solver_iters, warm_start=self.warm_start) return Q.value
def load_data(size, num_lm): """ Function to load the data :param size: total number of points to get :param num_lm: number of points which will be landmarks :return: the batch loader, landmark points, labels, batched data without landmark points, data organized in graphs, neighborhood graph for the landmarks, original data, original labels, neighborhood graphs for non-landmarks """ global divisor, m, n, batch_size, set_random # import data data, labels = sklearn.datasets.make_swiss_roll(size) # data, labels = shuffle(data, labels) m = np.size(data, 0) n = np.size(data, 1) data = normalize(data) save_labels = labels save_data = data # make landmarks, select x random points in the data set land_marks = np.empty((num_lm, n)) top_landmarks_idxs = [] if set_random: # select x random points in the data set for i in range(num_lm): index = random.randint(0, m - i) land_marks[i] = data[index] data = np.delete(data, index, axis=0) labels = np.delete(labels, index, axis=0) else: # pick the points with the most neighbors N = NearestNeighbors( n_neighbors=k_start).fit(data).kneighbors_graph(data).todense() N = np.array(N) num_connections = N.sum(axis=0).argsort()[::-1] top_landmarks_idxs = num_connections[:num_lm] land_marks = data[top_landmarks_idxs, :] data = np.delete(data, top_landmarks_idxs, axis=0) # find the nearest landmarks for the landmarks landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit( land_marks).kneighbors_graph(land_marks).todense() # break data into batches divisor = int(size / batch_size) batch_loader = np.zeros((divisor, batch_size + num_lm, n)) batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm)) for i in range(divisor): holder = data[batch_size * i:batch_size * (i + 1)] # find the nearest landmarks for the rest of the points holder_graph = NearestNeighbors(n_neighbors=k_other).fit( land_marks).kneighbors_graph(holder).todense() for j in range(batch_size): # copy over the holder graph for l in range(num_lm): if holder_graph[j, l] == 1: batch_graph[i, j, l + batch_size] = 1 batch_graph[i, l + batch_size, j] = 1 for j in range(num_lm): # copy over landmark neighbors for l in range(j, num_lm): if landmark_neighbors[j, l] == 1 and j != l: batch_graph[i, j + batch_size, l + batch_size] = 1 batch_graph[i, l + batch_size, j + batch_size] = 1 holder = np.concatenate((holder, land_marks)) batch_loader[i] = holder batch_size += num_lm # adjust the batch size return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, save_data, save_labels, landmark_neighbors
def load_data(size, num_lm): global divisor, m, n, batch_size, set_random # import data data, labels = sklearn.datasets.make_swiss_roll(size) data, labels = shuffle(data, labels) m = np.size(data, 0) n = np.size(data, 1) data = normalize(data) saveLabels = labels saveData = data # make landmarks, select x random points in the data set land_marks = np.empty((num_lm, n)) top_landmarks_idxs = [] if set_random: for i in range(num_lm): index = random.randint(0, m - i) land_marks[i] = data[index] data = np.delete(data, index, axis=0) labels = np.delete(labels, index, axis=0) else: N = NearestNeighbors( n_neighbors=k_start).fit(data).kneighbors_graph(data).todense() N = np.array(N) num_connections = N.sum(axis=0).argsort()[::-1] top_landmarks_idxs = num_connections[:num_lm] land_marks = data[top_landmarks_idxs, :] data = np.delete(data, top_landmarks_idxs, axis=0) labels = np.delete(labels, top_landmarks_idxs, axis=0) landmark_neighbors = NearestNeighbors(n_neighbors=k_lm).fit( land_marks).kneighbors_graph(land_marks).todense() divisor = int(size / batch_size) batch_loader = np.zeros((divisor, batch_size + num_lm, n)) batch_graph = np.zeros((divisor, batch_size + num_lm, batch_size + num_lm)) for i in range(divisor): holder = data[batch_size * i:batch_size * (i + 1)] holderLabel = labels[batch_size * i:batch_size * (i + 1)] #For ShortCircuit Detection holder_graph = NearestNeighbors(n_neighbors=k_other).fit( land_marks).kneighbors_graph(holder).todense() for j in range(batch_size): # copy over the holder graph for l in range(num_lm): if holder_graph[j, l] == 1: batch_graph[i, j, l + batch_size] = 1 batch_graph[i, l + batch_size, j] = 1 for j in range(num_lm): # copy over landmark neighbors for l in range(j, num_lm): if landmark_neighbors[j, l] == 1 and j != l: batch_graph[i, j + batch_size, l + batch_size] = 1 batch_graph[i, l + batch_size, j + batch_size] = 1 holder = np.concatenate((holder, land_marks)) fig = plt.figure() #For Short Circuit Detection ax = plt.axes(projection='3d') landmarkplaceholder = [] for j in range(num_lm): landmarkplaceholder.append(batch_size + j) ax.scatter(holder[range(batch_size), 0], holder[range(batch_size), 1], holder[range(batch_size), 2], c=holderLabel) ax.scatter(holder[landmarkplaceholder, 0], holder[landmarkplaceholder, 1], holder[landmarkplaceholder, 2], c="Red", marker="^", alpha=1) for o in range(batch_size): for j in range(batch_size, batch_size + num_lm): if batch_graph[i][o][j] > 0: ax.plot([holder[o][0], holder[j][0]], [holder[o][1], holder[j][1]], [holder[o][2], holder[j][2]], c='Red', alpha=0.5) plt.show() #End of Short Circuit Detection Code batch_loader[i] = holder batch_size += num_lm return batch_loader, land_marks, labels, data, batch_graph, top_landmarks_idxs, saveData, saveLabels
def over_sampling(self): if self.k + 1 > self.n_train_less: print( 'Expected n_neighbors <= n_samples, but n_samples = {}, n_neighbors = {}, ' 'has changed the n_neighbors to {}'.format( self.n_train_less, self.k + 1, self.n_train_less)) self.k = self.n_train_less - 1 data_less_filter = [] num_maj_filter = [] length_less = len(self.train_less) num_maj = number_maj(self.train[:, 1:], self.train_less[:, 1:], self.tp_less, self.train[:, 0]) for m in range(len(num_maj)): if num_maj[m] < self.k: data_less_filter.append(self.train_less[m]) num_maj_filter.append(num_maj[m]) self.train_less = np.array(data_less_filter) distance_more, nn_array_more = NearestNeighbors( n_neighbors=self.k + 1).fit(self.train_more[:, 1:]).kneighbors( self.train_less[:, 1:], return_distance=True) distance_less, nn_array = NearestNeighbors(n_neighbors=self.k + 1).fit( self.train_less[:, 1:]).kneighbors(self.train_less[:, 1:], return_distance=True) distance_less = distance_less.sum(axis=1) distance_more = distance_more.sum(axis=1) distance = distance_less / distance_more # print(distance) density = 1 / distance # calculate density density = list( map(lambda x: min(100, x), density)) # Control the maximum density range at 100 # The density is sorted below, and the minority samples are also sorted in order of density. density_sorted = sorted(range(len(density)), key=lambda a: density[a], reverse=True) # sorted data_resorted = [] density_sorted_data = [] num_sorted = [] for i in range(len(self.train_less)): data_resorted.append(self.train_less[density_sorted[i]]) density_sorted_data.append(density[density_sorted[i]]) num_sorted.append(num_maj_filter[density_sorted[i]]) density = np.array(density_sorted_data) cluster_big_density = [] cluster_small_density = [] cluster_big_data = [] cluster_small_data = [] cluster_big_num = [] cluster_small_num = [] cluster = k_means(X=density.reshape((len(density), 1)), n_clusters=2) for i in range(cluster[1].shape[0]): if cluster[1][i] != cluster[1][i + 1]: # Partition cluster cluster_big_density = density[:i + 1] cluster_big_data = np.array(data_resorted)[:i + 1, :] cluster_big_num = num_sorted[:i + 1] cluster_small_density = density[i + 1:] cluster_small_data = np.array(data_resorted)[i + 1:, :] cluster_small_num = num_sorted[i + 1:] break # If there is only one point in a cluster, do not divide the cluster if len(cluster_big_data) < 2 or len(cluster_small_data) < 2: cluster_big_data = np.array(data_resorted) cluster_big_density = density cluster_big_num = num_sorted flag = 1 # if flag==1 only run big cluster once else: flag = 2 sum_0 = 0 sum_1 = 0 # Calculate weight for p in range(len(cluster_big_num)): sum_0 += (5 - cluster_big_num[p]) / self.k + 1 for p in range(len(cluster_small_num)): sum_0 += (5 - cluster_small_num[p]) / self.k + 1 ratio = [] # save the every cluster's totol weight ratio.append(sum_0) ratio.append(sum_1) wight = [5 / 6, 4 / 6, 3 / 6, 2 / 6, 1 / 6] kk = self.k diff = len(self.train_more ) - length_less # the number of samples need to synthesize totol_less = len(self.train_less) for i in range(flag): if i == 0: # big cluster density = cluster_big_density self.n_train_less = len(cluster_big_data) self.train_less = cluster_big_data maj_num_ab = cluster_big_num else: # small cluster density = cluster_small_density self.n_train_less = len(cluster_small_data) self.train_less = cluster_small_data maj_num_ab = cluster_small_num self.k = min( len(self.train_less) - 1, kk) # if len(self.train_less)<k,set k =len(self.train_less) # The number of sample points that need to be inserted at each point if flag == 1: number_synthetic = int( len(self.train_more) / self.IR - len(self.train_less)) else: if i == 0: number_synthetic = int( (len(self.train_less) / totol_less) * diff) len_big = number_synthetic else: number_synthetic = diff - len_big # Calculate how many points should be inserted for each sample N = list( map(lambda x: int((x / ratio[i]) * number_synthetic), wight)) self.reminder = number_synthetic - sum(N) self.num = 0 neighbors = NearestNeighbors(n_neighbors=self.k + 1).fit( self.train_less[:, 1:]) nn_array = neighbors.kneighbors(self.train_less[:, 1:], return_distance=False) self.synthetic = np.zeros((number_synthetic, self.n_attrs - 1)) for p in range(self.train_less.shape[0]): self._populate(p, nn_array[p][1:], number_synthetic, N, maj_num_ab) label_synthetic = np.array([self.tp_less] * number_synthetic).reshape( (number_synthetic, 1)) np.random.seed(self.random_state) synthetic_dl = self.synthetic synthetic_dl = np.hstack( (label_synthetic, synthetic_dl)) # class column data_res = synthetic_dl if i == 0: return_data = np.vstack((copy.deepcopy(self.train), data_res)) if flag == 1: return return_data self.new_index = 0 else: return_data = np.vstack((copy.deepcopy(return_data), data_res)) return return_data