def PSOClusteringAlgorithm(datapoints, dataname, run): """--------------------------------------------------------------------------------------- :function: PSO clustering algorithm :parameter: datapoints: data points dataname: data set's name run: indicates which time is it and use to set the seed for random :return: lables: the labels of data points assigned by the algorithm k: number of cluster detected ------------------------------------------------------------------------------------------""" band_dict = {"data/aggregation.txt": 1.5, "data/flame.txt": 1.3, "data/DS850.txt": 0.3, "data/R15.txt": 0.3, "data/D31.txt": 0.5, "data/dim512.txt": 0.40, "data/iris.txt": 0.09, "data/wdbc.txt": 0.09, "data/seeds.txt": 0.10, "data/segmentation_all.txt": 0.16, "data/ecoli.txt": 0.07, "data/appendicitis.txt": 0.12} spread_dict = {"data/aggregation.txt": 1.8, "data/flame.txt": 2.2, "data/DS850.txt": 0.3, "data/R15.txt": 0.5, "data/D31.txt": 1.0, "data/dim512.txt": 0.40, "data/iris.txt": 0.10, "data/wdbc.txt": 0.08, "data/seeds.txt": 0.20, "data/segmentation_all.txt": 0.15, "data/ecoli.txt": 0.09, "data/appendicitis.txt": 0.18} # use for drawing figures num_dict = {"data/aggregation.txt": 7, "data/flame.txt": 3, "data/DS850.txt": 5, "data/R15.txt": 15, "data/D31.txt": 31, "data/iris.txt": 0.10, "data/wdbc.txt": 0.08, "data/seeds.txt": 0.20, "data/segmentation_all.txt": 0.15, "data/ecoli.txt": 0.09} T = 100 # total time steps vmax = [0.0 for i in range(len(datapoints[0]))] axis_range = [[0.0, 0.0] for i in range(len(datapoints[0]))] c1 = 1.0 # acceleration constant for cognitive learning c2 = 0.5 # acceleration constant for social learning # obtain the range on each dimension and the maximum velocity on each dimension dp = np.array(datapoints) for i in range(len(dp[0])): tmp = dp[0:len(dp), i:i+1] max_val = tmp.max() min_val = tmp.min() if vmax[i] < (max_val - min_val) * 0.10: vmax[i] = (max_val - min_val) * 0.10 axis_range[i][0] = min_val - 1 axis_range[i][1] = max_val + 1 np.random.seed(run) # seed for random X = np.array(datapoints) # record particles" position datapoints = X.tolist() n = len(X) # data set scale d = len(X[0]) # dimension # calculate the density of particle using KDE bandwidth = band_dict[dataname] # select bandwidth kde = KernelDensity(kernel="gaussian", bandwidth=bandwidth).fit(X.copy()) f = [math.exp(a) for a in kde.score_samples(X)] # densities of datapoints in KDE # train the RBF network t = np.array([[var] for var in f]) net = newrb.designrb(X, t, 0, spread_dict[filename], round(0.1*n), n) print("Building RBF network is done!") for i in range(n): f[i] = net(X[i].reshape((1, d))) order = sorted(enumerate(f), key=lambda x: x[1], reverse=True) # outliers' assignment order pbest = np.array(X) # pbest pbest_density = copy.deepcopy(f) # pbest's density # calculate leader for each particle neibrs_indices = calculate_neighborhood(pbest, n) # neighbors for each data point lbest_index = [i for i in range(n)] # the lbest's index lbest = np.array(X) # lbest's current position k = 2 # number of leader that each particle almost can follow neibrs = [] # store the leader set for each particle for i in range(n): tmp = [] cnt = 0 for j in range(1, n): if f[i] < f[neibrs_indices[i][j]]: tmp.append(neibrs_indices[i][j]) cnt += 1 if cnt == k or j == n - 1: if not tmp: tmp.append(neibrs_indices[i][0]) neibrs.append(tmp) lbest_index[i] = tmp[0] # select the first particle in its leader set as the initial leader lbest[i] = np.array(X[lbest_index[i]]) # obtain the position of lbest break # accquire the mean distance between particle and its leader, and the distance list of each pair avg_distance, distance_list = PublicFunctions.getAvgerageDistance(lbest_index, datapoints) bound = [0] * n # use to indicate whether the particle is merged with its leader, '1' means merged, '0' means no velocity = np.array([np.array([0.0 for i in range(0, d)]) for j in range(0, n)]) # initialize the velocity to 0.0 w = [1.0] * n # initial inertial weight to 1.0 for t in range(T): # start iteration pbest_Mat = pbest - X lbest_Mat = lbest - X # update velocity and position for i in range(n): # if the particle is merged with its leader, it do not need to fly, just change its position to its leader if bound[i]: continue for j in range(d): r1 = np.random.random() r2 = np.random.random() # update velocity velocity[i][j] = w[i]*velocity[i][j] + c1 * r1 * (pbest_Mat[i][j]) + c2 * r2 * (lbest_Mat[i][j]) # velocity clamped # v = vmax[j] - vmin[j] if velocity[i][j] > vmax[j]: velocity[i][j] = vmax[j] elif velocity[i][j] < -vmax[j]: velocity[i][j] = -vmax[j] X[i][j] = X[i][j] + velocity[i][j] # calculate trial position # use RBFnn to estimate the density of the trail position new_density = net(X[i].reshape((1, d))) w[i] = new_density/pbest_density[i] # if the trial position is not bad than pbest, update the position to trail position f[i] = new_density if new_density >= pbest_density[i]: continue delta = ((pbest_density[i] - new_density)/pbest_density[i])*10 # calculate delta # according to the density difference between trail and pbest, adopt N~(0, 1/np.sqrt(2*pi)) to calculate # probability prob = stats.norm(0, 1/np.sqrt(2*np.pi)).pdf(delta) rand = np.random.random() # generate a random number if rand > prob: # if the random number is higher than the probability, the particle return to pbest velocity[i] = 0.0 # velocity reset to 0.0 X[i] = pbest[i] # position update to pbest f[i] = pbest_density[i] # current density update to pbest's density ind = (neibrs[i].index(lbest_index[i]) + 1) % len(neibrs[i]) # select the next particle as leader lbest_index[i] = neibrs[i][ind] # end-for # update pbest in order for i in range(n): if bound[order[i][0]]: pbest[order[i][0]] = np.array(pbest[lbest_index[order[i][0]]]) # update pbest elif f[order[i][0]] > pbest_density[order[i][0]]: pbest_density[order[i][0]] = f[order[i][0]] pbest[order[i][0]] = np.array(X[order[i][0]]) # end-for # jude whether the particle should merged to its leader, if yes, update its pbest to its leader mark = [0] * n # mark whether the particle should merge with its leader for i in range(n): if not bound[i] and PublicFunctions.getDistance(pbest[i], pbest[lbest_index[i]]) <= avg_distance: mark[i] = 1 # end-for for i in range(n): if mark[order[i][0]] or bound[order[i][0]]: pbest[order[i][0]] = np.array(pbest[lbest_index[order[i][0]]]) X[order[i][0]] = np.array(X[lbest_index[order[i][0]]]) bound[order[i][0]] = 1 # update the lbest's position lbest[order[i][0]] = np.array(X[lbest_index[order[i][0]]]) # end-for if t == T - 1: labels, k = labeling(datapoints, bound, lbest_index, order) # Draw the final clustering result # PublicFunctions.drawClusteringResultGraph(pl, datapoints, labels, k, axis_range) # pl.show() # end-for return labels, k