def _get_sequence_init(level, cluster_num, sequences1, sequences2, k, w): distance = np.ones((cluster_num, len(sequences1))) np.random.seed(RAND_SEED_INIT) #init = np.array([4263, 3584]) #low #init = np.array([6541, 1470]) #high #init = np.append(init, np.random.randint(0, len(sequences1) - 1, 1)) init = np.random.randint(0, len(sequences1) - 1, 1) print("--init 3:", init) distance[0, :] = cskfuzzy.get_distance(level, w, sequences1, sequences2, np.array(sequences1)[init], np.array(sequences2)[init]) random.seed(RAND_SEED_INIT) # get far away cluster initial for c in range(1, cluster_num): far_cluster = list( np.where( (distance[0:c, :] >= CLUSTER_DIST_THRESHOLD).all(axis=0))[0]) far_cluster = list(set(far_cluster) - set(init)) if len(far_cluster) == 0: not_init = list(set(range(len(sequences1))) - set(init)) far_cluster = sorted(not_init, key=lambda x: distance[0:c, x].sum(axis=0)) far_cluster = [far_cluster[-1]] add_init = random.sample(far_cluster, 1) distance[c, :] = cskfuzzy.get_distance(level, w, sequences1, sequences2, np.array(sequences1)[add_init], np.array(sequences2)[add_init]) init = np.append(init, add_init) print("[fuzzy c means]- get_init_u> \n-- init:", init) # get enough k sequences for each cluster #u = np.zeros((cluster_num, len(sequences1))) filter_k = lambda row: row <= sorted(row)[k - 1] large_k_indices = np.apply_along_axis(filter_k, axis=1, arr=distance) u = large_k_indices.astype(int) random.seed(RAND_SEED_K) print("--each cluster initial # before random choose:", u.sum(axis=1)) for i in range(cluster_num): if sum(u[i, :]) > k: #print(np.where(u[i, :] == 1)) indices = set(np.where(u[i, :] == 1)[0]) indices.remove(init[i]) rand_k = random.sample(indices, k - 1) u[i, :] = 0 u[i, :][rand_k] = 1 u[i, init[i]] = 1 print("--each cluster initial #:", u.sum(axis=1)) return u
def _distance_c_c(level, c, k, w, data1, data2, center): center_data1 = np.array(data1)[center] center_data2 = np.array(data2)[center] distance = cskfuzzy.get_distance(level, w, center_data1, center_data2) # distance = distance / np.amax(distance) each_cluster = np.zeros((c, c)) np.fill_diagonal(each_cluster, 1) each_cluster = np.repeat(each_cluster, k, axis=1) d = each_cluster.dot(distance) d = d.dot(each_cluster.transpose()) / (k * k) np.fill_diagonal(d, 0) return d
def _distance_c_c(level, c, k, w, data1, data2, center): center_data1 = np.array(data1)[center] center_data2 = np.array(data2)[center] distance = cskfuzzy.get_distance(level, w, center_data1, center_data2) #distance = distance / np.amax(distance) each_cluster = np.zeros((c, c)) np.fill_diagonal(each_cluster, 1) each_cluster = np.repeat(each_cluster, k, axis=1) d = each_cluster.dot(distance) d = d.dot(each_cluster.transpose()) / (k * k) np.fill_diagonal(d, 0) return d