def load_dblp(data_ratio=0.9): file_path = './datasets/mat/dblp-s.mat' data = loadmat(file_path) W = data['W'].astype(int) label = data['gnd'].astype(int) tot_len = W.shape[0] # shuffle数组 idx = np.array(range(tot_len)) np.random.shuffle(idx) W = W[idx, :] W = W[:, idx] label = label[idx] data_len = np.int(W.shape[0] * data_ratio) W = W[:data_len, :] W = W[:, :data_len] label = label[:data_len].reshape(-1) # # 去除孤立点 degree = np.sum(W, axis=0).astype(int) ZD = (degree.A != 0).reshape(-1) W = W[ZD] W = W[:, ZD] label = label[ZD] # 转化为one_hot 向量 label_set = set(label) label = hd.one_hot(label, len(label_set)) return W, label
data_len = np.int(W.shape[0] * data_ratio) W = W[:data_len, :] W = W[:, :data_len] label = label[:data_len].reshape(-1) # # 去除孤立点 degree = np.sum(W, axis=0).astype(int) ZD = (degree.A != 0).reshape(-1) W = W[ZD] W = W[:, ZD] label = label[ZD] # 转化为one_hot 向量 label_set = set(label) label = hd.one_hot(label, len(label_set)) # 求关系矩阵 g1 = W k = 2 for i in range(2, k, 1): g1 = g1 * g1 W = W + g1 W = W / k sm = scipy.sparse.csc_matrix(np.diag(np.sum(W, axis=0).A.reshape(-1) ** -.5)) NS = sm @ W @ sm print("---初始化完成", '-' * 50)
# data = loadmat(file_path) # print(data) # network, labels = load_dblp(0.9) tot_num = network.shape[0] network += sparse.eye(tot_num) D = sparse.csc_matrix(np.diag(np.sum(network, axis=0).A.reshape(-1)**-.5)) S = (D @ network @ D) # attributes = sparse.eye(network.shape[0]) # network, labels = load_dblp(0.9) # print(type(P[0])) labels -= 1 label_set = set(labels) label_num = len(label_set) labels = hd.one_hot(labels, label_num) train_ratio = 0.9 train_num = int(tot_num * train_ratio) test_num = tot_num - train_num temp = S k = 5 P = np.empty((k, attributes.shape[0], attributes.shape[1])) P[0] = S @ attributes start = time.time() for i in range(1, k, 1): # temp = sparse.csr_matrix(temp @ S) # P.append(sparse.csr_matrix(temp @ attributes)) temp = temp @ S P[i] = temp @ attributes print(time.time() - start)