def visualize_matrix(h: sparse.coo_matrix): """ :param h: Matrice creuse sous forme de coo_matrix :return: pd.Dataframe """ normal_matrix = h.todense() return pd.DataFrame(normal_matrix)
def mask_test_edges( adj: sp.coo_matrix, seed: int = 0, validation_frac: float = 0.05, test_frac: float = 0.1, validation_edges_in_adj: bool = False, ): """ Split edges for graph autoencoder into train/validation/test splits. Based on https://github.com/tkipf/gae/blob/master/gae/preprocessing.py Args: adj: scipy.sparse.coo_matrix adjacency matrix. """ rng = np.random.default_rng(seed) def sparse_to_tuple(sparse_mx): if not sp.isspmatrix_coo(sparse_mx): sparse_mx = sparse_mx.tocoo() coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose() values = sparse_mx.data shape = sparse_mx.shape return coords, values, shape # Remove diagonal elements adj = adj - sp.dia_matrix( (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape) adj.eliminate_zeros() # Check that diag is zero: assert np.diag(adj.todense()).sum() == 0 adj_triu = sp.triu(adj) adj_tuple = sparse_to_tuple(adj_triu) edges = adj_tuple[0] edges_all = sparse_to_tuple(adj)[0] num_test = int(np.floor(edges.shape[0] * test_frac)) num_val = int(np.floor(edges.shape[0] * validation_frac)) all_edge_idx = list(range(edges.shape[0])) rng.shuffle(all_edge_idx) val_edge_idx = all_edge_idx[:num_val] test_edge_idx = all_edge_idx[num_val:(num_val + num_test)] test_edges = edges[test_edge_idx] val_edges = edges[val_edge_idx] train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0) # TODO: use sets? def ismember(a, b, tol=5): rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1) return np.any(rows_close) test_edges_false = [] while len(test_edges_false) < len(test_edges): idx_i = rng.integers(0, adj.shape[0]) idx_j = rng.integers(0, adj.shape[0]) if idx_i == idx_j: continue if ismember([idx_i, idx_j], edges_all): continue if test_edges_false: if ismember([idx_j, idx_i], np.array(test_edges_false)): continue if ismember([idx_i, idx_j], np.array(test_edges_false)): continue test_edges_false.append([idx_i, idx_j]) val_edges_false = [] while len(val_edges_false) < len(val_edges): idx_i = rng.integers(0, adj.shape[0]) idx_j = rng.integers(0, adj.shape[0]) if idx_i == idx_j: continue if ismember([idx_i, idx_j], train_edges): continue if ismember([idx_j, idx_i], train_edges): continue if ismember([idx_i, idx_j], val_edges): continue if ismember([idx_j, idx_i], val_edges): continue if val_edges_false: if ismember([idx_j, idx_i], np.array(val_edges_false)): continue if ismember([idx_i, idx_j], np.array(val_edges_false)): continue val_edges_false.append([idx_i, idx_j]) assert ~ismember(test_edges_false, edges_all) assert ~ismember(val_edges_false, edges_all) assert ~ismember(val_edges, train_edges) assert ~ismember(test_edges, train_edges) assert ~ismember(val_edges, test_edges) if validation_edges_in_adj: adj_edges = np.concatenate((train_edges, val_edges), axis=0) else: adj_edges = train_edges data = np.ones(adj_edges.shape[0]) # Re-build adj matrix adj_train = sp.coo_matrix((data, adj_edges.T), shape=adj.shape) adj_train = adj_train + adj_train.T # NOTE: these edge lists only contain single direction of edge! return ( adj_train, val_edges, val_edges_false, test_edges, test_edges_false, )