def gen_knn_hg(X, n_neighbors, is_prob=True, with_feature=False): """ :param X: numpy array, shape = (n_samples, n_features) :param n_neighbors: int, :param is_prob: bool, optional(default=True) :param with_feature: bool, optional(default=False) :return: instance of HyperG """ assert isinstance(X, (np.ndarray, list)) assert n_neighbors > 0 X = np.array(X) n_nodes = X.shape[0] n_edges = n_nodes m_dist = pairwise_distances(X) # top n_neighbors+1 m_neighbors = np.argpartition(m_dist, kth=n_neighbors + 1, axis=1) m_neighbors_val = np.take_along_axis(m_dist, m_neighbors, axis=1) m_neighbors = m_neighbors[:, :n_neighbors + 1] m_neighbors_val = m_neighbors_val[:, :n_neighbors + 1] # check for i in range(n_nodes): if not np.any(m_neighbors[i, :] == i): m_neighbors[i, -1] = i m_neighbors_val[i, -1] = 0. node_idx = m_neighbors.reshape(-1) edge_idx = np.tile( np.arange(n_edges).reshape(-1, 1), (1, n_neighbors + 1)).reshape(-1) if not is_prob: values = np.ones(node_idx.shape[0]) else: avg_dist = np.mean(m_dist) m_neighbors_val = m_neighbors_val.reshape(-1) values = np.exp(-np.power(m_neighbors_val, 2.) / np.power(avg_dist, 2.)) H = sparse.coo_matrix((values, (node_idx, edge_idx)), shape=(n_nodes, n_edges)) w = np.ones(n_edges) if with_feature: return HyperG(H, w=w, X=X) return HyperG(H, w=w)
def fuse_mutli_sub_hg(hg_list): """ :param hg_list: list, list of HyperG instance :return: instance of HyperG """ incident_mat_row = [hg.incident_matrix().row for hg in hg_list] incident_mat_col = [hg.incident_matrix().col for hg in hg_list] incident_mat_data = [hg.incident_matrix().data for hg in hg_list] num_nodes = [hg.num_nodes() for hg in hg_list] num_edges = [hg.num_edges() for hg in hg_list] nodes_to_add = [0] + [ sum(num_nodes[:i + 1]) for i in range(len(hg_list) - 1) ] edges_to_add = [0] + [ sum(num_edges[:i + 1]) for i in range(len(hg_list) - 1) ] for i in range(len(hg_list)): incident_mat_row[i] = incident_mat_row[i] + nodes_to_add[i] incident_mat_col[i] = incident_mat_col[i] + edges_to_add[i] incident_mat_row = np.concatenate(incident_mat_row) incident_mat_col = np.concatenate(incident_mat_col) incident_mat_data = np.concatenate(incident_mat_data) H = sparse.coo_matrix( (incident_mat_data, (incident_mat_row, incident_mat_col)), shape=(sum(num_nodes), sum(num_edges))) return HyperG(H)
def gen_epsilon_ball_hg(X, ratio, is_prob=True, with_feature=False): """ :param X: numpy array, shape = (n_samples, n_features) :param ratio: float, the ratio of average distance to select neighbor :param is_prob: bool, optional(default=True) :param with_feature: bool, optional(default=False) :return: instance of HyperG """ assert isinstance(X, (np.ndarray, list)) assert ratio > 0 X = np.array(X) n_nodes = X.shape[0] n_edges = n_nodes m_dist = pairwise_distances(X) avg_dist = np.mean(m_dist) threshold = ratio * avg_dist coo = np.where(m_dist <= threshold) edge_idx, node_idx = coo if not is_prob: values = np.ones(node_idx.shape[0]) else: m_neighbors_val = m_dist[coo] values = np.exp(-np.power(m_neighbors_val, 2.) / np.power(avg_dist, 2.)) H = sparse.coo_matrix((values, (node_idx, edge_idx)), shape=(n_nodes, n_edges)) w = np.ones(n_edges) if with_feature: return HyperG(H, w=w, X=X) return HyperG(H, w=w)
def test_trans_infer(): edge_idx = np.array([0, 0, 1, 1, 2, 2, 2]) node_idx = np.array([0, 1, 2, 3, 0, 1, 4]) val = np.array([0.1, 0.3, 0.2, 0.5, 0.6, 0.1, 0.3]) H = sparse.coo_matrix((val, (node_idx, edge_idx)), shape=(5, 3)) hg = HyperG(H) y = np.array([0, 1, 1, -1, -1]) y_predict = trans_infer(hg, y, lbd=100) assert y_predict.shape[0] == 2
def gen_clustering_hg(X, n_clusters, method="kmeans", with_feature=False, random_state=None): """ :param X: numpy array, shape = (n_samples, n_features) :param n_clusters: int, number of clusters :param method: str, clustering methods("kmeans",) :param with_feature: bool, optional(default=False) :param random_state: int, optional(default=False) determines random number generation for centroid initialization :return: instance of HyperG """ if method == "kmeans": cluster = KMeans(n_clusters=n_clusters, random_state=random_state).fit(X).labels_ else: raise ValueError("{} method is not supported".format(method)) assert n_clusters >= 1 n_edges = n_clusters n_nodes = X.shape[0] node_idx = np.arange(n_nodes) edge_idx = cluster values = np.ones(node_idx.shape[0]) H = sparse.coo_matrix((values, (node_idx, edge_idx)), shape=(n_nodes, n_edges)) w = np.ones(n_edges) if with_feature: return HyperG(H, w=w, X=X) return HyperG(H, w=w)
def concat_multi_hg(hg_list): """concatenate multiple hypergraphs to one hypergraph :param hg_list: list, list of HyperG instance :return: instance of HyperG """ H_s = [hg.incident_matrix() for hg in hg_list] w_s = [hg.hyperedge_weights() for hg in hg_list] H = sparse.hstack(H_s) w = np.hstack(w_s) X = None for hg in hg_list: if X is not None and hg.node_features() is not None: assert (X == hg.node_features()).all() elif hg.node_features() is not None: X = hg.node_features() return HyperG(H, X=X, w=w)
def gen_grid_neigh_hg(input_size): """ :param input_size: numpy array, shape = (2, ), (height, width) :return: instance of HyperG """ input_size = np.array(input_size).reshape(-1) assert input_size.shape[0] == 2 # TODO h, w = input_size n_nodes = w * h node_set = np.arange(n_nodes) neigh_idx = [ node_set - w - 1, node_set - w, node_set - w + 1, node_set - 1, node_set, node_set + 1, node_set + w - 1, node_set + w, node_set + w + 1, ] neigh_mask = [ (node_set // w == 0) | (node_set % w == 0), (node_set // w == 0), (node_set // w == 0) | (node_set % w == w - 1), (node_set % w == 0), np.zeros_like(node_set, dtype=np.bool), (node_set % w == w - 1), (node_set // w == h-1) | (node_set % w == 0), (node_set // w == h-1), (node_set // w == h-1) | (node_set % w == w - 1), ] # mask for i in range(len(neigh_idx)): neigh_idx[i][neigh_mask[i]] = -1 node_idx = np.hstack(neigh_idx) edge_idx = np.tile(node_set.reshape(1, -1), [len(neigh_idx), 1]).reshape(-1) values = np.ones_like(node_idx) # filter negative elements non_neg_idx = np.where(node_idx != -1) node_idx = node_idx[non_neg_idx] edge_idx = edge_idx[non_neg_idx] values = values[non_neg_idx] n_edges = n_nodes H = sparse.coo_matrix((values, (node_idx, edge_idx)), shape=(n_nodes, n_edges)) return HyperG(H)
def gen_l1_hg(X, gamma, n_neighbors, log=False, with_feature=False): """ :param X: numpy array, shape = (n_samples, n_features) :param gamma: float, the tradeoff parameter of the l1 norm on representation coefficients :param n_neighbors: int, :param log: bool :param with_feature: bool, optional(default=False) :return: instance of HyperG """ assert n_neighbors >= 1. assert isinstance(X, np.ndarray) assert X.ndim == 2 n_nodes = X.shape[0] n_edges = n_nodes m_dist = pairwise_distances(X) m_neighbors = np.argsort(m_dist)[:, 0:n_neighbors + 1] edge_idx = np.tile( np.arange(n_edges).reshape(-1, 1), (1, n_neighbors + 1)).reshape(-1) node_idx = [] values = [] for i_edge in range(n_edges): if log: print_log("processing edge {} ".format(i_edge)) neighbors = m_neighbors[i_edge].tolist() if i_edge in neighbors: neighbors.remove(i_edge) else: neighbors = neighbors[:-1] P = X[neighbors, :] v = X[i_edge, :] # cvxpy x = cp.Variable(P.shape[0], nonneg=True) objective = cp.Minimize( cp.norm((P.T @ x).T - v, 2) + gamma * cp.norm(x, 1)) # objective = cp.Minimize(cp.norm(x@P-v, 2) + gamma * cp.norm(x, 1)) prob = cp.Problem(objective) try: prob.solve() except SolverError: prob.solve(solver='SCS', verbose=False) node_idx.extend([i_edge] + neighbors) values.extend([1.] + x.value.tolist()) node_idx = np.array(node_idx) values = np.array(values) H = sparse.coo_matrix((values, (node_idx, edge_idx)), shape=(n_nodes, n_edges)) if with_feature: return HyperG(H, X=X) return HyperG(H)
def test_hyperg(): edge_idx = np.array([0, 0, 1, 1, 2, 2, 2]) node_idx = np.array([0, 1, 2, 3, 0, 1, 4]) val = np.array([0.1, 0.3, 0.2, 0.5, 0.6, 0.1, 0.3]) H = sparse.coo_matrix((val, (node_idx, edge_idx)), shape=(5, 3)) w = np.array([0.3, 0.4, 0.3]) X = np.random.rand(5, 4) hg = HyperG(H, X=X, w=w) assert hg.num_edges() == 3 assert hg.num_nodes() == 5 assert np.allclose(hg.incident_matrix().A, H.A) assert np.allclose(hg.hyperedge_weights(), w) assert np.allclose(hg.node_features(), X) assert np.allclose(hg.node_degrees().data.reshape(-1), np.array([0.21, 0.12, 0.08, 0.2, 0.09])) assert np.allclose( hg.inv_square_node_degrees().data.reshape(-1), np.array([2.1821789, 2.88675135, 3.53553391, 2.23606798, 3.33333333])) assert np.allclose(hg.edge_degrees().data.reshape(-1), np.array([0.4, 0.7, 1.0])) assert np.allclose(hg.inv_edge_degrees().data.reshape(-1), np.array([1 / 0.4, 1 / 0.7, 1 / 1.0])) DV2 = hg.inv_square_node_degrees() INVDE = hg.inv_edge_degrees() THETA = DV2.dot(H).dot(sparse.diags(w)).dot(INVDE).dot(H.T).dot(DV2) assert np.allclose(hg.theta_matrix().A, THETA.A) assert np.allclose(hg.laplacian().A, (sparse.eye(5) - THETA).A) hg.update_hyedge_weights(np.array([1.0, 1.0, 1.0])) assert np.allclose(hg.hyperedge_weights(), np.array([1.0, 1.0, 1.0])) assert np.allclose(hg.node_degrees().data.reshape(-1), np.array([0.7, 0.4, 0.2, 0.5, 0.3])) edge_idx = np.array([0, 1, 1, 2, 2, 2]) node_idx = np.array([0, 2, 3, 0, 1, 4]) val = np.array([0.2, 0.4, 0.5, 0.6, 0.1, 0.3]) H = sparse.coo_matrix((val, (node_idx, edge_idx)), shape=(5, 3)) hg.update_incident_matrix(H) assert np.allclose(hg.incident_matrix().A, H.A) assert np.allclose(hg.node_degrees().data.reshape(-1), np.array([0.8, 0.1, 0.4, 0.5, 0.3]))