def __init__(self, X=None, name=''): super(Graph, self).__init__(name=name) X = utils.unify_data(X) self.rawX = X # sample x features self.nodePos = None self.cal_params = {} self.all_spath = None self.weight = None self._SAFE = []
def get_significant_nodes(graph, safe_scores, nr_threshold=0.5, pvalue=0.05, n_iter=None, SAFE_pvalue=None, r_neighbor=False): """ get significantly enriched/declined nodes (>= threshold) Difference between centroides and nodes: 1. centroides mean the node itself 2. neighbor_nodes mean the neighbor nodes during SAFE calculation (For advanced usage) :param tmap.tda.Graph.Graph graph: :param pd.DataFrame safe_scores: :param nr_threshold: :param pvalue: :param n_iter: :param SAFE_pvalue: :param r_neighbor: :return: """ neighborhoods = graph.get_neighborhoods(nr_threshold=nr_threshold) safe_scores = unify_data(safe_scores) # become nodes x features matrix if safe_scores.shape[0] != len(graph.nodes): safe_scores = safe_scores.T assert safe_scores.shape[0] == len(graph.nodes) if SAFE_pvalue is None: n_iter = graph._SAFE[-1][ 'n_iter'] if n_iter is None else n_iter # get last score n_iter min_p_value = 1.0 / (n_iter + 1.0) SAFE_pvalue = np.log10(pvalue) / np.log10(min_p_value) sc_dict = safe_scores.to_dict(orient='dict') significant_centroids = { f: [n for n in n2v if n2v[n] >= SAFE_pvalue] for f, n2v in sc_dict.items() } if r_neighbor: significant_neighbor_nodes = { f: list(set([n for n in nodes for n in neighborhoods[n]])) for f, nodes in significant_centroids.items() } return significant_centroids, significant_neighbor_nodes else: return significant_centroids
def neighborhood_score(self, node_data, neighborhoods=None, mode='sum'): """ calculate neighborhood scores for each node from node associated data :param node_data: node associated values :return: return a dict with keys of center nodes, value is a float """ if neighborhoods is None: neighborhoods = self.get_neighborhoods() node_data = utils.unify_data(node_data) map_fun = { 'sum': np.sum, 'weighted_sum': np.sum, 'weighted_mean': np.mean, "mean": np.mean } if mode not in ["sum", "mean", "weighted_sum", "weighted_mean"]: raise SyntaxError('Wrong provided parameters.') else: aggregated_fun = map_fun[mode] if 'weighted_' in mode: # weighted neighborhood scores by node size sizes = [self.nodes[nid]['size'] for nid in node_data.index] node_data = node_data.multiply(sizes, axis='index') nv = node_data.values neighborhood_scores = { k: aggregated_fun(nv[neighbors, :], 0) for k, neighbors in neighborhoods.items() } neighborhood_scores = pd.DataFrame.from_dict(neighborhood_scores, orient="index", columns=node_data.columns) # neighborhood_scores = neighborhood_scores.reindex(node_data.index) return neighborhood_scores