Exemple #1
0
 def __init__(self, X=None, name=''):
     super(Graph, self).__init__(name=name)
     X = utils.unify_data(X)
     self.rawX = X  # sample x features
     self.nodePos = None
     self.cal_params = {}
     self.all_spath = None
     self.weight = None
     self._SAFE = []
Exemple #2
0
def get_significant_nodes(graph,
                          safe_scores,
                          nr_threshold=0.5,
                          pvalue=0.05,
                          n_iter=None,
                          SAFE_pvalue=None,
                          r_neighbor=False):
    """
    get significantly enriched/declined nodes (>= threshold)
    Difference between centroides and nodes:
        1. centroides mean the node itself
        2. neighbor_nodes mean the neighbor nodes during SAFE calculation (For advanced usage)
    :param tmap.tda.Graph.Graph graph:
    :param pd.DataFrame safe_scores:
    :param nr_threshold:
    :param pvalue:
    :param n_iter:
    :param SAFE_pvalue:
    :param r_neighbor:
    :return:
    """
    neighborhoods = graph.get_neighborhoods(nr_threshold=nr_threshold)
    safe_scores = unify_data(safe_scores)  # become nodes x features matrix
    if safe_scores.shape[0] != len(graph.nodes):
        safe_scores = safe_scores.T
    assert safe_scores.shape[0] == len(graph.nodes)
    if SAFE_pvalue is None:
        n_iter = graph._SAFE[-1][
            'n_iter'] if n_iter is None else n_iter  # get last score n_iter
        min_p_value = 1.0 / (n_iter + 1.0)
        SAFE_pvalue = np.log10(pvalue) / np.log10(min_p_value)

    sc_dict = safe_scores.to_dict(orient='dict')

    significant_centroids = {
        f: [n for n in n2v if n2v[n] >= SAFE_pvalue]
        for f, n2v in sc_dict.items()
    }

    if r_neighbor:
        significant_neighbor_nodes = {
            f: list(set([n for n in nodes for n in neighborhoods[n]]))
            for f, nodes in significant_centroids.items()
        }
        return significant_centroids, significant_neighbor_nodes
    else:
        return significant_centroids
Exemple #3
0
    def neighborhood_score(self, node_data, neighborhoods=None, mode='sum'):
        """
        calculate neighborhood scores for each node from node associated data
        :param node_data: node associated values
        :return: return a dict with keys of center nodes, value is a float
        """
        if neighborhoods is None:
            neighborhoods = self.get_neighborhoods()
        node_data = utils.unify_data(node_data)

        map_fun = {
            'sum': np.sum,
            'weighted_sum': np.sum,
            'weighted_mean': np.mean,
            "mean": np.mean
        }
        if mode not in ["sum", "mean", "weighted_sum", "weighted_mean"]:
            raise SyntaxError('Wrong provided parameters.')
        else:
            aggregated_fun = map_fun[mode]

        if 'weighted_' in mode:
            # weighted neighborhood scores by node size
            sizes = [self.nodes[nid]['size'] for nid in node_data.index]
            node_data = node_data.multiply(sizes, axis='index')

        nv = node_data.values
        neighborhood_scores = {
            k: aggregated_fun(nv[neighbors, :], 0)
            for k, neighbors in neighborhoods.items()
        }
        neighborhood_scores = pd.DataFrame.from_dict(neighborhood_scores,
                                                     orient="index",
                                                     columns=node_data.columns)
        # neighborhood_scores = neighborhood_scores.reindex(node_data.index)
        return neighborhood_scores