def __init__(self, graph: GraphSignalGraph, obj: GraphSignalData, node2id: Optional[Mapping[object, int]] = None): """Should **ALWAYS** instantiate graph signals with the method to_signal, which handles non-instantiation semantics.""" self.graph = graph self.node2id = {v: i for i, v in enumerate(graph) } if node2id is None else node2id if backend.is_array(obj): if backend.length(graph) != backend.length(obj): raise Exception("Graph signal array dimensions " + str(backend.length(obj)) + " should be equal to graph nodes " + str(backend.length(graph))) self.np = backend.to_array(obj) elif obj is None: self.np = backend.repeat(1.0, len(graph)) else: self.np = np.repeat( 0.0, len(graph) ) # tensorflow does not initialize editing of eager tensors for key, value in obj.items(): self[key] = value self.np = backend.to_array( self.np ) # make all operations with numpy and then potentially switch to tensorflow
def evaluate(self, scores: GraphSignalData) -> BackendPrimitive: known_scores, scores = self.to_numpy(scores) DCG = 0 IDCG = 0 for i, v in enumerate( list( sorted(list(range(backend.length(scores))), key=scores.__getitem__, reverse=True))[:self.k]): DCG += known_scores[v] / np.log2(i + 2) for i, v in enumerate( list( sorted(list(range(backend.length(known_scores))), key=known_scores.__getitem__, reverse=True))[:self.k]): IDCG += known_scores[v] / np.log2(i + 2) return DCG / IDCG
def evaluate(self, scores: GraphSignalData) -> BackendPrimitive: known_scores, scores = self.to_numpy(scores, normalization=True) eps = backend.epsilon() known_scores = known_scores - backend.min(known_scores) + eps known_scores = known_scores / backend.sum(known_scores) scores = scores - backend.min(scores) + eps scores = scores / backend.sum(scores) ratio = scores / known_scores ret = -backend.sum(scores * backend.log(ratio)) return ret / backend.length(scores)
def evaluate(self, scores: GraphSignalData) -> BackendPrimitive: sensitive, scores = self.to_numpy(scores) p1 = backend.dot(scores, sensitive) p2 = backend.dot(scores, 1 - sensitive) if p1 == 0 or p2 == 0: return 0 s = backend.sum(sensitive) p1 = backend.safe_div(p1, s) p2 = backend.safe_div(p2, backend.length(sensitive) - s) if p1 <= p2: # this implementation is derivable return p1 / p2 return p2 / p1
def _transform(self, ranks: GraphSignal, **kwargs): ensure_used_args(kwargs) threshold = 0 fraction_of_training = self.fraction_of_training*backend.length(ranks) if self.fraction_of_training < 1 else self.fraction_of_training fraction_of_training = int(fraction_of_training) for v in sorted(ranks, key=ranks.get, reverse=True): fraction_of_training -= 1 if fraction_of_training == 0: threshold = ranks[v] break return {v: 1. if ranks[v] >= threshold else 0. for v in ranks.keys()}
def __init__(self, graph: GraphSignalGraph, obj: GraphSignalData, node2id: Optional[Mapping[object, int]] = None): """Should **ALWAYS** instantiate graph signals with the method to_signal, which handles non-instantiation semantics.""" if node2id is not None: self.node2id = node2id elif hasattr(graph, "_pygrank_node2id"): # obtained from preprocessing self.node2id = graph._pygrank_node2id elif hasattr(graph, "shape"): # externally defined type self.node2id = {i: i for i in range(graph.shape[0])} else: # this is the case where it is an actual graph self.node2id = {v: i for i, v in enumerate(graph)} self.graph = graph #self.node2id = ({i: i for i in range(graph.shape[0])} if hasattr(graph, "shape") # else {v: i for i, v in enumerate(graph)}) if node2id is None else node2id graph_len = graph.shape[0] if hasattr(graph, "shape") else len(graph) if backend.is_array(obj): if graph_len != backend.length(obj): raise Exception("Graph signal array dimensions " + str(backend.length(obj)) + " should be equal to graph nodes " + str(len(graph))) self._np = backend.to_array(obj) elif obj is None: self._np = backend.repeat(1.0, graph_len) else: import numpy as np self._np = np.repeat( 0.0, graph_len ) # tensorflow does not initialize editing of eager tensors for key, value in obj.items(): self[key] = value self._np = backend.to_array( self._np ) # make all operations with numpy and then potentially switch to tensorflow
def evaluate(self, scores: GraphSignalData) -> BackendPrimitive: sensitive, scores = self.to_numpy(scores) p1 = backend.dot(scores, sensitive) p2 = backend.dot(scores, 1 - sensitive) s = backend.sum(sensitive) n = backend.length(sensitive) p1 = backend.safe_div(p1, s) p2 = backend.safe_div(p2, n - s) #if p1 <= p2*self.target_pRule: # p2 *= self.target_pRule #elif p2 <= p1*self.target_pRule: # p1 *= self.target_pRule #else: # return 0 return (p1 - p2)**2 * n
def normalization(self, M): import scipy.sparse sensitive = self.sensitive phi = self.phi outR = backend.conv(sensitive.np, M) outB = backend.conv(1. - sensitive.np, M) case1 = outR < phi * (outR + outB) case2 = (1 - case1) * (outR != 0) case3 = (1 - case1) * (1 - case2) d = backend.repeat(0, backend.length(outR)) d[case1] = (1 - phi) / outB[case1] d[case2] = phi / outR[case2] d[case3] = 1 Q = scipy.sparse.spdiags(d, 0, *M.shape) M = M + Q * M self.outR = outR self.outB = outB return M
def _start(self, M, personalization, ranks, *args, **kwargs): self.coefficient = None if self.coefficient_type == "chebyshev": self.prev_term = 0 if self.krylov_dims is not None: V, H = krylov_base(M, personalization.np, int(self.krylov_dims)) self.krylov_base = V self.krylov_H = H self.zero_coefficient = self.coefficient self.krylov_result = H * 0 self.Mpower = backend.eye(int(self.krylov_dims)) error_bound = krylov_error_bound(V, H, M, personalization.np) if error_bound > 0.01: raise Exception( "Krylov approximation with estimated relative error " + str(error_bound) + " > 0.01 is too rough to be meaningful (try on lager graphs)" ) else: self.ranks_power = personalization.np ranks.np = backend.repeat(0.0, backend.length(ranks.np))
def _run(self, personalization: GraphSignal, params: object, base=None, *args, **kwargs): params = backend.to_primitive(params) div = backend.sum(backend.abs(params)) if div != 0: params = params / div if self.basis != "krylov": if base is None: M = self.ranker_generator(params).preprocessor( personalization.graph) base = arnoldi_iteration(M, personalization.np, len(params))[0] ret = 0 for i in range(backend.length(params)): ret = ret + params[i] * base[:, i] return to_signal(personalization, ret) return self.ranker_generator(params).rank(personalization, *args, **kwargs)
def _start(self, M, personalization, ranks, sensitive, *args, **kwargs): sensitive = to_signal(ranks, sensitive) outR = self.outR # backend.conv(sensitive.np, M) outB = self.outB # backend.conv(1.-sensitive.np, M) phi = backend.sum(sensitive.np) / backend.length( sensitive.np) * self.target_prule dR = backend.repeat(0., len(sensitive.graph)) dB = backend.repeat(0., len(sensitive.graph)) case1 = outR < phi * (outR + outB) case2 = (1 - case1) * (outR != 0) case3 = (1 - case1) * (1 - case2) dR[case1] = phi - (1 - phi) / outB[case1] * outR[case1] dR[case3] = phi dB[case2] = (1 - phi) - phi / outR[case2] * outB[case2] dB[case3] = 1 - phi personalization.np = backend.safe_div(sensitive.np * personalization.np, backend.sum(sensitive.np)) * self.target_prule \ + backend.safe_div(personalization.np * (1 - sensitive.np), backend.sum(1 - sensitive.np)) personalization.np = backend.safe_div(personalization.np, backend.sum(personalization.np)) L = sensitive.np if self.redistributor is None or self.redistributor == "uniform": original_ranks = 1 elif self.redistributor == "original": original_ranks = PageRank( alpha=self.alpha, preprocessor=default_preprocessor(assume_immutability=False, normalization="col"), convergence=self.convergence)(personalization).np else: original_ranks = self.redistributor(personalization).np self.dR = dR self.dB = dB self.xR = backend.safe_div(original_ranks * L, backend.sum(original_ranks * L)) self.xB = backend.safe_div(original_ranks * (1 - L), backend.sum(original_ranks * (1 - L))) super()._start(M, personalization, ranks, *args, **kwargs)
def _prepare_graph(self, graph, sensitive, *args, **kwargs): sensitive = to_signal(graph, sensitive) self.sensitive = sensitive self.phi = backend.sum(sensitive.np) / backend.length( sensitive.np) * self.target_prule return graph
def evaluate(self, scores: GraphSignalData) -> BackendPrimitive: known_scores, scores = self.to_numpy(scores) return 1 - backend.sum( backend.abs(known_scores - scores)) / backend.length(scores)