Ejemplo n.º 1
0
 def to_numpy(
     self,
     scores: GraphSignalData,
     normalization: bool = False
 ) -> Union[Tuple[GraphSignal, GraphSignal], Tuple[BackendPrimitive,
                                                   BackendPrimitive]]:
     if isinstance(scores, numbers.Number) and isinstance(
             self.known_scores, numbers.Number):
         return backend.to_array([self.known_scores
                                  ]), backend.to_array([scores])
     elif isinstance(scores, GraphSignal):
         return to_signal(scores, self.known_scores).filter(
             exclude=self.exclude), scores.normalized(normalization).filter(
                 exclude=self.exclude)
     elif isinstance(self.known_scores, GraphSignal):
         return self.known_scores.filter(exclude=self.exclude), to_signal(
             self.known_scores,
             scores).normalized(normalization).filter(exclude=self.exclude)
     else:
         if self.exclude is not None:
             raise Exception(
                 "Needs to parse graph signal scores or known_scores to be able to exclude specific nodes"
             )
         scores = backend.self_normalize(
             backend.to_array(scores, copy_array=True)
         ) if normalization else backend.to_array(scores)
         return backend.to_array(self.known_scores), scores
Ejemplo n.º 2
0
 def rank(self,
          graph: GraphSignalGraph = None,
          personalization: GraphSignalData = None,
          warm_start: GraphSignalData = None,
          graph_dropout: float = 0,
          *args,
          **kwargs) -> GraphSignal:
     personalization = to_signal(graph, personalization)
     self._prepare(personalization)
     personalization = self.personalization_transform(personalization)
     personalization_norm = backend.sum(backend.abs(personalization.np))
     if personalization_norm == 0:
         return personalization
     personalization = to_signal(personalization,
                                 personalization.np / personalization_norm)
     ranks = to_signal(
         personalization,
         backend.copy(personalization.np)
         if warm_start is None else warm_start)
     M = self.preprocessor(personalization.graph)
     self.convergence.start()
     self._start(backend.graph_dropout(M, graph_dropout), personalization,
                 ranks, *args, **kwargs)
     while not self.convergence.has_converged(ranks.np):
         self._step(backend.graph_dropout(M, graph_dropout),
                    personalization, ranks, *args, **kwargs)
     self._end(backend.graph_dropout(M, graph_dropout), personalization,
               ranks, *args, **kwargs)
     ranks.np = ranks.np * personalization_norm
     return ranks
Ejemplo n.º 3
0
 def rank(self, graph, personalization, sensitive, *args, **kwargs):
     personalization = to_signal(graph, personalization)
     original_graph = personalization.graph
     graph = self._reweigh(original_graph, sensitive)
     personalization = to_signal(graph, dict(personalization.items()))
     ranks = self.ranker.rank(graph, personalization, *args, **kwargs)
     ranks.graph = original_graph
     return ranks
Ejemplo n.º 4
0
def split(groups: Union[GraphSignalData, Mapping[str, GraphSignalData]],
          training_samples: float = 0.8,
          seed: int = 0):
    """
    Splits a graph signal, iterable of map of graph signals and iterables into two same-type objects
    with training and test data respectively. For graph signals, training and test data are
    basically masked to output zeros more times and this method takes care to stratify sampling
    between non-zero and zero values.

    Args:
        groups: The input data to split.
        training_samples: If less than 1, it determines the fraction of training data to use for training. If greater
            than 1, it determines the absolute number of training data points. If 1, the data are not split but
            used for both training and testing. Default is 0.8 to use 80% data for training and the rest 20% for
            testing.
        seed: A sample to introu

    Returns:
        Data with the same organization as the *groups* argument.

    Example:
        >>> import pygrank as pg
        >>> training, test = pg.split(["A", "B", "C", "D"], training_samples=0.5)
    """
    if training_samples == 1:
        return groups, groups
    if isinstance(groups, GraphSignal):
        group = [v for v in groups if groups[v] != 0]
        if seed is not None:
            group = sorted(group)
        random.Random(seed).shuffle(group)
        splt = int(training_samples) if training_samples > 1 else (
            int(len(group) *
                training_samples) if training_samples >= 0 else len(group) +
            int(training_samples))
        return to_signal(groups, {v: groups[v]
                                  for v in group[:splt]}), to_signal(
                                      groups,
                                      {v: groups[v]
                                       for v in group[splt:]})
    if not isinstance(groups, collections.abc.Mapping):
        group = list(groups)
        if seed is not None:
            group = sorted(group)
        random.Random(seed).shuffle(group)
        splt = int(training_samples) if training_samples > 1 else (
            int(len(group) *
                training_samples) if training_samples >= 0 else len(group) +
            int(training_samples))
        return group[:splt], group[splt:]
    testing = {}
    training = {}
    for group_id, group in groups.items():
        training[group_id], testing[group_id] = split(group, training_samples,
                                                      seed)
    return training, testing
Ejemplo n.º 5
0
 def evaluate(self, scores: GraphSignalData) -> BackendPrimitive:
     sensitive = to_signal(scores, self.sensitive)
     if self.exclude is not None:
         exclude = to_signal(sensitive, self.exclude)
         return Parity([
             self.measure(self.known_scores, 1 - (1 - exclude) * sensitive),
             self.measure(self.known_scores,
                          1 - (1 - exclude) * (1 - sensitive))
         ]).evaluate(scores)
     else:
         return Parity([
             self.measure(self.known_scores, None),
             self.measure(self.known_scores, None)
         ]).evaluate(scores)
Ejemplo n.º 6
0
 def _transform(self, ranks: GraphSignal, sensitive: GraphSignal):
     sensitive = to_signal(ranks, sensitive)
     phi = sum(sensitive.values())/len(ranks)
     if self.method == "O" or self.method == "LFPRO":
         ranks = Normalize("sum").transform(ranks)
         sumR = sum(ranks[v] * sensitive.get(v, 0) for v in ranks)
         sumB = sum(ranks[v] * (1 - sensitive.get(v, 0)) for v in ranks)
         numR = sum(sensitive.values())
         numB = len(ranks) - numR
         if sumR < phi:
             red = self.__distribute(phi - sumR, ranks, {v: 1 - sensitive.get(v, 0) for v in ranks})
             ranks = {v: red.get(v, ranks[v] + (phi - sumR) / numR) for v in ranks}
         elif sumB < 1-phi:
             red = self.__distribute(1-phi - sumB, ranks, {v: sensitive.get(v, 0) for v in ranks})
             ranks = {v: red.get(v, ranks[v] + (1-phi - sumB) / numB) for v in ranks}
     elif self.method == "B" or self.method == "mult":
         sumR = sum(ranks[v]*sensitive.get(v, 0) for v in ranks)
         sumB = sum(ranks[v]*(1-sensitive.get(v, 0)) for v in ranks)
         sum_total = sumR + sumB
         sumR /= sum_total
         sumB /= sum_total
         ranks = {v: ranks[v]*(phi*sensitive.get(v, 0)/sumR+(1-phi)*(1-sensitive.get(v, 0))/sumB) for v in ranks}
     else:
         raise Exception("Invalid fairness postprocessing method "+self.method)
     return ranks
Ejemplo n.º 7
0
def load_feature_dataset(dataset: str,
                         path: Union[str, Iterable[str]] = (os.path.join(os.path.expanduser('~'), '.pygrank/data'), ".", "data"),
                         groups_no_labels = False,
                         **kwargs):
    """
    Imports a dataset comprising node features. Features and labels are organized as numpy matrix.
    This tries to automatically download the dataset first if not found.

    Args:
        dataset: The dataset's name. Corresponds to a folder name in which the dataset is stored.
        path: The dataset's path in which *dataset* is a folder, or a list of paths in which to search.
            The first of these will be set as the preferred download location if the dataset is not
            found and can be downloaded. Default is a list comprising the path where pygrank's settings file resides,
            "." and "data".
        kwargs: Optional. Additional arguments to pass to *import_snap_format_dataset*.
    Returns:
        graph: A graph of node relations. Nodes are indexed in the order the graph is traversed.
        features: A column-normalized numpy matrix whose rows correspond to node features.
        labels: A numpy matrix whose rows correspond to one-hot encodings of node labels.
    """
    graph, groups = call(import_snap_format_dataset, kwargs, [dataset, path])
    features = call(_import_features, kwargs, [dataset, path])
    feature_dims = len(features[list(features.keys())[0]])
    features = np.array([features.get(v, [0] * feature_dims) for v in graph], dtype=np.float64)
    features = _preprocess_features(features)
    labels = groups if groups_no_labels else np.array([to_signal(graph, group).np for group in groups.values()], dtype=np.float64).transpose()
    return graph, features, labels
Ejemplo n.º 8
0
 def rank(self,
          graph: GraphSignalGraph = None,
          personalization: GraphSignalData = None,
          **kwargs):
     personalization = to_signal(graph, personalization)
     graph = personalization.graph
     assert_binary(personalization)
     if self.method == 'safe':
         ranks = self.ranker.rank(graph, personalization, **kwargs)
         threshold = min(ranks[u] for u in personalization if personalization[u] == 1)
         personalization = {v: 1 for v in graph.nodes() if ranks[v] >= threshold}
         return self.ranker.rank(graph, personalization, **kwargs)
     elif self.method == 'top':
         ranks = self.ranker.rank(graph, personalization, **kwargs)
         top = int(graph.number_of_nodes() * graph.number_of_nodes() / graph.number_of_edges())
         threshold = np.sort(list(ranks.values()))[len(ranks) - top]  # get top ranks
         personalization = {v: 1. for v in graph.nodes() if ranks[v] >= threshold or personalization.get(v, 0) == 1}
         return self.ranker.rank(graph, personalization, **kwargs)
     elif self.method == 'neighbors':
         personalization = dict(personalization.items())
         for u in [u for u in personalization if personalization[u] == 1]:
             for v in graph.neighbors(u):
                 personalization[v] = 1.
         return self.ranker.rank(graph, personalization, **kwargs)
     else:
         raise Exception("Supported oversampling methods: safe, neighbors, top")
Ejemplo n.º 9
0
 def _reweigh(self, graph, sensitive):
     sensitive = to_signal(graph, sensitive)
     if not getattr(self, "reweighs", None):
         self.reweighs = dict()
     if graph not in self.reweighs:
         phi = sum(sensitive.values())/len(graph)
         new_graph = graph.copy()
         for u, v, d in new_graph.edges(data=True):
             d["weight"] = 1./(sensitive[u]*phi+(1-sensitive[u])*(1-phi))
         self.reweighs[graph] = new_graph
     return self.reweighs[graph]
Ejemplo n.º 10
0
 def _start(self, M, personalization, ranks, absorption=None, **kwargs):
     self.degrees = backend.degrees(M)
     if self.symmetric:
         self.absorption = (1 + (1 + 4 * self.degrees)**0.5) / 2
     else:
         self.absorption = to_signal(
             personalization.graph,
             absorption).np * (1 - self.alpha) / self.alpha
     self.personalization_skew = self.absorption / (self.absorption +
                                                    self.degrees)
     self.sqrt_degrees = (self.degrees / (self.absorption + self.degrees))
     self.sqrt_degrees_left = 1. / self.absorption
Ejemplo n.º 11
0
 def rank(self,
          graph: GraphSignalGraph = None,
          personalization: GraphSignalData = None,
          **kwargs):
     personalization = to_signal(graph, personalization)
     r0_N = personalization.normalized(False)
     RN = self.ranker.rank(graph, r0_N, **kwargs)
     a_N = 1
     sum_a_N = 1
     self._weight_convergence.start()
     while not self._weight_convergence.has_converged(a_N):
         if self._oversample_from_iteration == 'previous':
             threshold = min(RN[u] for u in r0_N if r0_N[u] == 1)
         elif self._oversample_from_iteration == 'original':
             threshold = min(RN[u] for u in personalization if personalization[u] == 1)
         else:
             raise Exception("Boosting only supports oversampling from iterations: previous, original")
         r0_N = {u: 1 for u in RN if RN[u] >= threshold}
         Rr0_N = self.ranker.rank(graph, r0_N, **kwargs)
         a_N = self._boosting_weight(r0_N, Rr0_N, RN)
         RN = to_signal(RN, [RN.get(u, 0) + a_N*Rr0_N[u] for u in graph])
         sum_a_N += a_N
     return RN
Ejemplo n.º 12
0
    def _start(self, M, personalization, ranks, sensitive, *args, **kwargs):
        sensitive = to_signal(ranks, sensitive)
        outR = self.outR  # backend.conv(sensitive.np, M)
        outB = self.outB  # backend.conv(1.-sensitive.np, M)
        phi = backend.sum(sensitive.np) / backend.length(
            sensitive.np) * self.target_prule
        dR = backend.repeat(0., len(sensitive.graph))
        dB = backend.repeat(0., len(sensitive.graph))

        case1 = outR < phi * (outR + outB)
        case2 = (1 - case1) * (outR != 0)
        case3 = (1 - case1) * (1 - case2)
        dR[case1] = phi - (1 - phi) / outB[case1] * outR[case1]
        dR[case3] = phi
        dB[case2] = (1 - phi) - phi / outR[case2] * outB[case2]
        dB[case3] = 1 - phi

        personalization.np = backend.safe_div(sensitive.np * personalization.np, backend.sum(sensitive.np)) * self.target_prule \
                             + backend.safe_div(personalization.np * (1 - sensitive.np), backend.sum(1 - sensitive.np))
        personalization.np = backend.safe_div(personalization.np,
                                              backend.sum(personalization.np))
        L = sensitive.np
        if self.redistributor is None or self.redistributor == "uniform":
            original_ranks = 1
        elif self.redistributor == "original":
            original_ranks = PageRank(
                alpha=self.alpha,
                preprocessor=default_preprocessor(assume_immutability=False,
                                                  normalization="col"),
                convergence=self.convergence)(personalization).np
        else:
            original_ranks = self.redistributor(personalization).np

        self.dR = dR
        self.dB = dB
        self.xR = backend.safe_div(original_ranks * L,
                                   backend.sum(original_ranks * L))
        self.xB = backend.safe_div(original_ranks * (1 - L),
                                   backend.sum(original_ranks * (1 - L)))
        super()._start(M, personalization, ranks, *args, **kwargs)
Ejemplo n.º 13
0
    def rank(self,
             G: GraphSignalGraph,
             personalization: GraphSignalData,
             sensitive: GraphSignalData, *args, **kwargs):
        personalization = to_signal(G, personalization)
        G = personalization.graph
        if self.parity_type == "impact":
            self.pRule = pRule(sensitive)
        elif self.parity_type == "TPR":
            self.pRule = Parity([TPR(personalization, exclude=1-sensitive.np),
                                 TPR(personalization, exclude=1-(1-sensitive.np))])
        elif self.parity_type == "TNR":
            self.pRule = Parity([TNR(personalization, exclude=1 - sensitive.np),
                                 TNR(personalization, exclude=1 - (1 - sensitive.np))])
        elif self.parity_type == "mistreatment":
            self.pRule = AM([Parity([TPR(personalization, exclude=1-sensitive.np),
                                     TPR(personalization, exclude=1-(1-sensitive.np))]),
                             Parity([TNR(personalization, exclude=1 - sensitive.np),
                                     TNR(personalization, exclude=1 - (1 - sensitive.np))])
                            ])
        else:
            raise Exception("Invalid parity type "+self.parity_type+": expected impact, TPR, TNR or mistreatment")
        sensitive, personalization = pRule(sensitive).to_numpy(personalization)
        ranks = self.ranker.rank(G, personalization, *args, **kwargs)

        def loss(params):
            fair_pers = self.__culep(personalization, sensitive, ranks, params)
            fair_ranks = self.ranker.rank(G, personalization=fair_pers, *args, **kwargs)
            return self.__prule_loss(fair_ranks, ranks, sensitive, personalization)

        optimal_params = optimize(loss,
                                  max_vals=[1, 1, 10, 10] * self.parameter_buckets + [self.max_residual],
                                  min_vals=[0, 0, -10, -10]*self.parameter_buckets+[0],
                                  deviation_tol=1.E-3,
                                  divide_range=1.5,
                                  partitions=5,
                                  depth=2)
        optimal_personalization = personalization=self.__culep(personalization, sensitive, ranks, optimal_params)
        return self.ranker.rank(G, optimal_personalization, *args, **kwargs)
Ejemplo n.º 14
0
 def transform(self, ranks: GraphSignal, *args, **kwargs):
     nodes = [u for u in ranks if ranks[u] != 0]
     graph = ranks.graph.subgraph(nodes)
     graph._pygrank_original_graph = ranks.graph
     return to_signal(graph, {u: ranks[u] for u in nodes})
Ejemplo n.º 15
0
 def _prepare_graph(self, graph, sensitive, *args, **kwargs):
     sensitive = to_signal(graph, sensitive)
     self.sensitive = sensitive
     self.phi = backend.sum(sensitive.np) / backend.length(
         sensitive.np) * self.target_prule
     return graph
Ejemplo n.º 16
0
 def _start(self, M, personalization, ranks, absorption=None, **kwargs):
     self.absorption = to_signal(personalization.graph, absorption) * (
         (1 - self.alpha) / self.alpha)
     self.degrees = backend.degrees(M)
Ejemplo n.º 17
0
    def _tune(self, graph=None, personalization=None, *args, **kwargs):
        previous_backend = backend.backend_name()
        personalization = to_signal(graph, personalization)
        if self.tuning_backend is not None and self.tuning_backend != previous_backend:
            backend.load_backend(self.tuning_backend)
        backend_personalization = to_signal(
            graph, backend.to_array(personalization.np))
        total_params = list()
        for seed0 in range(self.cross_validate):
            fraction_of_training = self.fraction_of_training if isinstance(
                self.fraction_of_training,
                Iterable) else [self.fraction_of_training]
            #fraction_of_training = [random.choice(fraction_of_training)]
            internal_training_list = list()
            validation_list = list()
            for seed, fraction in enumerate(fraction_of_training):
                training, validation = split(backend_personalization, fraction,
                                             seed0 + seed)
                internal_training = training
                if self.pre_diffuse is not None:
                    internal_training = self.pre_diffuse(internal_training)
                    validation = self.pre_diffuse(validation)
                internal_training_list.append(internal_training)
                validation_list.append(validation)

            def eval(params):
                val = 0
                for internal_training, validation in zip(
                        internal_training_list, validation_list):
                    """import pygrank as pg

                    scores = self._run(backend_personalization, params, *args, **kwargs)
                    internal_training = pg.Undersample(int(backend.sum(internal_training)))(scores*backend_personalization)
                    validation = backend_personalization - internal_training"""
                    measure = self.measure(
                        validation, internal_training
                        if internal_training != validation else None)
                    val = val - measure.best_direction() * measure.evaluate(
                        self._run(internal_training, params, *args, **kwargs))
                return val / len(internal_training_list)

            best_params = self.optimizer(eval, **self.optimize_args)
            """import cma
            es = cma.CMAEvolutionStrategy([0.5 for _ in range(len(self.optimize_args["max_vals"]))], 1./12**0.5)
            es.optimize(eval, verb_disp=False)
            best_params = es.result.xbest"""
            total_params.append(best_params)
        best_params = [0 for _ in best_params]
        best_squares = [0 for _ in best_params]
        best_means = [0 for _ in best_params]
        for params in total_params:
            for i in range(len(best_params)):
                best_params[i] = max(best_params[i], params[i])
                best_means[i] += params[i] / self.cross_validate
                best_squares[i] += params[i]**2 / self.cross_validate
        best_params = best_means

        if self.tuning_backend is not None and self.tuning_backend != previous_backend:
            backend.load_backend(previous_backend)
            # TODO: make training back-propagate through tensorflow for combined_prediction==False (do this with a gather in the split method)
        self.last_params = best_params
        return self.ranker_generator(
            best_params
        ), personalization if self.combined_prediction else internal_training
Ejemplo n.º 18
0
def benchmark(algorithms: Mapping[str, NodeRanking],
              datasets: Any,
              metric: Union[Callable[[nx.Graph], Measure], Callable[[GraphSignal, GraphSignal], Measure]] = AUC,
              fraction_of_training: Union[float, Iterable[float]] = 0.5,
              sensitive: Optional[Union[Callable[[nx.Graph], Measure], Callable[[GraphSignal, GraphSignal], Measure]]] = None,
              seed: Union[int, Iterable[int]] = 0):
    """
    Compares the outcome of provided algorithms on given datasets using a desired metric.

    Args:
        algorithms: A map from names to node ranking algorithms to compare.
        datasets: A list of datasets to compare the algorithms on. List elements should either be strings or (string, num) tuples
            indicating the dataset name and number of community of interest respectively.
        metric: A method to instantiate a measure type to assess the efficacy of algorithms with.
        fraction_of_training: The fraction of training samples to split on. The rest are used for testing. An
            iterable of floats can also be provided to experiment with multiple fractions.
        sensitive: Optinal. A generator of sensitivity-aware supervised or unsupervised measures.
            Could be None (default).
        seed: A seed to ensure reproducibility. Default is 0. An iterable of floats can also be provided to experimet
            with multiple seeds.
    Returns:
        Yields an array of outcomes. Is meant to be used with wrapping methods, such as print_benchmark.
    Example:
        >>> import pygrank as pg
        >>> algorithms = ...
        >>> datasets = ...
        >>> pg.benchmark_print(pg.benchmark(algorithms, datasets))
    """
    if sensitive is not None:
        yield [""] + [algorithm for algorithm in algorithms for suffix in [metric.__name__, sensitive.__name__]]
        yield [""] + [suffix for algorithm in algorithms for suffix in [metric.__name__, sensitive.__name__]]
    else:
        yield [""] + [algorithm for algorithm in algorithms]
    seeds = [seed] if isinstance(seed, int) else seed
    fraction_of_training = [fraction_of_training] if isinstance(fraction_of_training, float) else fraction_of_training
    for name, graph, group in datasets:
        for training_samples in fraction_of_training:
            for seed in seeds:
                multigroup = isinstance(group, collections.abc.Mapping) and not isinstance(group, GraphSignal)
                training, evaluation = split(group, training_samples=training_samples, seed=seed)
                if sensitive is None and multigroup:
                    training = {group_id: to_signal(graph,{v: 1 for v in group}) for group_id, group in training.items()}
                    evaluation = {group_id: to_signal(graph,{v: 1 for v in group}) for group_id, group in evaluation.items()}
                    rank = lambda algorithm: {group_id: algorithm(graph, group) for group_id, group in training.items()}
                else:
                    if multigroup:
                        training = training[0]
                        evaluation = evaluation[0]
                        sensitive_signal = to_signal(graph, {v: 1 for v in group[max(group.keys())]})
                        training, evaluation = to_signal(graph, {v: 1 for v in training}), to_signal(graph, {v: 1 for v in evaluation})
                    else:
                        training, evaluation = to_signal(graph, {v: 1 for v in training}), to_signal(graph, {v: 1 for v in evaluation})
                    if sensitive is not None:
                        if not multigroup:
                            sensitive_signal = to_signal(training, 1-evaluation.np)
                        #training.np = training.np*(1-sensitive_signal.np)
                        rank = lambda algorithm: algorithm(graph, training, sensitive=sensitive_signal)
                    else:
                        rank = lambda algorithm: algorithm(graph, training)
                dataset_results = [name]
                for algorithm in algorithms.values():
                    if metric == Time:
                        tic = time()
                        predictions = rank(algorithm)
                        dataset_results.append(time()-tic)
                    else:
                        predictions = rank(algorithm)
                        try:
                            dataset_results.append(metric(graph)(predictions))
                        except:
                            dataset_results.append(metric(evaluation, training)(predictions))
                    if sensitive is not None:
                        try:
                            dataset_results.append(sensitive(sensitive_signal, training)(predictions))
                        except:
                            dataset_results.append(sensitive(evaluation, sensitive_signal, training)(predictions))
                yield dataset_results
Ejemplo n.º 19
0
    def rank(self, graph: GraphSignalGraph, personalization: GraphSignalData,
             sensitive: GraphSignalData, *args, **kwargs):
        #from pygrank import split
        personalization = to_signal(graph, personalization)
        training, validation = None, None  #split(personalization, 1)
        graph = personalization.graph
        if self.parity_type == "impact":
            fairness_measure = pRule(sensitive, exclude=training)
        elif self.parity_type == "TPR":
            fairness_measure = Mistreatment(validation,
                                            sensitive,
                                            exclude=training,
                                            measure=TPR)
        elif self.parity_type == "TNR":
            fairness_measure = Mistreatment(validation,
                                            sensitive,
                                            exclude=training,
                                            measure=TNR)
        elif self.parity_type == "mistreatment":
            fairness_measure = AM([
                Mistreatment(validation,
                             sensitive,
                             exclude=training,
                             measure=TPR),
                Mistreatment(personalization,
                             sensitive,
                             exclude=training,
                             measure=TNR)
            ])
        else:
            raise Exception("Invalid parity type " + self.parity_type +
                            ": expected impact, TPR, TNR or mistreatment")
        training = personalization
        sensitive, personalization = pRule(sensitive).to_numpy(personalization)
        original_ranks = self.ranker.rank(graph, personalization, *args,
                                          **kwargs)

        def loss(params):
            fair_pers = self.__culep(training.np, sensitive, original_ranks,
                                     params)
            fair_ranks = self.ranker.rank(graph,
                                          personalization=fair_pers,
                                          *args,
                                          **kwargs)
            fairness_loss = fairness_measure(fair_ranks)
            # ranks = ranks.np / backend.max(ranks.np)
            # original_ranks = original_ranks.np / backend.max(original_ranks.np)
            error = self.error_type(original_ranks, training)
            error_value = error(fair_ranks)
            return - self.retain_rank_weight * error_value * error.best_direction() \
                   - self.pRule_weight * min(self.target_pRule, fairness_loss) - 0.1 * fairness_loss

        optimal_params = nelder_mead(
            loss,
            max_vals=[1, 1, 3, 3] * self.parameter_buckets +
            [self.max_residual],
            min_vals=[0, 0, -3, -3] * self.parameter_buckets + [0],
            deviation_tol=1.E-8,
            parameter_tol=1.E-8)
        optimal_personalization = self.__culep(personalization, sensitive,
                                               original_ranks, optimal_params)
        return self.ranker.rank(graph, optimal_personalization, *args,
                                **kwargs)
Ejemplo n.º 20
0
 def transform(self, ranks: GraphSignal, *args, **kwargs):
     return to_signal(ranks.graph._pygrank_original_graph,
                      {u: ranks[u]
                       for u in ranks})