Example #1
0
def gnn_accuracy(labels, predictions, nodes):
    if backend.backend_name() == "tensorflow":
        return _gnn_accuracy_tf(labels, predictions, nodes)
    elif backend.backend_name() == "pytorch":
        return _gnn_accuracy_torch(labels, predictions, nodes)
    raise Exception(
        "GNN accuracy is supported only for tensorflow and pytorch backends")
Example #2
0
def gnn_train(*args, **kwargs):
    if backend.backend_name() == "tensorflow":
        return _gnn_train_tf(*args, **kwargs)
    elif backend.backend_name() == "pytorch":
        return _gnn_train_torch(*args, **kwargs)
    raise Exception(
        "GNN training is supported only for tensorflow and pytorch backends")
Example #3
0
def _idfier(*args, **kwargs):
    """
    Converts args and kwargs into a hashable array of object ids.
    """
    return "[" + ",".join(obj2id(arg) for arg in args) + "]" + "{" + ",".join(
        v + ":" + obj2id(kwarg)
        for v, kwarg in kwargs.items()) + "}" + backend.backend_name()
Example #4
0
 def _tune(self, graph=None, personalization=None, *args, **kwargs):
     previous_backend = backend.backend_name()
     personalization = to_signal(graph, personalization)
     if self.tuning_backend is not None and self.tuning_backend != previous_backend:
         backend.load_backend(self.tuning_backend)
     backend_personalization = to_signal(
         graph, backend.to_array(personalization.np))
     prev_dropout = kwargs.get("graph_dropout")
     kwargs["graph_dropout"] = 0
     best_value = -float('inf')
     best_ranker = None
     fraction_of_training = self.fraction_of_training if isinstance(
         self.fraction_of_training,
         Iterable) else [self.fraction_of_training]
     for ranker in self.rankers:
         values = list()
         for seed, fraction in enumerate(fraction_of_training):
             training, validation = split(backend_personalization,
                                          fraction,
                                          seed=seed)
             measure = self.measure(validation, training)
             values.append(
                 measure.best_direction() *
                 measure.evaluate(ranker.rank(training, *args, **kwargs)))
         value = np.min(values)
         if value > best_value:
             best_value = value
             best_ranker = ranker
     if self.tuning_backend is not None and self.tuning_backend != previous_backend:
         backend.load_backend(previous_backend)
         # TODO: make training back-propagate through tensorflow for combined_prediction==False
     kwargs["graph_dropout"] = prev_dropout
     return best_ranker, personalization if self.combined_prediction else training
Example #5
0
 def _tune(self, graph=None, personalization=None, *args, **kwargs):
     previous_backend = backend.backend_name()
     personalization = to_signal(graph, personalization)
     if self.tuning_backend is not None and self.tuning_backend != previous_backend:
         backend.load_backend(self.tuning_backend)
     backend_personalization = to_signal(
         graph, backend.to_array(personalization.np))
     training, validation = split(backend_personalization,
                                  self.fraction_of_training)
     measure = self.measure(validation, training)
     best_params = optimize(
         lambda params: -measure.best_direction() * measure.evaluate(
             self._run(training, params, *args, **kwargs)),
         **self.optimize_args)
     if self.tuning_backend is not None and self.tuning_backend != previous_backend:
         backend.load_backend(previous_backend)
         # TODO: make training back-propagate through tensorflow for combined_prediction==False (do this with a gather in the split method)
     return self.ranker_generator(
         best_params
     ), personalization if self.combined_prediction else training
Example #6
0
 def _tune(self, graph=None, personalization=None, *args, **kwargs):
     previous_backend = backend.backend_name()
     personalization = to_signal(graph, personalization)
     if self.tuning_backend is not None and self.tuning_backend != previous_backend:
         backend.load_backend(self.tuning_backend)
     backend_personalization = to_signal(
         graph, backend.to_array(personalization.np))
     training, validation = split(backend_personalization,
                                  self.fraction_of_training)
     measure = self.measure(validation, training)
     best_value = -float('inf')
     best_ranker = None
     for ranker in self.rankers:
         value = measure.best_direction() * measure.evaluate(
             ranker.rank(training, *args, **kwargs))
         if value > best_value:
             best_value = value
             best_ranker = ranker
     if self.tuning_backend is not None and self.tuning_backend != previous_backend:
         backend.load_backend(previous_backend)
         # TODO: make training back-propagate through tensorflow for combined_prediction==False
     return best_ranker, personalization if self.combined_prediction else training
Example #7
0
    def _tune(self, graph=None, personalization=None, *args, **kwargs):
        #graph_dropout = kwargs.get("graph_dropout", 0)
        #kwargs["graph_dropout"] = 0
        previous_backend = backend.backend_name()
        personalization = to_signal(graph, personalization)
        graph = personalization.graph
        if self.tuning_backend is not None and self.tuning_backend != previous_backend:
            backend.load_backend(self.tuning_backend)
        backend_personalization = to_signal(
            personalization, backend.to_array(personalization.np))
        #training, validation = split(backend_personalization, 0.8)
        #training2, validation2 = split(backend_personalization, 0.6)
        #measure_weights = [1, 1, 1, 1, 1]
        #propagated = [training.np, validation.np, backend_personalization.np, training2.np, validation2.np]

        measure_values = [None] * (self.num_parameters + self.autoregression)
        M = self.ranker_generator(measure_values).preprocessor(graph)

        #for _ in range(10):
        #    backend_personalization.np = backend.conv(backend_personalization.np, M)
        training, validation = split(backend_personalization, 0.8)
        training1, training2 = split(training, 0.5)

        propagated = [training1.np, training2.np]
        measures = [
            self.measure(backend_personalization, training1),
            self.measure(backend_personalization, training2)
        ]
        #measures = [self.measure(validation, training), self.measure(training, validation)]

        if self.basis == "krylov":
            for i in range(len(measure_values)):
                measure_values[i] = [
                    measure(p) for p, measure in zip(propagated, measures)
                ]
                propagated = [backend.conv(p, M) for p in propagated]
        else:
            basis = [
                arnoldi_iteration(M, p, len(measure_values))[0]
                for p in propagated
            ]
            for i in range(len(measure_values)):
                measure_values[i] = [
                    float(measure(base[:, i]))
                    for base, measure in zip(basis, measures)
                ]
        measure_values = backend.to_primitive(measure_values)
        mean_value = backend.mean(measure_values, axis=0)
        measure_values = measure_values - mean_value
        best_parameters = measure_values
        measure_weights = [1] * measure_values.shape[1]
        if self.autoregression != 0:
            #vals2 = -measure_values-mean_value
            #measure_values = np.concatenate([measure_values, vals2-np.mean(vals2, axis=0)], axis=1)
            window = backend.repeat(1. / self.autoregression,
                                    self.autoregression)
            beta1 = 0.9
            beta2 = 0.999
            beta1t = 1
            beta2t = 1
            rms = window * 0
            momentum = window * 0
            error = float('inf')
            while True:
                beta1t *= beta1
                beta2t *= beta2
                prev_error = error
                parameters = backend.copy(measure_values)
                for i in range(len(measure_values) - len(window) - 2, -1, -1):
                    parameters[i, :] = backend.dot(
                        (window),
                        measure_values[(i + 1):(i + len(window) + 1), :])
                errors = (parameters - measure_values
                          ) * measure_weights / backend.sum(measure_weights)
                for j in range(len(window)):
                    gradient = 0
                    for i in range(len(measure_values) - len(window) - 1):
                        gradient += backend.dot(measure_values[i + j + 1, :],
                                                errors[i, :])
                    momentum[j] = beta1 * momentum[j] + (
                        1 - beta1) * gradient  #*np.sign(window[j])
                    rms[j] = beta2 * rms[j] + (1 - beta2) * gradient * gradient
                    window[j] -= 0.01 * momentum[j] / (1 - beta1t) / (
                        (rms[j] / (1 - beta2t))**0.5 + 1.E-8)
                    #window[j] -= 0.01*gradient*np.sign(window[j])
                error = backend.mean(backend.abs(errors))
                if error == 0 or abs(error - prev_error) / error < 1.E-6:
                    best_parameters = parameters
                    break
        best_parameters = backend.mean(best_parameters[:self.num_parameters, :]
                                       * backend.to_primitive(measure_weights),
                                       axis=1) + backend.mean(mean_value)

        if self.tunable_offset is not None:
            div = backend.max(best_parameters)
            if div != 0:
                best_parameters /= div
            measure = self.tunable_offset(validation, training)
            base = basis[0] if self.basis != "krylov" else None
            best_offset = optimize(
                lambda params: -measure.best_direction() * measure(
                    self._run(training, [(best_parameters[i] + params[
                        2]) * params[0]**i + params[1] for i in range(
                            len(best_parameters))], base, *args, **kwargs)),
                #lambda params: - measure.evaluate(self._run(training, best_parameters + params[0], *args, **kwargs)),
                max_vals=[1, 0, 0],
                min_vals=[0, 0, 0],
                deviation_tol=0.005,
                parameter_tol=1,
                partitions=5,
                divide_range=2)
            #best_parameters += best_offset[0]
            best_parameters = [
                (best_parameters[i] + best_offset[2]) * best_offset[0]**i +
                best_offset[1] for i in range(len(best_parameters))
            ]

        best_parameters = backend.to_primitive(best_parameters)
        if backend.sum(backend.abs(best_parameters)) != 0:
            best_parameters /= backend.mean(backend.abs(best_parameters))
        if self.tuning_backend is not None and self.tuning_backend != previous_backend:
            best_parameters = [
                float(param) for param in best_parameters
            ]  # convert parameters to backend-independent list
            backend.load_backend(previous_backend)
        #kwargs["graph_dropout"] = graph_dropout
        if self.basis != "krylov":
            return Tautology(), self._run(
                personalization, best_parameters, *args,
                **kwargs)  # TODO: make this unecessary
        return self.ranker_generator(best_parameters), personalization
Example #8
0
def to_sparse_matrix(G,
                     normalization="auto",
                     weight="weight",
                     renormalize=False,
                     reduction=backend.degrees,
                     transform_adjacency=lambda x: x,
                     cors=False):
    """ Used to normalize a graph and produce a sparse matrix representation.

    Args:
        G: A networkx or fastgraph graph. If an object with a "shape" attribute is provided (which means that it
            is backend matrix) then it is directly returned.
        normalization: Optional. The type of normalization can be "none", "col", "symmetric", "laplacian", "both",
            or "auto" (default). The last one selects the type of normalization between "col" and "symmetric",
            depending on whether the graph is directed or not respectively. Alternatively, this could be a callable,
            in which case it transforms a scipy sparse adjacency matrix to produce a normalized copy.
        weight: Optional. The weight attribute (default is "weight") of *networkx* graph edges. This is ignored when
            *fastgraph* graphs are parsed, as these are unweighted.
        renormalize: Optional. If True, the renormalization trick (self-loops) of graph neural networks is applied to
            ensure iteration stability by shrinking the graph's spectrum. Default is False. Can provide anything that
            can be cast to a float to regularize the renormalization.
        reduction: Optional. Controls how degrees are calculated from a callable (e.g. `pygrank.eigdegree`
            for entropy-preserving transition matrices [li2011link]). Default is `pygrank.degrees`.
        cors: Optional.<details><summary>Cross-origin resource (shared between backends). Default is false.</summary>
            If True, it enriches backend primitives
            holding the outcome of graph preprocessing with additional private metadata that enable their
            usage as base graphs when passing through other postprocessors in other backends.
            This is not required when constructing GraphSignal instances with
            the pattern `pygrank.to_signal(M, personalization_data)` where `M = pygrank.preprocessor(cors=True)(graph)`
            but is mandarotry when the two commands are called in different backends. Note that *cors* objects are not
            normalized again with other strategies in other preprocessors and compliance is not currently enforced.
            There is **significant speedup** in using *cors* when frequently switching between backends for the
            same graphs. Furthermore, after defining such instances, they can be used in place of base graphs.
            If False (default), a lot of memory is saved by not keeping pointers to all versions of adjacency matrices
            among backends that use them. Enabling *cors* and then visiting up to two backends out of which one is
            "numpy", does not affect the maximum memory consumption by code processing one graph.
            </details>
    """
    if hasattr(G, "__pygrank_preprocessed"):
        if backend.backend_name() in G.__pygrank_preprocessed:
            return G.__pygrank_preprocessed[backend.backend_name(
            )]  # this is basically caching, but it's pretty safe for just passing adjacency matrices around
        ret = backend.scipy_sparse_to_backend(
            G.__pygrank_preprocessed["numpy"])
        if cors:
            ret.__pygrank_preprocessed = G.__pygrank_preprocessed
            ret.__pygrank_preprocessed[backend.backend_name()] = ret
        else:
            ret.__pygrank_preprocessed = {backend.backend_name(): ret}
        ret._pygrank_node2id = G._pygrank_node2id
        return ret
    with backend.Backend("numpy"):
        normalization = normalization.lower() if isinstance(
            normalization, str) else normalization
        if normalization == "auto":
            normalization = "col" if G.is_directed() else "symmetric"
        M = G.to_scipy_sparse_array() if isinstance(
            G, fastgraph.Graph) else nx.to_scipy_sparse_matrix(
                G, weight=weight, dtype=float)
        renormalize = float(renormalize)
        left_reduction = reduction  #(lambda x: backend.degrees(x)) if reduction == "sum" else reduction
        right_reduction = lambda x: left_reduction(x.T)
        if renormalize != 0:
            M = M + scipy.sparse.eye(M.shape[0]) * renormalize
        if normalization == "col":
            S = np.array(left_reduction(M)).flatten()
            S[S != 0] = 1.0 / S[S != 0]
            Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
            M = Q * M
        elif normalization == "laplacian":
            S = np.array(np.sqrt(left_reduction(M))).flatten()
            S[S != 0] = 1.0 / S[S != 0]
            Qleft = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
            S = np.array(np.sqrt(right_reduction(M))).flatten()
            S[S != 0] = 1.0 / S[S != 0]
            Qright = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
            M = Qleft * M * Qright
            M = -M + scipy.sparse.eye(M.shape[0])
        elif normalization == "both":
            S = np.array(left_reduction(M)).flatten()
            S[S != 0] = 1.0 / S[S != 0]
            Qleft = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
            S = np.array(right_reduction(M)).flatten()
            S[S != 0] = 1.0 / S[S != 0]
            Qright = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
            M = Qleft * M * Qright
        elif normalization == "symmetric":
            S = np.array(np.sqrt(left_reduction(M))).flatten()
            S[S != 0] = 1.0 / S[S != 0]
            Qleft = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
            S = np.array(np.sqrt(right_reduction(M))).flatten()
            S[S != 0] = 1.0 / S[S != 0]
            Qright = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
            M = Qleft * M * Qright
        elif callable(normalization):
            M = normalization(M)
        elif normalization != "none":
            raise Exception(
                "Supported normalizations: none, col, symmetric, both, laplacian, auto"
            )
    M = transform_adjacency(M)
    ret = M if backend.backend_name(
    ) == "numpy" else backend.scipy_sparse_to_backend(M)
    ret._pygrank_node2id = {v: i for i, v in enumerate(G)}
    if cors:
        ret.__pygrank_preprocessed = {backend.backend_name(): ret, "numpy": M}
        M.__pygrank_preprocessed = ret.__pygrank_preprocessed
    else:
        ret.__pygrank_preprocessed = {backend.backend_name(): ret}
    return ret
Example #9
0
    def _tune(self, graph=None, personalization=None, *args, **kwargs):
        previous_backend = backend.backend_name()
        personalization = to_signal(graph, personalization)
        if self.tuning_backend is not None and self.tuning_backend != previous_backend:
            backend.load_backend(self.tuning_backend)
        backend_personalization = to_signal(
            graph, backend.to_array(personalization.np))
        total_params = list()
        for seed0 in range(self.cross_validate):
            fraction_of_training = self.fraction_of_training if isinstance(
                self.fraction_of_training,
                Iterable) else [self.fraction_of_training]
            #fraction_of_training = [random.choice(fraction_of_training)]
            internal_training_list = list()
            validation_list = list()
            for seed, fraction in enumerate(fraction_of_training):
                training, validation = split(backend_personalization, fraction,
                                             seed0 + seed)
                internal_training = training
                if self.pre_diffuse is not None:
                    internal_training = self.pre_diffuse(internal_training)
                    validation = self.pre_diffuse(validation)
                internal_training_list.append(internal_training)
                validation_list.append(validation)

            def eval(params):
                val = 0
                for internal_training, validation in zip(
                        internal_training_list, validation_list):
                    """import pygrank as pg

                    scores = self._run(backend_personalization, params, *args, **kwargs)
                    internal_training = pg.Undersample(int(backend.sum(internal_training)))(scores*backend_personalization)
                    validation = backend_personalization - internal_training"""
                    measure = self.measure(
                        validation, internal_training
                        if internal_training != validation else None)
                    val = val - measure.best_direction() * measure.evaluate(
                        self._run(internal_training, params, *args, **kwargs))
                return val / len(internal_training_list)

            best_params = self.optimizer(eval, **self.optimize_args)
            """import cma
            es = cma.CMAEvolutionStrategy([0.5 for _ in range(len(self.optimize_args["max_vals"]))], 1./12**0.5)
            es.optimize(eval, verb_disp=False)
            best_params = es.result.xbest"""
            total_params.append(best_params)
        best_params = [0 for _ in best_params]
        best_squares = [0 for _ in best_params]
        best_means = [0 for _ in best_params]
        for params in total_params:
            for i in range(len(best_params)):
                best_params[i] = max(best_params[i], params[i])
                best_means[i] += params[i] / self.cross_validate
                best_squares[i] += params[i]**2 / self.cross_validate
        best_params = best_means

        if self.tuning_backend is not None and self.tuning_backend != previous_backend:
            backend.load_backend(previous_backend)
            # TODO: make training back-propagate through tensorflow for combined_prediction==False (do this with a gather in the split method)
        self.last_params = best_params
        return self.ranker_generator(
            best_params
        ), personalization if self.combined_prediction else internal_training