Exemple #1
0
from graph_measures.features_algorithms.vertices.louvain import LouvainCalculator
from graph_measures.features_algorithms.vertices.motifs import nth_nodes_motif
from graph_measures.features_algorithms.vertices.page_rank import PageRankCalculator
from graph_measures.features_infra.feature_calculators import FeatureMeta, FeatureCalculator
from graph_measures.features_algorithms.vertices.neighbor_nodes_histogram import nth_neighbor_calculator
# from graph_measures.features_algorithms.accelerated_graph_features.attractor_basin import AttractorBasinCalculator
# from graph_measures.features_algorithms.accelerated_graph_features.bfs_moments import BfsMomentsCalculator
# from graph_measures.features_algorithms.accelerated_graph_features.flow import FlowCalculator
# from graph_measures.features_algorithms.accelerated_graph_features.k_core import KCoreCalculator
# from graph_measures.features_algorithms.accelerated_graph_features.motifs import nth_nodes_motif
# from graph_measures.features_algorithms.accelerated_graph_features.page_rank import PageRankCalculator

NODE_FEATURES = {
    # Passed
    "attractor_basin":
    FeatureMeta(AttractorBasinCalculator, {"ab"}),
    "average_neighbor_degree":
    FeatureMeta(AverageNeighborDegreeCalculator, {"avg_nd"}),
    "betweenness_centrality":
    FeatureMeta(BetweennessCentralityCalculator, {"betweenness"}),
    "bfs_moments":
    FeatureMeta(BfsMomentsCalculator, {"bfs"}),

    # Didn't pass - but no logic
    "closeness_centrality":
    FeatureMeta(ClosenessCentralityCalculator, {"closeness"}),
    # "communicability_betweenness_centrality": FeatureMeta(CommunicabilityBetweennessCentralityCalculator,
    #                                                       {"communicability"}),

    # Passed
    "eccentricity":
Exemple #2
0
               for node in self._gnx}
        max_b_u = float(max(b_u.values()))

        for node in self._gnx:
            # the delta determines whether this node is to be considered
            if (b_u[node] / max_b_u) <= self._threshold:
                self._features[node] = 0
                continue

            udists = undirected_dists[node]
            dists = directed_dists[node]

            # getting coordinated values from two dictionaries with the same keys
            # saving the data as np.array type
            num, denom = map(np.array, zip(*((udists[n], dists[n]) for n in dists)))

            num = num[denom != 0]
            denom = denom[denom != 0]

            self._features[node] = np.sum(num / denom) / float(b_u[node])


feature_entry = {
    "flow": FeatureMeta(FlowCalculator, {}),
}


if __name__ == "__main__":
    from graph_measures.measure_tests.specific_feature_test import test_specific_feature
    test_specific_feature(FlowCalculator, is_max_connected=True)
Exemple #3
0
import networkx as nx

from graph_measures.features_infra.feature_calculators import NodeFeatureCalculator, FeatureMeta


class BetweennessCentralityCalculator(NodeFeatureCalculator):
    def __init__(self, *args, normalized=False, **kwargs):
        super(BetweennessCentralityCalculator, self).__init__(*args, **kwargs)
        self._is_normalized = normalized

    def _calculate(self, include: set, is_regression=False):
        self._features = nx.betweenness_centrality(
            self._gnx, normalized=self._is_normalized)

    def is_relevant(self):
        return True


feature_entry = {
    "betweenness_centrality":
    FeatureMeta(BetweennessCentralityCalculator, {"betweenness"}),
}

if __name__ == "__main__":
    from measure_tests.specific_feature_test import test_specific_feature
    test_specific_feature(BetweennessCentralityCalculator,
                          is_max_connected=True)
        d = r
        return laplacian, y, tol, r, d

    @staticmethod
    def _initialize_vars_from_laplacian_matrix1(g):
        # creating laplacian matrix
        w = g + g.T
        d = np.diag(sum(w))
        l = d - w
        _id = np.sum(g, 0)
        od = np.sum(g, 1)
        # initialize_vars
        b = np.subtract((np.array([od])).T, (np.array([_id])).T)
        tol = 0.001
        n = np.size(g, 1)
        y = np.random.rand(n, 1)
        y = np.subtract(y, (1 / n) * sum(y))
        k = np.dot(l, y)
        r = np.subtract(b, k)
        d = r
        return l, y, tol, r, d


feature_entry = {
    "hierarchy_energy": FeatureMeta(HierarchyEnergyCalculator, {"hierarchy"}),
}

if __name__ == "__main__":
    from graph_measures.measure_tests.specific_feature_test import test_specific_feature
    test_specific_feature(HierarchyEnergyCalculator, is_max_connected=True)
Exemple #5
0
            [[cur_feature[r_type][x] for x in range(self._num_classes)]
             for r_type in self._relation_types]).flatten()

    # def _to_ndarray(self):
    #     mx = np.matrix([self._get_feature(node) for node in self._nodes()])
    #     return mx.astype(np.float32)


def nth_neighbor_calculator(order, **kwargs):
    # Kwargs are used to pass the labels to consider argument
    return partial(NthNeighborNodeHistogramCalculator, order, **kwargs)


feature_entry = {
    "first_neighbor_histogram":
    FeatureMeta(nth_neighbor_calculator(1), {"fnh", "first_neighbor"}),
    "second_neighbor_histogram":
    FeatureMeta(nth_neighbor_calculator(2), {"snh", "second_neighbor"}),
}


def build_sample_graph(edges, colors, color_list):
    dg = nx.DiGraph(labels=color_list)  # list(set(colors.values())))
    dg.add_edges_from(edges)
    for n in dg:
        dg.node[n]['label'] = colors[n]
    return dg


def test_neighbor_histogram():
    all_colors = ['red', 'blue', 'green', 'yellow']
    def _edge_based_degree_directed(self):
        for edge in self._gnx.edges():
            e1_feature = np.array(self._general_c.feature(edge[0]))
            e2_feature = np.array(self._general_c.feature(edge[1]))

            self._features[edge] = np.concatenate([
                (e1_feature - e2_feature).astype(np.float32),  # sub out-in
                np.mean([e1_feature, e2_feature],
                        axis=1).astype(np.float32),  # mean out-in
            ])

    def _edge_based_degree_undirected(self):
        for edge in self._gnx.edges():
            e1_feature = self._general_c.feature(edge[0])
            e2_feature = self._general_c.feature(edge[1])

            self._features[edge] = [
                float(e1_feature[0]) - e2_feature[0],  # sub
                (float(e1_feature[0]) + e2_feature[0]) / 2  # mean
            ]


feature_entry = {
    "Edge_degree_based_calculator":
    FeatureMeta(EdgeDegreeBasedCalculator, {"e_degree"}),
}

if __name__ == 'main':
    pass
Exemple #7
0
import sys

sys.path.append(os.path.abspath('.'))
sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../..'))
sys.path.append(os.path.abspath('../../..'))
sys.path.append(os.path.abspath('src'))
sys.path.append(os.path.abspath('src/accelerated_graph_features'))

from graph_measures.features_infra.feature_calculators import NodeFeatureCalculator, FeatureMeta
from graph_measures.features_algorithms.accelerated_graph_features.src import node_page_rank


class PageRankCalculator(NodeFeatureCalculator):
    def __init__(self, *args, alpha=0.9, **kwargs):
        super(PageRankCalculator, self).__init__(*args, **kwargs)
        self._alpha = alpha

    def is_relevant(self):
        # Undirected graphs will be converted to a directed
        #       graph with two directed edges for each undirected edge.
        return True

    def _calculate(self, include: set):
        self._features = node_page_rank(self._gnx, dumping=self._alpha)


feature_entry = {
    "page_rank": FeatureMeta(PageRankCalculator, {"pr"}),
}
Exemple #8
0
import os
import sys

sys.path.append(os.path.abspath('.'))
sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../..'))
sys.path.append(os.path.abspath('../../..'))
sys.path.append(os.path.abspath('src'))
sys.path.append(os.path.abspath('src/accelerated_graph_features'))

from graph_measures.features_infra.feature_calculators import NodeFeatureCalculator, FeatureMeta
from graph_measures.features_algorithms.accelerated_graph_features.src import k_core


class KCoreCalculator(NodeFeatureCalculator):
    def is_relevant(self):
        return True

    def _calculate(self, include: set):
        self._features = k_core(self._gnx)


feature_entry = {
    "k_core": FeatureMeta(KCoreCalculator, {"kc"}),
}
import networkx as nx

from graph_measures.features_infra.feature_calculators import NodeFeatureCalculator, FeatureMeta


class CommunicabilityBetweennessCentralityCalculator(NodeFeatureCalculator):
    def _calculate(self, include: set, is_regression=False):
        self._features = nx.communicability_betweenness_centrality(self._gnx)

    def is_relevant(self):
        return not self._gnx.is_directed()


feature_entry = {
    "communicability_betweenness_centrality": FeatureMeta(CommunicabilityBetweennessCentralityCalculator,
                                                          {"communicability"}),
}


if __name__ == "__main__":
    from measure_tests.specific_feature_test import test_specific_feature
    test_specific_feature(CommunicabilityBetweennessCentralityCalculator, is_max_connected=True)
Exemple #10
0
    def _active_learning(self, eps=0.05, batch_size=5, epochs=200, out_prog=True, clear_model=False, out_interval=25,
                         **params):

        if self._opt not in Available_Options:
            print("option {} is not supported".format(self._opt))
            return

        # preliminary calculation for exploitation
        neighbors_matrix = None
        if self._opt in {'region_entropy'} or params.get('representation_region', False):
            gn = GraphNeighbors(self._graph)
            neighbors_matrix = gn.neighbors(second_order=params.get('region_second_order', False),
                                            self_node=params.get('region_include_self', False),
                                            with_orders=False, only_out=params.get('region_only_out', False))

            if params.get('region_weights', None):
                if params['region_weights'] == 'out':
                    neighbors_degree = np.array(self._graph.out_degree(self._nodes_order))[:, 1]
                elif params['region_weights'] == 'in':
                    neighbors_degree = np.array(self._graph.in_degree(self._nodes_order))[:, 1]
                else:
                    neighbors_degree = np.array(self._graph.degree(self._nodes_order))[:, 1]

                neighbors_degree = neighbors_degree.astype(float)
                for x in neighbors_matrix:
                    for j in range(len(x) - 1, -1, -1):
                        if neighbors_degree[x[j]] == 0:
                            del x[j]
                if params.get('region_opposite_weights', False):
                    weights = neighbors_degree
                else:
                    weights = 1 / neighbors_degree
                    weights[weights == np.inf] = 0
            else:
                weights = None

        if self._opt in {'centrality', 'Chang', 'geo_cent', 'APR'}:
            page_rank_feature = {"page_rank": FeatureMeta(PageRankCalculator, {"pr"})}
            page_rank_matrix = self._gl.get_features(page_rank_feature)
            norm_page_rank = normalize(page_rank_matrix, axis=0, norm='max').reshape(-1)
            if self._opt == 'APR':
                adj_matrix = nx.adjacency_matrix(self._graph, nodelist=self._nodes_order).todense()
                np.fill_diagonal(adj_matrix, 0)
                adj_matrix = adj_matrix / adj_matrix.sum(axis=1)
                adj_matrix = np.nan_to_num(adj_matrix)
                adj_matrix = adj_matrix.T
                if torch.cuda.is_available():
                    page_rank_matrix = torch.Tensor(page_rank_matrix).to(self._device)
                    adj_matrix = torch.Tensor(adj_matrix).to(self._device)

        if self._opt in {'rep_dist', 'Chang'}:
            inv_cov = None
            features_matrix = None
            if params.get('representation_vectors', 'model') in {'topology', 'both'}:
                rep_features = CHOSEN_FEATURES.copy()
                if not params.get('representation_motif4', False):
                    rep_features.pop('motif4')
                features_matrix = self._gl.get_features(rep_features, print_time=True)
                # replace all nan values of attractor basin to 100
                features_matrix[np.isnan(features_matrix)] = 100
                if params.get('representation_measure', 'mahalanobis') == 'mahalanobis' and \
                        params['representation_vectors'] == 'topology':
                    inv_cov = np.linalg.pinv(np.cov(features_matrix.T))

        if self._opt in {'geo_dist', 'geo_cent'}:
            graph_dists = dict(weighted.all_pairs_dijkstra_path_length(self._graph, self._num_nodes, weight='weight'))
            ancestors = [nx.ancestors(self._graph, node) for node in sorted(self._graph)]
            descendants = [nx.descendants(self._graph, node) for node in sorted(self._graph)]

        if self._opt == 'k_truss':
            k_truss_score = k_truss(self._graph)

        if self._opt == 'feature':
            if params.get('feature', 'attractor_basin') == 'attractor_basin':
                chosen_feature = {"attractor_basin": FeatureMeta(AttractorBasinCalculator, {"ab"})}
            feature_score = self._gl.get_features(chosen_feature)

        # initializing parameters for plots
        if out_prog:
            results = {0: self._init(n_nodes=1)}
            prog_intervals = out_interval
            prog_iter = 1
            cur_interval = 1
        else:
            self._init()
            results = {}

        while self._num_revealed < self._stop_cond:
            # making output for plot
            if out_prog and self._num_revealed >= cur_interval/prog_intervals*self._stop_cond:
                res = self._train_and_eval(epochs, iterations=prog_iter, clear_model=clear_model)
                percent_revealed = round(self._num_revealed / self._num_nodes * 100, 2)
                results[percent_revealed] = res
                val = list(res.values())[0]
                self._model_runner.data_logger.info(self.eval_method, percent_revealed,
                                                    val["loss"], val["acc"], val["mic_f1"], val["mac_f1"])
                cur_interval = np.floor(self._num_revealed/self._stop_cond*prog_intervals) + 1
            elif self._opt in {'entropy', 'region_entropy', 'rep_dist', 'Chang'}:
                self._train_and_eval(epochs, clear_model=clear_model, verbose=0)

            # eps greedy algorithm choosing nodes to reveal
            rand = np.random.uniform(0, 1)
            # Explore
            if rand < eps or self._opt == 'random':
                idx = self._explore(batch_size=batch_size, balance=params.get('balance', False))
            # Exploit
            else:
                # evaluate the samples
                if self._opt in {'rep_dist', 'Chang'}:
                    outlier_score = self._representation_distance(vectors_type=params.get('representation_vectors',
                                                                                          'model'),
                                                                  dist_measure=params.get('representation_measure',
                                                                                          'mahalanobis'),
                                                                  features_matrix=features_matrix, inv_cov=inv_cov,
                                                                  average_distance=params.get('representation_average',
                                                                                              False),
                                                                  region=params.get('representation_region', False),
                                                                  neighbors_matrix=neighbors_matrix)

                if self._opt in {'entropy', 'Chang'}:
                    test_probs = self._model_runner.predict_proba(self._models).cpu()
                    if params.get('margin', False):
                        test_probs.sort()
                        entropies = test_probs[:, -1] - test_probs[:, -2]
                    else:
                        entropies = entropy(test_probs.t())

                if self._opt in {'region_entropy'}:
                    region_entropies = self._region_entropies(neighbors_matrix=neighbors_matrix, weights=weights,
                                                              average_entropy=params.get('region_average_entropy',
                                                                                         False),
                                                              prefer_large_regions=params.get('region_prefer_large',
                                                                                              False),
                                                              margin=params.get('margin', False))

                if self._opt in {'geo_dist', 'geo_cent'}:
                    dist_to_train = self._geodesic_distance(graph_dists, ancestors, descendants,
                                                            in_out=params.get('geo_in_out', 'both'))

                if self._opt == 'APR':
                    relative_pr = self._relative_page_rank(page_rank_matrix, adj_matrix,
                                                           only_known=params.get('apr_only_known', False))

                # build scores vector according to the evaluation method
                if self._opt == 'entropy':
                    scores = entropies
                elif self._opt == 'region_entropy':
                    scores = region_entropies
                elif self._opt == 'rep_dist':
                    scores = outlier_score
                elif self._opt == 'geo_dist':
                    scores = dist_to_train
                elif self._opt == 'centrality':
                    scores = np.asarray([page_rank_matrix[x] for x in self._pool]).reshape(-1)
                elif self._opt == 'k_truss':
                    scores = np.asarray([k_truss_score[x] for x in self._pool]).reshape(-1)
                elif self._opt == 'feature':
                    scores = np.asarray([feature_score[x] for x in self._pool]).reshape(-1)
                elif self._opt == 'APR':
                    scores = relative_pr.flat
                elif self._opt == 'Chang':
                    c3 = max(0.7 * (1 - self._num_revealed/self._stop_cond), 0)
                    c1 = c2 = (1 - c3) / 2

                    density = normalize(np.nan_to_num(outlier_score).reshape(1, -1)).reshape(-1)
                    norm_entropy = normalize(entropies.reshape(1, -1)).reshape(-1)
                    centrality = np.asarray([norm_page_rank[x] for x in self._pool]).reshape(-1)

                    scores = c1 * norm_entropy + c2 * density + c3 * centrality
                elif self._opt == 'geo_cent':
                    c1 = 0.7
                    c2 = 0.3

                    g_dists = normalize(dist_to_train.reshape(1, -1), norm='max').reshape(-1)
                    centrality = np.vstack(norm_page_rank[x] for x in self._pool).reshape(-1)

                    scores = c1 * g_dists + c2 * centrality

                if params.get('margin', False):
                    # choose the lowest scores nodes
                    idx = np.argpartition(scores, batch_size)[:batch_size]
                else:
                    # choose the best nodes
                    idx = np.argpartition(scores, -batch_size)[-batch_size:]

            self._reveal(idx)

        # evaluate the model
        res = self._train_and_eval(epochs, clear_model=clear_model)
        percent_revealed = round(self._num_revealed / self._num_nodes * 100, 2)
        results[percent_revealed] = res
        val = list(res.values())[0]
        self._model_runner.data_logger.info(self.eval_method, percent_revealed,
                                            val["loss"], val["acc"], val["mic_f1"], val["mac_f1"])

        return results
Exemple #11
0
            denominator = sum((dist / avg_out[m]) * (self._alpha**(-m))
                              for m, dist in out_dist.items())
            if 0 != denominator:
                numerator = sum((dist / avg_in[m]) * (self._alpha**(-m))
                                for m, dist in in_dist.items())
                self._features[node] = numerator / denominator

    @staticmethod
    def _calculate_average_per_dist(num_nodes, count_dist):
        # rearrange the details in "count_dist" to be with unique distance in the array "all_dist_count"
        all_dist_count = {}
        for counter in count_dist.values():
            for dist, occurrences in counter.items():
                all_dist_count[dist] = all_dist_count.get(dist,
                                                          0) + occurrences

        # calculating for each distance the average
        return {
            dist: float(count) / num_nodes
            for dist, count in all_dist_count.items()
        }


feature_entry = {
    "attractor_basin": FeatureMeta(AttractorBasinCalculator, {"ab"}),
}

if __name__ == "__main__":
    from measure_tests.specific_feature_test import test_specific_feature
    test_specific_feature(AttractorBasinCalculator, is_max_connected=True)
Exemple #12
0
                        motif_number: 0
                        for motif_number in self._all_motifs
                    }
                self._features[edge][motif_num] += 1


def nth_nodes_motif(motif_level):
    return partial(MotifsNodeCalculator, level=motif_level)


def nth_edges_motif(motif_level):
    return partial(MotifsNodeCalculator, level=motif_level)


feature_node_entry = {
    "motif3": FeatureMeta(nth_nodes_motif(3), {"m3"}),
    "motif4": FeatureMeta(nth_nodes_motif(4), {"m4"}),
}

feature_edge_entry = {
    "motif3_edge": FeatureMeta(nth_edges_motif(3), {"me3"}),
    "motif4_edge": FeatureMeta(nth_edges_motif(4), {"me4"}),
}

if __name__ == "__main__":
    from measure_tests.specific_feature_test import test_specific_feature

    # Previous version contained a bug while counting twice sub-groups with double edges
    # test_specific_feature(nth_edges_motif(3), is_max_connected=True)
    test_specific_feature(nth_edges_motif(4), is_max_connected=True)
Exemple #13
0
class FiedlerVectorCalculator(NodeFeatureCalculator):
    def _calculate_dep(self, include: set):
        # Working on every connected component by itself
        self._features = dict(zip(self._gnx, alg_connectivity.fiedler_vector(self._gnx)))

    def _calculate(self, include: set, is_regression=False):
        self._features = {}

        for graph in nx.connected_component_subgraphs(self._gnx):
            if len(graph) < 2:
                self._features.update(zip(graph.nodes(), [0.] * len(graph)))
            else:
                self._features.update(zip(graph.nodes(), map(float, alg_connectivity.fiedler_vector(graph))))

    def is_relevant(self):
        # Fiedler vector also works only on connected undirected graphs
        # so if gnx is not connected we shall expect an exception: networkx.exception.NetworkXError
        # return (not self._gnx.is_directed()) and (nx.is_connected(self._gnx.to_undirected()))
        return not self._gnx.is_directed()


feature_entry = {
    "fiedler_vector": FeatureMeta(FiedlerVectorCalculator, {"fv"}),
}


if __name__ == "__main__":
    from graph_measures.measure_tests.specific_feature_test import test_specific_feature
    test_specific_feature(FiedlerVectorCalculator, is_max_connected=True)
import networkx as nx

from graph_measures.features_infra.feature_calculators import NodeFeatureCalculator, FeatureMeta


class ClosenessCentralityCalculator(NodeFeatureCalculator):
    def _calculate(self, include: set, is_regression=False):
        self._features = nx.closeness_centrality(self._gnx)

    def is_relevant(self):
        return True


feature_entry = {
    "closeness_centrality":
    FeatureMeta(ClosenessCentralityCalculator, {"closeness"}),
}

if __name__ == "__main__":
    from measure_tests.specific_feature_test import test_specific_feature
    test_specific_feature(ClosenessCentralityCalculator, is_max_connected=True)
import networkx as nx

from graph_measures.features_infra.feature_calculators import NodeFeatureCalculator, FeatureMeta


class AverageNeighborDegreeCalculator(NodeFeatureCalculator):
    def is_relevant(self):
        return True

    def _calculate(self, include: set, is_regression=False):
        self._features = nx.average_neighbor_degree(self._gnx)


feature_entry = {
    "average_neighbor_degree":
    FeatureMeta(AverageNeighborDegreeCalculator, {"avg_nd"}),
}

if __name__ == "__main__":
    from graph_measures.measure_tests.specific_feature_test import test_specific_feature
    test_specific_feature(AverageNeighborDegreeCalculator,
                          is_max_connected=True)
Exemple #16
0
class EccentricityCalculator(NodeFeatureCalculator):
    def _calculate(self, include: set, is_regression=False):
        dists = {
            src: neighbors
            for src, neighbors in nx.all_pairs_shortest_path_length(self._gnx)
        }
        self._features = {
            node: max(neighbors.values())
            for node, neighbors in dists.items()
        }

    def _calculate_dep(self, include: set):
        # Not using eccentricity to handle disconnected graphs. (If a graph has more than 1 connected components,
        # the eccentricty will raise an exception)
        self._features = {
            node: nx.eccentricity(self._gnx, node)
            for node in self._gnx
        }

    def is_relevant(self):
        return True


feature_entry = {
    "eccentricity": FeatureMeta(EccentricityCalculator, {"ecc"}),
}

if __name__ == "__main__":
    from graph_measures.measure_tests.specific_feature_test import test_specific_feature
    test_specific_feature(EccentricityCalculator, is_max_connected=True)
import networkx as nx

from graph_measures.features_infra.feature_calculators import NodeFeatureCalculator, FeatureMeta


class LoadCentralityCalculator(NodeFeatureCalculator):
    def is_relevant(self):
        return True

    def _calculate(self, include: set, is_regression=False):
        self._features = nx.load_centrality(self._gnx)


feature_entry = {
    "load_centrality": FeatureMeta(LoadCentralityCalculator, {"load_c"}),
}

if __name__ == "__main__":
    from graph_measures.measure_tests.specific_feature_test import test_specific_feature
    test_specific_feature(LoadCentralityCalculator, is_max_connected=True)
            min_degrees = min(graph.degree(), key=lambda x: x[1])[
                1]  # [deg for node, deg in graph.degree()])
            isomap_mx = Isomap(
                n_neighbors=min(min_degrees, MAX_DEGREE),
                n_components=COMPONENT_SIZE).fit_transform(dissimilarities)
            self._features.update(zip(nodes_order, isomap_mx))

    @staticmethod
    def _dissimilarity(graph, nodes_order):
        m = nx.floyd_warshall_numpy(graph, nodelist=nodes_order)
        return np.asarray(m)


feature_entry = {
    "multi_dimensional_scaling":
    FeatureMeta(MultiDimensionalScalingCalculator, {"mds"}),
}


def test_feature():
    from graph_measures.loggers import PrintLogger
    from graph_measures.measure_tests.test_graph import get_graph
    gnx = get_graph()
    feat = MultiDimensionalScalingCalculator(
        gnx, logger=PrintLogger("Keren's Logger"))
    res = feat.build()
    print(res)


if __name__ == "__main__":
    # from measure_tests.specific_feature_test import test_specific_feature
Exemple #19
0
        # Fast and numerically precise:
        variance = np.average((values - average)**2, weights=weights)
        return average, np.sqrt(variance)

    def _calculate(self, include: set, is_regression=False):
        for node in self._gnx:
            # calculate BFS distances
            distances = nx.single_source_shortest_path_length(self._gnx, node)
            # distances.pop(node)
            # if not distances:
            #     self._features[node] = [0., 0.]
            #     continue
            node_dist = Counter(distances.values())
            dists, weights = zip(*node_dist.items())
            # This was in the previous version
            # instead of the above commented fix
            adjusted_dists = np.asarray([x + 1 for x in dists])
            weights = np.asarray(weights)
            self._features[node] = list(
                self.weighted_avg_and_std(adjusted_dists, weights))


feature_entry = {
    "bfs_moments": FeatureMeta(BfsMomentsCalculator, {"bfs"}),
}

if __name__ == "__main__":
    from measure_tests.specific_feature_test import test_specific_feature

    test_specific_feature(BfsMomentsCalculator, is_max_connected=True)
Exemple #20
0
from collections import Counter

import community

from graph_measures.features_infra.feature_calculators import NodeFeatureCalculator, FeatureMeta


class LouvainCalculator(NodeFeatureCalculator):
    def is_relevant(self):
        # relevant only for undirected graphs
        return not self._gnx.is_directed()

    def _calculate(self, include: set, is_regression=False):
        partition = community.best_partition(self._gnx)
        com_size_dict = Counter(partition.values())
        self._features = {node: com_size_dict[partition[node]] for node in self._gnx}


feature_entry = {
    "louvain": FeatureMeta(LouvainCalculator, {"lov"}),
}

if __name__ == "__main__":
    from graph_measures.measure_tests.specific_feature_test import test_specific_feature
    test_specific_feature(LouvainCalculator, is_max_connected=True)
Exemple #21
0
            self._features[element][r_type][x]
            for x in self._gnx.graph["edge_labels"]
        ] for r_type in self._relation_types]).flatten()

    # def _to_ndarray(self):
    #     mx = np.matrix([self._get_feature(node) for node in self._nodes()])
    #     return mx.astype(np.float32)


def nth_neighbor_calculator(order):
    return partial(NthNeighborNodeEdgeHistogramCalculator, order)


feature_entry = {
    "first_node_edge_histogram":
    FeatureMeta(nth_neighbor_calculator(1), {"fnneh"}),
    "second_node_edge_histogram":
    FeatureMeta(nth_neighbor_calculator(2), {"snneh"}),
}


def sample_graph():
    g = nx.DiGraph(edge_labels=[1, 2])
    g.add_edges_from([
        (1, 2, {
            "label": 1
        }),
        (2, 6, {
            "label": 2
        }),
        (4, 1, {
Exemple #22
0
from graph_measures.features_infra.feature_calculators import NodeFeatureCalculator, FeatureMeta


class GeneralCalculator(NodeFeatureCalculator):
    def is_relevant(self):
        return True

    def _calculate(self, include: set, is_regression=False):
        if self._gnx.is_directed():
            self._features = {node: (in_deg, out_deg) for
                              (node, out_deg), (_, in_deg) in zip(self._gnx.out_degree(), self._gnx.in_degree())}
        else:
            self._features = {node: deg for node, deg in self._gnx.degree()}


feature_entry = {
    "general": FeatureMeta(GeneralCalculator, {"gen"}),
}


if __name__ == "__main__":
    from graph_measures.measure_tests.specific_feature_test import test_specific_feature
    test_specific_feature(GeneralCalculator, is_max_connected=True)