Example #1
0
def ratio_of_typed_subjects(D,
                            edge_labels=np.empty(0),
                            stats=dict(),
                            print_stats=False):
    """
        (1) number of all different typed subjects
        (2) ratio of typed subjects
    """

    if edge_labels is None or edge_labels.size == 0:
        edge_labels = np.array([D.ep.c0[p] for p in D.get_edges()])

    # ae98476863dc6ec5 = http://www.w3.org/1999/02/22-rdf-syntax-ns#type
    rdf_type = hash('ae98476863dc6ec5')
    S_C_G = GraphView(D, efilt=edge_labels == rdf_type)
    S_C_G = np.unique(S_C_G.get_edges()[:, 0])

    if print_stats:
        print("number of different typed subjects S^{C}_G: %s" % S_C_G.size)

    S_G = GraphView(D, vfilt=D.get_out_degrees(D.get_vertices()))

    if print_stats:
        print("ratio of typed subjects r_T(G): %s" %
              (float(S_C_G.size) / S_G.num_vertices()))

    stats['typed_subjects'], stats['ratio_of_typed_subjects'] = S_C_G.size, (
        float(S_C_G.size) / S_G.num_vertices())

    return S_C_G
Example #2
0
        def solve(g):
            if g.num_vertices() == 0:
                W0 = self.pg.new_vertex_property("bool")
                W1 = self.pg.new_vertex_property("bool")
                return {0: W0, 1: W1}
            else:
                p = self.maxparity(g)
                i = p % 2

                U = self.vertices_with_priority(g, p)

                A = self.attractor(U, i)  # get i attractor of U in g
                WW = solve(GraphView(g, vfilt=self.complement(A)))

                #if WW[1-i].ma.all():  ## does not work
                gg = GraphView(g, vfilt=WW[1 - i])  # just to check emptiness
                if gg.num_vertices() == 0:
                    res = {}
                    res[i] = self.maskplus(g, WW[i], A)
                    res[1 - i] = WW[1 - i]
                    return res
                else:
                    B = self.attractor(WW[1 - i], 1 - i)
                    gg = GraphView(g, vfilt=self.complement(B))
                    WW = solve(gg)

                    res = {}
                    res[i] = WW[i]
                    res[1 - i] = self.maskplus(g, WW[1 - i], B)
                    return res
Example #3
0
def f_pseudo_diameter( D, stats, options={ 'features': [] } ):
    """"""

    LC = label_largest_component(D)
    LCD = GraphView( D, vfilt=LC )

    if 'diameter' in options['features']:
        if LCD.num_vertices() == 0 or LCD.num_vertices() == 1:
            # if largest component does practically not exist, use the whole graph
            dist, ends = pseudo_diameter(D)
        else:
            dist, ends = pseudo_diameter(LCD)

        stats['pseudo_diameter']=dist
        # D may be used in both cases
        stats['pseudo_diameter_src_vertex']=D.vertex_properties['name'][ends[0]]
        stats['pseudo_diameter_trg_vertex']=D.vertex_properties['name'][ends[1]]
        log.debug( 'done pseudo_diameter' )
Example #4
0
def repeated_predicate_lists(D,
                             edge_labels=np.empty(0),
                             stats=dict(),
                             print_stats=False,
                             return_collected=True):
    """"""

    if edge_labels is None or edge_labels.size == 0:
        edge_labels = [D.ep.c0[p] for p in D.get_edges()]

    # filter those vertices v | out-degree(v) > 0
    S = GraphView(D, vfilt=D.get_out_degrees(D.get_vertices()))

    # .. is defined as the ratio of repeated predicate lists from the total lists in the graph G
    df = pd.DataFrame(data=list(zip(D.get_edges()[:, 0], edge_labels)),
                      index=np.arange(0,
                                      D.get_edges().shape[0]),
                      columns=np.arange(0,
                                        D.get_edges().shape[1]))

    df = df.groupby(0)[1].apply(tuple).apply(hash).to_frame().reset_index()

    if return_collected:
        df = df.groupby(1).count()[0]

        if print_stats:
            print("(Eq.17) ratio of repeated predicate lists r_L(G): %f" %
                  (1 - (df.size / S.num_vertices())))
            print(
                "(Eq.18/19) predicate list degree deg_{PL}(G). max: %f, mean: %f"
                % (df.max(), df.mean()))

        stats['repeated_predicate_lists'] = 1 - (df.size / S.num_vertices())
        stats['max_predicate_list_degree'], stats[
            'mean_predicate_list_degree'] = df.max(), df.mean()

    return df
def is_tree(t):
    # to undirected
    t = GraphView(t, directed=False)

    # num nodes = num edges+1
    if t.num_vertices() != (t.num_edges() + 1):
        return False

    # all nodes have degree > 0
    vs = list(map(int, t.vertices()))
    degs = t.degree_property_map('out').a[vs]
    if np.all(degs > 0) == 0:
        return False

    return True
Example #6
0
def reduce_ratio_of_typed_subjects(vals, D, S_G, stats={}):
    """"""
    S_G = GraphView(D, vfilt=D.get_out_degrees(D.get_vertices()))

    stats['typed_subjects'], stats['ratio_of_typed_subjects'] = len(vals), (
        float(len(vals)) / S_G.num_vertices())