Esempio n. 1
0
def ratio_of_typed_subjects(D,
                            edge_labels=np.empty(0),
                            stats=dict(),
                            print_stats=False):
    """
        (1) number of all different typed subjects
        (2) ratio of typed subjects
    """

    if edge_labels is None or edge_labels.size == 0:
        edge_labels = np.array([D.ep.c0[p] for p in D.get_edges()])

    # ae98476863dc6ec5 = http://www.w3.org/1999/02/22-rdf-syntax-ns#type
    rdf_type = hash('ae98476863dc6ec5')
    S_C_G = GraphView(D, efilt=edge_labels == rdf_type)
    S_C_G = np.unique(S_C_G.get_edges()[:, 0])

    if print_stats:
        print("number of different typed subjects S^{C}_G: %s" % S_C_G.size)

    S_G = GraphView(D, vfilt=D.get_out_degrees(D.get_vertices()))

    if print_stats:
        print("ratio of typed subjects r_T(G): %s" %
              (float(S_C_G.size) / S_G.num_vertices()))

    stats['typed_subjects'], stats['ratio_of_typed_subjects'] = S_C_G.size, (
        float(S_C_G.size) / S_G.num_vertices())

    return S_C_G
Esempio n. 2
0
def number_of_classes(D,
                      edge_labels=np.empty(0),
                      stats=dict(),
                      print_stats=False):
    """counts the number of different classes"""

    if edge_labels is None or edge_labels.size == 0:
        edge_labels = np.array([D.ep.c0[p] for p in D.get_edges()])

    # ae98476863dc6ec5 = http://www.w3.org/1999/02/22-rdf-syntax-ns#type
    rdf_type = hash('ae98476863dc6ec5')
    C_G = GraphView(D, efilt=edge_labels == rdf_type)
    C_G = np.unique(C_G.get_edges()[:, 1])

    if print_stats:
        print("number of different classes C_G: %s" % C_G.size)

    stats['distinct_classes'] = C_G.size

    return C_G