def ratio_of_typed_subjects(D, edge_labels=np.empty(0), stats=dict(), print_stats=False): """ (1) number of all different typed subjects (2) ratio of typed subjects """ if edge_labels is None or edge_labels.size == 0: edge_labels = np.array([D.ep.c0[p] for p in D.get_edges()]) # ae98476863dc6ec5 = http://www.w3.org/1999/02/22-rdf-syntax-ns#type rdf_type = hash('ae98476863dc6ec5') S_C_G = GraphView(D, efilt=edge_labels == rdf_type) S_C_G = np.unique(S_C_G.get_edges()[:, 0]) if print_stats: print("number of different typed subjects S^{C}_G: %s" % S_C_G.size) S_G = GraphView(D, vfilt=D.get_out_degrees(D.get_vertices())) if print_stats: print("ratio of typed subjects r_T(G): %s" % (float(S_C_G.size) / S_G.num_vertices())) stats['typed_subjects'], stats['ratio_of_typed_subjects'] = S_C_G.size, ( float(S_C_G.size) / S_G.num_vertices()) return S_C_G
def solve(g): if g.num_vertices() == 0: W0 = self.pg.new_vertex_property("bool") W1 = self.pg.new_vertex_property("bool") return {0: W0, 1: W1} else: p = self.maxparity(g) i = p % 2 U = self.vertices_with_priority(g, p) A = self.attractor(U, i) # get i attractor of U in g WW = solve(GraphView(g, vfilt=self.complement(A))) #if WW[1-i].ma.all(): ## does not work gg = GraphView(g, vfilt=WW[1 - i]) # just to check emptiness if gg.num_vertices() == 0: res = {} res[i] = self.maskplus(g, WW[i], A) res[1 - i] = WW[1 - i] return res else: B = self.attractor(WW[1 - i], 1 - i) gg = GraphView(g, vfilt=self.complement(B)) WW = solve(gg) res = {} res[i] = WW[i] res[1 - i] = self.maskplus(g, WW[1 - i], B) return res
def f_pseudo_diameter( D, stats, options={ 'features': [] } ): """""" LC = label_largest_component(D) LCD = GraphView( D, vfilt=LC ) if 'diameter' in options['features']: if LCD.num_vertices() == 0 or LCD.num_vertices() == 1: # if largest component does practically not exist, use the whole graph dist, ends = pseudo_diameter(D) else: dist, ends = pseudo_diameter(LCD) stats['pseudo_diameter']=dist # D may be used in both cases stats['pseudo_diameter_src_vertex']=D.vertex_properties['name'][ends[0]] stats['pseudo_diameter_trg_vertex']=D.vertex_properties['name'][ends[1]] log.debug( 'done pseudo_diameter' )
def repeated_predicate_lists(D, edge_labels=np.empty(0), stats=dict(), print_stats=False, return_collected=True): """""" if edge_labels is None or edge_labels.size == 0: edge_labels = [D.ep.c0[p] for p in D.get_edges()] # filter those vertices v | out-degree(v) > 0 S = GraphView(D, vfilt=D.get_out_degrees(D.get_vertices())) # .. is defined as the ratio of repeated predicate lists from the total lists in the graph G df = pd.DataFrame(data=list(zip(D.get_edges()[:, 0], edge_labels)), index=np.arange(0, D.get_edges().shape[0]), columns=np.arange(0, D.get_edges().shape[1])) df = df.groupby(0)[1].apply(tuple).apply(hash).to_frame().reset_index() if return_collected: df = df.groupby(1).count()[0] if print_stats: print("(Eq.17) ratio of repeated predicate lists r_L(G): %f" % (1 - (df.size / S.num_vertices()))) print( "(Eq.18/19) predicate list degree deg_{PL}(G). max: %f, mean: %f" % (df.max(), df.mean())) stats['repeated_predicate_lists'] = 1 - (df.size / S.num_vertices()) stats['max_predicate_list_degree'], stats[ 'mean_predicate_list_degree'] = df.max(), df.mean() return df
def is_tree(t): # to undirected t = GraphView(t, directed=False) # num nodes = num edges+1 if t.num_vertices() != (t.num_edges() + 1): return False # all nodes have degree > 0 vs = list(map(int, t.vertices())) degs = t.degree_property_map('out').a[vs] if np.all(degs > 0) == 0: return False return True
def reduce_ratio_of_typed_subjects(vals, D, S_G, stats={}): """""" S_G = GraphView(D, vfilt=D.get_out_degrees(D.get_vertices())) stats['typed_subjects'], stats['ratio_of_typed_subjects'] = len(vals), ( float(len(vals)) / S_G.num_vertices())