def fitData(data): """Under the framework developed at: http://arxiv.org/abs/1305.0215""" c("plaw fit start") fit = powerlaw.Fit(data, discrete=True) c("plaw fit end") dists = list(fit.supported_distributions.keys()) dists.remove("power_law") for dist in dists: c("plaw compare start: " + dist) comp = fit.distribution_compare("power_law", dist) exec("fit.{}_R=comp[0]".format(dist)) exec("fit.{}_p=comp[1]".format(dist)) c("plaw compare end: " + dist) del dist, comp return locals()
def convert(self, notes, reference): if 'notes_dict' not in dir(self): self.makeDict() notes_ = [reference + note for note in notes] notes__ = [self.notes_dict[note] for note in notes_] c('notes', notes) c('notes_', notes_) c('notes__', notes__) return notes__
def probeOntology(endpoint_url, graph_urns, final_dir, one_datatype=True): if not os.path.isdir(final_dir): os.makedirs(final_dir) client = P.rdf.sparql.classes.LegacyClient(endpoint_url) from_ = '' for graph_urn in graph_urns: from_ += '\nFROM <%s>' % (graph_urn, ) def mkQuery(query, plain=True): query_ = query.split('WHERE') query__ = (query_[0], from_, '\nWHERE ' + query_[1]) query___ = ''.join(query__) result = client.retrieveQuery(query___) if plain: return pl(result) else: return result['results']['bindings'] c('find all classes') q = "SELECT DISTINCT ?class WHERE { ?s a ?class . }" # classes = pl(client.retrieveQuery(prefix+q)) classes = mkQuery(q) c('antecedents, consequents and restrictions of each class') neighbors = {} triples = [] existential_restrictions = {} universal_restrictions = {} for aclass in classes: q = "SELECT DISTINCT ?cs ?p WHERE { ?i a <%s> . ?s ?p ?i . OPTIONAL { ?s a ?cs . } }" % ( aclass, ) antecedent_property = mkQuery(q) # q = "SELECT DISTINCT ?ap (datatype(?o) as ?do) WHERE { ?i a <%s> . ?i ?ap ?o . filter (datatype(?o) != '') }" % (aclass,) # consequent_property = mkQuery(q) # q = "SELECT DISTINCT ?ap ?co WHERE { ?i a <%s> . ?i ?ap ?o . ?o a ?co . }" % (aclass,) # consequent_property_ = mkQuery(q) q = "SELECT DISTINCT ?ap ?co (datatype(?o) as ?do) WHERE { ?i a <%s> . ?i ?ap ?o . OPTIONAL { ?o a ?co . } }" % ( aclass, ) consequent_property__ = mkQuery(q, 0) consequent_property = [[i['ap']['value'], i['do']['value']] for i in consequent_property__ if 'do' in i] consequent_property_ = [[i['ap']['value'], i['co']['value']] for i in consequent_property__ if 'co' in i] neighbors[aclass] = (antecedent_property, consequent_property + consequent_property_) # neighbors[aclass] = (antecedent_property, dict(consequent_property, **consequent_property_)) # class restrictions q = "SELECT DISTINCT ?p WHERE {?s a <%s>. ?s ?p ?o .}" % (aclass, ) props_c = mkQuery(q) # q = "SELECT DISTINCT ?s WHERE {?s a <%s>}" % (aclass,) # inds = mkQuery(q) q = "SELECT (COUNT(DISTINCT ?s) as ?cs) WHERE {?s a <%s>}" % (aclass, ) ninds = pl(client.retrieveQuery(q))[0] for pc in props_c: if '22-rdf-syntax' in pc: continue # q = "SELECT DISTINCT ?s ?co (datatype(?o) as ?do) WHERE {?s a <%s>. ?s <%s> ?o . OPTIONAL {?o a ?co . }}" % (aclass, pc) q = "SELECT DISTINCT ?co (datatype(?o) as ?do) WHERE {?s a <%s>. ?s <%s> ?o . OPTIONAL {?o a ?co . }}" % ( aclass, pc) inds2 = mkQuery(q, 0) # inds2_ = set([i["s"]["value"] for i in inds2]) objs = set([i["co"]["value"] for i in inds2 if "co" in i.keys()]) vals = set([i["do"]["value"] for i in inds2 if "do" in i.keys()]) q = "SELECT (COUNT(DISTINCT ?s) as ?cs) WHERE {?s a <%s>. ?s <%s> ?o . }" % ( aclass, pc) ninds2 = pl(client.retrieveQuery(q))[0] # if len(inds) == len(inds2_): # existential if ninds == ninds2: # existential if len(vals): ob = list(vals)[0] else: if len(objs): ob = list(objs)[0] else: ob = 0 if ob: B = r.BNode() triples += [(aclass, rdfs.subClassOf, B), (B, a, owl.Restriction), (B, owl.onProperty, pc), (B, owl.someValuesFrom, ob)] if aclass in existential_restrictions.keys(): existential_restrictions[aclass].append((pc, ob)) else: existential_restrictions[aclass] = [(pc, ob)] q = "SELECT (COUNT(DISTINCT ?s) as ?cs) WHERE { ?s <%s> ?o . ?s a ?ca . FILTER(str(?ca) != '%s') }" % ( pc, aclass) ninds3 = pl(client.retrieveQuery(q))[0] # q = "SELECT DISTINCT ?s WHERE { ?s <%s> ?o .}" % (pc,) # inds3 = mkQuery(q) # if set(inds) == set(inds3): # universal # if all([i in set(inds) for i in inds3]): # universal # if ninds == ninds3: # universal if ninds3 == 0: # universal if len(vals): ob = list(vals)[0] else: if len(objs): ob = list(objs)[0] else: ob = 0 if ob: B = r.BNode() triples += [(aclass, rdfs.subClassOf, B), (B, a, owl.Restriction), (B, owl.onProperty, pc), (B, owl.allValuesFrom, ob)] if aclass in universal_restrictions.keys(): universal_restrictions[aclass].append((pc, ob)) else: universal_restrictions[aclass] = [(pc, ob)] del q, aclass, antecedent_property, consequent_property c('find properties') q = "SELECT DISTINCT ?p WHERE {?s ?p ?o}" # properties = pl(client.retrieveQuery(prefix+q)) properties = mkQuery(q) # properties_ = [i.split("/")[-1] for i in properties] c('check if property is functional and get range and domain') functional_properties = set() for prop in properties: # check if property is functional q = 'SELECT DISTINCT (COUNT(?o) as ?co) WHERE { ?s <%s> ?o } GROUP BY ?s' % ( prop, ) is_functional = mkQuery(q) if len(is_functional) == 1 and is_functional[0] == 1: triples.append((prop, a, owl.FunctionalProperty)) functional_properties.add(prop) # datatype or object properties suj = mkQuery("SELECT DISTINCT ?cs WHERE { ?s <%s> ?o . ?s a ?cs . }" % (prop, )) # obj = mkQuery("SELECT DISTINCT ?co (datatype(?o) as ?do) WHERE { ?s <%s> ?o . OPTIONAL { ?o a ?co . } }" % (prop,)) obj1 = mkQuery( "SELECT DISTINCT ?co WHERE { ?s <%s> ?o . ?o a ?co . }" % (prop, )) obj2 = mkQuery( "SELECT DISTINCT (datatype(?o) as ?do) WHERE { ?s <%s> ?o . }" % (prop, )) obj = obj1 + obj2 if len(obj) and ("XMLS" in obj[0]): triples.append((prop, a, owl.DataProperty)) else: triples.append((prop, a, owl.ObjectProperty)) if len(suj) > 1: B = r.BNode() triples.append((prop, rdfs.domain, B)) for ss in suj: triples.append((B, owl.unionOf, ss)) elif suj: triples.append((prop, rdfs.domain, suj[0])) if len(obj) > 1: B = r.BNode() triples.append((prop, rdfs.range, B)) for ss in suj: triples.append((B, owl.unionOf, ss)) elif obj: triples.append((prop, rdfs.range, obj[0])) # for drawing # prop_ = prop.split("/")[-1] # suj_ = [i.split('/')[-1] for i in suj] # obj_ = [i.split('/')[-1] for i in obj] # Drawing c('started drawing') A = gv.AGraph(directed=True, strict=False) q = """PREFIX po: <http://purl.org/socialparticipation/po/> SELECT DISTINCT ?snap WHERE { { ?i po:snapshot ?snap } UNION { ?snap po:snapshotID ?idfoo } }""" # SELECT DISTINCT ?snap WHERE { ?i po:snapshot ?snap }""" snap = mkQuery(q)[0] q = """PREFIX po: <http://purl.org/socialparticipation/po/> SELECT ?provenance WHERE { <%s> po:socialProtocol ?provenance }""" % (snap) # WHERE { { <%s> po:socialProtocolTag ?provenance } UNION # { <%s> po:humanizedName ?provenance } }""" % (snap, snap) provenance = pl(client.retrieveQuery(q))[0] # A.graph_attr["label"] = r"General diagram of ontological structure from %s in the http://purl.org/socialparticipation/participationontology/ namespace.\nGreen edge denotes existential restriction;\ninverted edge nip denotes universal restriction;\nfull edge (non-dashed) denotes functional property." % (provenance,) edge_counter = 1 node_counter = 1 data_nodes = {} for aclass in classes: aclass_ = aclass.split('/')[-1] if aclass_ not in A.nodes(): A.add_node(aclass_, style="filled") n = A.get_node(aclass_) n.attr['color'] = "#A2F3D1" neigh = neighbors[aclass] # for i in range(len(neigh[0])): # antecendents # label = neigh[0][i][0].split("/")[-1] # elabel = neigh[0][i][1] # elabel_ = elabel.split("/")[-1] # if label not in A.nodes(): # A.add_node(label, style="filled") # n = A.get_node(label) # n.attr['color'] = "#A2F3D1" # ekey = '{}-{}-{}'.format(label, aclass_, edge_counter) # edge_counter += 1 # A.add_edge(label, aclass_, ekey) # e = A.get_edge(label, aclass_, key=ekey) # e.attr["label"] = elabel_ # e.attr["penwidth"] = 2. # e.attr["arrowsize"] = 2. # if elabel not in functional_properties: # e.attr["style"] = "dashed" # if neigh[0][i][0] in existential_restrictions.keys(): # restriction = existential_restrictions[neigh[0][i][0]] # prop = [iii[0] for iii in restriction] # obj = [iii[1] for iii in restriction] # if (elabel in prop) and (obj[prop.index(elabel)] == aclass): # e.attr["color"] = "#A0E0A0" # if neigh[0][i][0] in universal_restrictions.keys(): # restriction = universal_restrictions[neigh[0][i][0]] # prop = [iii[0] for iii in restriction] # obj = [iii[1] for iii in restriction] # if (elabel in prop) and (obj[prop.index(elabel)] == aclass): # e.attr["color"] = "inv" for i in range(len(neigh[1])): # consequents label = neigh[1][i][1].split("/")[-1] elabel = neigh[1][i][0] elabel_ = elabel.split('/')[-1] if "XMLS" in label: color = "#FFE4AA" if one_datatype: if label in data_nodes: label_ = data_nodes[label] else: label_ = node_counter node_counter += 1 data_nodes[label] = label_ else: label_ = node_counter node_counter += 1 else: label_ = label color = "#A2F3D1" if label_ not in A.nodes(): A.add_node(label_, style="filled") n = A.get_node(label_) n.attr['label'] = label.split("#")[-1] n.attr['color'] = color ekey = '{}-{}-{}'.format(aclass_, label_, edge_counter) edge_counter += 1 A.add_edge(aclass_, label_, ekey) e = A.get_edge(aclass_, label_, key=ekey) e.attr["label"] = elabel_ e.attr["color"] = color e.attr["penwidth"] = 2 if r.URIRef(elabel) not in functional_properties: e.attr["style"] = "dashed" if aclass in existential_restrictions.keys(): restrictions = existential_restrictions[aclass] prop = [iii[0] for iii in restrictions] if r.URIRef(elabel) in prop: e.attr["color"] = "#A0E0A0" if aclass in universal_restrictions.keys(): restrictions = universal_restrictions[aclass] prop = [iii[0] for iii in restrictions] if r.URIRef(elabel) in prop: e.attr["arrowhead"] = "inv" e.attr["arrowsize"] = 2. # A.draw(os.path.join(final_dir, "{}.png".format(final_dir)), prog="dot") # try: # A.draw(os.path.join(final_dir, "{}_circo.png".format(final_dir)), prog="circo") # except: # pass # A.draw(os.path.join(final_dir, "{}_twopi.png".format(final_dir)), prog="twopi", args="-Granksep=4") # A.write(os.path.join(final_dir, "{}.dot".format(final_dir))) A.draw(os.path.join(final_dir, "draw.png"), prog="dot") try: A.draw(os.path.join(final_dir, "draw_circo.png"), prog="circo") except: pass A.draw(os.path.join(final_dir, "draw_twopi.png"), prog="twopi", args="-Granksep=4") A.write(os.path.join(final_dir, "draw.dot")) # for triple in triples: # g.add(triple) P.start(False) P.context('ontology', 'remove') P.add(triples, 'ontology') g = P.context('ontology') g.serialize(os.path.join(final_dir, 'ontology.owl')) g.serialize(os.path.join(final_dir, 'ontology.ttl'), 'turtle') return locals()
# os.system('curl --digest --user dba:demo --url "http://192.168.0.11:8890/sparql-graph-crud-auth?graph-uri=urn:{}" -T {}/{}/{}'.format(afile, tdir, adir, afile)) # os.system('curl --digest --user dba:dba --url "http://192.168.0.11:8890/sparql-graph-crud-auth?graph-uri=urn:{}" -T {}/{}/{}'.format(afile, tdir, adir, afile)) os.system('curl --digest --user dba:dba --url "http://localhost:8890/sparql-graph-crud-auth?graph-uri=urn:{}" -T {}/{}/{}'.format(afile, tdir, adir, afile)) # os.system('curl --digest --user dba:dba --url "http://localhost:8890/sparql-graph-crud-auth?graph-uri=urn:percolation" -T {}/{}/{}'.format(tdir, adir, afile)) ans = input('add twitter translates (y/n)') if ans[0] == 'y': tdir = '/home/r/repos/social/tests/twitter_snapshots' dirs = os.listdir(tdir) # dirs = [i for i in dirs if not ('Snap' in i or 'science' in i or 'Fora' in i)] for adir in dirs: files = os.listdir('{}/{}'.format(tdir, adir)) # files_ = [i for i in files if i.endswith('.ttl') and 'Meta' not in i] files_ = [i for i in files if i.endswith('.ttl')] for afile in files_: c(afile, 'pre') uploadFile(platform, tdir, adir, afile) c(afile, 'pos') ans = input('add facebook translates (y/n)') if ans[0] == 'y': tdir = '/home/r/repos/social/tests/facebook_snapshots' dirs = os.listdir(tdir) for adir in dirs: files = os.listdir('{}/{}'.format(tdir, adir)) files_ = [i for i in files if i.endswith('.ttl')] # files_ = [i for i in files if i.endswith('.ttl') and 'Meta' not in i] for afile in files_: c(afile, 'pre') uploadFile(platform, tdir, adir, afile) c(afile, 'pos') # irc
if "cs" in aa.keys(): tobj = aa["cs"]["value"] ant_.append((tobj, aa["p"]["value"])) cons = fazQuery( "SELECT DISTINCT ?p ?co (datatype(?o) as ?do) WHERE { ?i a <%s> . ?i ?p ?o . OPTIONAL { ?o a ?co . } }" % (classe, )) cons_ = [] for cc in cons: if "co" in cc.keys(): tobj = cc["co"]["value"] cons_.append((cc["p"]["value"], tobj)) elif (("do" in cc.keys()) and ("w3.org" not in cc["p"]["value"])): tobj = cc["do"]["value"] cons_.append((cc["p"]["value"], tobj)) elif (("do" in cc.keys()) and ("w3.org" in cc["p"]["value"])): c(cc["p"]["value"], tobj) # to see what triple this is sys.exit() neighbors[classe] = (ant, cons) neighbors_[classe] = (ant_, cons_) f = open("dumpVV.pickle", "wb") vv = (neighbors, neighbors_) pickle.dump(vv, f) f.close() fo = open("dumpVV.pickle", "rb") vv_ = pickle.load(fo) fo.close() kk = vv_[1].keys() for tkey in kk: # for each class cl = tkey cl_ = cl.split("/")[-1] print(cl_)
'curl --digest --user dba:dba --url "http://localhost:8890/sparql-graph-crud-auth?graph-uri=urn:{}" -T {}/{}/{}' .format(afile, tdir, adir, afile)) # os.system('curl --digest --user dba:dba --url "http://localhost:8890/sparql-graph-crud-auth?graph-uri=urn:percolation" -T {}/{}/{}'.format(tdir, adir, afile)) ans = input('add twitter translates (y/n)') if ans[0] == 'y': tdir = '/home/r/repos/social/tests/twitter_snapshots' dirs = os.listdir(tdir) # dirs = [i for i in dirs if not ('Snap' in i or 'science' in i or 'Fora' in i)] for adir in dirs: files = os.listdir('{}/{}'.format(tdir, adir)) # files_ = [i for i in files if i.endswith('.ttl') and 'Meta' not in i] files_ = [i for i in files if i.endswith('.ttl')] for afile in files_: c(afile, 'pre') uploadFile(platform, tdir, adir, afile) c(afile, 'pos') ans = input('add facebook translates (y/n)') if ans[0] == 'y': tdir = '/home/r/repos/social/tests/facebook_snapshots' dirs = os.listdir(tdir) for adir in dirs: files = os.listdir('{}/{}'.format(tdir, adir)) files_ = [i for i in files if i.endswith('.ttl')] # files_ = [i for i in files if i.endswith('.ttl') and 'Meta' not in i] for afile in files_: c(afile, 'pre') uploadFile(platform, tdir, adir, afile) c(afile, 'pos') # irc
ant_ = [] for aa in ant: if "cs" in aa.keys(): tobj = aa["cs"]["value"] ant_.append((tobj, aa["p"]["value"])) cons = fazQuery("SELECT DISTINCT ?p ?co (datatype(?o) as ?do) WHERE { ?i a <%s> . ?i ?p ?o . OPTIONAL { ?o a ?co . } }" % (classe, )) cons_ = [] for cc in cons: if "co" in cc.keys(): tobj = cc["co"]["value"] cons_.append((cc["p"]["value"], tobj)) elif (("do" in cc.keys()) and ("w3.org" not in cc["p"]["value"])): tobj = cc["do"]["value"] cons_.append((cc["p"]["value"], tobj)) elif (("do" in cc.keys()) and ("w3.org" in cc["p"]["value"])): c(cc["p"]["value"], tobj) # to see what triple this is sys.exit() neighbors[classe] = (ant, cons) neighbors_[classe] = (ant_, cons_) f = open("dumpVV.pickle", "wb") vv = (neighbors, neighbors_) pickle.dump(vv, f) f.close() fo = open("dumpVV.pickle", "rb") vv_ = pickle.load(fo) fo.close() kk = vv_[1].keys() for tkey in kk: # for each class cl = tkey cl_ = cl.split("/")[-1] print(cl_)
import sys keys=tuple(sys.modules.keys()) for key in keys: if "social" in key or "percolation" in key: del sys.modules[key] import social as S, os from percolation import check as c snapdirs=os.listdir("./fb/") snapdirs=[i for i in snapdirs if os.path.isdir("./fb/"+i)] for snapdir in snapdirs: files=os.listdir("./fb/"+snapdir) files=[i for i in files if (i.endswith(".rdf") or i.endswith(".ttl"))] files.sort() for file_ in files: c("loading: ", file_, len(S.P.percolation_graph)) if file_.endswith(".rdf"): S.P.percolation_graph.parse("./fb/"+snapdir+"/"+file_) elif file_.endswith(".ttl"): S.P.percolation_graph.parse("./fb/"+snapdir+"/"+file_,format="turtle") else: raise ValueError("Only rdf files please") c("loaded: ", file_, "ntriples in percolation graph: ", len(S.P.percolation_graph))
import sys keys = tuple(sys.modules.keys()) for key in keys: if "social" in key or "percolation" in key: del sys.modules[key] import social as S, os from percolation import check as c snapdirs = os.listdir("./fb/") snapdirs = [i for i in snapdirs if os.path.isdir("./fb/" + i)] for snapdir in snapdirs: files = os.listdir("./fb/" + snapdir) files = [i for i in files if (i.endswith(".rdf") or i.endswith(".ttl"))] files.sort() for file_ in files: c("loading: ", file_, len(S.P.percolation_graph)) if file_.endswith(".rdf"): S.P.percolation_graph.parse("./fb/" + snapdir + "/" + file_) elif file_.endswith(".ttl"): S.P.percolation_graph.parse("./fb/" + snapdir + "/" + file_, format="turtle") else: raise ValueError("Only rdf files please") c("loaded: ", file_, "ntriples in percolation graph: ", len(S.P.percolation_graph))