def networksEvolution(client, pickledir=os.path.dirname(__file__)+'/../../../pickledir/'): # get all interactions ans = input('try to reload evolution structures? (Y/n)') if ans == 'n' or not os.path.isfile(pickledir+'evolutionStructures.pickle'): nes = {} for alist in order: # q = '''select distinct ?from ?message where { # ?message po:createdAt ?date . # ?message po:author ?from . # ?snap po:gmaneID "%s" } ORDER BY ?date''' % (lacronyms[alist],) # from_msg = pl(client.retrieveQuery(prefix+q)) q = '''select distinct ?message ?participant where { ?message po:author ?participant . ?message po:createdAt ?date . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" } ORDER BY ?date''' % (lacronyms[alist],) from_ = pl(client.retrieveQuery(prefix+q)) q = '''select ?message ?rmessage where { ?rmessage po:createdAt ?date . ?rmessage po:replyTo ?message . ?rmessage po:snapshot ?snap . ?snap po:gmaneID "%s" } ORDER BY ?date''' % (lacronyms[alist],) replies = pl(client.retrieveQuery(prefix+q)) # instantiate evolutive class ne = P.measures.evolution.networkEvolution.NetworkEvolution(window_size=1000, step_size=1000) ne.load(from_, replies) ne.evolve() nes[alist] = ne print('evolved '+alist) P.utils.pDump(nes, pickledir+'evolutionStructures.pickle') else: nes = P.utils.pRead(pickledir+'evolutionStructures.pickle') # send interactions to evolutive class # evolutive class makes networks and takes measures return nes
def makeNetworkFromSnapshotid(client, snapshotid): snapclass = snapshotid.split('#')[0].split('/')[-1] if snapclass == 'FacebookSnapshot': q = '''SELECT ?friend1 ?friend2 WHERE {{ ?friendshipfoo po:snapshot <{}> . ?friendshipfoo a po:Friendship . ?friendshipfoo po:member ?friend1 . ?friendshipfoo po:member ?friend2 . }} '''.format(snapshotid, ) relational_data = pl(client.retrieveQuery(prefix + q)) elif snapclass == 'TwitterSnapshot': q = '''SELECT ?friend1 ?friend2 WHERE {{ ?tweetfoo po:snapshot <{}> . ?tweetfoo a po:Tweet . ?tweetfoo po:author ?friend2 . ?tweetfoo po:retweetOf ?tweetfoo2 . ?tweetfoo2 po:author ?friend1 . }} '''.format(snapshotid, ) relational_data = pl(client.retrieveQuery(prefix + q)) else: raise ValueError( 'Only Facebook and Twitter snapshots implemented for now') return makeNetwork(relational_data)
def outline(client): # get all snapshots # snapshots = pl(client.retrieveQuery(prefix+'select distinct ?g where { GRAPH ?g {?s ?p ?o} }SELECT DISTINCT ?snap WHERE { GRAPH <urn:percolation> { ?s po:snapshot ?snap . } }')) snapshots = pl(client.retrieveQuery(prefix+'SELECT DISTINCT ?snap WHERE { ?s po:snapshot ?snap }')) snaps = {} for snapshot in snapshots: # get number of triples # ntriples = pl(client.retrieveQuery(prefix+'SELECT (COUNT(?s) as ?c) WHERE { ?s ?p ?o . ?s po:snapshot <%s> . }' % (snapshot,))) ntriples = pl(client.retrieveQuery(prefix+'SELECT (COUNT(*) as ?c) WHERE { ?s ?p ?o . ?s po:snapshot <%s> . }' % (snapshot,)))[0] # get number of edges: union replyTo (gmane), directedTo (irc), # retweetOf (tweet) # nedges1 = pl(client.retrieveQuery(prefix+'SELECT (COUNT(?interaction) as ?c) WHERE { { ?interaction po:retweetOf ?message } UNION { ?interaction po:replyTo ?message } UNION { ?interaction po:directedTo ?participant } . ?interaction po:snapshot <%s> }' % (snapshot,))) # # number of union Friendship, Interaction (facebook) # nedges2 = pl(client.retrieveQuery(prefix+'SELECT (COUNT(?interaction) as ?c) WHERE { { ?interaction a po:Friendship } UNION { ?interaction a po:Interaction } . ?interaction po:snapshot <%s> }' % (snapshot,))) # nedges = nedges1+nedges2 nedges = pl(client.retrieveQuery(prefix+'SELECT (COUNT(?interaction) as ?c) WHERE { { ?interaction a po:Friendship } UNION { ?interaction a po:Interaction } UNION { ?interaction po:retweetOf ?message } UNION { ?interaction po:replyTo ?message } UNION { ?interaction po:directedTo ?participant } . ?interaction po:snapshot <%s> }' % (snapshot,)))[0] # get number of participants nparticipants = pl(client.retrieveQuery(prefix+'SELECT (COUNT(DISTINCT ?author) as ?c) WHERE { ?author a po:Participant . ?author po:snapshot <%s> . }' % (snapshot,)))[0] # get number of chars # nchars = pl(client.retrieveQuery(prefix+'SELECT (SUM(?nchars) as ?total) WHERE { ?message po:nChars ?nchars . ?message po:snapshot <%s> . }' % (snapshot,)))[0] # nchars = pl(client.retrieveQuery(prefix+'''select (strlen(GROUP_CONCAT(?text; separator='')) as ?ctext) where { ?foo po:text ?text . ?foo po:snapshot <%s> }''' % (snapshot, ))) # nchars = sum(pl(client.retrieveQuery(prefix+'''select (strlen(?text) as ?stext) where { ?foo po:text ?text . ?foo po:snapshot <%s> }''' % (snapshot, )))) nchars = pl(client.retrieveQuery(prefix+'''select (SUM(strlen(?text)) as ?stext) where { ?foo po:text ?text . ?foo po:snapshot <%s> }''' % (snapshot, )))[0] snaps[snapshot] = {'ntriples': ntriples, 'nedges': nedges, 'nparticipants': nparticipants, 'nchars': nchars} # return snapshots return snaps
def outlineTable(client, final_path='../../stabilityInteraction/tables/'): # for each of the four snapshots, get # date of first and last message # number of participants # number of threads # missing messages is 20000 - total messages data = [] for alist in order: q = '''select ?date where { ?message po:createdAt ?date . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" } ORDER BY ?date LIMIT 3''' % ( lacronyms[alist], ) datemin = pl(client.retrieveQuery(prefix + q))[-1:] q = '''select (MAX(?date) as ?ldate) where { ?message po:createdAt ?date . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" . }''' % (lacronyms[alist], ) datemax = pl(client.retrieveQuery(prefix + q)) dates = datemin + datemax dates = [i.split('T')[0] for i in dates] q = '''select (COUNT(DISTINCT ?participant) as ?cp) where { ?participant a po:Participant . ?participant po:snapshot ?snap . ?snap po:gmaneID "%s" . }''' % (lacronyms[alist], ) nparticipants = pl(client.retrieveQuery(prefix + q)) # q = '''select (COUNT(DISTINCT ?messages) as ?cmessages) where { # ?message po:snapshot ?snap . # FILTER NOT EXISTS { ?message po:replyTo ?message2 } # ?snap po:gmaneID "%s" . }''' % (lacronyms[alist],) # nthreads = pl(client.retrieveQuery(prefix+q)) order_ = [i for i in order if i != alist] q = '''select (COUNT(DISTINCT ?message) as ?cmessages) where { ?message a po:EmailMessage . ?message po:author ?author . ?message po:createdAt ?createdat . ?message po:text ?text . ?author po:observation ?obs . ?obs po:email ?email . ?author po:snapshot ?snap . ?obs po:snapshot ?snap . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" }''' % (lacronyms[alist]) nempty = 20000 - pl(client.retrieveQuery(prefix + q))[0] q = '''select (COUNT(DISTINCT ?message) as ?cmessages) where { ?message po:replyTo ?message2 . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" . }''' % (lacronyms[alist], ) nthreads = [20000 - nempty - pl(client.retrieveQuery(prefix + q))[0]] data.append([]) data[-1] += dates + nparticipants + nthreads + [nempty] table = P.mediaRendering.tables.makeTabular(order, data) P.mediaRendering.tables.writeTex(table, final_path + 'tab1Overview.tex') return locals()
def outlineTable(client, final_path='../../stabilityInteraction/tables/'): # for each of the four snapshots, get # date of first and last message # number of participants # number of threads # missing messages is 20000 - total messages data = [] for alist in order: q = '''select ?date where { ?message po:createdAt ?date . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" } ORDER BY ?date LIMIT 3''' % (lacronyms[alist],) datemin = pl(client.retrieveQuery(prefix+q))[-1:] q = '''select (MAX(?date) as ?ldate) where { ?message po:createdAt ?date . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" . }''' % (lacronyms[alist],) datemax = pl(client.retrieveQuery(prefix+q)) dates = datemin+datemax dates = [i.split('T')[0] for i in dates] q = '''select (COUNT(DISTINCT ?participant) as ?cp) where { ?participant a po:Participant . ?participant po:snapshot ?snap . ?snap po:gmaneID "%s" . }''' % (lacronyms[alist],) nparticipants = pl(client.retrieveQuery(prefix+q)) # q = '''select (COUNT(DISTINCT ?messages) as ?cmessages) where { # ?message po:snapshot ?snap . # FILTER NOT EXISTS { ?message po:replyTo ?message2 } # ?snap po:gmaneID "%s" . }''' % (lacronyms[alist],) # nthreads = pl(client.retrieveQuery(prefix+q)) order_ = [i for i in order if i!=alist] q = '''select (COUNT(DISTINCT ?message) as ?cmessages) where { ?message a po:EmailMessage . ?message po:author ?author . ?message po:createdAt ?createdat . ?message po:text ?text . ?author po:observation ?obs . ?obs po:email ?email . ?author po:snapshot ?snap . ?obs po:snapshot ?snap . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" }''' % (lacronyms[alist]) nempty = 20000 - pl(client.retrieveQuery(prefix+q))[0] q = '''select (COUNT(DISTINCT ?message) as ?cmessages) where { ?message po:replyTo ?message2 . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" . }''' % (lacronyms[alist],) nthreads = [20000 - nempty - pl(client.retrieveQuery(prefix+q))[0]] data.append([]) data[-1] += dates + nparticipants + nthreads + [nempty] table = P.mediaRendering.tables.makeTabular(order, data) P.mediaRendering.tables.writeTex(table, final_path+'tab1Overview.tex') return locals()
def evolutionTimelines(client, final_path=os.path.dirname(__file__) + '/../../../../stabilityInteraction/figs/', pickledir=os.path.dirname(__file__) + '/../../../pickledir/'): sizes = [50, 100, 250, 500, 1000, 3300, 9900] # make sectorialization for each size for LAD and CPP networks # make plot with them order = 'LAD', 'CPP' nes = {} ans = input('try to reload evolution structures for timelines? (Y/n)') if ans == 'n' or not os.path.isfile(pickledir + 'evolutionStructuresTimeline.pickle'): for alist in order: nes[alist] = [] q = '''select distinct ?message ?participant where { ?message po:author ?participant . ?message po:createdAt ?date . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" } ORDER BY ?date''' % ( lacronyms[alist], ) from_ = pl(client.retrieveQuery(prefix + q)) q = '''select ?message ?rmessage where { ?rmessage po:createdAt ?date . ?rmessage po:replyTo ?message . ?rmessage po:snapshot ?snap . ?snap po:gmaneID "%s" } ORDER BY ?date''' % ( lacronyms[alist], ) replies = pl(client.retrieveQuery(prefix + q)) for size in sizes: if size >= 250: step_size = size else: step_size = 200 ne = P.measures.evolution.networkEvolution.NetworkEvolution( window_size=size, step_size=step_size) ne.load(from_, replies) ne.evolve(pca=False) nes[alist].append(ne) P.utils.pDump(nes, pickledir + 'evolutionStructuresTimeline.pickle') else: nes = P.utils.pRead(pickledir + 'evolutionStructuresTimeline.pickle') for alist in nes: for ne in nes[alist]: et = P.mediaRendering.figures.EvolutionTimelines( alist, ne, final_path=final_path) if ne.window_size == 1000: et.plotSingles()
def authorsTable(client, final_path=os.path.dirname(__file__)+'/../../../../stabilityInteraction/tables/', pickledir=os.path.dirname(__file__)+'/../../../pickledir/'): ans = input('try to reload authors statistics? (Y/n)') if ans == 'n' or not os.path.isfile(pickledir+'authorsStatistics.pickle'): stats = {} for alist in order: q = '''select distinct ?author (COUNT(distinct ?message) as ?cmessage) where { ?message po:author ?author . ?message a po:EmailMessage . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" } GROUP BY ?author''' % (lacronyms[alist],) authors_messages = pl(client.retrieveQuery(prefix+q)) stats[alist] = P.measures.authors.authorsStatistics.AuthorsStatistics(authors_messages) P.utils.pDump(stats, pickledir+'authorsStatistics.pickle') else: stats = P.utils.pRead(pickledir+'authorsStatistics.pickle') data_ = [] for i in order: ae = stats[i] h_act = "{:.2f}".format(ae.n_msgs_h_) q1 = "{:.2f} ({:.2f}\\%)".format(ae.q1_*100, ae.Mq1*100) q3 = "{:.2f} ({:.2f}\\%)".format(ae.q3_*100, ae.Mq3*100) last_d10 = "{:.2f} (-{:.2f}\\%)".format(ae.last_d10_*100, ae.Mlast_d10*100) data_.append([h_act, q1, q3, last_d10]) tstring = P.mediaRendering.tables.makeTabular(order, data_, True) P.mediaRendering.tables.writeTex(tstring, final_path+"userTabNEW.tex") return stats
def textAnalysis(client): prefix = 'PREFIX po: <http://purl.org/socialparticipation/po/>\n' snapshots = pl(client.retrieveQuery(prefix+'SELECT DISTINCT ?snap WHERE { ?s po:snapshot ?snap }')) snapshot = [snap for snap in snapshots if 'Twitter' in snap][0] # P.topology() network = P.utils.makeNetworkFromSnapshotid(client, snapshot) analysis = P.legacy.analyses.topological.TopologicalAnalysis(network['gg']) q = '''SELECT ?author ?text WHERE { ?tweet po:author ?author . ?tweet po:message ?text . } ''' authors_text = pl(client.retrieveQuery(prefix+q)) P.measures.text.overall.measureAll(authors_text, analysis.sectors['sectorialized_agents']) return snapshot, analysis
def pickSnapshot(client): prefix = 'PREFIX po: <http://purl.org/socialparticipation/po/>\n' snapshot = pl( client.retrieveQuery( prefix + 'SELECT DISTINCT ?snap WHERE { ?s po:snapshot ?snap } LIMIT 1'))[0] return snapshot
def authorsTable(client, final_path=os.path.dirname(__file__) + '/../../../../stabilityInteraction/tables/', pickledir=os.path.dirname(__file__) + '/../../../pickledir/'): ans = input('try to reload authors statistics? (Y/n)') if ans == 'n' or not os.path.isfile(pickledir + 'authorsStatistics.pickle'): stats = {} for alist in order: q = '''select distinct ?author (COUNT(distinct ?message) as ?cmessage) where { ?message po:author ?author . ?message a po:EmailMessage . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" } GROUP BY ?author''' % (lacronyms[alist], ) authors_messages = pl(client.retrieveQuery(prefix + q)) stats[ alist] = P.measures.authors.authorsStatistics.AuthorsStatistics( authors_messages) P.utils.pDump(stats, pickledir + 'authorsStatistics.pickle') else: stats = P.utils.pRead(pickledir + 'authorsStatistics.pickle') data_ = [] for i in order: ae = stats[i] h_act = "{:.2f}".format(ae.n_msgs_h_) q1 = "{:.2f} ({:.2f}\\%)".format(ae.q1_ * 100, ae.Mq1 * 100) q3 = "{:.2f} ({:.2f}\\%)".format(ae.q3_ * 100, ae.Mq3 * 100) last_d10 = "{:.2f} (-{:.2f}\\%)".format(ae.last_d10_ * 100, ae.Mlast_d10 * 100) data_.append([h_act, q1, q3, last_d10]) tstring = P.mediaRendering.tables.makeTabular(order, data_, True) P.mediaRendering.tables.writeTex(tstring, final_path + "userTabNEW.tex") return stats
def mkQuery(query, plain=True): query_ = query.split('WHERE') query__ = (query_[0], from_, '\nWHERE ' + query_[1]) query___ = ''.join(query__) result = client.retrieveQuery(query___) if plain: return pl(result) else: return result['results']['bindings']
def textAnalysis(client): prefix = 'PREFIX po: <http://purl.org/socialparticipation/po/>\n' snapshots = pl( client.retrieveQuery( prefix + 'SELECT DISTINCT ?snap WHERE { ?s po:snapshot ?snap }')) snapshot = [snap for snap in snapshots if 'Twitter' in snap][0] # P.topology() network = P.utils.makeNetworkFromSnapshotid(client, snapshot) analysis = P.legacy.analyses.topological.TopologicalAnalysis(network['gg']) q = '''SELECT ?author ?text WHERE { ?tweet po:author ?author . ?tweet po:message ?text . } ''' authors_text = pl(client.retrieveQuery(prefix + q)) P.measures.text.overall.measureAll( authors_text, analysis.sectors['sectorialized_agents']) return snapshot, analysis
def outlineText(client): ntriples = pl(client.retrieveQuery(prefix+'SELECT (COUNT(*) as ?c) WHERE { ?s ?p ?o . }'))[0] nedges = pl(client.retrieveQuery(prefix+'SELECT (COUNT(?interaction) as ?c) WHERE { { ?interaction a po:Friendship } UNION { ?interaction a po:Interaction } UNION { ?interaction po:retweetOf ?message } UNION { ?interaction po:replyTo ?message } UNION { ?interaction po:directedTo ?participant } . }'))[0] nparticipants = pl(client.retrieveQuery(prefix+'SELECT (COUNT(DISTINCT ?author) as ?c) WHERE { ?author a po:Participant . }'))[0] # nchars = pl(client.retrieveQuery(prefix+'SELECT (SUM(?nchars) as ?total) WHERE { ?message po:nChars ?nchars . }'))[0] nchars = pl(client.retrieveQuery(prefix+'SELECT (SUM(strlen(?text)) as ?total) WHERE { { ?message po:text ?text . } UNION { ?message po:htmlBodyText ?text } UNION {?message po:htmlAbstractText ?text } UNION { ?message po:description ?text } }'))[0] text = 'The database consists of {:,} triples, {:,} edges yield by interactions or relations, {:,} participants and {:,} characters.'.format(ntriples, nedges, nparticipants, nchars) nego = pl(client.retrieveQuery(prefix+'SELECT (COUNT(?snap) as ?csnap) WHERE { ?snap po:isEgo true }'))[0] ngroup = pl(client.retrieveQuery(prefix+'SELECT (COUNT(?snap) as ?csnap) WHERE { ?snap po:isGroup true }'))[0] ninteraction = pl(client.retrieveQuery(prefix+'SELECT (COUNT(?snap) as ?csnap) WHERE { ?snap po:isInteraction true }'))[0] nfriendship = pl(client.retrieveQuery(prefix+'SELECT (COUNT(?snap) as ?csnap) WHERE { ?snap po:isFriendship true }'))[0] ntext = pl(client.retrieveQuery(prefix+'SELECT (COUNT(?snap) as ?csnap) WHERE { ?snap po:isPost true }'))[0] text += ' Among all snapshots, {} are ego snapshots, {} are group snapshots; {} have interaction edges, {} have friendship edges; {} have text content from messages.'.format( nego, ngroup, ninteraction, nfriendship, ntext) return text
def outlineNSnapshots(client): # snapshot_types = pl(client.retrieveQuery(prefix+'SELECT DISTINCT ?stype WHERE { ?snap a po:Snapshot . ?snap a ?stype . }')) # nsnaps = {} # for stype in snapshot_types: # nsnaps_ = pl(client.retrieveQuery(prefix+'SELECT (COUNT(DISTINCT ?snap) as ?csnap) WHERE { ?snap a <%s> }' % (stype,)))[0] # nsnaps[stype] = nsnaps_ nsnaps = pl(client.retrieveQuery(prefix+'SELECT DISTINCT ?stype (COUNT(?snap) as ?zscount) WHERE { ?snap po:socialProtocol ?stype . }')) return nsnaps # get all snapshot types and count them return text
def networksEvolution(client, pickledir=os.path.dirname(__file__) + '/../../../pickledir/'): # get all interactions ans = input('try to reload evolution structures? (Y/n)') if ans == 'n' or not os.path.isfile(pickledir + 'evolutionStructures.pickle'): nes = {} for alist in order: # q = '''select distinct ?from ?message where { # ?message po:createdAt ?date . # ?message po:author ?from . # ?snap po:gmaneID "%s" } ORDER BY ?date''' % (lacronyms[alist],) # from_msg = pl(client.retrieveQuery(prefix+q)) q = '''select distinct ?message ?participant where { ?message po:author ?participant . ?message po:createdAt ?date . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" } ORDER BY ?date''' % ( lacronyms[alist], ) from_ = pl(client.retrieveQuery(prefix + q)) q = '''select ?message ?rmessage where { ?rmessage po:createdAt ?date . ?rmessage po:replyTo ?message . ?rmessage po:snapshot ?snap . ?snap po:gmaneID "%s" } ORDER BY ?date''' % ( lacronyms[alist], ) replies = pl(client.retrieveQuery(prefix + q)) # instantiate evolutive class ne = P.measures.evolution.networkEvolution.NetworkEvolution( window_size=1000, step_size=1000) ne.load(from_, replies) ne.evolve() nes[alist] = ne print('evolved ' + alist) P.utils.pDump(nes, pickledir + 'evolutionStructures.pickle') else: nes = P.utils.pRead(pickledir + 'evolutionStructures.pickle') # send interactions to evolutive class # evolutive class makes networks and takes measures return nes
def evolutionTimelines(client, final_path=os.path.dirname(__file__)+'/../../../../stabilityInteraction/figs/', pickledir=os.path.dirname(__file__)+'/../../../pickledir/'): sizes=[50,100,250,500,1000,3300,9900] # make sectorialization for each size for LAD and CPP networks # make plot with them order = 'LAD', 'CPP' nes = {} ans = input('try to reload evolution structures for timelines? (Y/n)') if ans == 'n' or not os.path.isfile(pickledir+'evolutionStructuresTimeline.pickle'): for alist in order: nes[alist] = [] q = '''select distinct ?message ?participant where { ?message po:author ?participant . ?message po:createdAt ?date . ?message po:snapshot ?snap . ?snap po:gmaneID "%s" } ORDER BY ?date''' % (lacronyms[alist],) from_ = pl(client.retrieveQuery(prefix+q)) q = '''select ?message ?rmessage where { ?rmessage po:createdAt ?date . ?rmessage po:replyTo ?message . ?rmessage po:snapshot ?snap . ?snap po:gmaneID "%s" } ORDER BY ?date''' % (lacronyms[alist],) replies = pl(client.retrieveQuery(prefix+q)) for size in sizes: if size >= 250: step_size = size else: step_size = 200 ne = P.measures.evolution.networkEvolution.NetworkEvolution(window_size=size, step_size=step_size) ne.load(from_, replies) ne.evolve(pca=False) nes[alist].append(ne) P.utils.pDump(nes, pickledir+'evolutionStructuresTimeline.pickle') else: nes = P.utils.pRead(pickledir+'evolutionStructuresTimeline.pickle') for alist in nes: for ne in nes[alist]: et = P.mediaRendering.figures.EvolutionTimelines(alist, ne, final_path=final_path) if ne.window_size == 1000: et.plotSingles()
import percolation as P from percolation.rdf.sparql.functions import plainQueryValues as pl import networkx as x c = P.c # prefix po prefix = 'PREFIX po: <http://purl.org/socialparticipation/po/>\n' client = P.rdf.sparql.classes.LegacyClient('http://127.0.0.1:3030/adbname') snapshots = pl(client.retrieveQuery(prefix+'SELECT DISTINCT ?snap WHERE { ?s po:snapshot ?snap }')) snapshots = [snap for snap in snapshots if 'Twitter' in snap] gg = [] for snapshot in snapshots: # retrieve friendships # make graph q = '''SELECT ?friend1 ?friend2 WHERE {{ ?tweetfoo po:snapshot <{}> . ?tweetfoo a po:Tweet . ?tweetfoo po:author ?friend2 . ?tweetfoo po:retweetOf ?tweetfoo2 . ?tweetfoo2 po:author ?friend1 . }} '''.format(snapshot, ) c('before query') friends = pl(client.retrieveQuery(prefix+q)) c('after query') # g = x.DiGraph() # for friend1, friend2 in friends: # g.add_edge(friend1, friend2) g = P.utils.makeNetwork(friends, True)
def circularTables(client, final_path=os.path.dirname(__file__) + '/../../../../stabilityInteraction/tables/', pickledir=os.path.dirname(__file__) + '/../../../pickledir/'): # get datetimes from sent # try toPython # send array of datetimes to # temporalStatistics # save result insto pickledir # ask if open from pickledir or process all again ans = input('try to reload temporal statistics? (Y/n)') if ans == 'n' or not os.path.isfile(pickledir + 'temporalStatistics.pickle'): stats = {} for alist in order: q = """select distinct ?message ?date where { ?message po:createdAt ?date . ?message po:snapshot ?snap . ?snap po:gmaneID '%s' . }""" % (lacronyms[alist], ) dates = [i[0] for i in pl(client.retrieveQuery(prefix + q))] dates_ = [dateutil.parser.parse(date) for date in dates] stats[ alist] = P.measures.time.temporalStatistics.TemporalStatistics( dates_) P.utils.pDump(stats, pickledir + 'temporalStatistics.pickle') else: stats = P.utils.pRead(pickledir + 'temporalStatistics.pickle') def circMeasures(tdict, mean=True): if mean: return [ tdict["circular_measures"]["circular_mean"], tdict["circular_measures"]["std_unity_radius"], tdict["circular_measures"]["variance_unity_radius"], tdict["circular_measures"]["circular_dispersion"], tdict["max_discrepancy"], tdict["max_discrepancy_"][0], tdict["max_discrepancy_"][1], ] else: return [ "--//--", tdict["circular_measures"]["std_unity_radius"], tdict["circular_measures"]["variance_unity_radius"], tdict["circular_measures"]["circular_dispersion"], tdict["max_discrepancy"], tdict["max_discrepancy_"][0], tdict["max_discrepancy_"][1], ] labels_ = [ "seconds", "minutes", "hours", "weekdays", "month days", "months" ] for alist in order: data_ = [] data_.append(circMeasures(stats[alist].seconds, False)) data_.append(circMeasures(stats[alist].minutes, False)) data_.append(circMeasures(stats[alist].hours)) data_.append(circMeasures(stats[alist].weekdays)) data_.append(circMeasures(stats[alist].monthdays)) data_.append(circMeasures(stats[alist].months)) tstring = P.mediaRendering.tables.makeTabular(labels_, data_, True) P.mediaRendering.tables.writeTex( tstring, final_path + "tab2TimeNEW{}.tex".format(alist)) # hours along the days table row_labels = ["{}h".format(i) for i in range(24)] for alist in order: ts = stats[alist] hi = 100 * ts.hours["histogram"] / ts.hours["histogram"].sum() tstring = P.mediaRendering.tables.partialSums( row_labels, data=[hi], partials=[1, 2, 3, 4, 6, 12], partial_labels=["1h", "2h", "3h", "4h", "6h", "12h"]) P.mediaRendering.tables.writeTex( tstring, final_path + "tabHours{}NEW.tex".format(alist)) # days along the week data_ = [ 100 * stats[i].weekdays["histogram"] / stats[i].weekdays["histogram"].sum() for i in order ] labels_ = ["LAU", "LAD", "MET", "CPP"] tstring = P.mediaRendering.tables.makeTabular(labels_, data_, True) P.mediaRendering.tables.writeTex(tstring, final_path + "tabWeekdaysNEW.tex") # days of the month row_labels = ["{}".format(i + 1) for i in range(30)] for i in order: ts = stats[i] hi = 100 * ts.monthdays["histogram"] / ts.monthdays["histogram"].sum() tstring = P.mediaRendering.tables.partialSums( row_labels, data=[hi], partials=[1, 5, 10, 15], partial_labels=["1 day", "5", "10", "15 days"]) P.mediaRendering.tables.writeTex( tstring, final_path + "tabMonthdays{}NEW.tex".format(i)) # months of the year row_labels = [ "Jan", "Fev", "Mar", "Apr", "Mai", "Jun", "Jul", "Ago", "Set", "Out", "Nov", "Dez" ] for i in order: ts = stats[i] hi = 100 * ts.months["histogram"] / ts.months["histogram"].sum() tstring = P.mediaRendering.tables.partialSums( row_labels, data=[hi], partials=[1, 2, 3, 4, 6], partial_labels=["m.", "b.", "t.", "q.", "s."]) P.mediaRendering.tables.writeTex( tstring, final_path + "tabMonths{}NEW.tex".format(i)) return stats
def circularTables(client, final_path=os.path.dirname(__file__)+'/../../../../stabilityInteraction/tables/', pickledir=os.path.dirname(__file__)+'/../../../pickledir/'): # get datetimes from sent # try toPython # send array of datetimes to # temporalStatistics # save result insto pickledir # ask if open from pickledir or process all again ans = input('try to reload temporal statistics? (Y/n)') if ans == 'n' or not os.path.isfile(pickledir+'temporalStatistics.pickle'): stats = {} for alist in order: q = """select distinct ?message ?date where { ?message po:createdAt ?date . ?message po:snapshot ?snap . ?snap po:gmaneID '%s' . }""" % (lacronyms[alist],) dates = [i[0] for i in pl(client.retrieveQuery(prefix+q))] dates_ = [dateutil.parser.parse(date) for date in dates] stats[alist] = P.measures.time.temporalStatistics.TemporalStatistics(dates_) P.utils.pDump(stats, pickledir+'temporalStatistics.pickle') else: stats = P.utils.pRead(pickledir+'temporalStatistics.pickle') def circMeasures(tdict,mean=True): if mean: return [tdict["circular_measures"]["circular_mean"], tdict["circular_measures"]["std_unity_radius"], tdict["circular_measures"]["variance_unity_radius"], tdict["circular_measures"]["circular_dispersion"], tdict["max_discrepancy"], tdict["max_discrepancy_"][0], tdict["max_discrepancy_"][1], ] else: return ["--//--", tdict["circular_measures"]["std_unity_radius"], tdict["circular_measures"]["variance_unity_radius"], tdict["circular_measures"]["circular_dispersion"], tdict["max_discrepancy"], tdict["max_discrepancy_"][0], tdict["max_discrepancy_"][1], ] labels_=["seconds","minutes","hours","weekdays","month days","months"] for alist in order: data_=[] data_.append(circMeasures(stats[alist].seconds,False)) data_.append(circMeasures(stats[alist].minutes,False)) data_.append(circMeasures(stats[alist].hours)) data_.append(circMeasures(stats[alist].weekdays)) data_.append(circMeasures(stats[alist].monthdays)) data_.append(circMeasures(stats[alist].months)) tstring = P.mediaRendering.tables.makeTabular(labels_, data_, True) P.mediaRendering.tables.writeTex(tstring, final_path+"tab2TimeNEW{}.tex".format(alist)) # hours along the days table row_labels=["{}h".format(i) for i in range(24)] for alist in order: ts = stats[alist] hi = 100*ts.hours["histogram"]/ts.hours["histogram"].sum() tstring = P.mediaRendering.tables.partialSums(row_labels,data=[hi],partials=[1,2,3,4,6,12],partial_labels=["1h","2h","3h","4h","6h","12h"]) P.mediaRendering.tables.writeTex(tstring, final_path+"tabHours{}NEW.tex".format(alist)) # days along the week data_=[100*stats[i].weekdays["histogram"]/stats[i].weekdays["histogram"].sum() for i in order] labels_=["LAU","LAD","MET","CPP"] tstring=P.mediaRendering.tables.makeTabular(labels_, data_, True) P.mediaRendering.tables.writeTex(tstring, final_path+"tabWeekdaysNEW.tex") # days of the month row_labels=["{}".format(i+1) for i in range(30)] for i in order: ts = stats[i] hi = 100*ts.monthdays["histogram"]/ts.monthdays["histogram"].sum() tstring = P.mediaRendering.tables.partialSums(row_labels, data=[hi], partials=[1,5,10,15], partial_labels=["1 day","5","10","15 days"]) P.mediaRendering.tables.writeTex(tstring, final_path+"tabMonthdays{}NEW.tex".format(i)) # months of the year row_labels=["Jan","Fev","Mar","Apr","Mai","Jun","Jul","Ago","Set","Out","Nov","Dez"] for i in order: ts = stats[i] hi = 100*ts.months["histogram"]/ts.months["histogram"].sum() tstring = P.mediaRendering.tables.partialSums(row_labels, data=[hi], partials=[1,2,3,4,6], partial_labels=["m.","b.","t.","q.","s."]) P.mediaRendering.tables.writeTex(tstring, final_path+"tabMonths{}NEW.tex".format(i)) return stats
import percolation as P from percolation.rdf.sparql.functions import plainQueryValues as pl import networkx as x c = P.c # prefix po prefix = 'PREFIX po: <http://purl.org/socialparticipation/po/>\n' client = P.rdf.sparql.classes.LegacyClient('http://127.0.0.1:3030/adbname') snapshots = pl( client.retrieveQuery( prefix + 'SELECT DISTINCT ?snap WHERE { ?s po:snapshot ?snap }')) snapshots = [snap for snap in snapshots if 'Twitter' in snap] gg = [] for snapshot in snapshots: # retrieve friendships # make graph q = '''SELECT ?friend1 ?friend2 WHERE {{ ?tweetfoo po:snapshot <{}> . ?tweetfoo a po:Tweet . ?tweetfoo po:author ?friend2 . ?tweetfoo po:retweetOf ?tweetfoo2 . ?tweetfoo2 po:author ?friend1 . }} '''.format(snapshot, ) c('before query') friends = pl(client.retrieveQuery(prefix + q)) c('after query') # g = x.DiGraph() # for friend1, friend2 in friends:
def facebookGroups(client): name_url = pl(client.retrieveQuery(prefix+'PREFIX po: <http://purl.org/socialparticipation/po/> select distinct ?name ?url where {?s po:socialProtocol "Facebook" . ?s po:name ?name . ?s po:url ?url }')) return name_url
def probeOntology(endpoint_url, graph_urns, final_dir, one_datatype=True): if not os.path.isdir(final_dir): os.makedirs(final_dir) client = P.rdf.sparql.classes.LegacyClient(endpoint_url) from_ = '' for graph_urn in graph_urns: from_ += '\nFROM <%s>' % (graph_urn, ) def mkQuery(query, plain=True): query_ = query.split('WHERE') query__ = (query_[0], from_, '\nWHERE ' + query_[1]) query___ = ''.join(query__) result = client.retrieveQuery(query___) if plain: return pl(result) else: return result['results']['bindings'] c('find all classes') q = "SELECT DISTINCT ?class WHERE { ?s a ?class . }" # classes = pl(client.retrieveQuery(prefix+q)) classes = mkQuery(q) c('antecedents, consequents and restrictions of each class') neighbors = {} triples = [] existential_restrictions = {} universal_restrictions = {} for aclass in classes: q = "SELECT DISTINCT ?cs ?p WHERE { ?i a <%s> . ?s ?p ?i . OPTIONAL { ?s a ?cs . } }" % ( aclass, ) antecedent_property = mkQuery(q) # q = "SELECT DISTINCT ?ap (datatype(?o) as ?do) WHERE { ?i a <%s> . ?i ?ap ?o . filter (datatype(?o) != '') }" % (aclass,) # consequent_property = mkQuery(q) # q = "SELECT DISTINCT ?ap ?co WHERE { ?i a <%s> . ?i ?ap ?o . ?o a ?co . }" % (aclass,) # consequent_property_ = mkQuery(q) q = "SELECT DISTINCT ?ap ?co (datatype(?o) as ?do) WHERE { ?i a <%s> . ?i ?ap ?o . OPTIONAL { ?o a ?co . } }" % ( aclass, ) consequent_property__ = mkQuery(q, 0) consequent_property = [[i['ap']['value'], i['do']['value']] for i in consequent_property__ if 'do' in i] consequent_property_ = [[i['ap']['value'], i['co']['value']] for i in consequent_property__ if 'co' in i] neighbors[aclass] = (antecedent_property, consequent_property + consequent_property_) # neighbors[aclass] = (antecedent_property, dict(consequent_property, **consequent_property_)) # class restrictions q = "SELECT DISTINCT ?p WHERE {?s a <%s>. ?s ?p ?o .}" % (aclass, ) props_c = mkQuery(q) # q = "SELECT DISTINCT ?s WHERE {?s a <%s>}" % (aclass,) # inds = mkQuery(q) q = "SELECT (COUNT(DISTINCT ?s) as ?cs) WHERE {?s a <%s>}" % (aclass, ) ninds = pl(client.retrieveQuery(q))[0] for pc in props_c: if '22-rdf-syntax' in pc: continue # q = "SELECT DISTINCT ?s ?co (datatype(?o) as ?do) WHERE {?s a <%s>. ?s <%s> ?o . OPTIONAL {?o a ?co . }}" % (aclass, pc) q = "SELECT DISTINCT ?co (datatype(?o) as ?do) WHERE {?s a <%s>. ?s <%s> ?o . OPTIONAL {?o a ?co . }}" % ( aclass, pc) inds2 = mkQuery(q, 0) # inds2_ = set([i["s"]["value"] for i in inds2]) objs = set([i["co"]["value"] for i in inds2 if "co" in i.keys()]) vals = set([i["do"]["value"] for i in inds2 if "do" in i.keys()]) q = "SELECT (COUNT(DISTINCT ?s) as ?cs) WHERE {?s a <%s>. ?s <%s> ?o . }" % ( aclass, pc) ninds2 = pl(client.retrieveQuery(q))[0] # if len(inds) == len(inds2_): # existential if ninds == ninds2: # existential if len(vals): ob = list(vals)[0] else: if len(objs): ob = list(objs)[0] else: ob = 0 if ob: B = r.BNode() triples += [(aclass, rdfs.subClassOf, B), (B, a, owl.Restriction), (B, owl.onProperty, pc), (B, owl.someValuesFrom, ob)] if aclass in existential_restrictions.keys(): existential_restrictions[aclass].append((pc, ob)) else: existential_restrictions[aclass] = [(pc, ob)] q = "SELECT (COUNT(DISTINCT ?s) as ?cs) WHERE { ?s <%s> ?o . ?s a ?ca . FILTER(str(?ca) != '%s') }" % ( pc, aclass) ninds3 = pl(client.retrieveQuery(q))[0] # q = "SELECT DISTINCT ?s WHERE { ?s <%s> ?o .}" % (pc,) # inds3 = mkQuery(q) # if set(inds) == set(inds3): # universal # if all([i in set(inds) for i in inds3]): # universal # if ninds == ninds3: # universal if ninds3 == 0: # universal if len(vals): ob = list(vals)[0] else: if len(objs): ob = list(objs)[0] else: ob = 0 if ob: B = r.BNode() triples += [(aclass, rdfs.subClassOf, B), (B, a, owl.Restriction), (B, owl.onProperty, pc), (B, owl.allValuesFrom, ob)] if aclass in universal_restrictions.keys(): universal_restrictions[aclass].append((pc, ob)) else: universal_restrictions[aclass] = [(pc, ob)] del q, aclass, antecedent_property, consequent_property c('find properties') q = "SELECT DISTINCT ?p WHERE {?s ?p ?o}" # properties = pl(client.retrieveQuery(prefix+q)) properties = mkQuery(q) # properties_ = [i.split("/")[-1] for i in properties] c('check if property is functional and get range and domain') functional_properties = set() for prop in properties: # check if property is functional q = 'SELECT DISTINCT (COUNT(?o) as ?co) WHERE { ?s <%s> ?o } GROUP BY ?s' % ( prop, ) is_functional = mkQuery(q) if len(is_functional) == 1 and is_functional[0] == 1: triples.append((prop, a, owl.FunctionalProperty)) functional_properties.add(prop) # datatype or object properties suj = mkQuery("SELECT DISTINCT ?cs WHERE { ?s <%s> ?o . ?s a ?cs . }" % (prop, )) # obj = mkQuery("SELECT DISTINCT ?co (datatype(?o) as ?do) WHERE { ?s <%s> ?o . OPTIONAL { ?o a ?co . } }" % (prop,)) obj1 = mkQuery( "SELECT DISTINCT ?co WHERE { ?s <%s> ?o . ?o a ?co . }" % (prop, )) obj2 = mkQuery( "SELECT DISTINCT (datatype(?o) as ?do) WHERE { ?s <%s> ?o . }" % (prop, )) obj = obj1 + obj2 if len(obj) and ("XMLS" in obj[0]): triples.append((prop, a, owl.DataProperty)) else: triples.append((prop, a, owl.ObjectProperty)) if len(suj) > 1: B = r.BNode() triples.append((prop, rdfs.domain, B)) for ss in suj: triples.append((B, owl.unionOf, ss)) elif suj: triples.append((prop, rdfs.domain, suj[0])) if len(obj) > 1: B = r.BNode() triples.append((prop, rdfs.range, B)) for ss in suj: triples.append((B, owl.unionOf, ss)) elif obj: triples.append((prop, rdfs.range, obj[0])) # for drawing # prop_ = prop.split("/")[-1] # suj_ = [i.split('/')[-1] for i in suj] # obj_ = [i.split('/')[-1] for i in obj] # Drawing c('started drawing') A = gv.AGraph(directed=True, strict=False) q = """PREFIX po: <http://purl.org/socialparticipation/po/> SELECT DISTINCT ?snap WHERE { { ?i po:snapshot ?snap } UNION { ?snap po:snapshotID ?idfoo } }""" # SELECT DISTINCT ?snap WHERE { ?i po:snapshot ?snap }""" snap = mkQuery(q)[0] q = """PREFIX po: <http://purl.org/socialparticipation/po/> SELECT ?provenance WHERE { <%s> po:socialProtocol ?provenance }""" % (snap) # WHERE { { <%s> po:socialProtocolTag ?provenance } UNION # { <%s> po:humanizedName ?provenance } }""" % (snap, snap) provenance = pl(client.retrieveQuery(q))[0] # A.graph_attr["label"] = r"General diagram of ontological structure from %s in the http://purl.org/socialparticipation/participationontology/ namespace.\nGreen edge denotes existential restriction;\ninverted edge nip denotes universal restriction;\nfull edge (non-dashed) denotes functional property." % (provenance,) edge_counter = 1 node_counter = 1 data_nodes = {} for aclass in classes: aclass_ = aclass.split('/')[-1] if aclass_ not in A.nodes(): A.add_node(aclass_, style="filled") n = A.get_node(aclass_) n.attr['color'] = "#A2F3D1" neigh = neighbors[aclass] # for i in range(len(neigh[0])): # antecendents # label = neigh[0][i][0].split("/")[-1] # elabel = neigh[0][i][1] # elabel_ = elabel.split("/")[-1] # if label not in A.nodes(): # A.add_node(label, style="filled") # n = A.get_node(label) # n.attr['color'] = "#A2F3D1" # ekey = '{}-{}-{}'.format(label, aclass_, edge_counter) # edge_counter += 1 # A.add_edge(label, aclass_, ekey) # e = A.get_edge(label, aclass_, key=ekey) # e.attr["label"] = elabel_ # e.attr["penwidth"] = 2. # e.attr["arrowsize"] = 2. # if elabel not in functional_properties: # e.attr["style"] = "dashed" # if neigh[0][i][0] in existential_restrictions.keys(): # restriction = existential_restrictions[neigh[0][i][0]] # prop = [iii[0] for iii in restriction] # obj = [iii[1] for iii in restriction] # if (elabel in prop) and (obj[prop.index(elabel)] == aclass): # e.attr["color"] = "#A0E0A0" # if neigh[0][i][0] in universal_restrictions.keys(): # restriction = universal_restrictions[neigh[0][i][0]] # prop = [iii[0] for iii in restriction] # obj = [iii[1] for iii in restriction] # if (elabel in prop) and (obj[prop.index(elabel)] == aclass): # e.attr["color"] = "inv" for i in range(len(neigh[1])): # consequents label = neigh[1][i][1].split("/")[-1] elabel = neigh[1][i][0] elabel_ = elabel.split('/')[-1] if "XMLS" in label: color = "#FFE4AA" if one_datatype: if label in data_nodes: label_ = data_nodes[label] else: label_ = node_counter node_counter += 1 data_nodes[label] = label_ else: label_ = node_counter node_counter += 1 else: label_ = label color = "#A2F3D1" if label_ not in A.nodes(): A.add_node(label_, style="filled") n = A.get_node(label_) n.attr['label'] = label.split("#")[-1] n.attr['color'] = color ekey = '{}-{}-{}'.format(aclass_, label_, edge_counter) edge_counter += 1 A.add_edge(aclass_, label_, ekey) e = A.get_edge(aclass_, label_, key=ekey) e.attr["label"] = elabel_ e.attr["color"] = color e.attr["penwidth"] = 2 if r.URIRef(elabel) not in functional_properties: e.attr["style"] = "dashed" if aclass in existential_restrictions.keys(): restrictions = existential_restrictions[aclass] prop = [iii[0] for iii in restrictions] if r.URIRef(elabel) in prop: e.attr["color"] = "#A0E0A0" if aclass in universal_restrictions.keys(): restrictions = universal_restrictions[aclass] prop = [iii[0] for iii in restrictions] if r.URIRef(elabel) in prop: e.attr["arrowhead"] = "inv" e.attr["arrowsize"] = 2. # A.draw(os.path.join(final_dir, "{}.png".format(final_dir)), prog="dot") # try: # A.draw(os.path.join(final_dir, "{}_circo.png".format(final_dir)), prog="circo") # except: # pass # A.draw(os.path.join(final_dir, "{}_twopi.png".format(final_dir)), prog="twopi", args="-Granksep=4") # A.write(os.path.join(final_dir, "{}.dot".format(final_dir))) A.draw(os.path.join(final_dir, "draw.png"), prog="dot") try: A.draw(os.path.join(final_dir, "draw_circo.png"), prog="circo") except: pass A.draw(os.path.join(final_dir, "draw_twopi.png"), prog="twopi", args="-Granksep=4") A.write(os.path.join(final_dir, "draw.dot")) # for triple in triples: # g.add(triple) P.start(False) P.context('ontology', 'remove') P.add(triples, 'ontology') g = P.context('ontology') g.serialize(os.path.join(final_dir, 'ontology.owl')) g.serialize(os.path.join(final_dir, 'ontology.ttl'), 'turtle') return locals()