Example #1
0
 def translateLinks(self):
     count = 0
     triples = []
     for link in self.data['links']:
         lid = link[0]
         nome = link[1]
         url = link[2]
         tid = link[4]
         created = link[5]
         updated = link[6]
         uri = P.rdf.ic(po.Link,
                        self.snapshotid+"-"+str(lid),
                        self.translation_graph, self.snapshoturi)
         triples += [
                 (uri, po.name, nome),
                 (uri, po.url, url),
                 (uri, po.topic,
                     po.Topic+'#'+self.snapshotid+'-'+str(tid)),
                 (uri, po.createdAt, created)
         ]
         if updated != created:
             triples += [
                        (uri, po.updatedAt, updated),
                        ]
         count += 1
         if count % 60 == 0:
             c("finished links entries:", count, "ntriples:", len(triples))
             P.add(triples, self.translation_graph)
             c("finished add of links entries")
             triples = []
     if triples:
             P.add(triples, self.translation_graph)
     c("finished add of links entries")
Example #2
0
def minimalTestData():
    triples=[
            (NS.po.SnapshotFoo+"#1", NS.facebook.ego, True),
            (NS.po.SnapshotFoo+"#1", NS.facebook.userID, "1039203918"),
            (NS.po.SnapshotFoo+"#1", NS.facebook.user, NS.facebook.Participant+"Foop"),
            ]
    P.add(triples,context="void")
Example #3
0
 def translateStates(self):
     count = 0
     triples = []
     for estado in self.data["estados"]:
         gid = estado[0]
         nome = estado[1]
         abr = estado[2]
         created = estado[3]
         updated = estado[4]
         relevance = estado[5]
         uri = P.rdf.ic(po.State,
                        self.snapshotid+"-"+str(gid),
                        self.translation_graph, self.snapshoturi)
         triples += [
                 (uri, po.name, nome),
                 (uri, po.abbreviation, abr),
                 (uri, po.createdAt, created),
                 (uri, po.relevance, relevance),
         ]
         if updated != created:
             triples += [
                        (uri, po.updatedAt, updated),
                        ]
         count += 1
         if count % 60 == 0:
             c("finished states entries:", count, "ntriples:", len(triples))
             P.add(triples, self.translation_graph)
             c("finished add of states entries")
             triples = []
     if triples:
             P.add(triples, self.translation_graph)
     c("finished add of states entries")
Example #4
0
 def translateSupporters(self):
     count = 0
     triples = []
     for adesao in self.data["adesoes"]:
         tid = adesao[0]
         uid = adesao[1]
         created = adesao[2]
         updated = adesao[3]
         aid = adesao[4]
         uri = P.rdf.ic(po.Support,
                        self.snapshotid+"-"+str(aid),
                        self.translation_graph, self.snapshoturi)
         triples += [
                 (uri, po.participant,
                     po.Participant+'#'+self.snapshotid+'-'+str(uid)),
                 (uri, po.topic,
                     po.Topic+'#'+self.snapshotid+'-'+str(tid)),
                 (uri, po.createdAt, created),
         ]
         if updated != created:
             triples += [
                        (uri, po.updatedAt, updated),
                        ]
         count += 1
         if count % 60 == 0:
             c("finished supporters entries:", count, "ntriples:", len(triples))
             P.add(triples, self.translation_graph)
             c("finished add of supporters entries")
             triples = []
     if triples:
             P.add(triples, self.translation_graph)
     c("finished add of supporters entries")
Example #5
0
 def translateNeighborhoods(self):
     count = 0
     triples = []
     for bairro in self.data["bairros"]:
         bid = bairro[0]
         nome = bairro[1]
         cid = bairro[2]
         created = bairro[3]
         updated = bairro[4]
         relevance = bairro[5]
         uri = P.rdf.ic(po.Neighborhood,
                        self.snapshotid+"-"+str(bid),
                        self.translation_graph, self.snapshoturi)
         triples += [
                 (uri, po.name, nome),
                 (uri, po.city,
                     po.City+'#'+self.snapshotid+'-'+str(cid)),
                 (uri, po.createdAt, created),
                 (uri, po.relevance, relevance)
         ]
         if updated != created:
             triples += [
                        (uri, po.updatedAt, updated),
                        ]
         count += 1
         if count % 60 == 0:
             c("finished neighborhood entries:", count, "ntriples:", len(triples))
             P.add(triples, self.translation_graph)
             c("finished add of neighborhood entries")
             triples = []
     if triples:
             P.add(triples, self.translation_graph)
     c("finished add of neighborhood entries")
Example #6
0
 def addArticleBody(self, body, articleuri):
     triples = []
     if re.findall(r"<(.*)>(.*)<(.*)>", body, re.S):
         try:
             P.add((articleuri, po.htmlBodyText, body),
                   context=self.translation_graph)
         except QueryBadFormed:
             c("QUOTING HTML BODY")
             P.add((articleuri, po.quotedHtmlBodyText,
                    urllib.parse.quote(body)),
                   context=self.translation_graph)
         cleanbody = BeautifulSoup(body, 'html.parser').get_text()
         if cleanbody:
             try:
                 P.add((articleuri, po.cleanBodyText, cleanbody),
                       context=self.translation_graph)
             except QueryBadFormed:
                 c("QUOTING HTML CLEAN BODY")
                 P.add((articleuri, po.quotedCleanBodyText,
                        urllib.parse.quote(cleanbody)),
                       context=self.translation_graph)
     else:
         triples += [
                    (articleuri, po.cleanBodyText, body),
                    ]
         P.add(triples, context=self.translation_graph)
     self.bodies += [body]
Example #7
0
 def writeRdf(self):
     pub_dir = './participabr_snapshot/'
     if not os.path.isdir(pub_dir):
         os.mkdir(pub_dir)
     g = P.context(self.translation_graph)
     g.serialize(pub_dir+'participabr.ttl', 'turtle')
     c('participation ttl serialized')
     g.serialize(pub_dir+'participabr.rdf', 'xml')
     c('participation xml serialized')
     # metadados: group, platform,
     triples = [
              (self.snapshoturi, a, po.Snapshot),
              # (self.snapshoturi, a, po.ParticipabrSnapshot),
              (self.snapshoturi, po.snapshotID, self.snapshotid),
              (self.snapshoturi, po.isEgo, False),
              (self.snapshoturi, po.isGroup, True),
              (self.snapshoturi, po.isFriendship, True),
              (self.snapshoturi, po.isInteraction, True),
              (self.snapshoturi, po.isPost, True),
              (self.snapshoturi, po.socialProtocol, 'ParticipaBR'),
              (self.snapshoturi, po.dateObtained, datetime.date(2012, 6, 28)),
              ]
     P.add(triples, self.meta_graph)
     g = P.context(self.meta_graph)
     g.serialize(pub_dir+'participabrMeta.ttl', 'turtle')
     c('participation meta ttl serialized')
     g.serialize(pub_dir+'participabrMeta.rdf', 'xml')
     c('participation meta xml serialized')
Example #8
0
 def translateCities(self):
     count = 0
     triples = []
     for cidade in self.data["cidades"]:
         cid = cidade[0]
         nome = cidade[1]
         eid = cidade[2]
         slug = cidade[3]
         created = cidade[4]
         updated = cidade[5]
         relevance = cidade[6]
         uri = P.rdf.ic(po.City,
                        self.snapshotid+"-"+str(cid),
                        self.translation_graph, self.snapshoturi)
         triples += [
                 (uri, po.name, nome),
                 (uri, po.state,
                     po.State+'#'+self.snapshotid+str(eid)),
                 (uri, po.slug, slug),
                 (uri, po.createdAt, created),
                 (uri, po.relevance, relevance)
         ]
         if updated != created:
             triples += [
                        (uri, po.updatedAt, updated),
                        ]
         count += 1
         if count % 60 == 0:
             c("finished cities k entries:", count, "ntriples:", len(triples))
             P.add(triples, self.translation_graph)
             c("finished add of cities entries")
             triples = []
     if triples:
             P.add(triples, self.translation_graph)
     c("finished add of cities entries")
Example #9
0
 def translateObservatories(self):
     count = 0
     triples = []
     for observatorio in self.data["observatorios"]:
         oid = observatorio[0]
         uid = observatorio[1]
         created = observatorio[4]
         updated = observatorio[5]
         uri = P.rdf.ic(po.Observatory,
                        self.snapshotid+"-"+str(oid),
                        self.translation_graph, self.snapshoturi)
         triples += [
                 (uri, po.participant,
                     po.Participant+'#'+self.snapshoturi+'-'+str(uid)),
                 (uri, po.createdAt, created),
         ]
         if updated != created:
             triples += [
                        (uri, po.updatedAt, updated),
                        ]
         count += 1
         if count % 60 == 0:
             c("finished observatory  entries:", count, "ntriples:", len(triples))
             P.add(triples, self.translation_graph)
             c("finished add of observatory entries")
             triples = []
     if triples:
             P.add(triples, self.translation_graph)
     c("finished add of observatory entries")
Example #10
0
def void():
    triples=[
            (NS.po.SnapshotFoo+"#1", a, NS.po.FacebookSnapshot),
            (NS.po.SnapshotFoo+"#1", NS.po.rawFile, "~/.percolation/data/somedirs/something.raw"),
            (NS.po.SnapshotFoo+"#1", NS.po.rdfFile, "~/.percolation/data/somedirs/something.rdf"),
            (NS.po.SnapshotFoo+"#1", NS.po.voidFile, "~/.percolation/data/somedirs/void.raw"),
            ]
    P.add(triples,context="void")
Example #11
0
def parseLegacyFiles(data_dir=DATADIR+"twitter/"):
    """Parse legacy pickle files with Twitter tweets"""
    filenames=os.listdir(data_dir)
    filenames=[i for i in filenames if i!="ipython_log.py" and not i.endswith(".swp")]

    snapshots=set()
    triples=[]
    for filename in filenames:
        snapshotid="twitter-legacy-"+filename.replace("_","")
        snapshoturi=po.TwitterSnapshot+"#"+snapshotid
        expressed_classes=[po.Participant,po.Tweet]
        expressed_reference=filename.replace("_","").replace(".pickle","")
        name_humanized="Twitter"+expressed_reference
        filesize=os.path.getsize(data_dir+filename)/10**6
        fileformat="pickle"
        fileuri=po.File+"#twitter-file-"+filename
        triples+=[
                 (snapshoturi,a,po.Snapshot),
                 (snapshoturi,a,po.TwitterSnapshot),
                 (snapshoturi,po.snapshotID,snapshotid),
                 (snapshoturi, po.isEgo, False),
                 (snapshoturi, po.isGroup, True),
                 (snapshoturi, po.isFriendship, False),
                 (snapshoturi, po.isInteraction, True),
                 (snapshoturi, po.isPost, True),
                 (snapshoturi, po.humanizedName, name_humanized),
                 (snapshoturi, po.expressedReference, expressed_reference),
                 (snapshoturi, po.rawFile, fileuri),
                 (fileuri,     po.fileSize, filesize),
                 (fileuri,     po.fileName, filename),
                 (fileuri,     po.fileFormat, fileformat),
                 ]+[
                 (fileuri,    po.expressedClass, expressed_class) for expressed_class in expressed_classes
                 ]
        snapshots.add(snapshoturi)
    nfiles=len(filenames)
    nsnapshots=len(snapshots)
    P.context("social_twitter","remove")
    platformuri=P.rdf.ic(po.Platform,"Twitter",context="social_twitter")
    triples+=[
             (NS.social.Session,NS.social.nIRCParsedFiles,nfiles),
             (NS.social.Session,NS.social.nIRCSnapshots,nsnapshots),
             (platformuri, po.dataDir,data_dir),
             ]
    P.add(triples,context="social_twitter")
    c("parsed {} twitter files ({} snapshots) are in percolation graph and 'social_twitter' context".format(nfiles,nsnapshots))
    c("percolation graph have {} triples ({} in social_twitter context)".format(len(P.percolation_graph),len(P.context("social_twitter"))))
    negos=P.query(r" SELECT (COUNT(?s) as ?cs) WHERE         { GRAPH <social_twitter> { ?s po:isEgo true         } } ")
    ngroups=P.query(r" SELECT (COUNT(?s) as ?cs) WHERE       { GRAPH <social_twitter> { ?s po:isGroup true       } } ")
    nfriendships=P.query(r" SELECT (COUNT(?s) as ?cs) WHERE  { GRAPH <social_twitter> { ?s po:isFriendship true  } } ")
    ninteractions=P.query(r" SELECT (COUNT(?s) as ?cs) WHERE { GRAPH <social_twitter> { ?s po:isInteraction true } } ")
    nposts=P.query(r" SELECT (COUNT(?s) as ?cs) WHERE        { GRAPH <social_twitter> { ?s po:isPost true        } } ")
    totalsize=sum(P.query(r" SELECT ?size WHERE              { GRAPH <social_twitter> { ?s po:fileSize ?size     } } "))
    c("""{} are ego snapshots, {} are group snapshots
{} have a friendship network. {} have an interaction network. {} have post texts and reaction counts
Total raw data size is {:.2f}MB""".format(negos,ngroups,nfriendships,ninteractions,nposts,totalsize))

    return snapshots
Example #12
0
 def translateObservatoryTags(self):
     triples = []
     for ot in self.data["observatorios_tem_tags"]:
         oid = ot[0]
         tid = ot[1]
         triples.append((po.Observatory+'#'+self.snapshotid+'-'+str(oid),
                         po.hasTag, po.Tag+'#'+self.snapshotid+'-'+str(tid)))
     P.add(triples, self.translation_graph)
     c("finished add of observatory tag entries")
Example #13
0
    def translateComments(self):
        trans = {'resposta': 'answer',
                 'pergunta': 'question',
                 'comentario': 'comment',
                 'ideia': 'idea'}
        triples = []
        count = 0
        for comment in self.data['comments']:
            cid = comment[0]
            tid = comment[1]  # topic id
            body = comment[3]
            if not body:
                continue
            body = body.replace('', '')
            uid = comment[4]
            ctype = comment[8]
            created = comment[9]
            updated = comment[10]

            assert isinstance(cid, int)
            assert isinstance(tid, int)
            assert isinstance(body, str)
            assert isinstance(uid, int)
            assert isinstance(ctype, str)
            assert isinstance(created, datetime.datetime)
            assert isinstance(updated, datetime.datetime)
            commenturi = P.rdf.ic(po.Comment,
                                  self.snapshotid+"-"+str(cid),
                                  self.translation_graph, self.snapshoturi)
            participanturi = po.Participant+'#'+self.snapshotid+"-"+str(uid)
            # topicuri = self.topicuris[tid]
            topicuri = po.Topic+'#'+self.snapshotid+'-'+str(tid)
            triples += [
                (commenturi, po.author, participanturi),
                (commenturi, po.topic, topicuri),
                (commenturi, po.text, body),
                # (commenturi, po.nChars, len(body)),
                (commenturi, po.type, trans[ctype]),
                (topicuri, po.createdAt, created),
            ]
            if updated != created:
                 triples.append(
                    (topicuri, po.updatedAt, updated),
                 )
            count += 1
            if count % 60 == 0:
                c("finished comment entries:", count, "ntriples:", len(triples))
                P.add(triples, self.translation_graph)
                c("finished add of comment entries")
                triples = []
        if triples:
                P.add(triples, self.translation_graph)
        c("finished add of comment entries")
Example #14
0
 def translatePlaces(self):
     count = 0
     triples = []
     for local in self.data["locais"]:
         lid = local[0]
         rid = local[1]
         rtype = local[2]
         bid = local[3]
         cid = local[4]
         created = local[7]
         updated = local[8]
         cep = local[9]
         eid = local[10]
         uri = P.rdf.ic(po.Place,
                        self.snapshotid+"-"+str(lid),
                        self.translation_graph, self.snapshoturi)
         triples += [(uri, po.createdAt, created)]
         if bid:
             triples.append((uri, po.neighborhood,
                             po.Neighborhood+'#'+self.snapshotid+'-'+str(bid)))
         if cid:
             triples.append((uri, po.city,
                             po.City+'#'+self.snapshotid+'-'+str(cid)))
         if eid:
             triples.append((uri, po.state,
                             po.State+'#'+self.snapshotid+'-'+str(eid)))
         if cep:
             triples.append((uri, po.cep, cep))
         if updated != created:
             triples += [
                        (uri, po.updatedAt, updated),
                        ]
         if rtype == "Topico":
             uri_ = po.Topic+'#'+self.snapshotid+'-'+str(rid)
         elif rtype == "User":
             uri_ = po.User+'#'+self.snapshotid+'-'+str(rid)
         elif rtype == "Competition":
             uri_ = po.Competition+'#'+self.snapshotid+'-'+str(rid)
         elif rtype == "Observatorio":
             uri_ = po.Observatory+'#'+self.snapshotid+'-'+str(rid)
         if rtype:
             triples.append((uri, po.accountable, uri_))
         count += 1
         if count % 60 == 0:
             c("finished places entries:", count, "ntriples:", len(triples))
             P.add(triples, self.translation_graph)
             c("finished add of places entries")
             triples = []
     if triples:
             P.add(triples, self.translation_graph)
     c("finished add of places entries")
Example #15
0
    def rdfGroupPosts(self,filename_posts_):
        data=[i.split("\t") for i in open(filename_posts_,"r").read().split("\n")[:-1]]
        tvars=data[0]
        standard_vars=['id','type','message','created_time','comments','likes','commentsandlikes']
        if len(tvars)!=sum([i==j for i,j in zip(tvars,standard_vars)]):
            raise ValueError("the tab file format was not understood")
        data=data[1:]
        triples=[]
        self.nposts=0
        nchars_all=[]
        ntokens_all=[]
        for post in data:
            ind=P.rdf.ic(po.Post,post[0],self.posts_graph,self.snapshoturi)
            ptext=post[2].replace("_","\n")
            nchars=len(ptext)
            nchars_all+=[nchars]
            ntokens=len(k.tokenize.wordpunct_tokenize(ptext))
            ntokens_all+=[ntokens]
            triples+=[
                     (ind,po.snapshot,self.snapshoturi),
                     (ind,po.postID,post[0]),
                     (ind,po.postType,post[1]),
                     (ind,po.postText,ptext),
                     (ind,po.createdAt,dateutil.parser.parse(post[3])),
                     (ind,po.nComments,int(post[4])),
                     (ind,po.nLikes,int(post[5])),
                     (ind,po.nChars,nchars),
                     (ind,po.nTokens,ntokens),
                     ]
            if self.nposts%200==0:
                c("posts: ",self.nposts)
            self.nposts+=1
        self.postsvars=["postID","postType","postText","createdAt","nComments","nLikes","nChars","nTokens"]
        self.mcharsposts=n.mean(nchars_all)
        self.dcharsposts=n.std(  nchars_all)
        self.totalchars=n.sum(   nchars_all)
        self.mtokensposts=n.mean(ntokens_all)
        self.dtokensposts=n.std( ntokens_all)
        self.totaltokens=n.sum(  ntokens_all)
        #triples+=[ # went to meta file
        #         (self.snapshoturi,po.mCharsPosts,self.mcharsposts),
        #         (self.snapshoturi,po.dCharsPosts,self.dcharsposts),
        #         (self.snapshoturi,po.totalCharsPosts,self.totalchars),

        #         (self.snapshoturi,po.mTokensPosts,self.mtokensposts),
        #         (self.snapshoturi,po.dTokensPosts,self.dtokensposts),
        #         (self.snapshoturi,po.totalTokensPosts,self.totaltokens),
        #         ]
        P.add(triples,context=self.posts_graph)
Example #16
0
 def translateVotes(self):
     triples = []
     commentids = set(self.comments_table.get("id"))
     count = 0
     for id_, vote, voteable_id, voteable_type,\
         voter_id, voter_type, created_at in \
         self.votes_table.getMany(
                 ("id", "vote", "voteable_id",
                  "voteable_type", "voter_id", "voter_type", "created_at")):
         assert isinstance(id_, int)
         assert isinstance(voteable_id, int)
         assert isinstance(created_at, datetime.datetime)
         voteuri = P.rdf.ic(po.Vote, self.snapshotid+"-"+str(id_),
                            self.translation_graph, self.snapshoturi)
         if voteable_type == "Article":
             type__ = self.articletypes[voteable_id].split("::")[-1]
             # referenceuri = \
             #     eval("po."+type__)+"#"+self.snapshotid+"-"+str(voteable_id)
             referenceuri = \
                 po.Article+"#"+self.snapshotid+"-"+str(voteable_id)
         elif voteable_type == "Comment":
             assert voteable_id in commentids
             referenceuri = \
                 po.Comment+"#"+self.snapshotid+"-"+str(voteable_id)
         else:
             raise ValueError("unexpected voteable type")
         triples += [
                    (voteuri, po.createdAt, created_at),
                    (voteuri, po.vote, vote),
                    (voteuri, po.reference, referenceuri),
                    ]
         if voter_id:
             assert voter_type == "Profile"
             assert isinstance(voter_id, int)
             participanturi = po.Participant + '#' + \
                 self.snapshotid+"-"+self.profileids[voter_id]
             triples += [
                        (voteuri, po.author, participanturi),
                        ]
         count += 1
         if count % 100 == 0:
             c("votes done:", count)
             c("ntriples:", len(triples))
             P.add(triples, self.translation_graph)
             c("finished add of votes")
             triples = []
     if triples:
         c("ntriples:", len(triples))
         P.add(triples, self.translation_graph)
Example #17
0
 def makeMeta(self):
     triples = [
              (self.snapshoturi, a, po.Snapshot),
              # (self.snapshoturi, a, po.AASnapshot),
              # (self.snapshoturi, a, po.AAIRCSnapshot),
              (self.snapshoturi, po.snapshotID, self.snapshotid),
              (self.snapshoturi, po.isEgo, False),
              (self.snapshoturi, po.isGroup, True),
              (self.snapshoturi, po.isFriendship, False),
              (self.snapshoturi, po.isInteraction, False),
              (self.snapshoturi, po.isPost, True),
              (self.snapshoturi, po.socialProtocol, 'Algorithmic Autoregulation'),
              (self.snapshoturi, po.dateObtained, datetime.date(2015, 7, 15)),
              ]
     P.add(triples, self.meta_graph)
Example #18
0
 def makeMeta(self):
     triples = [
              (self.snapshoturi, a, po.Snapshot),
              # (self.snapshoturi, a, po.AASnapshot),
              # (self.snapshoturi, a, po.AAIRCSnapshot),
              (self.snapshoturi, po.snapshotID, self.snapshotid),
              (self.snapshoturi, po.isEgo, False),
              (self.snapshoturi, po.isGroup, True),
              (self.snapshoturi, po.isFriendship, False),
              (self.snapshoturi, po.isInteraction, False),
              (self.snapshoturi, po.isPost, True),
              (self.snapshoturi, po.socialProtocol, 'Cidade Democrática'),
              (self.snapshoturi, po.dateObtained, datetime.date(2014, 3, 19)),
              ]
     P.add(triples, self.meta_graph)
Example #19
0
 def translateImages(self):
     triples = []
     count = 0
     for imagem in self.data["imagens"]:
         iid = imagem[0]
         rid = imagem[1]
         rtype = imagem[2]
         size = imagem[3]
         ctype = imagem[4]
         fname = imagem[5]
         height = imagem[6]
         width = imagem[7]
         legenda = imagem[11]
         created = imagem[12]
         updated = imagem[13]
         uri = P.rdf.ic(po.Image,
                        self.snapshotid+"-"+str(iid),
                        self.translation_graph, self.snapshoturi)
         triples.append((uri, po.createdAt, created))
         if rtype == "User":
             triples.append((uri, po.accountable,
                             po.Participant+"#"+self.snapshotid+'-'+str(rid)))
         if rtype == "Topico":
             triples.append((uri, po.accountable,
                             po.Topic+"#"+self.snapshotid+'-'+str(rid)))
         if size:
             triples.append((uri, po.size, int(size)))
         if ctype:
             triples.append((uri, po.contentType, ctype))
         if fname:
             triples.append((uri, po.filename, fname))
         if height:
             triples.append((uri, po.height, int(height)))
         if width:
             triples.append((uri, po.width, int(width)))
         if legenda:
             triples.append((uri, po.caption, legenda))
         if updated != created:
             triples.append((uri, po.updatedAt, updated))
         count += 1
         if count % 60 == 0:
             c("finished image  entries:", count, "ntriples:", len(triples))
             P.add(triples, self.translation_graph)
             c("finished add of image entries")
             triples = []
     if triples:
             P.add(triples, self.translation_graph)
     c("finished add of prizes entries")
Example #20
0
 def translateLoginHistory(self):
     triples = []
     for login in self.data["historico_de_logins"]:
         lid = login[0]
         uid = login[1]
         created = login[2]
         ip = login[3]
         uri = P.rdf.ic(po.Login,
                        self.snapshotid+"-"+str(lid),
                        self.translation_graph, self.snapshoturi)
         triples += [
                 (uri, po.participant,
                     po.Participant+'#'+self.snapshotid+'-'+str(uid)),
                 (uri, po.createdAt, created),
                 (uri, po.ip, ip)
         ]
     P.add(triples, self.translation_graph)
     c("finished add of login entries")
Example #21
0
 def translateFriendships(self):
     triples = []
     fids = self.friendships_table.getMany(("person_id", "friend_id"))
     added_friendships = []
     count = 0
     for person_id, friend_id, created_at, group in \
             self.friendships_table.getMany(
                 ('person_id', 'friend_id', 'created_at', 'group')):
         if [friend_id, person_id] in added_friendships:
             pass
         else:
             added_friendships += [[person_id, friend_id]]
         id0 = self.profileids[person_id]
         id1 = self.profileids[friend_id]
         friendshipuri = P.rdf.ic(po.Friendship,
                                  self.snapshotid+'-'+id0+'-'+id1,
                                  self.translation_graph, self.snapshoturi)
         participanturi0 = po.Participant+"#"+self.snapshotid+"-"+id0
         participanturi1 = po.Participant+"#"+self.snapshotid+"-"+id1
         assert isinstance(created_at, datetime.datetime)
         triples += [
                    (friendshipuri, po.member, participanturi0),
                    (friendshipuri, po.member, participanturi1),
                    (friendshipuri, po.createdAt, created_at),
                    ]
         if [friend_id, person_id] not in fids:
             triples += [
                        (participanturi0, po.knows, participanturi1),
                        ]
         if group:
             triples += [
                        (friendshipuri, po.socialCircle, group),
                        ]
         count += 1
         if count % 100 == 0:
             c("done friendships:", count)
             c("ntriples:", len(triples))
             P.add(triples, self.translation_graph)
             c("finished add of friendships")
             triples = []
     if triples:
         c("ntriples:", len(triples))
         P.add(triples, self.translation_graph)
Example #22
0
 def translateMacrotags(self):
     triples = []
     for mt in self.data["macro_tags"]:
         mtid = mt[0]
         title = mt[1]
         created = mt[2]
         updated = mt[3]
         uri = P.rdf.ic(po.Macrotag,
                        self.snapshotid+"-"+str(mtid),
                        self.translation_graph, self.snapshoturi)
         triples.append((uri, po.createdAt, created))
         if updated != created:
             triples += [
                        (uri, po.updatedAt, updated),
                        ]
         if title:
             triples.append((uri, po.title, title))
     P.add(triples, self.translation_graph)
     c("finished add of microtag entries")
Example #23
0
 def translateCompetitions(self):
     count = 0
     triples = []
     for competition in self.data['competitions']:
         coid = competition[0]
         sdesc = competition[1]
         created = competition[3]
         updated = competition[4]
         start = competition[5]
         title = competition[11]
         ldesc = competition[14]
         adesc = competition[15]
         reg = competition[16]
         aw = competition[17]
         part = competition[18]
         competitionuri = P.rdf.ic(po.Competition,
                                   self.snapshotid+"-"+str(coid),
                                   self.translation_graph, self.snapshoturi)
         triples += [
                 (competitionuri, po.shortDescription, sdesc),
                 (competitionuri, po.description, ldesc),
                 (competitionuri, po.authorDescription, adesc),
                 (competitionuri, po.createdAt, created),
                 (competitionuri, po.startAt, start),
                 (competitionuri, po.title, title),
                 (competitionuri, po.regulations, reg),
                 (competitionuri, po.awards, aw),
                 (competitionuri, po.partners, part),
         ]
         if updated != created:
              triples.append(
                 (competitionuri, po.updatedAt, updated),
              )
         count += 1
         if count % 60 == 0:
             c("finished competition entries:", count, "ntriples:", len(triples))
             P.add(triples, self.translation_graph)
             c("finished add of competition entries")
             triples = []
     if triples:
         P.add(triples, self.translation_graph)
     c("finisheg add of competitiok entries")
Example #24
0
    def translatePrizes(self):
        count = 0
        triples = []
        for prize in self.data["competition_prizes"]:
            pid = prize[0]
            name = prize[1]
            description = prize[2]
            competition_id = prize[3]
            offerer_id = prize[4]
            tid = prize[5]
            created = prize[6]
            updated = prize[7]
            prizeuri = P.rdf.ic(po.Prize,
                                self.snapshotid+"-"+str(pid),
                                self.translation_graph, self.snapshoturi)

            triples += [
                    (prizeuri, po.name, name),
                    (prizeuri, po.description, description),
                    (prizeuri, po.description, description),
                    (prizeuri, po.competition,
                        po.Competition+"#"+self.snapshotid+'-'+str(competition_id)),
                    (prizeuri, po.offerer,
                        po.Participant+"#"+self.snapshotid+'-'+str(offerer_id)),
                    (prizeuri, po.topic,
                     po.Topic+"#"+self.snapshotid+'-'+str(tid)),
                    (prizeuri, po.createdAt, created)
            ]
            if updated != created:
                triples += [
                           (prizeuri, po.updatedAt, updated),
                           ]
            count += 1
            if count % 60 == 0:
                c("finished prizes entries:", count, "ntriples:", len(triples))
                P.add(triples, self.translation_graph)
                c("finished add of prizes entries")
                triples = []
        if triples:
                P.add(triples, self.translation_graph)
        c("finished add of prizes entries")
Example #25
0
 def translateInspirations(self):
     count = 0
     triples = []
     for inspiration in self.data["inspirations"]:
         iid = inspiration[0]
         cid = inspiration[1]
         desc = inspiration[2]
         created = inspiration[3]
         updated = inspiration[4]
         image = inspiration[5]
         uid = inspiration[6]
         title = inspiration[7]
         uri = P.rdf.ic(po.Inspiration,
                        self.snapshotid+"-"+str(iid),
                        self.translation_graph, self.snapshoturi)
         triples += [
                 (uri, po.competition,
                     po.Competition+'#'+self.snapshotid+'-'+str(cid)),
                 (uri, po.description, desc),
                 (uri, po.createdAt, created),
                 (uri, po.participant,
                     po.Participant+'#'+self.snapshotid+'-'+str(uid)),
                 (uri, po.title, title),
                 (uri, po.filename, image),
         ]
         if updated != created:
             triples += [
                        (uri, po.updatedAt, updated),
                        ]
         count += 1
         if count % 60 == 0:
             c("finished inspiration entries:", count, "ntriples:", len(triples))
             P.add(triples, self.translation_graph)
             c("finished add of inspiration entries")
             triples = []
     if triples:
             P.add(triples, self.translation_graph)
     c("finished add of inspiration entries")
Example #26
0
    def translateTags(self):
        count = 0
        triples = []
        for tag in self.data["tags"]:
            tid = tag[0]
            tag_ = tag[1]
            relevancia = tag[2]

            uri = P.rdf.ic(po.Tag,
                           self.snapshotid+"-"+str(tid),
                           self.translation_graph, self.snapshoturi)
            triples += [
                        (uri, po.text, tag_),
                        (uri, po.relevance, relevancia),
            ]
            count += 1
            if count % 160 == 0:
                c("finished tag  entries:", count, "ntriples:", len(triples))
                P.add(triples, self.translation_graph)
                c("finished add of tag  entries")
                triples = []
        if triples:
                P.add(triples, self.translation_graph)
        c("finished add of tag entries")
Example #27
0
    def translateTaggings(self):
        count = 0
        triples = []
        for tagging in self.data["taggings"]:
            tid_ = tagging[0]
            tid = tagging[1]
            toid = tagging[2]
            uid = tagging[3]
            ttype = tagging[5]
            created = tagging[7]

            uri = po.Tagging+"#"+self.snapshotid+'-'+str(tid_)
            uri = P.rdf.ic(po.Tagging,
                           self.snapshotid+"-"+str(tid_),
                           self.translation_graph, self.snapshoturi)
            triples += [
                (uri, po.tag, po.Tag+"#"+self.snapshotid+'-'+str(tid)),
                (uri, po.tagger, po.Participant+"#"+self.snapshotid+'-'+str(uid)),
                (uri, po.createdAt, created)
            ]
            if ttype == "Topico":
                # tagging -> topico
                triples.append((uri, po.tagged,
                                po.Topic+'#'+self.snapshotid+'-'+str(toid)))
            else:
                triples.append((uri, po.tagged,
                                po.Macrotag+"#"+self.snapshotid+'-'+str(toid)))
            count += 1
            if count % 160 == 0:
                c("finished tagging  entries:", count, "ntriples:", len(triples))
                P.add(triples, self.translation_graph)
                c("finished add of tagging  entries")
                triples = []
        if triples:
                P.add(triples, self.translation_graph)
        c("finished add of tagging entries")
Example #28
0
    def makeMetadata(self):
        triples=P.get(self.snapshoturi,None,None,"social_facebook")
        for rawfile in P.get(self.snapshoturi,po.rawFile,None,"social_facebook",strict=True,minimized=True):
            triples+=P.get(rawfile,None,None,"social_facebook")
        P.add(triples,context=self.meta_graph)

        self.ffile="base/"+self.filename_friendships
        self.frdf=self.snapshotid+"Friendship.rdf"
        self.fttl=self.snapshotid+"Friendship.ttl"
        triples=[
                (self.snapshoturi, po.onlineOriginalFriendshipFile,self.online_prefix+self.ffile),
                (self.snapshoturi, po.originalFriendshipFileName,self.ffile),
                (self.snapshoturi, po.onlineFriendshipXMLFile,self.online_prefix+self.frdf),
                (self.snapshoturi, po.onlineFriendshipTTLFile,self.online_prefix+self.fttl),
                (self.snapshoturi, po.friendshipXMLFileName,       self.frdf),
                (self.snapshoturi, po.friendshipTTLFileName,       self.fttl),
                (self.snapshoturi, po.nFriends,              self.nfriends),
                (self.snapshoturi, po.nFriendships,          self.nfriendships),
                (self.snapshoturi, po.friendshipsAnonymized ,self.friendships_anonymized),
                ]
        P.add(triples,context=self.meta_graph)
        P.rdf.triplesScaffolding(self.snapshoturi,
                [po.frienshipParticipantAttribute]*len(self.friendsvars),
                self.friendsvars,context=self.meta_graph)

        self.mrdf=self.snapshotid+"Meta.rdf"
        self.mttl=self.snapshotid+"Meta.ttl"

        self.desc="facebook network with snapshotID: {}\nsnapshotURI: {} \nisEgo: {}. isGroup: {}.".format(
                                                self.snapshotid,self.snapshoturi,self.isego,self.isgroup,)
        self.desc+="\nisFriendship: {}".format(self.isfriendship)
        self.desc+="; nFriends: {}; nFrienships: {}.".format(self.nfriends,self.nfriendships,)
        self.desc+="\nisInteraction: {}".format(self.isinteraction)
        self.desc+="\nisPost: {} (alias hasText: {})".format(self.hastext,self.hastext)
        triples=[
                (self.snapshoturi, po.triplifiedIn,      datetime.datetime.now()),
                (self.snapshoturi, po.triplifiedBy,      "scripts/"),
                (self.snapshoturi, po.donatedBy,         self.snapshotid[:-4]),
                (self.snapshoturi, po.availableAt,       self.online_prefix),
                (self.snapshoturi, po.onlineMetaXMLFile, self.online_prefix+self.mrdf),
                (self.snapshoturi, po.onlineMetaTTLFile, self.online_prefix+self.mttl),
                (self.snapshoturi, po.metaXMLFileName,   self.mrdf),
                (self.snapshoturi, po.metaTTLFileName,   self.mttl),
                (self.snapshoturi, po.acquiredThrough,   "Netvizz"),
                (self.snapshoturi, po.socialProtocolTag, "Facebook"),
                (self.snapshoturi, po.socialProtocol,    P.rdf.ic(po.Platform,"Facebook",self.meta_graph,self.snapshoturi)),
                (self.snapshoturi, NS.rdfs.comment,         self.desc),
                ]
        P.add(triples,self.meta_graph)
Example #29
0
    def makeMetadata(self):
        triples=P.get(self.snapshoturi,None,None,self.social_graph)
        for rawfile in P.get(self.snapshoturi,po.rawFile,None,self.social_graph,strict=True,minimized=True):
            triples+=P.get(rawfile,None,None,self.social_graph)
        self.totalchars=sum(self.nchars_all)
        self.mcharstweets=n.mean(self.nchars_all)
        self.dcharstweets=n.std(self.nchars_all)
        self.totaltokens=sum(self.ntokens_all)
        self.mtokenstweets=n.mean(self.ntokens_all)
        self.dtokenstweets=n.std(self.ntokens_all)
        P.add(triples,context=self.meta_graph)
        triples=[
                (self.snapshoturi, po.nParticipants,           self.nparticipants),
                (self.snapshoturi, po.nTweets,                 self.ntweets),
                (self.snapshoturi, po.nReplies,              self.nreplies),
                (self.snapshoturi, po.nRetweets,               self.nretweets),
                (self.snapshoturi, po.nCharsOverall, self.totalchars),
                (self.snapshoturi, po.mCharsOverall, self.mcharstweets),
                (self.snapshoturi, po.dCharsOverall, self.dcharstweets),
                (self.snapshoturi, po.nTokensOverall, self.totaltokens),
                (self.snapshoturi, po.mTokensOverall, self.mtokenstweets),
                (self.snapshoturi, po.dTokensOverall, self.dtokenstweets),
                ]
        P.add(triples,context=self.meta_graph)
        P.rdf.triplesScaffolding(self.snapshoturi,
                [po.tweetParticipantAttribute]*len(self.participantvars),
                self.participantvars,context=self.meta_graph)
        P.rdf.triplesScaffolding(self.snapshoturi,
                [po.tweetXMLFilename]*len(self.tweet_rdf)+[po.tweetTTLFilename]*len(self.tweet_ttl),
                self.tweet_rdf+self.tweet_ttl,context=self.meta_graph)
        P.rdf.triplesScaffolding(self.snapshoturi,
                [po.onlineTweetXMLFile]*len(self.tweet_rdf)+[po.onlineTweetTTLFile]*len(self.tweet_ttl),
                [self.online_prefix+i for i in self.tweet_rdf+self.tweet_ttl],context=self.meta_graph)

        self.mrdf=self.snapshotid+"Meta.rdf"
        self.mttl=self.snapshotid+"Meta.ttl"
        self.desc="twitter dataset with snapshotID: {}\nsnapshotURI: {} \nisEgo: {}. isGroup: {}.".format(
                                                self.snapshotid,self.snapshoturi,self.isego,self.isgroup,)
        self.desc+="\nisFriendship: {}; ".format(self.isfriendship)
        self.desc+="isInteraction: {}.".format(self.isinteraction)
        self.desc+="\nnParticipants: {}; nInteractions: {} (replies+retweets+user mentions).".format(self.nparticipants,self.nreplies+self.nretweets+self.nuser_mentions,)
        self.desc+="\nisPost: {} (alias hasText: {})".format(self.hastext,self.hastext)
        self.desc+="\nnTweets: {}; ".format(self.ntweets)
        self.desc+="nReplies: {}; nRetweets: {}; nUserMentions: {}.".format(self.nreplies,self.nretweets,self.nuser_mentions)
        self.desc+="\nnTokens: {}; mTokens: {}; dTokens: {};".format(self.totaltokens,self.mtokenstweets,self.dtokenstweets)
        self.desc+="\nnChars: {}; mChars: {}; dChars: {}.".format(self.totalchars,self.mcharstweets,self.dcharstweets)
        self.desc+="\nnHashtags: {}; nMedia: {}; nLinks: {}.".format(self.nhashtags,self.nmedia,self.nlinks)
        triples=[
                (self.snapshoturi, po.triplifiedIn,      datetime.datetime.now()),
                (self.snapshoturi, po.triplifiedBy,      "scripts/"),
                (self.snapshoturi, po.donatedBy,         self.snapshotid[:-4]),
                (self.snapshoturi, po.availableAt,       self.online_prefix),
                (self.snapshoturi, po.onlineMetaXMLFile, self.online_prefix+self.mrdf),
                (self.snapshoturi, po.onlineMetaTTLFile, self.online_prefix+self.mttl),
                (self.snapshoturi, po.metaXMLFileName,   self.mrdf),
                (self.snapshoturi, po.metaTTLFileName,   self.mttl),
                (self.snapshoturi, po.totalXMLFileSizeMB, sum(self.size_rdf)),
                (self.snapshoturi, po.totalTTLFileSizeMB, sum(self.size_ttl)),
                (self.snapshoturi, po.acquiredThrough,   "Twitter APIs"),
                (self.snapshoturi, po.socialProtocolTag, "Twitter"),
                (self.snapshoturi, po.socialProtocol,    P.rdf.ic(po.Platform,"Twitter",self.meta_graph,self.snapshoturi)),
                (self.snapshoturi, po.nTriples,         self.ntriples),
                (self.snapshoturi, NS.rdfs.comment,         self.desc),
                ]
        P.add(triples,self.meta_graph)
Example #30
0
    def writeAllTW(self):
        # write meta and readme with self.desc, finished.
        g=P.context(self.meta_graph)
        ntriples=len(g)
        triples=[
                 (self.snapshoturi,po.nMetaTriples,ntriples)      ,
                 ]
        P.add(triples,context=self.meta_graph)
        g.namespace_manager.bind("po",po)
        g.serialize(self.final_path_+self.snapshotid+"Meta.ttl","turtle"); c("ttl")
        g.serialize(self.final_path_+self.snapshotid+"Meta.rdf","xml")
        c("serialized meta")
        # copia o script que gera este codigo
        if not os.path.isdir(self.final_path_+"scripts"):
            os.mkdir(self.final_path_+"scripts")
        shutil.copy(S.PACKAGEDIR+"/../tests/triplify.py",self.final_path_+"scripts/triplify.py")
        # copia do base data
        tinteraction="""\n\n{} individuals with metadata {}
and {} interactions (retweets: {}, replies: {}, user_mentions: {}) 
constitute the interaction 
network in the RDF/XML file(s):
{}
and the Turtle file(s):
{}
(anonymized: {}).""".format( self.nparticipants,str(self.participantvars),
                    self.nretweets+self.nreplies+self.nuser_mentions,self.nretweets,self.nreplies,self.nuser_mentions,
                    self.tweet_rdf,
                    self.tweet_ttl,
                    self.interactions_anonymized)
        tposts="""\n\nThe dataset consists of {} tweets with metadata {}
{:.3f} characters in average (std: {:.3f}) and total chars in snapshot: {}
{:.3f} tokens in average (std: {:.3f}) and total tokens in snapshot: {}""".format(
                        self.ntweets,str(self.tweetvars),
                        self.mcharstweets,self.dcharstweets,self.totalchars,
                        self.mtokenstweets,self.dtokenstweets,self.totaltokens,
                        )
        self.dates=[i.isoformat() for i in self.dates]
        date1=min(self.dates)
        date2=max(self.dates)
        with open(self.final_path_+"README","w") as f:
            f.write("""::: Open Linked Social Data publication
\nThis repository is a RDF data expression of the twitter
snapshot {snapid} with tweets from {date1} to {date2}
(total of {ntrip} triples).{tinteraction}{tposts}
\nMetadata for discovery in the RDF/XML file:
{mrdf} \nor in the Turtle file:\n{mttl}
\nEgo network: {ise}
Group network: {isg}
Friendship network: {isf}
Interaction network: {isi}
Has text/posts: {ist}
\nAll files should be available at the git repository:
{ava}
\n{desc}

The script that rendered this data publication is on the script/ directory.\n:::""".format(
                snapid=self.snapshotid,date1=date1,date2=date2,ntrip=self.ntriples,
                        tinteraction=tinteraction,
                        tposts=tposts,
                        mrdf=self.mrdf,
                        mttl=self.mttl,
                        ise=self.isego,
                        isg=self.isgroup,
                        isf=self.isfriendship,
                        isi=self.isinteraction,
                        ist=self.hastext,
                        ava=self.online_prefix,
                        desc=self.desc
                        ))