Esempio n. 1
0
 def __processArtists(self):
     """manage artist queue"""
     tempArtist = self.__artistQueue.pop() #get top element
     if not self.expansiveArtistGraph: #check if to follow artist links
         #artist already visited
         if self.bm.artistExists(tempArtist["artist"]): 
             return
     tabs = "\t" * tempArtist["recursion"]
     if tempArtist["recursion"] > self.recursionLimit:
         return
     if self.printDebug:
         print (tabs + "[a] - " + str(tempArtist["artist"]).strip() +
                "   <" + tempArtist["parent"] +">")
     #add artist to band manager
     self.bm.addArtist(Artist(tempArtist["artist"])) 
     #if has no follow link, break function
     if tempArtist["link"].strip() == "": 
         return
     parser = WikiParser(tempArtist["link"])
     associatedActs = parser.getRelatedActs()
     for act in associatedActs : #add all associated acts to end of queue
         if not self.bm.bandExists(act["band"]):
             recursionValue = tempArtist["recursion"] + 1
             if recursionValue > self.recursionLimit:
                 continue
             act["recursion"] = recursionValue
             act["parent"] = "a." + tempArtist["artist"]
             self.__bandQueue.appendleft(act)
Esempio n. 2
0
 def __processBands(self):
     tempBand = self.__bandQueue.pop() #get top element
     #if has no follow link, break function
     if(str(tempBand["band"]) in self.__bandProcessedList):
         return
     else:
         self.__bandProcessedList.append(str(tempBand["band"]))
     if tempBand["link"].strip() == "": 
         return
     parser = WikiParser(tempBand["link"])
     members = parser.getBandMembers()
     #print tempBand.keys()
     formerMembers = parser.getBandMembers("former")
     tabs = "\t" * tempBand["recursion"]
     if tempBand["recursion"] > self.recursionLimit:
         return        
     for member in members:
         #add member artists to artist stack
         member["recursion"] = tempBand["recursion"] + 1
         member["parent"] = "b." + tempBand["band"]
         self.__artistQueue.appendleft(member)
         #link band with artist
         self.bm.link( Artist(member["artist"]), Band(tempBand["band"])) 
     if(self.showFormerMembers):
         for member in formerMembers:
             #add member artists to artist stack
             recursionValue = tempBand["recursion"] + 1
             if recursionValue > self.recursionLimit:
                 continue                
             member["recursion"] = recursionValue
             member["parent"] = "b." + tempBand["band"]
             self.__artistQueue.appendleft(member)
             #link band with artist
             self.bm.link( Artist(member["artist"]), Band(tempBand["band"]),True) 
     #if artist accidentaly ends up in band, move to artist.
     if not members and not formerMembers:
         recursionValue = tempBand["recursion"] 
         if recursionValue < self.recursionLimit:
             artistEntry = dict()
             artistEntry["artist"] = tempBand["band"]
             artistEntry["link"] = tempBand["link"]
             artistEntry["recursion"] = recursionValue
             artistEntry["parent"] = tempBand["parent"]
             self.__artistQueue.appendleft(artistEntry)
             return
     if self.printDebug:
         print (tabs + "[b] - " + str(tempBand["band"]).strip() +
                "   <" + tempBand["parent"] +">")
     self.bm.addBand(tempBand["band"])
     associatedActs = parser.getRelatedActs()
     #add all associated acts to end of queue
     for act in associatedActs : 
         if not self.bm.bandExists(act["band"]):
             recursionValue = tempBand["recursion"] + 1
             if recursionValue > self.recursionLimit:
                 continue
             act["recursion"] = recursionValue
             act["parent"] = "b." + tempBand["band"]
             self.__bandQueue.appendleft(act)
Esempio n. 3
0
 def addtoQueue(self, link, recursionValue=1):
     """add band or artist to parse queue"""
     wparse = WikiParser(link)
     wmembers = wparse.getBandMembers()
     initialEntry = dict()
     if wmembers: #if band, add to band queue
         initialEntry["band"] = wparse.getName()
         initialEntry["link"] = link
         initialEntry["recursion"] = recursionValue
         initialEntry["parent"] = "root"
         self.__bandQueue.appendleft(initialEntry)
     else: #if artist add to artist queue
         initialEntry["artist"] = wparse.getName()
         initialEntry["link"] = link
         initialEntry["recursion"] = recursionValue
         initialEntry["parent"] = "root"
         self.__artistQueue.appendleft(initialEntry)
    def map_reader(fd, size, url, params):
        from wikiparser import WikiParser
        count = 0
        line = fd.readline()
        while "<page>" not in line:
            line = fd.readline()
            count += 1

        fd.seek(0);

        while count > 0:
            fd.readline()
            count -= 1

        reader = WikiParser.regex_reader("\s\s<page>([\s\S]*?)</page>\\n", fd, size, url);
        for row in reader:
                yield row
Esempio n. 5
0
@author: wanjia
"""

from libZotero import zotero
from urllib import urlopen
import datetime
from handler import Handler
import os
from wikiparser import WikiParser
import hashlib

#https://api.zotero.org/users/3949286/items?page=3&key=yrQKEJNQsAKekW9GOgGVzCBG
#

handler = Handler()
wikiparser = WikiParser()
zlib = zotero.Library('user', '3949286', '<null>', 'yrQKEJNQsAKekW9GOgGVzCBG')
print zlib

# retrieve the first five top-level items.
#items = zlib.fetchItemsTop({'limit': 6, 'content': 'json,bib,coins'})
#for item in items:
# print 'Item Type: %s | Key: %s | Title: %s ' % (item.itemType,item.itemKey, item.title)

term = "Blinded_experiment"
url = "https://en.wikipedia.org/wiki/" + term
req = urlopen(url)  #.readlines()
filename = os.getcwd() + "/urlcontent.html"
file = open(filename, "w")
file.write(req.read())
                        statement =  {i:j for i, j in self._pairwise(a['m'])}
                    if statement != None:
                        try:
                            toyield1 = (title, str(statement['value']))
                            value = str(statement['wikibase-entityid']['numeric-id']) if 'wikibase-entityid' in statement else statement['string']
                            toyield2 = (title, str(statement['value']) + "----" + value)
                        except KeyError:
                            toyield1 = toyield2 = None
                        yield toyield1
                        yield toyield2
        except KeyError:
            pass

    def _pairwise(self, iterable):
        from itertools import izip
        a = iter(iterable)
        return izip(a, a)

if __name__ == '__main__':
    from wikiparser import WikiParser
    
    if(len(sys.argv) < 2):
        print "USAGE: python", sys.argv[0], "<ddfs tag:name> [<output file path>]"
        print "You may omit the output file; it's stdout by default.\n"
        sys.exit()

    job = WikiParser().run(input=[sys.argv[1]])
    outf = sys.stdout if len(sys.argv) < 3 else open(sys.argv[2], "w")
    for a, b in result_iterator(job.wait(show=True)):
        outf.write(a.encode('utf-8') + "," + b.encode('utf-8') + "\n")