def __init__(self): """ Grab generator based on SPARQL to work on. """ self.site = pywikibot.Site(u'commons', u'commons') self.repo = self.site.data_repository() self.search = u'incategory:"Media donated by Naturalis Biodiversity Center" -haswbstatement:P180' self.generator = pagegenerators.PreloadingGenerator( pagegenerators.SearchPageGenerator(self.search, namespaces=6, site=self.site)) self.speciescategories = self.speciesCategoriesOnWikidata()
def HasArticle(FullName, KeyWords): total=10 #number of pages we are retrieving from Wikipedia #print (FullName) gen = pagegenerators.SearchPageGenerator(query=FullName, site=site, namespaces=[0], total=total) ArticleList = [] #Redirects = pagegenerators.RedirectFilterPageGenerator(gen, no_redirects=False) #Get list of titles and sring distance from FullName for Article in gen: ArticleTitle = Article.title() Score = CompareFullNames(FullName, ArticleTitle) ArticleList.append ((Article, ArticleTitle, Score)) #print(ArticleTitle) SortedArticleList = sorted (ArticleList, key = lambda ArticleList:-ArticleList[2]) #print(SortedArticleList)ret #print(ArticleList) if SortedArticleList == []: return False BestScore = SortedArticleList [0][2] #print (BestScore) #Magic number if BestScore < 1.5: return False #Set threshold to analyse ArticleText Threshold = BestScore * 0.7 #Another Magic Number for i in range(0,len(ArticleList)): Score = SortedArticleList[i][2] #print(i, Score) if Score<Threshold: break #print(Score) ArticleText=StripAccents(SortedArticleList[i][0].text).lower() #Must contain at least one keyword for key in KeyWords: if StripAccents(key).lower() in ArticleText: return SortedArticleList[i][0] return False
def getGenerator(self, fullrun): """ Get the generator to work on. """ if fullrun: category = pywikibot.Category(self.site, title='Category:Quality_images') gen = pagegenerators.CategorizedPageGenerator(category, namespaces=6) else: query = 'incategory:Quality_images -haswbstatement:P6731=Q63348069' gen = pagegenerators.SearchPageGenerator(query, total=1000, namespaces=6, site=self.site) return pagegenerators.PageClassGenerator(gen)
def findPerson(self, name, yob, yod): """ Find a person. :param name: Name of the person :param yob: Year of birth of the person :param yod: Year of death of the person :return: ItemPage if a person is found """ # Search Wikidata for a suitable candidate, tell the search to only return humans searchstring = u'"%s" haswbstatement:P31=Q5' % (name, ) persongen = pagegenerators.PreloadingItemGenerator( pagegenerators.WikibaseItemGenerator( pagegenerators.SearchPageGenerator(searchstring, step=None, total=50, namespaces=[0], site=self.repo))) foundperson = False for personitem in persongen: #print (u'Possible match %s' % (personitem.title(),)) if personitem.isRedirectPage(): personitem = personitem.getRedirectTarget() # See if the label or one of the aliases of the creatoritem matches the string we have. Only label is case insensitive. #if (personitem.get().get('labels').get('en') and personitem.get().get('labels').get('en').lower() == name.lower()) or (personitem.get().get('aliases').get('en') and name in personitem.get().get('aliases').get('en')): # print (u'Label match for %s' % (personitem.title(),)) # # Check of year of birth and year of death match if u'P569' in personitem.get().get( 'claims') and u'P570' in personitem.get().get('claims'): dob = personitem.get().get('claims').get('P569')[0].getTarget() dod = personitem.get().get('claims').get('P570')[0].getTarget() foundperson = True if dob and dod: #print (u'Date found dob "%s" "%s" "%s"' % (dob, dob.year, yob)) #print (u'Date found dod "%s" "%s" "%s"' % (dod, dod.year, yod)) if int(dob.year) == int(yob) and int(dod.year) == int(yod): #print (u'maaaaaaaaaaaaaaaaaaaaaaaatcchhhhh') return personitem return foundperson
def virinSearch(v): vcount = 0 countErr = 0 loop = True while loop: try: vgen = pagegenerators.SearchPageGenerator(v, namespaces="6") for vPage in vgen: plist.append(vPage.title()) vcount += 1 loop = False except Exception, e: loop = True countErr += 1 print Fore.YELLOW, time.strftime("%d %b @%H:%M:%S"), Fore.WHITE print Fore.RED + "Error [{}]".format(str(e)) print Fore.RED + "Problem running search ['{}' attempted {} times], sleeping for".format( v, countErr), countErr, "seconds", Fore.WHITE time.sleep(countErr) if countErr > 120: loop = False vcount = -1
def match_name_off_labs(name, types, wd, limit): """ Check if there is an item matching the name using API search. Less good than match_name_on_labs() but works from anywhere. @param name: The name to search for @type name: basestring @param types: The Q-values which are allowed for INSTANCE_OF_P @type types: tuple of basestring @param wd: The running WikidataStuff instance @type wd: WikidataStuff (WD) @return: Any matching items @rtype: list (of pywikibot.ItemPage) """ matches = [] objgen = pagegenerators.PreloadingItemGenerator( pagegenerators.WikibaseItemGenerator( pagegenerators.SearchPageGenerator(name, step=None, total=10, namespaces=[0], site=wd.repo))) # check if P31 and then if any of prop[typ] in P31 i = 0 for obj in objgen: obj = wd.bypassRedirect(obj) i += 1 if i > limit: # better to skip than to crash when search times out # remove any matches (since incomplete) and exit loop return [] # avoids keeping a partial list if (name in obj.get().get('labels').values() or name in obj.get().get('aliases').values()): filter_on_types(obj, types, matches) return matches
def find_redirects(): problem_list = pywikibot.Page(pywikibot.Site(), LIST_PAGE_NAME).text.splitlines() for line in problem_list: test = REDIRECT_PATTERN.findall(line) if not test: continue s, d = test[0] print('\n' * 5, '=' * 80) print(s, '->', d) for page in pagegenerators.SearchPageGenerator( 'insource:/\|%s\|/' % s, namespaces=[ 0, # Головний/Статті 1, # Обговорення 2, # Користувач 4, # Вікіпедія 6, # Файл 10, # Шаблон 11, # Обговорення шаблону 12, # Довідка 13, # Обговорення довідки 14, # Категорія 15, # Обговорення категорії ] ): print('\n' * 3, '-' * 80) print(page.title()) for l in page.text.splitlines(): if s in l: print(l) print() def r(match): g = match.groups() return '[[%s|%s]]' % (d, g[1]) if g[1] else '[[%s]]' % d new_text = re.sub(NP_PATTERN % s, r, page.text) new_text = re.sub(r'\[\[(.+?)\|\1\]\]', r'[[\1]]', new_text) update_page(page, new_text, 'напівавтоматичне прибирання зайвих шаблонів про переклад')
def run2(self): """ Starts the robot. """ totalCreators = 0 totalMatched = 0 totalMissed = 0 totalMissedAllInfo = 0 totalMissedBasicInfo = 0 totatlMissedCreators = 0 for painter in self.generator: totalCreators = totalCreators + 1 #print u'start painter loop' #print painter.get('name') (familyname, sep, givenname) = unicode(painter.get('name'), "utf-8").partition(u',') if givenname: name = '%s %s'.strip() % ( givenname.strip(), familyname.strip(), ) else: name = familyname pywikibot.output(name) if name in self.creators.keys(): pywikibot.output(u'Found a creator in the cache.') totalMatched = totalMatched + 1 else: creatergen = pagegenerators.PreloadingEntityGenerator( pagegenerators.WikidataItemGenerator( pagegenerators.SearchPageGenerator(name, step=None, total=50, namespaces=[0], site=self.repo))) newcreator = None for creatoritem in creatergen: #print creatoritem.title() if creatoritem.get().get('labels').get( 'en') == name or creatoritem.get().get( 'labels').get('nl') == name: #print creatoritem.get().get('labels').get('en') #print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get().get('claims') and ( u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue elif (creatoritem.get().get('aliases').get('en') and name in creatoritem.get().get('aliases').get('en')) or ( creatoritem.get().get('aliases').get('nl') and name in creatoritem.get().get('aliases').get('nl')): if u'P106' in creatoritem.get().get('claims') and ( u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue if newcreator: pywikibot.output(u'Found a new creator!!!') pywikibot.output(newcreator.title()) totalMatched = totalMatched + 1 self.creators[name] = newcreator.title() else: pywikibot.output(u'Did not find a creator.') totalMissed = totalMissed + 1 infostring = u'' foundDob = False foundPob = False foundDod = False foundPod = False foundGender = False foundNationality = False if painter.get('birth_on') and painter.get( 'birth.date.end'): if painter.get('birth_on') == painter.get( 'birth.date.end'): infostring = infostring + u'- dob: %s ' % (unicode( painter.get('birth_on'), "utf-8"), ) foundDob = True else: infostring = infostring + u'- dob: %s/%s ' % ( unicode(painter.get('birth_on', "utf-8")), unicode(painter.get('birth.date.end'), "utf-8")) if painter.get('born_at'): infostring = infostring + u'- birth location: %s ' % ( unicode(painter.get('born_at'), "utf-8"), ) foundPob = True if painter.get('died_on') and painter.get( 'death.date.end'): if painter.get('died_on') == painter.get( 'death.date.end'): infostring = infostring + u'- dod: %s ' % (unicode( painter.get('died_on'), "utf-8"), ) foundDod = True else: infostring = infostring + u'- dod: %s/%s ' % ( unicode(painter.get('died_on'), "utf-8"), unicode(painter.get('death.date.end'), "utf-8")) if painter.get('died_at'): infostring = infostring + u'- death location: %s ' % ( unicode(painter.get('died_at'), "utf-8"), ) foundPod = True if painter.get('gender'): foundGender = True if painter.get('gender') == u'man': infostring = infostring + u'- gender: male ' elif painter.get('gender') == u'vrouw': infostring = infostring + u'- gender: female ' if painter.get('nationality'): infostring = infostring + u'- nationality: %s ' % ( unicode(painter.get('nationality'), "utf-8"), ) foundNationality = True if painter.get('source') and painter.get('source.id'): if painter.get('source') == u'RKD': infostring = infostring + u'- RKDartists: %s ' % ( unicode(painter.get('source.id'), "utf-8"), ) if foundDob and foundPob and foundDod and foundPod and foundGender and foundNationality: totalMissedAllInfo = totalMissedAllInfo + 1 self.creators[name] = self.makePainterItem( name, painter) elif foundDob and foundDod and foundGender: totalMissedBasicInfo = totalMissedBasicInfo + 1 self.creators[name] = self.makePainterItem( name, painter) elif name in self.missingCreators: totatlMissedCreators = totatlMissedCreators + 1 self.creators[name] = self.makePainterItem( name, painter) self.creators[name] = infostring pywikibot.output( u'Current score after %s creators: %s hits - %s missed (%s complete, %s basic, %s needed creators)' % (totalCreators, totalMatched, totalMissed, totalMissedAllInfo, totalMissedBasicInfo, totatlMissedCreators)) self.flushCreators()
def run(self): """ Starts the robot. """ teylers = pywikibot.ItemPage(self.repo, u'Q474563') for painting in self.generator: # Buh, for this one I know for sure it's in there paintingId = painting['object']['proxies'][0]['dcIdentifier'][ 'def'][0] uri = painting['object']['aggregations'][0]['webResources'][0][ 'about'] europeanaUrl = u'http://europeana.eu/portal/record/%s.html' % ( painting['object']['about'], ) print paintingId print uri dcCreator = painting['object']['proxies'][0]['dcCreator']['def'][ 0].strip() #print dcCreator dcCreatorName = u'' regex = u'^([^,]+), ([^\(]+) \(.+\)$' match = re.match(regex, dcCreator) if match: dcCreatorName = '%s %s' % ( match.group(2).strip(), match.group(1).strip(), ) else: dcCreatorName = dcCreator #print painting['object']['language'] #print painting['object']['title'] #print painting['object']['about'] #print painting['object']['proxies'][0]['dcCreator']['def'][0] #print painting['object']['proxies'][0]['dcFormat']['def'][0] #print painting['object']['proxies'][0]['dcIdentifier']['def'][0] #print painting['object']['proxies'][0]['dcIdentifier']['def'][1] paintingItem = None newclaims = [] if paintingId in self.paintingIds: paintingItemTitle = u'Q%s' % ( self.paintingIds.get(paintingId), ) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: #print 'bla' #monumentItem = pywikibot.ItemPage(self.repo, title=u'') #print dcCreatorName data = { 'labels': {}, 'descriptions': {}, } title = painting['object']['title'][0].strip() data['labels']['nl'] = {'language': u'nl', 'value': title} if dcCreatorName: data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s' % (dcCreatorName, ) } data['descriptions']['nl'] = { 'language': u'nl', 'value': u'schilderij van %s' % (dcCreatorName, ) } print data identification = {} summary = u'Creating new item with data from %s ' % ( europeanaUrl, ) pywikibot.output(summary) result = self.repo.editEntity(identification, data, summary=summary) #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) newclaim = pywikibot.Claim( self.repo, u'P%s' % (self.paintingIdProperty, )) newclaim.setTarget(paintingId) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(uri) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) newqualifier = pywikibot.Claim( self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(teylers) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(teylers) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) collectionclaim.addSource(newreference) if paintingItem: data = paintingItem.get() claims = data.get('claims') #print claims # located in if u'P276' not in claims: newclaim = pywikibot.Claim(self.repo, u'P276') newclaim.setTarget(teylers) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # instance of always painting while working on the painting collection if u'P31' not in claims: dcformatItem = pywikibot.ItemPage(self.repo, title='Q3305213') newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # creator if u'P170' not in claims and dcCreatorName: creategen = pagegenerators.PreloadingEntityGenerator( pagegenerators.WikidataItemGenerator( pagegenerators.SearchPageGenerator( dcCreatorName, step=None, total=10, namespaces=[0], site=self.repo))) newcreator = None for creatoritem in creategen: print creatoritem.title() if creatoritem.get().get('labels').get( 'en') == dcCreatorName or creatoritem.get( ).get('labels').get('nl') == dcCreatorName: print creatoritem.get().get('labels').get('en') print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get().get( 'claims') and u'P27' in creatoritem.get( ).get('claims'): newcreator = creatoritem continue elif ( creatoritem.get().get('aliases').get('en') and dcCreatorName in creatoritem.get().get('aliases').get('en') ) or (creatoritem.get().get('aliases').get('nl') and dcCreatorName in creatoritem.get().get('aliases').get('nl')): if u'P106' in creatoritem.get().get( 'claims') and u'P27' in creatoritem.get( ).get('claims'): newcreator = creatoritem continue if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) #creatoritem = pywikibot.ItemPage(self.repo, creatorpage) print creatoritem.title() print creatoritem.get() else: pywikibot.output('No item found for %s' % (dcCreatorName, )) # date of creation if u'P571' not in claims: if painting['object']['proxies'][0].get('dcDate'): dccreated = painting['object']['proxies'][0]['dcDate'][ 'def'][0].strip() if len(dccreated) == 4: # It's a year newdate = pywikibot.WbTime(year=dccreated) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output( 'Adding date of creation claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output( 'Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # Europeana ID if u'P727' not in claims: europeanaID = painting['object']['about'].lstrip('/') newclaim = pywikibot.Claim(self.repo, u'P727') newclaim.setTarget(europeanaID) pywikibot.output('Adding Europeana ID claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference)
def run(self): """ Starts the robot. """ amsterdammuseum = pywikibot.ItemPage(self.repo, u'Q1820897') for painting in self.generator: # Make sure it's the Frans Hals Museum if painting['object']['proxies'][0]['about'].startswith( u'/proxy/provider/2021608/dispatcher_aspx_action_search_database_ChoiceCollect_search_priref_' ): paintingId = painting['object']['proxies'][0]['dcIdentifier'][ 'def'][0].strip() piref = painting['object']['proxies'][0]['about'].replace( u'/proxy/provider/2021608/dispatcher_aspx_action_search_database_ChoiceCollect_search_priref_', u'') uri = u'http://am.adlibhosting.com/dispatcher.aspx?action=search&database=ChoiceCollect&search=priref=%s' % ( piref, ) europeanaUrl = u'http://europeana.eu/portal/record/%s.html' % ( painting['object']['about'], ) print paintingId print uri if painting['object']['proxies'][0].get('dcCreator'): dcCreator = painting['object']['proxies'][0]['dcCreator'][ 'def'][0].strip().replace(u' (schilder)', u'') if u',' in dcCreator: (surname, givenname) = dcCreator.split(u',', 1) dcCreatorName = u'%s %s' % ( givenname.strip(), surname.strip(), ) else: dcCreatorName = dcCreator else: dcCreator = u'anoniem' #print dcCreator paintingItem = None newclaims = [] if paintingId in self.paintingIds: paintingItemTitle = u'Q%s' % ( self.paintingIds.get(paintingId), ) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: #print 'bla' #monumentItem = pywikibot.ItemPage(self.repo, title=u'') #print dcCreatorName data = { 'labels': {}, 'descriptions': {}, } data['labels'][u'nl'] = { 'language': u'nl', 'value': painting['object']['proxies'][0]['dcTitle']['def'][0] } print data identification = {} summary = u'Creating new item with data from %s ' % ( europeanaUrl, ) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) result = self.repo.editEntity(identification, data, summary=summary) #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) paintingItem.get() if dcCreatorName: descriptions = {} descriptions['en'] = u'painting by %s' % ( dcCreatorName, ) descriptions['nl'] = u'schilderij van %s' % ( dcCreatorName, ) summary = u'Adding description' try: paintingItem.editDescriptions(descriptions, summary=summary) except pywikibot.data.api.APIError: pywikibot.output( 'Could not add description, combination already in use' ) newclaim = pywikibot.Claim( self.repo, u'P%s' % (self.paintingIdProperty, )) newclaim.setTarget(paintingId) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(uri) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) newqualifier = pywikibot.Claim( self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(amsterdammuseum) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(amsterdammuseum) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) collectionclaim.addSource(newreference) if paintingItem and paintingItem.exists(): data = paintingItem.get() claims = data.get('claims') #print claims # located in if u'P276' not in claims: newclaim = pywikibot.Claim(self.repo, u'P276') newclaim.setTarget(amsterdammuseum) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # instance of always painting while working on the painting collection if u'P31' not in claims: dcformatItem = pywikibot.ItemPage(self.repo, title='Q3305213') newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # creator if u'P170' not in claims and dcCreatorName: creategen = pagegenerators.PreloadingItemGenerator( pagegenerators.WikidataItemGenerator( pagegenerators.SearchPageGenerator( dcCreatorName, step=None, total=50, namespaces=[0], site=self.repo))) newcreator = None for creatoritem in creategen: print creatoritem.title() #print creatoritem.get().get('labels') #print creatoritem.get().get('aliases') if creatoritem.get().get('labels').get( 'en') == dcCreatorName or creatoritem.get( ).get('labels').get('nl') == dcCreatorName: print creatoritem.get().get('labels').get('en') print creatoritem.get().get('labels').get('nl') # Check occupation and <s>country of citizinship</s> if u'P106' in creatoritem.get().get('claims'): newcreator = creatoritem continue elif (creatoritem.get().get('aliases').get('en') and dcCreatorName in creatoritem.get().get('aliases').get('en') ) or (creatoritem.get().get('aliases').get( 'nl') and dcCreatorName in creatoritem. get().get('aliases').get('nl')): if u'P106' in creatoritem.get().get('claims'): newcreator = creatoritem continue if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output( 'Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) #creatoritem = pywikibot.ItemPage(self.repo, creatorpage) print creatoritem.title() print creatoritem.get() else: pywikibot.output('No item found for %s' % (dcCreatorName, )) # date of creation if u'P571' not in claims: if painting['object']['proxies'][0].get('dcDate'): if len(painting['object']['proxies'][0]['dcDate'] ['def']) == 2: dcDate0 = painting['object']['proxies'][0][ 'dcDate']['def'][0].strip() dcDate1 = painting['object']['proxies'][0][ 'dcDate']['def'][1].strip() if dcDate0 == dcDate1 and len( dcDate0) == 4: # It's a year newdate = pywikibot.WbTime(year=dcDate0) newclaim = pywikibot.Claim( self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output( 'Adding date of creation claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output( 'Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) ''' # material used if u'P186' not in claims: dcFormats = { u'http://vocab.getty.edu/aat/300014078' : u'Q4259259', # Canvas u'http://vocab.getty.edu/aat/300015050' : u'Q296955', # Oil paint } if painting['object']['proxies'][0].get('dcFormat') and painting['object']['proxies'][0]['dcFormat'].get('def'): for dcFormat in painting['object']['proxies'][0]['dcFormat']['def']: if dcFormat in dcFormats: dcformatItem = pywikibot.ItemPage(self.repo, title=dcFormats[dcFormat]) newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(dcformatItem) pywikibot.output('Adding material used claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) ''' # Handle if u'P1184' not in claims: handle = u'11259/collection.%s' % (piref, ) newclaim = pywikibot.Claim(self.repo, u'P1184') newclaim.setTarget(handle) pywikibot.output('Adding handle claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # Europeana ID if u'P727' not in claims: europeanaID = painting['object']['about'].lstrip('/') newclaim = pywikibot.Claim(self.repo, u'P727') newclaim.setTarget(europeanaID) pywikibot.output('Adding Europeana ID claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference)
def run(self): """ Starts the robot. """ fhmuseum = pywikibot.ItemPage(self.repo, u'Q574961') for painting in self.generator: # Make sure it's the Frans Hals Museum if painting['object']['proxies'][0]['about'].startswith(u'/proxy/provider/92034/GVNRC_FHM01'): paintingId = painting['object']['proxies'][0]['dcIdentifier']['def'][0].strip() uri = painting['object']['proxies'][0]['dcIdentifier']['def'][1].strip() europeanaUrl = u'http://europeana.eu/portal/record/%s.html' % (painting['object']['about'],) print paintingId print uri if painting['object']['proxies'][0].get('dcCreator'): dcCreator = painting['object']['proxies'][0]['dcCreator']['def'][0].strip() else: dcCreator = u'anoniem' #print dcCreator paintingItem = None newclaims = [] if paintingId in self.paintingIds: paintingItemTitle = u'Q%s' % (self.paintingIds.get(paintingId),) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: #print 'bla' #monumentItem = pywikibot.ItemPage(self.repo, title=u'') #print dcCreatorName data = {'labels': {}, 'descriptions': {}, } data['labels'][u'nl'] = {'language': u'nl', 'value': painting['object']['title'][0]} if dcCreator: data['descriptions']['en'] = {'language': u'en', 'value' : u'painting by %s' % (dcCreator,)} data['descriptions']['nl'] = {'language': u'nl', 'value' : u'schilderij van %s' % (dcCreator,)} print data identification = {} summary = u'Creating new item with data from %s ' % (europeanaUrl,) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) result = self.repo.editEntity(identification, data, summary=summary) #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) newclaim = pywikibot.Claim(self.repo, u'P%s' % (self.paintingIdProperty,)) newclaim.setTarget(paintingId) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(uri) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) newqualifier = pywikibot.Claim(self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(fhmuseum) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(fhmuseum) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) collectionclaim.addSource(newreference) if paintingItem and paintingItem.exists(): data = paintingItem.get() claims = data.get('claims') #print claims # located in if u'P276' not in claims: newclaim = pywikibot.Claim(self.repo, u'P276') newclaim.setTarget(fhmuseum) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # instance of always painting while working on the painting collection if u'P31' not in claims: dcformatItem = pywikibot.ItemPage(self.repo, title='Q3305213') newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # creator if u'P170' not in claims and dcCreator: creategen = pagegenerators.PreloadingEntityGenerator(pagegenerators.WikidataItemGenerator(pagegenerators.SearchPageGenerator(dcCreator, step=None, total=10, namespaces=[0], site=self.repo))) newcreator = None for creatoritem in creategen: print creatoritem.title() if creatoritem.get().get('labels').get('en') == dcCreator or creatoritem.get().get('labels').get('nl') == dcCreator: print creatoritem.get().get('labels').get('en') print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get().get('claims') and (u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue elif (creatoritem.get().get('aliases').get('en') and dcCreator in creatoritem.get().get('aliases').get('en')) or (creatoritem.get().get('aliases').get('nl') and dcCreator in creatoritem.get().get('aliases').get('nl')): if u'P106' in creatoritem.get().get('claims') and (u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) #creatoritem = pywikibot.ItemPage(self.repo, creatorpage) print creatoritem.title() print creatoritem.get() else: pywikibot.output('No item found for %s' % (dcCreator, )) # date of creation if u'P571' not in claims: if painting['object']['proxies'][0].get('dctermsCreated'): dccreated = painting['object']['proxies'][0]['dctermsCreated']['def'][0].strip() if len(dccreated)==4: # It's a year newdate = pywikibot.WbTime(year=dccreated) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output('Adding date of creation claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) ''' # material used if u'P186' not in claims: dcFormats = { u'http://vocab.getty.edu/aat/300014078' : u'Q4259259', # Canvas u'http://vocab.getty.edu/aat/300015050' : u'Q296955', # Oil paint } if painting['object']['proxies'][0].get('dcFormat') and painting['object']['proxies'][0]['dcFormat'].get('def'): for dcFormat in painting['object']['proxies'][0]['dcFormat']['def']: if dcFormat in dcFormats: dcformatItem = pywikibot.ItemPage(self.repo, title=dcFormats[dcFormat]) newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(dcformatItem) pywikibot.output('Adding material used claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # Handle if u'P1184' not in claims: handleUrl = painting['object']['proxies'][0]['dcIdentifier']['def'][0] handle = handleUrl.replace(u'http://hdl.handle.net/', u'') newclaim = pywikibot.Claim(self.repo, u'P1184') newclaim.setTarget(handle) pywikibot.output('Adding handle claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) ''' # Europeana ID if u'P727' not in claims: europeanaID = painting['object']['about'].lstrip('/') newclaim = pywikibot.Claim(self.repo, u'P727') newclaim.setTarget(europeanaID) pywikibot.output('Adding Europeana ID claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference)
def run(self): """ Starts the robot. """ for painting in self.generator: # Buh, for this one I know for sure it's in there #print painting[u'id'] print painting[u'url'] paintingItem = None newclaims = [] if painting[u'id'] in self.paintingIds: paintingItemTitle = u'Q%s' % (self.paintingIds.get( painting[u'id']), ) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: #Break for now print u'Let us create stuff' #continue #print u'WTFTFTFTFT???' #print 'bla' data = { 'labels': {}, 'descriptions': {}, } data['labels']['en'] = { 'language': 'en', 'value': painting[u'title'] } data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s' % (painting[u'creator'], ) } data['descriptions']['nl'] = { 'language': u'nl', 'value': u'schilderij van %s' % (painting[u'creator'], ) } print data identification = {} summary = u'Creating new item with data from %s ' % ( painting[u'url'], ) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) try: result = self.repo.editEntity(identification, data, summary=summary) except pywikibot.exceptions.APIError: # We got ourselves a duplicate label and description, let's correct that pywikibot.output( u'Oops, already had that one. Trying again') data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s (%s, %s)' % (painting[u'creator'], painting[u'collectionshort'], painting[u'id']) } result = self.repo.editEntity(identification, data, summary=summary) pass #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) # Add to self.paintingIds so that we don't create dupes self.paintingIds[painting[u'id']] = paintingItemTitle.replace( u'Q', u'') newclaim = pywikibot.Claim( self.repo, u'P%s' % (self.paintingIdProperty, )) newclaim.setTarget(painting[u'id']) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim( self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(self.collectionitem) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(self.collectionitem) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) # Add the date they got it as a qualifier to the collection if painting.get(u'acquisitiondate'): colqualifier = pywikibot.Claim(self.repo, u'P580') acdate = None if len(painting[u'acquisitiondate']) == 4 and painting[ u'acquisitiondate'].isnumeric(): # It's a year acdate = pywikibot.WbTime( year=painting[u'acquisitiondate']) elif len(painting[u'acquisitiondate'].split(u'-', 2)) == 3: (acday, acmonth, acyear) = painting[u'acquisitiondate'].split(u'-', 2) acdate = pywikibot.WbTime(year=int(acyear), month=int(acmonth), day=int(acday)) if acdate: colqualifier.setTarget(acdate) pywikibot.output( 'Adding new acquisition date qualifier claim to collection on %s' % paintingItem) collectionclaim.addQualifier(colqualifier) self.addReference(paintingItem, collectionclaim, painting[u'url']) if paintingItem and paintingItem.exists(): painting['wikidata'] = paintingItem.title() data = paintingItem.get() claims = data.get('claims') #print claims # located in if u'P276' not in claims and painting.get(u'location'): newclaim = pywikibot.Claim(self.repo, u'P276') location = pywikibot.ItemPage(self.repo, painting.get(u'location')) newclaim.setTarget(location) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # instance of always painting while working on the painting collection if u'P31' not in claims: dcformatItem = pywikibot.ItemPage(self.repo, title='Q3305213') newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # creator if u'P170' not in claims and painting.get(u'creator'): #print painting[u'creator'] creategen = pagegenerators.PreloadingEntityGenerator( pagegenerators.WikidataItemGenerator( pagegenerators.SearchPageGenerator( painting[u'creator'], step=None, total=10, namespaces=[0], site=self.repo))) newcreator = None try: for creatoritem in creategen: print creatoritem.title() if creatoritem.get().get('labels').get( 'en' ) == painting[u'creator'] or creatoritem.get( ).get('labels').get('nl') == painting[u'creator']: #print creatoritem.get().get('labels').get('en') #print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get().get('claims'): existing_claims = creatoritem.get().get( 'claims').get('P106') for existing_claim in existing_claims: if existing_claim.target_equals( u'Q1028181'): newcreator = creatoritem continue elif ( creatoritem.get().get('aliases').get('en') and painting[u'creator'] in creatoritem.get().get('aliases').get('en') ) or (creatoritem.get().get('aliases').get('nl') and painting[u'creator'] in creatoritem.get().get('aliases').get('nl')): if u'P106' in creatoritem.get().get('claims'): existing_claims = creatoritem.get().get( 'claims').get('P106') for existing_claim in existing_claims: if existing_claim.target_equals( u'Q1028181'): newcreator = creatoritem continue except pywikibot.exceptions.APIError: print u'Search API is acting up, just let it be' pass if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) #print creatoritem.title() #print creatoritem.get() else: pywikibot.output('No item found for %s' % (painting[u'creator'], )) else: print u'Already has a creator' # date of creation if u'P571' not in claims and painting.get(u'date'): if len( painting[u'date'] ) == 4 and painting[u'date'].isnumeric(): # It's a year newdate = pywikibot.WbTime(year=painting[u'date']) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output( 'Adding date of creation claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) # material used if u'P186' not in claims and painting.get(u'medium'): if painting.get(u'medium') == u'Oil on canvas': olieverf = pywikibot.ItemPage(self.repo, u'Q296955') doek = pywikibot.ItemPage(self.repo, u'Q4259259') oppervlak = pywikibot.ItemPage(self.repo, u'Q861259') newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(olieverf) pywikibot.output('Adding new oil paint claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(doek) pywikibot.output('Adding new canvas claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim( self.repo, u'P518') #Applies to part newqualifier.setTarget(oppervlak) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) # Described at url if u'P973' not in claims: newclaim = pywikibot.Claim(self.repo, u'P973') newclaim.setTarget(painting[u'url']) pywikibot.output('Adding described at claim to %s' % paintingItem) paintingItem.addClaim(newclaim)
def run_bot(): bot_settings = load_bot_settings() bot_languages = bot_settings['retracted_template_names'] template_field_names = bot_settings['template_field_names'] retracted_identifiers = load_retracted_identifiers() template_template = '{{{{{template_name} |{id_field}={id}}}}}' for language, lang_items in bot_languages.items(): site = pywikibot.Site(language, 'wikipedia') bot_can_run = check_bot_killswitches(site) if not bot_can_run: continue for identifier in retracted_identifiers: original_id = identifier[3].decode("utf-8") retraction_id = identifier[4].decode("utf-8") retracted_template = template_template.format( template_name=lang_items, id_field=template_field_names[identifier[1].decode("utf-8")], id=retraction_id) page_list = pagegenerators.SearchPageGenerator('"' + original_id + '"', namespaces=[0], site=site) for wp_page in page_list: page_text = wp_page.text page_cites = find_page_cites(page_text, original_id) num_cites_found = len(page_cites) if num_cites_found == 0: logger.error("Couldn't find the identifier {id} inside " "<ref> tags on page {page}.".format( id=original_id, page=wp_page)) continue unique_page_cites = {tag.string for tag in page_cites} for page_cite in unique_page_cites: # Loop through each unique citation, updating the page text # for each - in case this identifier is cited multiple # times in a lazy and/or inconsistent way. cite_str = page_cite # Is this cite already flagged with a retraction template? if "{{retracted" in cite_str.lower(): continue ref_to_insert = cite_str + " " + retracted_template page_text = page_text.replace(cite_str, ref_to_insert) # Only bother trying to make an edit if we changed anything if page_text != wp_page.text: wp_page.text = page_text edit_summary = "Flagging a cited source as retracted" #wp_page.save(edit_summary, minor=False) logger.info("Successfully edited {page_name} with " "retracted source(s).".format( page_name=wp_page.title())) log_retraction_edit(datetime.datetime.now(), language + ".wikipedia.org", wp_page, original_id, retraction_id)
def run(self): """ Starts the robot. """ for painting in self.generator: # Buh, for this one I know for sure it's in there #print painting[u'id'] print painting[u'url'] paintingItem = None newclaims = [] if painting[u'id'] in self.paintingIds: paintingItemTitle = u'Q%s' % (self.paintingIds.get( painting[u'id']), ) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: #Break for now print u'Let us create stuff' #continue #print u'WTFTFTFTFT???' #print 'bla' data = { 'labels': {}, 'descriptions': {}, } data['labels']['en'] = { 'language': 'en', 'value': painting[u'title'] } data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s' % (painting[u'creator'], ) } data['descriptions']['nl'] = { 'language': u'nl', 'value': u'schilderij van %s' % (painting[u'creator'], ) } print data identification = {} summary = u'Creating new item with data from %s ' % ( painting[u'url'], ) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) try: result = self.repo.editEntity(identification, data, summary=summary) except pywikibot.exceptions.APIError: # We got ourselves a duplicate label and description, let's correct that pywikibot.output( u'Oops, already had that one. Trying again') data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s (%s, %s)' % (painting[u'creator'], painting[u'collectionshort'], painting[u'id']) } result = self.repo.editEntity(identification, data, summary=summary) pass #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) # Add to self.paintingIds so that we don't create dupes self.paintingIds[painting[u'id']] = paintingItemTitle.replace( u'Q', u'') newclaim = pywikibot.Claim( self.repo, u'P%s' % (self.paintingIdProperty, )) newclaim.setTarget(painting[u'id']) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim( self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(self.collectionitem) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(self.collectionitem) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) # Add the date they got it as a qualifier to the collection if painting.get(u'acquisitiondate'): colqualifier = pywikibot.Claim(self.repo, u'P580') acdate = None if len(painting[u'acquisitiondate']) == 4 and painting[ u'acquisitiondate'].isnumeric(): # It's a year acdate = pywikibot.WbTime( year=painting[u'acquisitiondate']) elif len(painting[u'acquisitiondate'].split(u'-', 2)) == 3: (acday, acmonth, acyear) = painting[u'acquisitiondate'].split(u'-', 2) acdate = pywikibot.WbTime(year=int(acyear), month=int(acmonth), day=int(acday)) if acdate: colqualifier.setTarget(acdate) pywikibot.output( 'Adding new acquisition date qualifier claim to collection on %s' % paintingItem) collectionclaim.addQualifier(colqualifier) self.addReference(paintingItem, collectionclaim, painting[u'url']) if paintingItem and paintingItem.exists(): painting['wikidata'] = paintingItem.title() data = paintingItem.get() claims = data.get('claims') #print claims if painting.get(u'creator'): self.fixDescription(paintingItem, painting.get(u'creator')) # located in if u'P276' not in claims and painting.get(u'location'): newclaim = pywikibot.Claim(self.repo, u'P276') location = pywikibot.ItemPage(self.repo, painting.get(u'location')) newclaim.setTarget(location) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # instance of always painting while working on the painting collection if u'P31' not in claims: dcformatItem = pywikibot.ItemPage(self.repo, title='Q3305213') newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # creator if u'P170' not in claims and painting.get(u'creator'): #print painting[u'creator'] creategen = pagegenerators.PreloadingEntityGenerator( pagegenerators.WikidataItemGenerator( pagegenerators.SearchPageGenerator( painting[u'creator'], step=None, total=10, namespaces=[0], site=self.repo))) newcreator = None try: for creatoritem in creategen: print creatoritem.title() if creatoritem.get().get('labels').get( 'en' ) == painting[u'creator'] or creatoritem.get( ).get('labels').get('nl') == painting[u'creator']: #print creatoritem.get().get('labels').get('en') #print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get().get('claims'): existing_claims = creatoritem.get().get( 'claims').get('P106') for existing_claim in existing_claims: if existing_claim.target_equals( u'Q1028181'): newcreator = creatoritem continue elif ( creatoritem.get().get('aliases').get('en') and painting[u'creator'] in creatoritem.get().get('aliases').get('en') ) or (creatoritem.get().get('aliases').get('nl') and painting[u'creator'] in creatoritem.get().get('aliases').get('nl')): if u'P106' in creatoritem.get().get('claims'): existing_claims = creatoritem.get().get( 'claims').get('P106') for existing_claim in existing_claims: if existing_claim.target_equals( u'Q1028181'): newcreator = creatoritem continue except pywikibot.exceptions.APIError: print u'Search API is acting up, just let it be' pass if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) #print creatoritem.title() #print creatoritem.get() else: pywikibot.output('No item found for %s' % (painting[u'creator'], )) else: print u'Already has a creator' # date of creation if u'P571' not in claims and painting.get(u'date'): if len( painting[u'date'] ) == 4 and painting[u'date'].isnumeric(): # It's a year newdate = pywikibot.WbTime(year=painting[u'date']) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output( 'Adding date of creation claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) # material used if u'P186' not in claims and painting.get(u'medium'): if painting.get(u'medium') == u'Oil on canvas': olieverf = pywikibot.ItemPage(self.repo, u'Q296955') doek = pywikibot.ItemPage(self.repo, u'Q4259259') oppervlak = pywikibot.ItemPage(self.repo, u'Q861259') newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(olieverf) pywikibot.output('Adding new oil paint claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(doek) pywikibot.output('Adding new canvas claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim( self.repo, u'P518') #Applies to part newqualifier.setTarget(oppervlak) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) # Described at url if u'P973' not in claims: newclaim = pywikibot.Claim(self.repo, u'P973') newclaim.setTarget(painting[u'url']) pywikibot.output('Adding described at claim to %s' % paintingItem) paintingItem.addClaim(newclaim) # self.addReference(paintingItem, newclaim, uri) # Upload an image baby! BUT NOT NOW imagetitle = u'' if painting.get(u'imageurl') and u'P18' not in claims: commonssite = pywikibot.Site("commons", "commons") photo = Photo(painting[u'imageurl'], painting) titlefmt = u'%(creator)s - %(title)s - %(id)s - Minneapolis Institute of Arts.%(_ext)s' pagefmt = u'User:Multichill/Minneapolis Institute of Arts' duplicates = photo.findDuplicateImages() if duplicates: pywikibot.output(u"Skipping duplicate of %r" % duplicates) imagetitle = duplicates[0] #return duplicates[0] else: imagetitle = self.cleanUpTitle( photo.getTitle(titlefmt)) pywikibot.output(imagetitle) description = photo.getDescription(pagefmt) pywikibot.output(description) handle, tempname = tempfile.mkstemp() with os.fdopen(handle, "wb") as t: t.write(photo.downloadPhoto().getvalue()) t.close() #tempname bot = upload.UploadRobot(url=tempname, description=description, useFilename=imagetitle, keepFilename=True, verifyDescription=False, uploadByUrl=False, targetSite=commonssite) #bot._contents = photo.downloadPhoto().getvalue() #bot._retrieved = True bot.run() if u'P18' not in claims and imagetitle: newclaim = pywikibot.Claim(self.repo, u'P18') imagelink = pywikibot.Link(imagetitle, source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage(image.getRedirectTarget()) newclaim.setTarget(image) pywikibot.output('Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) paintingItem.addClaim(newclaim)
def run(self): """ Starts the robot. """ guggenheim = pywikibot.ItemPage(self.repo, u'Q201469') for painting in self.generator: # Buh, for this one I know for sure it's in there print painting[u'id'] print painting[u'url'] paintingItem = None newclaims = [] if painting[u'id'] in self.paintingIds: paintingItemTitle = u'Q%s' % (self.paintingIds.get(painting[u'id']),) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: #Break for now print u'Let us create stuff' #continue #print u'WTFTFTFTFT???' #print 'bla' data = {'labels': {}, 'descriptions': {}, } data['labels']['en'] = {'language': 'en', 'value': painting[u'title']} data['descriptions']['en'] = {'language': u'en', 'value' : u'painting by %s' % (painting[u'creator'],)} data['descriptions']['nl'] = {'language': u'nl', 'value' : u'schilderij van %s' % (painting[u'creator'],)} print data identification = {} summary = u'Creating new item with data from %s ' % (painting[u'url'],) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) try: result = self.repo.editEntity(identification, data, summary=summary) except pywikibot.exceptions.APIError: # We got ourselves a duplicate label and description, let's correct that pywikibot.output(u'Oops, already had that one. Trying again') data['descriptions']['en'] = {'language': u'en', 'value' : u'painting by %s (Guggenheim, %s)' % (painting[u'creator'], painting[u'id'])} result = self.repo.editEntity(identification, data, summary=summary) pass #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) newclaim = pywikibot.Claim(self.repo, u'P%s' % (self.paintingIdProperty,)) newclaim.setTarget(painting[u'id']) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim(self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(guggenheim) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(guggenheim) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) self.addReference(paintingItem, collectionclaim, painting[u'url']) if paintingItem and paintingItem.exists(): painting['wikidata'] = paintingItem.title() data = paintingItem.get() claims = data.get('claims') #print claims # located in if u'P276' not in claims: newclaim = pywikibot.Claim(self.repo, u'P276') newclaim.setTarget(guggenheim) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # instance of always painting while working on the painting collection if u'P31' not in claims: dcformatItem = pywikibot.ItemPage(self.repo, title='Q3305213') newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # creator if u'P170' not in claims and painting.get(u'creator'): #print painting[u'creator'] creategen = pagegenerators.PreloadingEntityGenerator(pagegenerators.WikidataItemGenerator(pagegenerators.SearchPageGenerator(painting[u'creator'], step=None, total=10, namespaces=[0], site=self.repo))) newcreator = None for creatoritem in creategen: print creatoritem.title() if creatoritem.get().get('labels').get('en') == painting[u'creator'] or creatoritem.get().get('labels').get('nl') == painting[u'creator']: #print creatoritem.get().get('labels').get('en') #print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get().get('claims') and (u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue elif (creatoritem.get().get('aliases').get('en') and painting[u'creator'] in creatoritem.get().get('aliases').get('en')) or (creatoritem.get().get('aliases').get('nl') and painting[u'creator'] in creatoritem.get().get('aliases').get('nl')): if u'P106' in creatoritem.get().get('claims') and (u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) #print creatoritem.title() #print creatoritem.get() else: pywikibot.output('No item found for %s' % (painting[u'creator'], )) else: print u'Already has a creator' # date of creation if u'P571' not in claims and painting.get(u'year'): if len(painting[u'year'])==4: # It's a year newdate = pywikibot.WbTime(year=painting[u'year']) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output('Adding date of creation claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) """ # material used if u'P186' not in claims and painting.get(u'materiaal'): if painting.get(u'materiaal')==u'olieverf op doek': olieverf = pywikibot.ItemPage(self.repo, u'Q296955') doek = pywikibot.ItemPage(self.repo, u'Q4259259') oppervlak = pywikibot.ItemPage(self.repo, u'Q861259') newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(olieverf) pywikibot.output('Adding new oil paint claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(doek) pywikibot.output('Adding new canvas claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim(self.repo, u'P518') #Applies to part newqualifier.setTarget(oppervlak) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) dcFormats = { u'http://vocab.getty.edu/aat/300014078' : u'Q4259259', # Canvas u'http://vocab.getty.edu/aat/300015050' : u'Q296955', # Oil paint } if painting['object']['proxies'][0].get('dcFormat') and painting['object']['proxies'][0]['dcFormat'].get('def'): for dcFormat in painting['object']['proxies'][0]['dcFormat']['def']: if dcFormat in dcFormats: dcformatItem = pywikibot.ItemPage(self.repo, title=dcFormats[dcFormat]) newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(dcformatItem) pywikibot.output('Adding material used claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, uri) """ # Described at url if u'P973' not in claims: newclaim = pywikibot.Claim(self.repo, u'P973') newclaim.setTarget(painting[u'url']) pywikibot.output('Adding described at claim to %s' % paintingItem) paintingItem.addClaim(newclaim)
def run(self): """ Starts the robot. """ getty = pywikibot.ItemPage(self.repo, u'Q731126') for painting in self.generator: # Buh, for this one I know for sure it's in there #print painting[u'id'] print painting[u'url'] paintingItem = None newclaims = [] if painting[u'id'] in self.paintingIds: paintingItemTitle = u'Q%s' % (self.paintingIds.get( painting[u'id']), ) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: #Break for now print u'Let us create stuff' #continue #print u'WTFTFTFTFT???' #print 'bla' data = { 'labels': {}, 'descriptions': {}, } data['labels']['en'] = { 'language': 'en', 'value': painting[u'title'] } data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s' % (painting[u'creator'], ) } data['descriptions']['nl'] = { 'language': u'nl', 'value': u'schilderij van %s' % (painting[u'creator'], ) } print data identification = {} summary = u'Creating new item with data from %s ' % ( painting[u'url'], ) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) try: result = self.repo.editEntity(identification, data, summary=summary) except pywikibot.data.api.APIError: # We got ourselves a duplicate label and description, let's correct that pywikibot.output( u'Oops, already had that one. Trying again') data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s (Getty, %s)' % (painting[u'creator'], painting[u'id']) } result = self.repo.editEntity(identification, data, summary=summary) pass #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) # Add to self.paintingIds so that we don't create dupes self.paintingIds[painting[u'id']] = paintingItemTitle.replace( u'Q', u'') newclaim = pywikibot.Claim( self.repo, u'P%s' % (self.paintingIdProperty, )) newclaim.setTarget(painting[u'id']) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim( self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(getty) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(getty) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) self.addReference(paintingItem, collectionclaim, painting[u'url']) if paintingItem and paintingItem.exists(): painting['wikidata'] = paintingItem.title() data = paintingItem.get() claims = data.get('claims') #print claims # located in if u'P276' not in claims and painting.get(u'location'): newclaim = pywikibot.Claim(self.repo, u'P276') location = pywikibot.ItemPage(self.repo, painting.get(u'location')) newclaim.setTarget(location) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # instance of always painting while working on the painting collection if u'P31' not in claims: dcformatItem = pywikibot.ItemPage(self.repo, title='Q3305213') newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # creator if u'P170' not in claims and painting.get(u'creator'): #print painting[u'creator'] creategen = pagegenerators.PreloadingItemGenerator( pagegenerators.WikidataItemGenerator( pagegenerators.SearchPageGenerator( painting[u'creator'], step=None, total=10, namespaces=[0], site=self.repo))) newcreator = None for creatoritem in creategen: print creatoritem.title() if creatoritem.get().get('labels').get( 'en' ) == painting[u'creator'] or creatoritem.get().get( 'labels').get('nl') == painting[u'creator']: #print creatoritem.get().get('labels').get('en') #print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get().get('claims') and ( u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue elif ( creatoritem.get().get('aliases').get('en') and painting[u'creator'] in creatoritem.get().get('aliases').get('en') ) or (creatoritem.get().get('aliases').get('nl') and painting[u'creator'] in creatoritem.get().get('aliases').get('nl')): if u'P106' in creatoritem.get().get('claims') and ( u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) #print creatoritem.title() #print creatoritem.get() else: pywikibot.output('No item found for %s' % (painting[u'creator'], )) else: print u'Already has a creator' # date of creation if u'P571' not in claims and painting.get(u'date'): if len( painting[u'date'] ) == 4 and painting[u'date'].isnumeric(): # It's a year newdate = pywikibot.WbTime(year=painting[u'date']) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output( 'Adding date of creation claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) # material used if u'P186' not in claims and painting.get(u'medium'): if painting.get(u'medium') == u'Oil on canvas': olieverf = pywikibot.ItemPage(self.repo, u'Q296955') doek = pywikibot.ItemPage(self.repo, u'Q4259259') oppervlak = pywikibot.ItemPage(self.repo, u'Q861259') newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(olieverf) pywikibot.output('Adding new oil paint claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(doek) pywikibot.output('Adding new canvas claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim( self.repo, u'P518') #Applies to part newqualifier.setTarget(oppervlak) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) # Described at url if u'P973' not in claims: newclaim = pywikibot.Claim(self.repo, u'P973') newclaim.setTarget(painting[u'url']) pywikibot.output('Adding described at claim to %s' % paintingItem) paintingItem.addClaim(newclaim) # self.addReference(paintingItem, newclaim, uri) # Upload an image baby! BUT NOT NOW imagetitle = u'' if painting.get(u'imageurl'): #A free file is available, let's see how big the current file is if u'P18' in claims: imagefile = claims.get('P18')[0].getTarget() size = imagefile.latest_file_info.size if u'P18' not in claims or size < 1000000: commonssite = pywikibot.Site("commons", "commons") photo = Photo(painting[u'imageurl'], painting) titlefmt = u'%(creator)s - %(title)s - %(id)s - J. Paul Getty Museum.%(_ext)s' pagefmt = u'User:Multichill/J. Paul Getty Museum' duplicates = photo.findDuplicateImages() if duplicates: pywikibot.output(u"Skipping duplicate of %r" % duplicates) imagetitle = duplicates[0] #return duplicates[0] else: imagetitle = self.cleanUpTitle( photo.getTitle(titlefmt)) imagefile = pywikibot.FilePage(commonssite, title=imagetitle) imagetitle = imagefile.title() pywikibot.output(imagetitle) description = photo.getDescription(pagefmt) pywikibot.output(description) imagefile.text = description handle, tempname = tempfile.mkstemp() with os.fdopen(handle, "wb") as t: t.write(photo.downloadPhoto().getvalue()) t.close() #tempname commonssite.upload(imagefile, source_filename=tempname, ignore_warnings=True, chunk_size=1000000) #bot = upload.UploadRobot(url=tempname, # description=description, # useFilename=imagetitle, # keepFilename=True, # verifyDescription=False, # uploadByUrl=False, # targetSite=commonssite) #bot._contents = photo.downloadPhoto().getvalue() pywikibot.output( 'Uploaded a file, sleeping a bit so I don\it run into lagging databases' ) time.sleep(15) #bot._retrieved = True #bot.run() if u'P18' not in claims and imagetitle: newclaim = pywikibot.Claim(self.repo, u'P18') imagelink = pywikibot.Link(imagetitle, source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage(image.getRedirectTarget()) newclaim.setTarget(image) pywikibot.output('Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) paintingItem.addClaim(newclaim)
def run(self): """ Starts the robot. """ mauritshuis = pywikibot.ItemPage(self.repo, u'Q221092') for painting in self.generator: uri = painting['object']['proxies'][0]['dcIdentifier']['def'][0] paintingId = painting['object']['proxies'][0]['dcIdentifier']['def'][1] europeanaUrl = u'http://europeana.eu/portal/record/%s.html' % (painting['object']['about'],) print painting['object']['language'] print painting['object']['title'] print painting['object']['about'] print painting['object']['proxies'][0]['dcCreator']['def'][0] #print painting['object']['proxies'][0]['dcFormat']['def'][0] print painting['object']['proxies'][0]['dcIdentifier']['def'][0] print painting['object']['proxies'][0]['dcIdentifier']['def'][1] paintingItem = None newclaims = [] if paintingId in self.paintingIds: paintingItemTitle = u'Q%s' % (self.paintingIds.get(paintingId),) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: print 'bla' #monumentItem = pywikibot.ItemPage(self.repo, title=u'') if painting['object']['proxies'][0].get('dcCreator') and painting['object']['proxies'][0].get('dcFormat'): description = u'%s van %s' % (painting['object']['proxies'][0]['dcFormat']['def'][0].strip(), painting['object']['proxies'][0]['dcCreator']['def'][0].strip(), ) else: description = u'' lang = painting['object']['language'][0] label = painting['object']['title'][0] data = {'labels': {lang: {'language': lang, 'value': label} }, 'descriptions': {lang: {'language': lang, 'value': description} }, } identification = {} summary = u'Creating new item with data from %s ' % (europeanaUrl,) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) result = self.repo.editEntity(identification, data, summary=summary) #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) #print monumentItem.title() newclaim = pywikibot.Claim(self.repo, u'P%s' % (self.paintingIdProperty,)) newclaim.setTarget(paintingId) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(uri) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) newqualifier = pywikibot.Claim(self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(mauritshuis) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(mauritshuis) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) collectionclaim.addSource(newreference) if paintingItem: data = paintingItem.get() claims = data.get('claims') #print claims # located in if u'P276' not in claims: newclaim = pywikibot.Claim(self.repo, u'P276') newclaim.setTarget(mauritshuis) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # instance of if u'P31' not in claims: if painting['object']['proxies'][0].get('dcFormat'): dcformat = painting['object']['proxies'][0]['dcFormat']['def'][0].strip() formats = { u'schilderij' : u'Q3305213', u'pastel' : u'Q12043905', } if dcformat in formats: dcformatItem = pywikibot.ItemPage(self.repo, title=formats[dcformat]) newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) else: pywikibot.output('Found unknown format %s' % (dcformat,)) # creator if u'P170' not in claims: if painting['object']['proxies'][0].get('dcCreator'): dccreator = painting['object']['proxies'][0]['dcCreator']['def'][0].strip() creategen = pagegenerators.PreloadingItemGenerator(pagegenerators.WikidataItemGenerator(pagegenerators.SearchPageGenerator(dccreator, step=None, total=10, namespaces=[0], site=self.repo))) newcreator = None for creatoritem in creategen: print creatoritem.title() if creatoritem.get().get('labels').get('en') == dccreator or creatoritem.get().get('labels').get('nl') == dccreator: print creatoritem.get().get('labels').get('en') print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get().get('claims') and u'P27' in creatoritem.get().get('claims'): newcreator = creatoritem continue if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) #creatoritem = pywikibot.ItemPage(self.repo, creatorpage) print creatoritem.title() print creatoritem.get() else: pywikibot.output('No dccreator found') # date of creation if u'P571' not in claims: if painting['object']['proxies'][0].get('dctermsCreated'): dccreated = painting['object']['proxies'][0]['dctermsCreated']['def'][0].strip() if len(dccreated)==4: # It's a year newdate = pywikibot.WbTime(year=dccreated) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output('Adding date of creation claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference)
def querySearch(self): # todo: remove return pagegenerators.SearchPageGenerator( self.query, namespaces=[0], site=self.site)
def run(self): """ Starts the robot. """ rijksmuseum = pywikibot.ItemPage(self.repo, u'Q190804') for painting in self.generator: # Buh, for this one I know for sure it's in there paintingId = painting['artObject']['objectNumber'] uri = u'https://www.rijksmuseum.nl/nl/collectie/%s' % ( paintingId, ) #europeanaUrl = u'http://europeana.eu/portal/record/%s.html' % (painting['object']['about'],) print paintingId print uri dcCreatorName = painting['artObject'][ 'principalOrFirstMaker'].strip() #dcCreatorName = u'' #for agent in painting['object']['agents']: # if agent.get('about')== dcCreator: # #print u'Found my agent' # if u',' in agent['prefLabel']['def'][0]: # (surname, givenname) = agent['prefLabel']['def'][0].split(u',') # dcCreatorName = u'%s %s' % (givenname.strip(), surname.strip(),) # else: # dcCreatorName = agent['prefLabel']['def'][0] #print painting['object']['language'] #print painting['object']['title'] #print painting['object']['about'] #print painting['object']['proxies'][0]['dcCreator']['def'][0] #print painting['object']['proxies'][0]['dcFormat']['def'][0] #print painting['object']['proxies'][0]['dcIdentifier']['def'][0] #print painting['object']['proxies'][0]['dcIdentifier']['def'][1] paintingItem = None newclaims = [] if paintingId in self.paintingIds: paintingItemTitle = u'Q%s' % ( self.paintingIds.get(paintingId), ) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: #print 'bla' #monumentItem = pywikibot.ItemPage(self.repo, title=u'') #print dcCreatorName data = { 'labels': {}, 'descriptions': {}, } data['labels']['nl'] = { 'language': 'nl', 'value': painting['artObject']['title'] } if dcCreatorName: data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s' % (dcCreatorName, ) } data['descriptions']['nl'] = { 'language': u'nl', 'value': u'schilderij van %s' % (dcCreatorName, ) } print data identification = {} summary = u'Creating new item with data from %s ' % (uri, ) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) result = self.repo.editEntity(identification, data, summary=summary) #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) newclaim = pywikibot.Claim( self.repo, u'P%s' % (self.paintingIdProperty, )) newclaim.setTarget(paintingId) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, uri) newqualifier = pywikibot.Claim( self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(rijksmuseum) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(rijksmuseum) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) self.addReference(paintingItem, collectionclaim, uri) if paintingItem and paintingItem.exists(): data = paintingItem.get() claims = data.get('claims') #print claims # located in if u'P276' not in claims: newclaim = pywikibot.Claim(self.repo, u'P276') newclaim.setTarget(rijksmuseum) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, uri) # instance of always painting while working on the painting collection if u'P31' not in claims: dcformatItem = pywikibot.ItemPage(self.repo, title='Q3305213') newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, uri) # creator if u'P170' not in claims and dcCreatorName: creategen = pagegenerators.PreloadingItemGenerator( pagegenerators.WikidataItemGenerator( pagegenerators.SearchPageGenerator( dcCreatorName, step=None, total=10, namespaces=[0], site=self.repo))) newcreator = None for creatoritem in creategen: print creatoritem.title() if creatoritem.get().get('labels').get( 'en') == dcCreatorName or creatoritem.get( ).get('labels').get('nl') == dcCreatorName: print creatoritem.get().get('labels').get('en') print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get().get('claims') and ( u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue elif ( creatoritem.get().get('aliases').get('en') and dcCreatorName in creatoritem.get().get('aliases').get('en') ) or (creatoritem.get().get('aliases').get('nl') and dcCreatorName in creatoritem.get().get('aliases').get('nl')): if u'P106' in creatoritem.get().get('claims') and ( u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, uri) print creatoritem.title() print creatoritem.get() else: pywikibot.output('No item found for %s' % (dcCreatorName, )) else: print u'Already has a creator' """