def getCreator(self, creator): """ Find the painter with the name in creator First check if the name is already in the self.creators cache Second, do a search If a hit is found, update the cache in self.creators """ # First use the cache if creator in self.creators: return self.creators[creator] # Search Wikidata for a suitable candidate, tell the search to only return humans searchstring = u'%s haswbstatement:P31=Q5' % (creator, ) creategen = pagegenerators.PreloadingItemGenerator( pagegenerators.WikibaseItemGenerator( pagegenerators.SearchPageGenerator(searchstring, step=None, total=50, namespaces=[0], site=self.repo))) for creatoritem in creategen: if creatoritem.isRedirectPage(): creatoritem = creatoritem.getRedirectTarget() # See if the label or one of the aliases of the creatoritem matches the string we have. Only label is case insensitive. if (creatoritem.get().get('labels').get('en') and creatoritem.get().get('labels').get('en').lower() == creator.lower()) or ( creatoritem.get().get('aliases').get('en') and creator in creatoritem.get().get('aliases').get('en')): if u'P106' in creatoritem.get().get('claims'): existing_claims = creatoritem.get().get('claims').get( 'P106') for existing_claim in existing_claims: if existing_claim.target_equals(u'Q1028181'): self.creators[creator] = creatoritem return creatoritem # Regex that should match all the anonymous work stuff that isn't covered by the list anonymousRegex = '^(Workshop of|Follower of|Circle of|Manner of|Forgery after|School of|After|Unidentified Artist|School of)\s.*$' anonymousMatch = re.match(anonymousRegex, creator, flags=re.I) if anonymousMatch: self.creators[creator] = self.creators.get('anonymous') return self.creators.get('anonymous') # We don't want to do the same search over and over again self.creators[creator] = None return None
def subgenerator(self): limit = self.getOption('limit') for ident in self.item_ids: from_item = pywikibot.ItemPage(self.repo, ident) for item in pagegenerators.WikibaseItemGenerator( from_item.backlinks(total=limit, filterRedirects=False, namespaces=[0])): yield item if limit is not None: limit -= 1 if limit == 0: return for prop in self.url_props: ok = True while ok and limit != 0: ok = False query = self.store.build_query( 'fake_references_url', limit=500 if limit is None else min(500, limit), prop=prop) for item in pagegenerators.WikidataSPARQLPageGenerator( query, site=self.repo): ok = True yield item if limit is not None: limit -= 1 for prop in self.ref_props: ok = True while ok and limit != 0: ok = False query = self.store.build_query( 'fake_references', limit=100 if limit is None else min(100, limit), prop=prop) for item in pagegenerators.WikidataSPARQLPageGenerator( query, site=self.repo): ok = True yield item if limit is not None: limit -= 1
def findPerson(self, name, yob, yod): """ Find a person. :param name: Name of the person :param yob: Year of birth of the person :param yod: Year of death of the person :return: ItemPage if a person is found """ # Search Wikidata for a suitable candidate, tell the search to only return humans searchstring = u'"%s" haswbstatement:P31=Q5' % (name, ) persongen = pagegenerators.PreloadingItemGenerator( pagegenerators.WikibaseItemGenerator( pagegenerators.SearchPageGenerator(searchstring, step=None, total=50, namespaces=[0], site=self.repo))) foundperson = False for personitem in persongen: #print (u'Possible match %s' % (personitem.title(),)) if personitem.isRedirectPage(): personitem = personitem.getRedirectTarget() # See if the label or one of the aliases of the creatoritem matches the string we have. Only label is case insensitive. #if (personitem.get().get('labels').get('en') and personitem.get().get('labels').get('en').lower() == name.lower()) or (personitem.get().get('aliases').get('en') and name in personitem.get().get('aliases').get('en')): # print (u'Label match for %s' % (personitem.title(),)) # # Check of year of birth and year of death match if u'P569' in personitem.get().get( 'claims') and u'P570' in personitem.get().get('claims'): dob = personitem.get().get('claims').get('P569')[0].getTarget() dod = personitem.get().get('claims').get('P570')[0].getTarget() foundperson = True if dob and dod: #print (u'Date found dob "%s" "%s" "%s"' % (dob, dob.year, yob)) #print (u'Date found dod "%s" "%s" "%s"' % (dod, dod.year, yod)) if int(dob.year) == int(yob) and int(dod.year) == int(yod): #print (u'maaaaaaaaaaaaaaaaaaaaaaaatcchhhhh') return personitem return foundperson
def match_name_off_labs(name, types, wd, limit): """ Check if there is an item matching the name using API search. Less good than match_name_on_labs() but works from anywhere. @param name: The name to search for @type name: basestring @param types: The Q-values which are allowed for INSTANCE_OF_P @type types: tuple of basestring @param wd: The running WikidataStuff instance @type wd: WikidataStuff (WD) @return: Any matching items @rtype: list (of pywikibot.ItemPage) """ matches = [] objgen = pagegenerators.PreloadingItemGenerator( pagegenerators.WikibaseItemGenerator( pagegenerators.SearchPageGenerator(name, step=None, total=10, namespaces=[0], site=wd.repo))) # check if P31 and then if any of prop[typ] in P31 i = 0 for obj in objgen: obj = wd.bypassRedirect(obj) i += 1 if i > limit: # better to skip than to crash when search times out # remove any matches (since incomplete) and exit loop return [] # avoids keeping a partial list if (name in obj.get().get('labels').values() or name in obj.get().get('aliases').values()): filter_on_types(obj, types, matches) return matches