Example #1
0
    def getCreator(self, creator):
        """
        Find the painter with the name in creator

        First check if the name is already in the self.creators cache
        Second, do a search
        If a hit is found, update the cache in self.creators
        """

        # First use the cache
        if creator in self.creators:
            return self.creators[creator]

        # Search Wikidata for a suitable candidate, tell the search to only return humans
        searchstring = u'%s haswbstatement:P31=Q5' % (creator, )
        creategen = pagegenerators.PreloadingItemGenerator(
            pagegenerators.WikibaseItemGenerator(
                pagegenerators.SearchPageGenerator(searchstring,
                                                   step=None,
                                                   total=50,
                                                   namespaces=[0],
                                                   site=self.repo)))

        for creatoritem in creategen:
            if creatoritem.isRedirectPage():
                creatoritem = creatoritem.getRedirectTarget()
            # See if the label or one of the aliases of the creatoritem matches the string we have. Only label is case insensitive.
            if (creatoritem.get().get('labels').get('en')
                    and creatoritem.get().get('labels').get('en').lower()
                    == creator.lower()) or (
                        creatoritem.get().get('aliases').get('en') and creator
                        in creatoritem.get().get('aliases').get('en')):
                if u'P106' in creatoritem.get().get('claims'):
                    existing_claims = creatoritem.get().get('claims').get(
                        'P106')
                    for existing_claim in existing_claims:
                        if existing_claim.target_equals(u'Q1028181'):
                            self.creators[creator] = creatoritem
                            return creatoritem

        # Regex that should match all the anonymous work stuff that isn't covered by the list
        anonymousRegex = '^(Workshop of|Follower of|Circle of|Manner of|Forgery after|School of|After|Unidentified Artist|School of)\s.*$'
        anonymousMatch = re.match(anonymousRegex, creator, flags=re.I)
        if anonymousMatch:
            self.creators[creator] = self.creators.get('anonymous')
            return self.creators.get('anonymous')

        # We don't want to do the same search over and over again
        self.creators[creator] = None
        return None
    def subgenerator(self):
        limit = self.getOption('limit')
        for ident in self.item_ids:
            from_item = pywikibot.ItemPage(self.repo, ident)
            for item in pagegenerators.WikibaseItemGenerator(
                    from_item.backlinks(total=limit,
                                        filterRedirects=False,
                                        namespaces=[0])):
                yield item
                if limit is not None:
                    limit -= 1

            if limit == 0:
                return

        for prop in self.url_props:
            ok = True
            while ok and limit != 0:
                ok = False
                query = self.store.build_query(
                    'fake_references_url',
                    limit=500 if limit is None else min(500, limit),
                    prop=prop)
                for item in pagegenerators.WikidataSPARQLPageGenerator(
                        query, site=self.repo):
                    ok = True
                    yield item
                    if limit is not None:
                        limit -= 1

        for prop in self.ref_props:
            ok = True
            while ok and limit != 0:
                ok = False
                query = self.store.build_query(
                    'fake_references',
                    limit=100 if limit is None else min(100, limit),
                    prop=prop)
                for item in pagegenerators.WikidataSPARQLPageGenerator(
                        query, site=self.repo):
                    ok = True
                    yield item
                    if limit is not None:
                        limit -= 1
    def findPerson(self, name, yob, yod):
        """
        Find a person.
        :param name: Name of the person
        :param yob: Year of birth of the person
        :param yod: Year of death of the person
        :return: ItemPage if a person is found
        """
        # Search Wikidata for a suitable candidate, tell the search to only return humans
        searchstring = u'"%s" haswbstatement:P31=Q5' % (name, )
        persongen = pagegenerators.PreloadingItemGenerator(
            pagegenerators.WikibaseItemGenerator(
                pagegenerators.SearchPageGenerator(searchstring,
                                                   step=None,
                                                   total=50,
                                                   namespaces=[0],
                                                   site=self.repo)))

        foundperson = False

        for personitem in persongen:
            #print (u'Possible match %s' % (personitem.title(),))
            if personitem.isRedirectPage():
                personitem = personitem.getRedirectTarget()
            # See if the label or one of the aliases of the creatoritem matches the string we have. Only label is case insensitive.
            #if (personitem.get().get('labels').get('en') and personitem.get().get('labels').get('en').lower() == name.lower()) or (personitem.get().get('aliases').get('en') and name in personitem.get().get('aliases').get('en')):
            #    print (u'Label match for %s' % (personitem.title(),))
            #    # Check of year of birth and year of death match
            if u'P569' in personitem.get().get(
                    'claims') and u'P570' in personitem.get().get('claims'):
                dob = personitem.get().get('claims').get('P569')[0].getTarget()
                dod = personitem.get().get('claims').get('P570')[0].getTarget()
                foundperson = True
                if dob and dod:
                    #print (u'Date found dob "%s" "%s" "%s"' % (dob, dob.year, yob))
                    #print (u'Date found dod "%s" "%s" "%s"' % (dod, dod.year, yod))
                    if int(dob.year) == int(yob) and int(dod.year) == int(yod):
                        #print (u'maaaaaaaaaaaaaaaaaaaaaaaatcchhhhh')
                        return personitem
        return foundperson
Example #4
0
def match_name_off_labs(name, types, wd, limit):
    """
    Check if there is an item matching the name using API search.

    Less good than match_name_on_labs() but works from anywhere.

    @param name: The name to search for
    @type name: basestring
    @param types: The Q-values which are allowed for INSTANCE_OF_P
    @type types: tuple of basestring
    @param wd: The running WikidataStuff instance
    @type wd: WikidataStuff (WD)
    @return: Any matching items
    @rtype: list (of pywikibot.ItemPage)
    """
    matches = []
    objgen = pagegenerators.PreloadingItemGenerator(
        pagegenerators.WikibaseItemGenerator(
            pagegenerators.SearchPageGenerator(name,
                                               step=None,
                                               total=10,
                                               namespaces=[0],
                                               site=wd.repo)))

    # check if P31 and then if any of prop[typ] in P31
    i = 0
    for obj in objgen:
        obj = wd.bypassRedirect(obj)
        i += 1
        if i > limit:
            # better to skip than to crash when search times out
            # remove any matches (since incomplete) and exit loop
            return []  # avoids keeping a partial list

        if (name in obj.get().get('labels').values()
                or name in obj.get().get('aliases').values()):
            filter_on_types(obj, types, matches)
    return matches