def make_url_ref(self, url, fetch_date, publish_date=None):
        """Make a Reference object for a url.

        Contains 3 parts:
        * P813: Retrieval date
        * P577: Publication date <from creation date of the document>
        * P854: Reference url <using the input url>

        :param url: the source url
        :param fetch_date: the retrieval date url (iso)
        :param publish_date: the retrieval date url (iso)
        :return: WdS.Reference
        """
        date_claims = []
        if publish_date:
            date_claims.append(
                self.wd.make_simple_claim('P577',
                                          helpers.iso_to_WbTime(publish_date)))
        date_claims.append(
            self.wd.make_simple_claim('P813',
                                      helpers.iso_to_WbTime(fetch_date)))

        ref = WdS.Reference(
            source_test=[self.wd.make_simple_claim('P854', url)],
            source_notest=date_claims)
        return ref
Ejemplo n.º 2
0
    def make_lido_ref(self, lido_data):
        """
        Make a Reference object for the dataset.

        Contains 4 parts:
        * P248: Stated in <the Nationalmuseum dataset>
        * P577: Publication date <from creation date of the document>
        * P854: Reference url <using the input url>
        * P813: Retrieval date <current date>
        """
        exit()
        # P248: Nationalmuseum dataset
        xml_file = lido_data.get('source_file')
        date = helpers.today_as_WbTime()
        pub_date = helpers.iso_to_WbTime(u'2016-09-30')
        zip_url = u'https://github.com/NationalmuseumSWE/WikidataCollection/' \
                  u'blob/master/valid_items_transform_1677.tgz'
        ref = WD.Reference(source_test=[
            self.wd.make_simple_claim(u'P854', zip_url),
            self.wd.make_simple_claim(u'P577', pub_date),
            self.wd.make_simple_claim(u'P?', xml_file),
        ],
                           source_notest=self.wd.make_simple_claim(
                               u'P813', date))
        return ref
Ejemplo n.º 3
0
    def matchBirth(self, value):
        """Convert value of birth to statement.

        param value: str|unicode
        return: WD.Statement|None
        """
        if value is None or not value.strip():
            return None
        return WD.Statement(helpers.iso_to_WbTime(value))
Ejemplo n.º 4
0
    def matchDeath(self, value):
        """Extract death date from status.

        param value: str|unicode
        return: WD.Statement|None
        """
        if value and value.startswith('Avliden'):
            value = value[len('Avliden'):].strip()
            return WD.Statement(helpers.iso_to_WbTime(value))
        return None
Ejemplo n.º 5
0
    def get_claims(bot, values):
        """Retrieve the basic claims for a person.

        @param bot: the instance of the bot calling upon the template
        @param bot: KulturnavBot
        @param values: the values extracted using the rules
        @type values: dict
        @return: the protoclaims
        @rtype: dict PID-WD.Statement pairs
        """
        protoclaims = {}

        # instance of HUMAN = Q5
        protoclaims[u'P31'] = WD.Statement(bot.wd.QtoItemPage(u'Q5'))

        if values.get(u'deathDate') and values.get(u'deathDate') != 'unknown':
            protoclaims[u'P570'] = WD.Statement(
                helpers.iso_to_WbTime(values[u'deathDate']))

        protoclaims[u'P20'] = Person.get_death_place(bot, values)

        if values.get(u'birthDate') and values.get(u'birthDate') != 'unknown':
            protoclaims[u'P569'] = WD.Statement(
                helpers.iso_to_WbTime(values[u'birthDate']))

        protoclaims[u'P19'] = Person.get_birth_place(bot, values)

        if values.get(u'gender'):
            # db_gender returns a WD.Statement
            protoclaims[u'P21'] = bot.db_gender(values[u'gender'])

        if values.get(u'firstName'):
            protoclaims[u'P735'] = WD.Statement(
                bot.db_name(values[u'firstName'], u'firstName'))

        if values.get(u'lastName'):
            protoclaims[u'P734'] = WD.Statement(
                bot.db_name(values[u'lastName'], u'lastName'))

        protoclaims[u'P27'] = Person.get_nationality(bot, values)

        return protoclaims
 def parse_date(self, date):
     """Convert date in DD-MMM-YYYY format to WbTime."""
     months = [
         'JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP',
         'OCT', 'NOV', 'DEC'
     ]
     dd, mmm, yyyy = date.split('-')
     iso = '{year}-{month:02d}-{day:02d}'.format(year=yyyy,
                                                 day=int(dd),
                                                 month=months.index(mmm) +
                                                 1)
     return helpers.iso_to_WbTime(iso)
Ejemplo n.º 7
0
        def claims(self, values):
            """Add protoclaims.

            @param values: the values extracted using the rules
            @type values: dict
            @return: the protoclaims
            @rtype: dict PID-WD.Statement pairs
            """
            protoclaims = {}
            self.set_is_instance(self.SHIPYARD_Q, protoclaims)
            self.set_location(values, protoclaims)
            self.set_owner(values, protoclaims)

            # handle values
            if values.get(u'establishment.date'):
                protoclaims[u'P571'] = WD.Statement(
                    helpers.iso_to_WbTime(values[u'establishment.date']))
            if values.get(u'termination.date'):
                protoclaims[u'P576'] = WD.Statement(
                    helpers.iso_to_WbTime(values[u'termination.date']))

            return protoclaims
Ejemplo n.º 8
0
    def add_date_claim(self, item, lido_data, ref):
        """
        Add an inception/P571 claim.

        Only adds the claim if it's an exact year.
        """
        prop = u'P571'
        creation_date = lido_data.get('creation_date')
        wb_date = None
        if not creation_date:
            return None

        # exact date
        if creation_date.get('earliest') and \
                creation_date.get('earliest') == creation_date.get('latest'):
            wb_date = helpers.iso_to_WbTime(creation_date.get('earliest'))

        # make claim
        if wb_date:
            self.wd.addNewClaim(prop, WD.Statement(wb_date), item, ref)
Ejemplo n.º 9
0
    def set_date_qualifier(self, values, key, statement, prop=None):
        """Add a date qualifier to a statement.

        @param values: the values extracted using the rules
        @type values: dict
        @param key: the key to which the location is associated
            e.g. built for built.location
        @type key: str
        @param statement: statment to add qualifier to
        @type statement: WD.Statement
        @param prop: the property to use, defaults to self.TIME_P/P585
        @type prop: str
        @return: if qualifier was found
        @rtype: bool
        """
        prop = prop or self.TIME_P
        date_key = u'%s.date' % key
        if not values[date_key]:
            return False
        statement.addQualifier(
            WD.Qualifier(P=prop, itis=helpers.iso_to_WbTime(values[date_key])))
        return True
Ejemplo n.º 10
0
    def runLayout(self, datasetRules, datasetProtoclaims, datasetSanityTest,
                  label, shuffle):
        """
        Execute the basic layout of a run.

        It should be called for a dataset-specific run which sets the
        parameters.

        param datasetRules: a dict of additional Rules or values to look for
        param datasetProtoclaims: a function for populating protoclaims
        param datasetSanityTest: a function which must return true for
                                 results to be written to Wikidata
        param label: the key in values to be used for label/alias.
                     set to None to skip addNames()
        param shuffle: whether name/label/alias is shuffled or not
                       i.e. if name = last, first
        """
        count = 0
        for hit in self.generator:
            # print count, self.cutoff
            if self.cutoff and count >= self.cutoff:
                break
            # some type of feedback
            if count % 100 == 0 and count > 0:
                pywikibot.output('%d entries handled...' % count)
            # Required rules/values to search for
            rules = {
                u'identifier': None,
                u'modified': None,
                u'seeAlso': None,
                u'sameAs': None,
                u'exactMatch': None,
                # not expected
                u'wikidata': None,
                u'libris-id': None,
                u'viaf-id': None,
                u'getty_aat': None,
                u'ulan': None
            }
            rules.update(datasetRules)

            # put together empty dict of values then populate
            values = {}
            for k in rules.keys():
                values[k] = None
            if not self.populateValues(values, rules, hit):
                # continue with next hit if problem was encounterd
                continue

            # find the matching wikidata item
            hitItem = self.wikidataMatch(values)
            self.current_uuid = values['identifier']
            # @todo: self.current_protoclaims  # allows these to be accessed more easily

            # convert values to potential claims
            protoclaims = datasetProtoclaims(self, values)
            self.make_base_protoclaims(values, protoclaims)

            # output info for testing
            if self.verbose:
                pywikibot.output(values)
                pywikibot.output(protoclaims)
                pywikibot.output(hitItem)

            # Add information if a match was found
            if hitItem and hitItem.exists():
                # if redirect then get target instead

                # make sure it passes the sanityTests
                if not self.sanityTest(hitItem):
                    continue
                if not datasetSanityTest(self, hitItem):
                    continue

                # add name as label/alias
                if label is not None:
                    self.addNames(values[label], hitItem, shuffle=shuffle)

                # get the "last modified" timestamp and construct a Reference
                date = helpers.iso_to_WbTime(values[u'modified'])
                ref = self.make_ref(date)

                # add each property (if new) and source it
                self.addProperties(protoclaims, hitItem, ref)

            # allow for limited runs
            count += 1

        # done
        pywikibot.output(u'Handled %d entries' % count)