def make_url_ref(self, url, fetch_date, publish_date=None): """Make a Reference object for a url. Contains 3 parts: * P813: Retrieval date * P577: Publication date <from creation date of the document> * P854: Reference url <using the input url> :param url: the source url :param fetch_date: the retrieval date url (iso) :param publish_date: the retrieval date url (iso) :return: WdS.Reference """ date_claims = [] if publish_date: date_claims.append( self.wd.make_simple_claim('P577', helpers.iso_to_WbTime(publish_date))) date_claims.append( self.wd.make_simple_claim('P813', helpers.iso_to_WbTime(fetch_date))) ref = WdS.Reference( source_test=[self.wd.make_simple_claim('P854', url)], source_notest=date_claims) return ref
def make_lido_ref(self, lido_data): """ Make a Reference object for the dataset. Contains 4 parts: * P248: Stated in <the Nationalmuseum dataset> * P577: Publication date <from creation date of the document> * P854: Reference url <using the input url> * P813: Retrieval date <current date> """ exit() # P248: Nationalmuseum dataset xml_file = lido_data.get('source_file') date = helpers.today_as_WbTime() pub_date = helpers.iso_to_WbTime(u'2016-09-30') zip_url = u'https://github.com/NationalmuseumSWE/WikidataCollection/' \ u'blob/master/valid_items_transform_1677.tgz' ref = WD.Reference(source_test=[ self.wd.make_simple_claim(u'P854', zip_url), self.wd.make_simple_claim(u'P577', pub_date), self.wd.make_simple_claim(u'P?', xml_file), ], source_notest=self.wd.make_simple_claim( u'P813', date)) return ref
def matchBirth(self, value): """Convert value of birth to statement. param value: str|unicode return: WD.Statement|None """ if value is None or not value.strip(): return None return WD.Statement(helpers.iso_to_WbTime(value))
def matchDeath(self, value): """Extract death date from status. param value: str|unicode return: WD.Statement|None """ if value and value.startswith('Avliden'): value = value[len('Avliden'):].strip() return WD.Statement(helpers.iso_to_WbTime(value)) return None
def get_claims(bot, values): """Retrieve the basic claims for a person. @param bot: the instance of the bot calling upon the template @param bot: KulturnavBot @param values: the values extracted using the rules @type values: dict @return: the protoclaims @rtype: dict PID-WD.Statement pairs """ protoclaims = {} # instance of HUMAN = Q5 protoclaims[u'P31'] = WD.Statement(bot.wd.QtoItemPage(u'Q5')) if values.get(u'deathDate') and values.get(u'deathDate') != 'unknown': protoclaims[u'P570'] = WD.Statement( helpers.iso_to_WbTime(values[u'deathDate'])) protoclaims[u'P20'] = Person.get_death_place(bot, values) if values.get(u'birthDate') and values.get(u'birthDate') != 'unknown': protoclaims[u'P569'] = WD.Statement( helpers.iso_to_WbTime(values[u'birthDate'])) protoclaims[u'P19'] = Person.get_birth_place(bot, values) if values.get(u'gender'): # db_gender returns a WD.Statement protoclaims[u'P21'] = bot.db_gender(values[u'gender']) if values.get(u'firstName'): protoclaims[u'P735'] = WD.Statement( bot.db_name(values[u'firstName'], u'firstName')) if values.get(u'lastName'): protoclaims[u'P734'] = WD.Statement( bot.db_name(values[u'lastName'], u'lastName')) protoclaims[u'P27'] = Person.get_nationality(bot, values) return protoclaims
def parse_date(self, date): """Convert date in DD-MMM-YYYY format to WbTime.""" months = [ 'JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC' ] dd, mmm, yyyy = date.split('-') iso = '{year}-{month:02d}-{day:02d}'.format(year=yyyy, day=int(dd), month=months.index(mmm) + 1) return helpers.iso_to_WbTime(iso)
def claims(self, values): """Add protoclaims. @param values: the values extracted using the rules @type values: dict @return: the protoclaims @rtype: dict PID-WD.Statement pairs """ protoclaims = {} self.set_is_instance(self.SHIPYARD_Q, protoclaims) self.set_location(values, protoclaims) self.set_owner(values, protoclaims) # handle values if values.get(u'establishment.date'): protoclaims[u'P571'] = WD.Statement( helpers.iso_to_WbTime(values[u'establishment.date'])) if values.get(u'termination.date'): protoclaims[u'P576'] = WD.Statement( helpers.iso_to_WbTime(values[u'termination.date'])) return protoclaims
def add_date_claim(self, item, lido_data, ref): """ Add an inception/P571 claim. Only adds the claim if it's an exact year. """ prop = u'P571' creation_date = lido_data.get('creation_date') wb_date = None if not creation_date: return None # exact date if creation_date.get('earliest') and \ creation_date.get('earliest') == creation_date.get('latest'): wb_date = helpers.iso_to_WbTime(creation_date.get('earliest')) # make claim if wb_date: self.wd.addNewClaim(prop, WD.Statement(wb_date), item, ref)
def set_date_qualifier(self, values, key, statement, prop=None): """Add a date qualifier to a statement. @param values: the values extracted using the rules @type values: dict @param key: the key to which the location is associated e.g. built for built.location @type key: str @param statement: statment to add qualifier to @type statement: WD.Statement @param prop: the property to use, defaults to self.TIME_P/P585 @type prop: str @return: if qualifier was found @rtype: bool """ prop = prop or self.TIME_P date_key = u'%s.date' % key if not values[date_key]: return False statement.addQualifier( WD.Qualifier(P=prop, itis=helpers.iso_to_WbTime(values[date_key]))) return True
def runLayout(self, datasetRules, datasetProtoclaims, datasetSanityTest, label, shuffle): """ Execute the basic layout of a run. It should be called for a dataset-specific run which sets the parameters. param datasetRules: a dict of additional Rules or values to look for param datasetProtoclaims: a function for populating protoclaims param datasetSanityTest: a function which must return true for results to be written to Wikidata param label: the key in values to be used for label/alias. set to None to skip addNames() param shuffle: whether name/label/alias is shuffled or not i.e. if name = last, first """ count = 0 for hit in self.generator: # print count, self.cutoff if self.cutoff and count >= self.cutoff: break # some type of feedback if count % 100 == 0 and count > 0: pywikibot.output('%d entries handled...' % count) # Required rules/values to search for rules = { u'identifier': None, u'modified': None, u'seeAlso': None, u'sameAs': None, u'exactMatch': None, # not expected u'wikidata': None, u'libris-id': None, u'viaf-id': None, u'getty_aat': None, u'ulan': None } rules.update(datasetRules) # put together empty dict of values then populate values = {} for k in rules.keys(): values[k] = None if not self.populateValues(values, rules, hit): # continue with next hit if problem was encounterd continue # find the matching wikidata item hitItem = self.wikidataMatch(values) self.current_uuid = values['identifier'] # @todo: self.current_protoclaims # allows these to be accessed more easily # convert values to potential claims protoclaims = datasetProtoclaims(self, values) self.make_base_protoclaims(values, protoclaims) # output info for testing if self.verbose: pywikibot.output(values) pywikibot.output(protoclaims) pywikibot.output(hitItem) # Add information if a match was found if hitItem and hitItem.exists(): # if redirect then get target instead # make sure it passes the sanityTests if not self.sanityTest(hitItem): continue if not datasetSanityTest(self, hitItem): continue # add name as label/alias if label is not None: self.addNames(values[label], hitItem, shuffle=shuffle) # get the "last modified" timestamp and construct a Reference date = helpers.iso_to_WbTime(values[u'modified']) ref = self.make_ref(date) # add each property (if new) and source it self.addProperties(protoclaims, hitItem, ref) # allow for limited runs count += 1 # done pywikibot.output(u'Handled %d entries' % count)