def data_save(self, page, data): """Stores the content to Wikidata. @param page: Page containing template. @type page: page object @param data: New content. @type data: dict Returns nothing, but stores the changed content to linked labels. """ # DRTRIGON-130: check for changes and then write/change/set values datapage = pywikibot.DataPage(self.site, page.title()) dataitem = u'%s:%s' % (self._bot_config['BotName'], datapage.title().split(u':')[1]) links = [ {u'aliases': [u'%s:%s' % (dataitem, p.sortkeyprefix)], u'id': p.toggleTalkPage().title().lower(),} for p in catlib.Category(self.site, dataitem).articles() ] links += datapage.searchentities(dataitem) for element in links: propid = int(self._bot_config['data_PropertyId']) el = element[u'aliases'][0].split(u':') item = el[2] if item not in data: pywikibot.output(u'Value "%s" not found.' % (item,)) data[item] = u'%s: N/A' % self._bot_config['BotName'] if len(el) > 3: propid = int(el[3]) dataoutpage = pywikibot.DataPage(self.site, element['id']) # check for changes and then write/change/set values summary = u'Bot: update data because of configuration on %s.' % page.title(asLink=True) buf = dataoutpage.get() claim = [ claim for claim in buf[u'claims'] if (claim['m'][1] == propid) ] # TODO: does this check (if) work with multiple claims per property? if (not claim) or (claim[0]['m'][3] != data[item]): pywikibot.output(u'%s in %s changed to "%s"' %\ (element[u'aliases'][0], dataoutpage.title(asLink=True), data[item])) dataoutpage.editclaim(u'p%s' % propid, data[item], refs={"p%s" % propid: [{"snaktype": "value", "property": "p%s" % propid, "datavalue": {u'type': u'string', u'value': datapage.title()}}, {"snaktype": "value", "property": "p585", # point in time #"property": "p578", # Sandbox-TimeValue "datavalue": {u'type': u'time', u'value': {u'after': 0, u'precision': 11, u'time': (u'+0000000%sZ' % pywikibot.Timestamp.now().isoformat().split('.')[0]), u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}},]}, comment=summary)
def checkCoordinates(pageTitle, dataPageId, lat, lon, cursor): addStatus = False if DEBUG: print "title: %s, dataPage: %s, lat: %s, lon: %s" % ( pageTitle, dataPageId, lat, lon) repo = site.data_repository() if dataPageId: if not hasCoordinates(dataPageId, cursor): dataPageTitle = "Q%s" % dataPageId data = pywikibot.DataPage(repo, dataPageTitle) if hasCoordinatesAPI(data): logging.warning( '[[%s]]: coords exist, hasCoordinates() didnt catch it!' % (pageTitle)) return False else: addStatus = addCoordinates(data, pageTitle, lat, lon, lang) elif pageTitle: page = pywikibot.Page(site, pageTitle) data = pywikibot.DataPage(page) if data.exists(): print '[[%s]]: data page already exists!' % (pageTitle) logging.info('[[%s]]: data page already exists!' % (pageTitle)) else: data.createitem(u"Bot: Importing article from %s.wikipedia" % lang) logging.info(u'[[%s]]: creating data page' % (pageTitle)) if data.exists(): if not hasCoordinatesAPI(data): addStatus = addCoordinates(data, pageTitle, lat, lon, lang) else: print 'ERROR: NO DATA PAGE' logging.warning('[[%s]]: no data page in Wikidata' % (pageTitle)) return addStatus
def ClaimFinder(page_title, claim_num): fa_result = False fa_result_more = [] en_wdata = wikipedia.DataPage(wikipedia.Page(faSite, page_title)) try: items = en_wdata.get() except: return False if items['claims']: case = items['claims'] for i in case: if i['m'][1] == claim_num: fa_result = data2fa(i[u'm'][3][u'numeric-id']) break
def data2fa(number, strict=False): data = wikipedia.DataPage(int(number)) try: items = data.get() except: return "" if isinstance(items['links'], list): items['links'] = {} if items['links'].has_key('fawiki'): return items['links']['fawiki']['name'] if strict: return "" if items['label'].has_key('fa'): return items['label']['fa'] try: return items['label']['en'] except: return ""
def sep(lang): if lang == 'ja': return u'・' if lang == 'zh': return u'·' return ' ' nc = '--nc' in '|'.join(sys.argv) for name in aa: if not name: continue print name data = wikipedia.DataPage(int(name.split('Q')[1])) try: items = data.get() except: continue P31 = [] P27 = [] for claim in items['claims']: if claim['m'][1] == 31: try: P31.append(claim['m'][3]['numeric-id']) except: pass if claim['m'][1] == 27: try: P27.append(claim['m'][3]['numeric-id'])
def test_DataPage_removereferences(self): page = pywikibot.DataPage(self.repo, ITEM_SINGLE_GENERIC) self._check_member(page, "removereferences", call=True)
def test_DataPage_linktitles(self): page = pywikibot.DataPage(self.repo, ITEM_SINGLE_GENERIC) self._check_member(page, "linktitles", call=True)
def test_DataPage_searchentities(self): page = pywikibot.DataPage(self.repo, ITEM_SINGLE_GENERIC) self._check_member(page, "searchentities", call=True)
def test_DataPage_get(self): page = pywikibot.DataPage(self.repo, ITEM_SINGLE_GENERIC) self._check_member(page, "get", call=True) self._check_member(page, "getentities", call=True) page.get()
def test_DataPage_createitem(self): page = pywikibot.DataPage(self.repo, ITEM_SINGLE_GENERIC) self._check_member(page, "createitem", call=True)
def test_DataPage_editclaim(self): page = pywikibot.DataPage(self.repo, ITEM_SINGLE_GENERIC) self._check_member(page, "editclaim", call=True)
logging.basicConfig(filename='add.log', level=logging.DEBUG) logging.basicConfig(format='%(asctime)s %(message)s') for page in generator: print page if not page.exists(): print 'ERROR: NO CONTENT' logging.warning('[[%s]]: couldnt get page content' % (page)) continue if page.namespace(): print 'ERROR: NOT AN ARTICLE' continue data = pywikibot.DataPage(page) settlement = '' claims_rules[31] = '' for cat in page.categories(): if re.search(ur'\slinnad', cat.title()): settlement = 'q3957' # town elif re.search(ur'\salevid', cat.title()): settlement = 'q3374262' # market town elif re.search(ur'\salevikud', cat.title()): settlement = 'q3744870' # et:alevik elif re.search(ur'\skülad', cat.title()): settlement = 'q532' # village if settlement: claims_rules[31] = settlement
def procesPage(self, page): """ Proces a single page """ item = pywikibot.DataPage(page) pywikibot.output('Processing %s' % page) if not item.exists(): pywikibot.output('%s doesn\'t have a wikidata item :(' % page) #TODO FIXME: We should provide an option to create the page else: pagetext = page.get() pagetext = pywikibot.removeDisabledParts(pagetext) templates = pywikibot.extract_templates_and_params(pagetext) for (template, fielddict) in templates: # We found the template we were looking for if template.replace(u'_', u' ') == self.templateTitle: for field, value in fielddict.items(): # This field contains something useful for us if field in self.fields: # Check if the property isn't already set claim = self.fields[field] if claim in item.get().get('claims'): pywikibot.output( u'A claim for %s already exists. Skipping' % (claim, )) # TODO FIXME: This is a very crude way of dupe # checking else: # Try to extract a valid page match = re.search( re.compile( r'\[\[(?P<title>[^\]|[#<>{}]*)(\|.*?)?\]\]' ), value) if match: try: link = match.group(1) linkedPage = pywikibot.Page( self.site, link) if linkedPage.isRedirectPage(): linkedPage = linkedPage.getRedirectTarget( ) linkedItem = pywikibot.DataPage( linkedPage) pywikibot.output( 'Adding %s --> %s' % (claim, linkedItem.getID())) if self.setSource( self.site().language()): item.editclaim( str(claim), linkedItem.getID(), refs={ self.setSource( self.site().language()) }) else: item.editclaim( str(claim), linkedItem.getID()) except pywikibot.NoPage: pywikibot.output( "[[%s]] doesn't exist so I can't link to it" % linkedItem.title())