def getPoisonedLinks(pl): """Returns a list of known corrupted links that should be removed if seen """ result = [] pywikibot.output(u'getting poisoned links for %s' % pl.title()) dictName, value = date.getAutoFormat(pl.site().language(), pl.title()) if dictName is not None: pywikibot.output( u'date found in %s' % dictName ) # errors in year BC if dictName in date.bcFormats: for fmt in bcDateErrors: result.append( fmt % value ) # i guess this is like friday the 13th for the years if value == 398 and dictName == 'yearsBC': appendFormatedDates(result, dictName, 399) if dictName == 'yearsBC': appendFormatedDates(result, 'decadesBC', value) appendFormatedDates(result, 'yearsAD', value) if dictName == 'yearsAD': appendFormatedDates(result, 'decadesAD', value) appendFormatedDates(result, 'yearsBC', value) if dictName == 'centuriesBC': appendFormatedDates(result, 'decadesBC', value * 100 + 1) if dictName == 'centuriesAD': appendFormatedDates(result, 'decadesAD', value * 100 + 1) return result
def getPoisonedLinks(pl): """Returns a list of known corrupted links that should be removed if seen """ result = [] pywikibot.output(u'getting poisoned links for %s' % pl.title()) dictName, value = date.getAutoFormat(pl.site.language(), pl.title()) if dictName is not None: pywikibot.output(u'date found in %s' % dictName) # errors in year BC if dictName in date.bcFormats: for fmt in bcDateErrors: result.append(fmt % value) # i guess this is like friday the 13th for the years if value == 398 and dictName == 'yearsBC': appendFormatedDates(result, dictName, 399) if dictName == 'yearsBC': appendFormatedDates(result, 'decadesBC', value) appendFormatedDates(result, 'yearsAD', value) if dictName == 'yearsAD': appendFormatedDates(result, 'decadesAD', value) appendFormatedDates(result, 'yearsBC', value) if dictName == 'centuriesBC': appendFormatedDates(result, 'decadesBC', value * 100 + 1) if dictName == 'centuriesAD': appendFormatedDates(result, 'decadesAD', value * 100 + 1) return result
def isdate(s): """returns true iff s is a date or year """ dict,val = date.getAutoFormat( pywikibot.getSite().language(), s ) return dict is not None
def translate(page, hints = None, auto = True, removebrackets = False, site = None, family = None): """ Please comment your source code! --Daniel Does some magic stuff. Returns a list of pages. Goes through all entries in 'hints'. Returns a list of pages. Entries for single page titles list those pages. Page titles for entries such as "all:" or "xyz:" or "20:" are first built from the page title of 'page' and then listed. When 'removebrackets' is True, a trailing pair of brackets and the text between them is removed from the page title. If 'auto' is true, known year and date page titles are autotranslated to all known target languages and inserted into the list. """ result = [] if site is None and page: site = page.site() if family is None and site: family = site.family if site: sitelang = site.language() if hints: for h in hints: if ':' not in h: # argument given as -hint:xy where xy is a language code codes = h newname = '' else: codes, newname = h.split(':', 1) if newname == '': # if given as -hint:xy or -hint:xy:, assume that there should # be a page in language xy with the same title as the page # we're currently working on ... if page is None: continue ns = page.namespace() if ns: newname = u'%s:%s' % (family.namespace('_default', ns), page.titleWithoutNamespace()) else: # article in the main namespace newname = page.title() # ... unless we do want brackets if removebrackets: newname = re.sub(re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ", newname) codesplit = codes.split(',') codes = [] for code in codesplit: try: number = int(code) _join_to_(codes, family.languages_by_size[:number] ) except ValueError: if code == 'all': _join_to_(codes, family.languages_by_size ) elif code in family.language_groups: _join_to_(codes, family.language_groups[code] ) elif code: _join_to_(codes, [ code ] ) for newcode in codes: x = None if newcode in family.langs.keys(): if page is None or \ (newcode != sitelang and pywikibot.getSite().family.name not in family.interwiki_forwarded_from): x = pywikibot.Page(pywikibot.getSite(fam=family, code=newcode), newname) elif newcode in family.interwiki_forwarded_from: x = pywikibot.Page(pywikibot.getSite(fam=newcode, code=newcode), newname) else: if pywikibot.verbose: pywikibot.output(u"Ignoring the unknown language code %s" % newcode) if x: _join_to_(result, [ x ] ) # Autotranslate dates into all other languages, the rest will come from # existing interwiki links. if auto and page: # search inside all dictionaries for this link dictName, value = date.getAutoFormat(sitelang, page.title()) if dictName: if not (dictName == 'yearsBC' and sitelang in date.maxyearBC and value > date.maxyearBC[sitelang]) or \ (dictName == 'yearsAD' and sitelang in date.maxyearAD and value > date.maxyearAD[sitelang]): pywikibot.output( u'TitleTranslate: %s was recognized as %s with value %d' % (page.title(), dictName, value)) for entryLang, entry in date.formats[dictName].iteritems(): if entryLang != sitelang: if dictName == 'yearsBC' and \ entryLang in date.maxyearBC and \ value > date.maxyearBC[entryLang]: pass elif dictName == 'yearsAD' and \ entryLang in date.maxyearAD and \ value > date.maxyearAD[entryLang]: pass else: newname = entry(value) x = pywikibot.Page( pywikibot.getSite(code=entryLang, fam=family), newname) _join_to_(result, [ x ] ) return result
def isdate(s): """returns true iff s is a date or year """ dict, val = date.getAutoFormat(pywikibot.getSite().language(), s) return dict is not None
def translate(page, hints = None, auto = True, removebrackets = False): """ Please comment your source code! --Daniel Does some magic stuff. Returns a list of pages. """ result = [] site = page.site() if hints: for h in hints: if ':' not in h: # argument given as -hint:xy where xy is a language code codes = h newname = '' else: codes, newname = h.split(':', 1) if newname == '': # if given as -hint:xy or -hint:xy:, assume that there should # be a page in language xy with the same title as the page # we're currently working on ... ns = page.namespace() if ns: newname = u'%s:%s' % (site.family.namespace('_default', ns), page.titleWithoutNamespace()) else: # article in the main namespace newname = page.title() # ... unless we do want brackets if removebrackets: newname = re.sub(re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ", newname) try: number = int(codes) codes = site.family.languages_by_size[:number] except ValueError: if codes == 'all': codes = site.family.languages_by_size elif codes in site.family.language_groups: codes = site.family.language_groups[codes] else: codes = codes.split(',') for newcode in codes: if newcode in site.languages(): if newcode != site.language(): x = wikipedia.Page(site.getSite(code=newcode), newname) if x not in result: result.append(x) else: if wikipedia.verbose: wikipedia.output(u"Ignoring unknown language code %s"%newcode) # Autotranslate dates into all other languages, the rest will come from existing interwiki links. if auto: # search inside all dictionaries for this link dictName, value = date.getAutoFormat( page.site().language(), page.title() ) if dictName: if not (dictName == 'yearsBC' and date.maxyearBC.has_key(page.site().language()) and value > date.maxyearBC[page.site().language()]) or (dictName == 'yearsAD' and date.maxyearAD.has_key(page.site().language()) and value > date.maxyearAD[page.site().language()]): wikipedia.output(u'TitleTranslate: %s was recognized as %s with value %d' % (page.title(),dictName,value)) for entryLang, entry in date.formats[dictName].iteritems(): if entryLang != page.site().language(): if dictName == 'yearsBC' and date.maxyearBC.has_key(entryLang) and value > date.maxyearBC[entryLang]: pass elif dictName == 'yearsAD' and date.maxyearAD.has_key(entryLang) and value > date.maxyearAD[entryLang]: pass else: newname = entry(value) x = wikipedia.Page( wikipedia.getSite(code=entryLang, fam=site.family), newname ) if x not in result: result.append(x) # add new page return result
def translate(page, hints=None, auto=True, removebrackets=False, site=None, family=None): """ Goes through all entries in 'hints'. Returns a list of pages. Entries for single page titles list those pages. Page titles for entries such as "all:" or "xyz:" or "20:" are first built from the page title of 'page' and then listed. When 'removebrackets' is True, a trailing pair of brackets and the text between them is removed from the page title. If 'auto' is true, known year and date page titles are autotranslated to all known target languages and inserted into the list. """ result = [] if site is None and page: site = page.site if family is None and site: family = site.family if site: sitelang = site.language() if hints: for h in hints: if ':' not in h: # argument given as -hint:xy where xy is a language code codes = h newname = '' else: codes, newname = h.split(':', 1) if newname == '': # if given as -hint:xy or -hint:xy:, assume that there should # be a page in language xy with the same title as the page # we're currently working on ... if page is None: continue ns = page.namespace() if ns: newname = u'%s:%s' % (family.namespace( '_default', ns), page.title(withNamespace=False)) else: # article in the main namespace newname = page.title() # ... unless we do want brackets if removebrackets: newname = re.sub( re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ", newname) codesplit = codes.split(',') codes = [] for code in codesplit: try: number = int(code) _join_to_(codes, family.languages_by_size[:number]) except ValueError: if code == 'all': _join_to_(codes, family.languages_by_size) elif code in family.language_groups: _join_to_(codes, family.language_groups[code]) elif code: _join_to_(codes, [code]) for newcode in codes: x = None if newcode in family.langs.keys(): if page is None or \ (newcode != sitelang and pywikibot.getSite().family.name not in family.interwiki_forwarded_from): x = pywikibot.Page( pywikibot.getSite(fam=family, code=newcode), newname) elif newcode in family.interwiki_forwarded_from: x = pywikibot.Page( pywikibot.getSite(fam=newcode, code=newcode), newname) else: if pywikibot.verbose: pywikibot.output( u"Ignoring the unknown language code %s" % newcode) if x: _join_to_(result, [x]) # Autotranslate dates into all other languages, the rest will come from # existing interwiki links. if auto and page: # search inside all dictionaries for this link dictName, value = date.getAutoFormat(sitelang, page.title()) if dictName: if not (dictName == 'yearsBC' and sitelang in date.maxyearBC and value > date.maxyearBC[sitelang]) or \ (dictName == 'yearsAD' and sitelang in date.maxyearAD and value > date.maxyearAD[sitelang]): pywikibot.output( u'TitleTranslate: %s was recognized as %s with value %d' % (page.title(), dictName, value)) for entryLang, entry in date.formats[dictName].iteritems(): if entryLang != sitelang: if dictName == 'yearsBC' and \ entryLang in date.maxyearBC and \ value > date.maxyearBC[entryLang]: pass elif (dictName == 'yearsAD' and entryLang in date.maxyearAD and value > date.maxyearAD[entryLang]): pass else: newname = entry(value) x = pywikibot.Page( pywikibot.getSite(code=entryLang, fam=family), newname) _join_to_(result, [x]) return result
def translate(page, hints=None, auto=True, removebrackets=False): """ Please comment your source code! --Daniel Does some magic stuff. Returns a list of pages. """ result = [] site = page.site() if hints: for h in hints: if ':' not in h: # argument given as -hint:xy where xy is a language code codes = h newname = '' else: codes, newname = h.split(':', 1) if newname == '': # if given as -hint:xy or -hint:xy:, assume that there should # be a page in language xy with the same title as the page # we're currently working on ... ns = page.namespace() if ns: newname = u'%s:%s' % (site.family.namespace( '_default', ns), page.titleWithoutNamespace()) else: # article in the main namespace newname = page.title() # ... unless we do want brackets if removebrackets: newname = re.sub( re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ", newname) try: number = int(codes) codes = site.family.languages_by_size[:number] except ValueError: if codes == 'all': codes = site.family.languages_by_size elif codes in site.family.language_groups: codes = site.family.language_groups[codes] else: codes = codes.split(',') for newcode in codes: if newcode in site.languages(): if newcode != site.language(): x = pywikibot.Page(site.getSite(code=newcode), newname) if x not in result: result.append(x) else: if pywikibot.verbose: pywikibot.output(u"Ignoring unknown language code %s" % newcode) # Autotranslate dates into all other languages, the rest will come from existing interwiki links. if auto: # search inside all dictionaries for this link dictName, value = date.getAutoFormat(page.site().language(), page.title()) if dictName: if not (dictName == 'yearsBC' and page.site().language() in date.maxyearBC and value > date.maxyearBC[page.site().language()]) or ( dictName == 'yearsAD' and page.site().language() in date.maxyearAD and value > date.maxyearAD[page.site().language()]): pywikibot.output( u'TitleTranslate: %s was recognized as %s with value %d' % (page.title(), dictName, value)) for entryLang, entry in date.formats[dictName].iteritems(): if entryLang != page.site().language(): if dictName == 'yearsBC' and entryLang in date.maxyearBC and value > date.maxyearBC[ entryLang]: pass elif dictName == 'yearsAD' and entryLang in date.maxyearAD and value > date.maxyearAD[ entryLang]: pass else: newname = entry(value) x = pywikibot.Page( pywikibot.getSite(code=entryLang, fam=site.family), newname) if x not in result: result.append(x) # add new page return result