def getPoisonedLinks(pl):
    """Returns a list of known corrupted links that should be removed if seen

    """
    result = []
    pywikibot.output(u'getting poisoned links for %s' % pl.title())
    dictName, value = date.getAutoFormat(pl.site().language(), pl.title())
    if dictName is not None:
        pywikibot.output( u'date found in %s' % dictName )
        # errors in year BC
        if dictName in date.bcFormats:
            for fmt in bcDateErrors:
                result.append( fmt % value )
        # i guess this is like friday the 13th for the years
        if value == 398 and dictName == 'yearsBC':
            appendFormatedDates(result, dictName, 399)
        if dictName == 'yearsBC':
            appendFormatedDates(result, 'decadesBC', value)
            appendFormatedDates(result, 'yearsAD', value)
        if dictName == 'yearsAD':
            appendFormatedDates(result, 'decadesAD', value)
            appendFormatedDates(result, 'yearsBC', value)
        if dictName == 'centuriesBC':
            appendFormatedDates(result, 'decadesBC', value * 100 + 1)
        if dictName == 'centuriesAD':
            appendFormatedDates(result, 'decadesAD', value * 100 + 1)
    return result
Ejemplo n.º 2
0
def getPoisonedLinks(pl):
    """Returns a list of known corrupted links that should be removed if seen

    """
    result = []
    pywikibot.output(u'getting poisoned links for %s' % pl.title())
    dictName, value = date.getAutoFormat(pl.site.language(), pl.title())
    if dictName is not None:
        pywikibot.output(u'date found in %s' % dictName)
        # errors in year BC
        if dictName in date.bcFormats:
            for fmt in bcDateErrors:
                result.append(fmt % value)
        # i guess this is like friday the 13th for the years
        if value == 398 and dictName == 'yearsBC':
            appendFormatedDates(result, dictName, 399)
        if dictName == 'yearsBC':
            appendFormatedDates(result, 'decadesBC', value)
            appendFormatedDates(result, 'yearsAD', value)
        if dictName == 'yearsAD':
            appendFormatedDates(result, 'decadesAD', value)
            appendFormatedDates(result, 'yearsBC', value)
        if dictName == 'centuriesBC':
            appendFormatedDates(result, 'decadesBC', value * 100 + 1)
        if dictName == 'centuriesAD':
            appendFormatedDates(result, 'decadesAD', value * 100 + 1)
    return result
Ejemplo n.º 3
0
def isdate(s):
    """returns true iff s is a date or year
    """
    dict,val = date.getAutoFormat( pywikibot.getSite().language(), s )
    return dict is not None
def translate(page, hints = None, auto = True, removebrackets = False, site = None, family = None):
    """
    Please comment your source code! --Daniel

    Does some magic stuff. Returns a list of pages.

    Goes through all entries in 'hints'. Returns a list of pages.

    Entries for single page titles list those pages. Page titles for entries
    such as "all:" or "xyz:" or "20:" are first built from the page title of
    'page' and then listed. When 'removebrackets' is True, a trailing pair of
    brackets and the text between them is removed from the page title.
    If 'auto' is true, known year and date page titles are autotranslated
    to all known target languages and inserted into the list.
    
    """
    result = []
    if site is None and page:
       site = page.site()
    if family is None and site:
       family = site.family
    if site:
       sitelang = site.language()
    if hints:
        for h in hints:
            if ':' not in h:
                # argument given as -hint:xy where xy is a language code
                codes = h
                newname = ''
            else:
                codes, newname = h.split(':', 1)
            if newname == '':
                # if given as -hint:xy or -hint:xy:, assume that there should
                # be a page in language xy with the same title as the page
                # we're currently working on ...
                if page is None:
                   continue
                ns = page.namespace()
                if ns:
                    newname = u'%s:%s' % (family.namespace('_default', ns),
                                          page.titleWithoutNamespace())
                else:
                    # article in the main namespace
                    newname = page.title()
                # ... unless we do want brackets
                if removebrackets:
                    newname = re.sub(re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ", newname)
            codesplit = codes.split(',')
            codes = []
            for code in codesplit:
                try:
                    number = int(code)
                    _join_to_(codes, family.languages_by_size[:number] )
                except ValueError:
                    if code == 'all':
                        _join_to_(codes, family.languages_by_size )
                    elif code in family.language_groups:
                        _join_to_(codes, family.language_groups[code] )
                    elif code:
                        _join_to_(codes, [ code ] )
            for newcode in codes:
                x = None
                if newcode in family.langs.keys():
                    if page is None or \
                       (newcode != sitelang and
                        pywikibot.getSite().family.name
                        not in family.interwiki_forwarded_from):
                        x = pywikibot.Page(pywikibot.getSite(fam=family, code=newcode), newname)
                elif newcode in family.interwiki_forwarded_from:
                    x = pywikibot.Page(pywikibot.getSite(fam=newcode, code=newcode), newname)
                else:
                    if pywikibot.verbose:
                        pywikibot.output(u"Ignoring the unknown language code %s" % newcode)
                if x:
                    _join_to_(result, [ x ] )

    # Autotranslate dates into all other languages, the rest will come from
    # existing interwiki links.
    if auto and page:
        # search inside all dictionaries for this link
        dictName, value = date.getAutoFormat(sitelang, page.title())
        if dictName:
            if not (dictName == 'yearsBC' and
                    sitelang in date.maxyearBC and
                    value > date.maxyearBC[sitelang]) or \
                    (dictName == 'yearsAD' and
                     sitelang in date.maxyearAD and
                     value > date.maxyearAD[sitelang]):
                pywikibot.output(
                    u'TitleTranslate: %s was recognized as %s with value %d'
                    % (page.title(), dictName, value))
                for entryLang, entry in date.formats[dictName].iteritems():
                    if entryLang != sitelang:
                        if dictName == 'yearsBC' and \
                           entryLang in date.maxyearBC and \
                           value > date.maxyearBC[entryLang]:
                            pass
                        elif dictName == 'yearsAD' and \
                             entryLang in date.maxyearAD and \
                             value > date.maxyearAD[entryLang]:
                            pass
            else:
                            newname = entry(value)
                            x = pywikibot.Page(
                                pywikibot.getSite(code=entryLang,
                                                  fam=family), newname)
                            _join_to_(result, [ x ] )
    return result
Ejemplo n.º 5
0
def isdate(s):
    """returns true iff s is a date or year
    """
    dict, val = date.getAutoFormat(pywikibot.getSite().language(), s)
    return dict is not None
Ejemplo n.º 6
0
def translate(page, hints = None, auto = True, removebrackets = False):
    """
    Please comment your source code! --Daniel

    Does some magic stuff. Returns a list of pages.
    """
    result = []
    site = page.site()
    if hints:
        for h in hints:
            if ':' not in h:
                # argument given as -hint:xy where xy is a language code
                codes = h
                newname = ''
            else:
                codes, newname = h.split(':', 1)
            if newname == '':
                # if given as -hint:xy or -hint:xy:, assume that there should
                # be a page in language xy with the same title as the page
                # we're currently working on ...
                ns = page.namespace()
                if ns:
                    newname = u'%s:%s' % (site.family.namespace('_default', ns), page.titleWithoutNamespace())
                else:
                    # article in the main namespace
                    newname = page.title()
                # ... unless we do want brackets
                if removebrackets:
                    newname = re.sub(re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ", newname)
            try:
                number = int(codes)
                codes = site.family.languages_by_size[:number]
            except ValueError:
                if codes == 'all':
                    codes = site.family.languages_by_size
                elif codes in site.family.language_groups:
                    codes = site.family.language_groups[codes]
                else:
                    codes = codes.split(',')
            for newcode in codes:
                if newcode in site.languages():
                    if newcode != site.language():
                        x = wikipedia.Page(site.getSite(code=newcode), newname)
                        if x not in result:
                            result.append(x)
                else:
                    if wikipedia.verbose:
                        wikipedia.output(u"Ignoring unknown language code %s"%newcode)

    # Autotranslate dates into all other languages, the rest will come from existing interwiki links.
    if auto:
        # search inside all dictionaries for this link
        dictName, value = date.getAutoFormat( page.site().language(), page.title() )
        if dictName:
            if not (dictName == 'yearsBC' and date.maxyearBC.has_key(page.site().language()) and value > date.maxyearBC[page.site().language()]) or (dictName == 'yearsAD' and date.maxyearAD.has_key(page.site().language()) and value > date.maxyearAD[page.site().language()]):
                wikipedia.output(u'TitleTranslate: %s was recognized as %s with value %d' % (page.title(),dictName,value))
                for entryLang, entry in date.formats[dictName].iteritems():
                    if entryLang != page.site().language():
                        if dictName == 'yearsBC' and date.maxyearBC.has_key(entryLang) and value > date.maxyearBC[entryLang]:
                            pass
                        elif dictName == 'yearsAD' and date.maxyearAD.has_key(entryLang) and value > date.maxyearAD[entryLang]:
                            pass
            else:
                            newname = entry(value)
                            x = wikipedia.Page( wikipedia.getSite(code=entryLang, fam=site.family), newname )
                            if x not in result:
                                result.append(x) # add new page
    return result
Ejemplo n.º 7
0
def translate(page,
              hints=None,
              auto=True,
              removebrackets=False,
              site=None,
              family=None):
    """
    Goes through all entries in 'hints'. Returns a list of pages.

    Entries for single page titles list those pages. Page titles for entries
    such as "all:" or "xyz:" or "20:" are first built from the page title of
    'page' and then listed. When 'removebrackets' is True, a trailing pair of
    brackets and the text between them is removed from the page title.
    If 'auto' is true, known year and date page titles are autotranslated
    to all known target languages and inserted into the list.

    """
    result = []
    if site is None and page:
        site = page.site
    if family is None and site:
        family = site.family
    if site:
        sitelang = site.language()
    if hints:
        for h in hints:
            if ':' not in h:
                # argument given as -hint:xy where xy is a language code
                codes = h
                newname = ''
            else:
                codes, newname = h.split(':', 1)
            if newname == '':
                # if given as -hint:xy or -hint:xy:, assume that there should
                # be a page in language xy with the same title as the page
                # we're currently working on ...
                if page is None:
                    continue
                ns = page.namespace()
                if ns:
                    newname = u'%s:%s' % (family.namespace(
                        '_default', ns), page.title(withNamespace=False))
                else:
                    # article in the main namespace
                    newname = page.title()
                # ... unless we do want brackets
                if removebrackets:
                    newname = re.sub(
                        re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ",
                        newname)
            codesplit = codes.split(',')
            codes = []
            for code in codesplit:
                try:
                    number = int(code)
                    _join_to_(codes, family.languages_by_size[:number])
                except ValueError:
                    if code == 'all':
                        _join_to_(codes, family.languages_by_size)
                    elif code in family.language_groups:
                        _join_to_(codes, family.language_groups[code])
                    elif code:
                        _join_to_(codes, [code])
            for newcode in codes:
                x = None
                if newcode in family.langs.keys():
                    if page is None or \
                       (newcode != sitelang and
                            pywikibot.getSite().family.name
                            not in family.interwiki_forwarded_from):
                        x = pywikibot.Page(
                            pywikibot.getSite(fam=family, code=newcode),
                            newname)
                elif newcode in family.interwiki_forwarded_from:
                    x = pywikibot.Page(
                        pywikibot.getSite(fam=newcode, code=newcode), newname)
                else:
                    if pywikibot.verbose:
                        pywikibot.output(
                            u"Ignoring the unknown language code %s" % newcode)
                if x:
                    _join_to_(result, [x])

    # Autotranslate dates into all other languages, the rest will come from
    # existing interwiki links.
    if auto and page:
        # search inside all dictionaries for this link
        dictName, value = date.getAutoFormat(sitelang, page.title())
        if dictName:
            if not (dictName == 'yearsBC' and
                    sitelang in date.maxyearBC and
                    value > date.maxyearBC[sitelang]) or \
                    (dictName == 'yearsAD' and
                     sitelang in date.maxyearAD and
                     value > date.maxyearAD[sitelang]):
                pywikibot.output(
                    u'TitleTranslate: %s was recognized as %s with value %d' %
                    (page.title(), dictName, value))
                for entryLang, entry in date.formats[dictName].iteritems():
                    if entryLang != sitelang:
                        if dictName == 'yearsBC' and \
                           entryLang in date.maxyearBC and \
                           value > date.maxyearBC[entryLang]:
                            pass
                        elif (dictName == 'yearsAD'
                              and entryLang in date.maxyearAD
                              and value > date.maxyearAD[entryLang]):
                            pass
            else:
                newname = entry(value)
                x = pywikibot.Page(
                    pywikibot.getSite(code=entryLang, fam=family), newname)
                _join_to_(result, [x])
    return result
Ejemplo n.º 8
0
def translate(page, hints=None, auto=True, removebrackets=False):
    """
    Please comment your source code! --Daniel

    Does some magic stuff. Returns a list of pages.
    """
    result = []
    site = page.site()
    if hints:
        for h in hints:
            if ':' not in h:
                # argument given as -hint:xy where xy is a language code
                codes = h
                newname = ''
            else:
                codes, newname = h.split(':', 1)
            if newname == '':
                # if given as -hint:xy or -hint:xy:, assume that there should
                # be a page in language xy with the same title as the page
                # we're currently working on ...
                ns = page.namespace()
                if ns:
                    newname = u'%s:%s' % (site.family.namespace(
                        '_default', ns), page.titleWithoutNamespace())
                else:
                    # article in the main namespace
                    newname = page.title()
                # ... unless we do want brackets
                if removebrackets:
                    newname = re.sub(
                        re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ",
                        newname)
            try:
                number = int(codes)
                codes = site.family.languages_by_size[:number]
            except ValueError:
                if codes == 'all':
                    codes = site.family.languages_by_size
                elif codes in site.family.language_groups:
                    codes = site.family.language_groups[codes]
                else:
                    codes = codes.split(',')
            for newcode in codes:
                if newcode in site.languages():
                    if newcode != site.language():
                        x = pywikibot.Page(site.getSite(code=newcode), newname)
                        if x not in result:
                            result.append(x)
                else:
                    if pywikibot.verbose:
                        pywikibot.output(u"Ignoring unknown language code %s" %
                                         newcode)

    # Autotranslate dates into all other languages, the rest will come from existing interwiki links.
    if auto:
        # search inside all dictionaries for this link
        dictName, value = date.getAutoFormat(page.site().language(),
                                             page.title())
        if dictName:
            if not (dictName == 'yearsBC'
                    and page.site().language() in date.maxyearBC
                    and value > date.maxyearBC[page.site().language()]) or (
                        dictName == 'yearsAD'
                        and page.site().language() in date.maxyearAD
                        and value > date.maxyearAD[page.site().language()]):
                pywikibot.output(
                    u'TitleTranslate: %s was recognized as %s with value %d' %
                    (page.title(), dictName, value))
                for entryLang, entry in date.formats[dictName].iteritems():
                    if entryLang != page.site().language():
                        if dictName == 'yearsBC' and entryLang in date.maxyearBC and value > date.maxyearBC[
                                entryLang]:
                            pass
                        elif dictName == 'yearsAD' and entryLang in date.maxyearAD and value > date.maxyearAD[
                                entryLang]:
                            pass
            else:
                newname = entry(value)
                x = pywikibot.Page(
                    pywikibot.getSite(code=entryLang, fam=site.family),
                    newname)
                if x not in result:
                    result.append(x)  # add new page
    return result