Exemple #1
0
def getPoisonedLinks(pl):
    """Return a list of known corrupted links that should be removed if seen."""
    result = []
    pywikibot.output(u'getting poisoned links for %s' % pl.title())
    dictName, value = date.getAutoFormat(pl.site.code, pl.title())
    if dictName is not None:
        pywikibot.output(u'date found in %s' % dictName)
        # errors in year BC
        if dictName in date.bcFormats:
            for fmt in bcDateErrors:
                result.append(fmt % value)
        # i guess this is like friday the 13th for the years
        if value == 398 and dictName == 'yearsBC':
            appendFormatedDates(result, dictName, 399)
        if dictName == 'yearsBC':
            appendFormatedDates(result, 'decadesBC', value)
            appendFormatedDates(result, 'yearsAD', value)
        if dictName == 'yearsAD':
            appendFormatedDates(result, 'decadesAD', value)
            appendFormatedDates(result, 'yearsBC', value)
        if dictName == 'centuriesBC':
            appendFormatedDates(result, 'decadesBC', value * 100 + 1)
        if dictName == 'centuriesAD':
            appendFormatedDates(result, 'decadesAD', value * 100 + 1)
    return result
Exemple #2
0
def getPoisonedLinks(pl):
    """Return a list of known corrupted links that should be removed if seen."""
    result = []
    pywikibot.output(u'getting poisoned links for %s' % pl.title())
    dictName, value = date.getAutoFormat(pl.site.code, pl.title())
    if dictName is not None:
        pywikibot.output(u'date found in %s' % dictName)
        # errors in year BC
        if dictName in date.bcFormats:
            for fmt in bcDateErrors:
                result.append(fmt % value)
        # i guess this is like friday the 13th for the years
        if value == 398 and dictName == 'yearsBC':
            appendFormatedDates(result, dictName, 399)
        if dictName == 'yearsBC':
            appendFormatedDates(result, 'decadesBC', value)
            appendFormatedDates(result, 'yearsAD', value)
        if dictName == 'yearsAD':
            appendFormatedDates(result, 'decadesAD', value)
            appendFormatedDates(result, 'yearsBC', value)
        if dictName == 'centuriesBC':
            appendFormatedDates(result, 'decadesBC', value * 100 + 1)
        if dictName == 'centuriesAD':
            appendFormatedDates(result, 'decadesAD', value * 100 + 1)
    return result
Exemple #3
0
def translate(page=None, hints=(), auto=True, removebrackets=False,
              site=None):
    """
    Return a list of links to pages on other sites based on hints.

    Entries for single page titles list those pages. Page titles for entries
    such as "all:" or "xyz:" or "20:" are first built from the page title of
    'page' and then listed. When 'removebrackets' is True, a trailing pair of
    brackets and the text between them is removed from the page title.
    If 'auto' is true, known year and date page titles are autotranslated
    to all known target languages and inserted into the list.
    """
    result = set()

    assert page or site

    if site is None:
        site = page.site

    for h in hints:
        # argument may be given as -hint:xy where xy is a language code
        codes, _, newname = h.partition(':')
        if not newname:
            # if given as -hint:xy or -hint:xy:, assume that there should
            # be a page in language xy with the same title as the page
            # we're currently working on ...
            if page is None:
                continue
            newname = page.title(with_ns=False)
            # ... unless we do want brackets
            if removebrackets:
                newname = re.sub(re.compile(r'\W*?\(.*?\)\W*?',
                                            re.UNICODE), ' ', newname)
        if codes.isdigit():
            codes = site.family.languages_by_size[:int(codes)]
        elif codes == 'all':
            codes = site.family.languages_by_size
        elif codes in site.family.language_groups:
            codes = site.family.language_groups[codes]
        else:
            codes = codes.split(',')

        for newcode in codes:
            if newcode in site.languages():
                if newcode != site.code:
                    ns = page.namespace() if page else 0
                    x = pywikibot.Link(newname,
                                       site.getSite(code=newcode),
                                       default_namespace=ns)
                    result.add(x)
            elif config.verbose_output:
                pywikibot.output('Ignoring unknown language code {}'
                                 .format(newcode))

    # Autotranslate dates into all other languages, the rest will come from
    # existing interwiki links.
    if auto and page:
        # search inside all dictionaries for this link
        sitelang = page.site.lang
        dict_name, value = date.getAutoFormat(sitelang, page.title())
        if dict_name:
            pywikibot.output(
                u'TitleTranslate: %s was recognized as %s with value %d'
                % (page.title(), dict_name, value))
            for entry_lang, entry in date.formats[dict_name].items():
                if entry_lang not in site.languages():
                    continue
                if entry_lang != sitelang:
                    newname = entry(value)
                    x = pywikibot.Link(
                        newname,
                        pywikibot.Site(code=entry_lang,
                                       fam=site.family))
                    result.add(x)
    return list(result)
Exemple #4
0
def translate(page,
              hints=None,
              auto=True,
              removebrackets=False,
              site=None,
              family=None):
    """
    Goes through all entries in 'hints'. Returns a list of pages.

    Entries for single page titles list those pages. Page titles for entries
    such as "all:" or "xyz:" or "20:" are first built from the page title of
    'page' and then listed. When 'removebrackets' is True, a trailing pair of
    brackets and the text between them is removed from the page title.
    If 'auto' is true, known year and date page titles are autotranslated
    to all known target languages and inserted into the list.

    """
    result = []
    if site is None and page:
        site = page.site
    if family is None and site:
        family = site.family
    if site:
        sitelang = site.language()
    if hints:
        for h in hints:
            if ':' not in h:
                # argument given as -hint:xy where xy is a language code
                codes = h
                newname = ''
            else:
                codes, newname = h.split(':', 1)
            if newname == '':
                # if given as -hint:xy or -hint:xy:, assume that there should
                # be a page in language xy with the same title as the page
                # we're currently working on ...
                if page is None:
                    continue
                ns = page.namespace()
                if ns:
                    newname = u'%s:%s' % (family.namespace(
                        '_default', ns), page.title(withNamespace=False))
                else:
                    # article in the main namespace
                    newname = page.title()
                # ... unless we do want brackets
                if removebrackets:
                    newname = re.sub(
                        re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ",
                        newname)
            try:
                number = int(codes)
                codes = site.family.languages_by_size[:number]
            except ValueError:
                if codes == 'all':
                    codes = site.family.languages_by_size
                elif codes in site.family.language_groups:
                    codes = site.family.language_groups[codes]
                else:
                    codes = codes.split(',')
            for newcode in codes:
                if newcode in site.languages():
                    if newcode != site.code:
                        x = pywikibot.Link(site.getSite(code=newcode), newname)
                        if x not in result:
                            result.append(x)
                else:
                    if pywikibot.verbose:
                        pywikibot.output(u"Ignoring unknown language code %s" %
                                         newcode)

    # Autotranslate dates into all other languages, the rest will come from
    # existing interwiki links.
    if auto and page:
        # search inside all dictionaries for this link
        dictName, value = date.getAutoFormat(page.site.code, page.title())
        if dictName:
            if not (dictName == 'yearsBC' and
                    page.site.code in date.maxyearBC and
                    value > date.maxyearBC[page.site.code]) or \
                    (dictName == 'yearsAD' and
                     page.site.code in date.maxyearAD and
                     value > date.maxyearAD[page.site.code]):
                pywikibot.output(
                    u'TitleTranslate: %s was recognized as %s with value %d' %
                    (page.title(), dictName, value))
                for entryLang, entry in date.formats[dictName].iteritems():
                    if entryLang != page.site.code:
                        if dictName == 'yearsBC' and \
                           entryLang in date.maxyearBC and \
                           value > date.maxyearBC[entryLang]:
                            pass
                        elif dictName == 'yearsAD' and \
                             entryLang in date.maxyearAD and \
                             value > date.maxyearAD[entryLang]:
                            pass
            else:
                newname = entry(value)
                x = pywikibot.Link(
                    newname, pywikibot.getSite(code=entryLang,
                                               fam=site.family))
                if x not in result:
                    result.append(x)  # add new page
    return result
Exemple #5
0
def isdate(s):
    """returns true if s is a date or year """
    dict, val = date.getAutoFormat(pywikibot.Site().language(), s)
    return dict is not None
def translate(page, hints=None, auto=True, removebrackets=False, site=None, family=None):
    """
    Goes through all entries in 'hints'. Returns a list of pages.

    Entries for single page titles list those pages. Page titles for entries
    such as "all:" or "xyz:" or "20:" are first built from the page title of
    'page' and then listed. When 'removebrackets' is True, a trailing pair of
    brackets and the text between them is removed from the page title.
    If 'auto' is true, known year and date page titles are autotranslated
    to all known target languages and inserted into the list.

    """
    result = []
    if site is None and page:
        site = page.site
    if family is None and site:
        family = site.family
    if site:
        sitelang = site.language()
    if hints:
        for h in hints:
            if ':' not in h:
                # argument given as -hint:xy where xy is a language code
                codes = h
                newname = ''
            else:
                codes, newname = h.split(':', 1)
            if newname == '':
                # if given as -hint:xy or -hint:xy:, assume that there should
                # be a page in language xy with the same title as the page
                # we're currently working on ...
                if page is None:
                    continue
                ns = page.namespace()
                if ns:
                    newname = u'%s:%s' % (family.namespace('_default', ns),
                                          page.title(withNamespace=False))
                else:
                    # article in the main namespace
                    newname = page.title()
                # ... unless we do want brackets
                if removebrackets:
                    newname = re.sub(re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ", newname)
            try:
                number = int(codes)
                codes = site.family.languages_by_size[:number]
            except ValueError:
                if codes == 'all':
                    codes = site.family.languages_by_size
                elif codes in site.family.language_groups:
                    codes = site.family.language_groups[codes]
                else:
                    codes = codes.split(',')
            for newcode in codes:
                if newcode in site.languages():
                    if newcode != site.code:
                        x = pywikibot.Link(site.getSite(code=newcode), newname)
                        if x not in result:
                            result.append(x)
                else:
                    if pywikibot.verbose:
                        pywikibot.output(u"Ignoring unknown language code %s"
                                         % newcode)

    # Autotranslate dates into all other languages, the rest will come from
    # existing interwiki links.
    if auto and page:
        # search inside all dictionaries for this link
        dictName, value = date.getAutoFormat(page.site.code,
                                             page.title())
        if dictName:
            if not (dictName == 'yearsBC' and
                    page.site.code in date.maxyearBC and
                    value > date.maxyearBC[page.site.code]) or \
                    (dictName == 'yearsAD' and
                     page.site.code in date.maxyearAD and
                     value > date.maxyearAD[page.site.code]):
                pywikibot.output(
                    u'TitleTranslate: %s was recognized as %s with value %d'
                    % (page.title(), dictName, value))
                for entryLang, entry in date.formats[dictName].iteritems():
                    if entryLang != page.site.code:
                        if (dictName == 'yearsBC' and
                                entryLang in date.maxyearBC and
                                value > date.maxyearBC[entryLang]):
                            pass
                        elif (dictName == 'yearsAD' and
                              entryLang in date.maxyearAD and
                              value > date.maxyearAD[entryLang]):
                            pass
            else:
                            newname = entry(value)
                            x = pywikibot.Link(
                                newname,
                                pywikibot.getSite(code=entryLang,
                                                  fam=site.family))
                            if x not in result:
                                result.append(x)  # add new page
    return result
Exemple #7
0
def isdate(s):
    """returns true if s is a date or year."""
    dict, val = date.getAutoFormat(pywikibot.Site().language(), s)
    return dict is not None
def translate(page, hints = None, auto = True, removebrackets = False):
    """
    Please comment your source code! --Daniel

    Does some magic stuff. Returns a list of Links.
    """
    result = []
    site = page.site
    if hints:
        for h in hints:
            if ':' not in h:
                # argument given as -hint:xy where xy is a language code
                codes = h
                newname = ''
            else:
                codes, newname = h.split(':', 1)
            if newname == '':
                # if given as -hint:xy or -hint:xy:, assume that there should
                # be a page in language xy with the same title as the page
                # we're currently working on ...
                ns = page.namespace()
                if ns:
                    newname = u'%s:%s' % (site.family.namespace('_default', ns),
                                          page.title(withNamespace=False))
                else:
                    # article in the main namespace
                    newname = page.title()
                # ... unless we do want brackets
                if removebrackets:
                    newname = re.sub(re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ", newname)
            try:
                number = int(codes)
                codes = site.family.languages_by_size[:number]
            except ValueError:
                if codes == 'all':
                    codes = site.family.languages_by_size
                elif codes in site.family.language_groups:
                    codes = site.family.language_groups[codes]
                else:
                    codes = codes.split(',')
            for newcode in codes:
                if newcode in site.languages():
                    if newcode != site.code:
                        x = pywikibot.Link(site.getSite(code=newcode), newname)
                        if x not in result:
                            result.append(x)
                else:
                    if pywikibot.verbose:
                        pywikibot.output(u"Ignoring unknown language code %s"
                                         % newcode)

    # Autotranslate dates into all other languages, the rest will come from
    # existing interwiki links.
    if auto:
        # search inside all dictionaries for this link
        dictName, value = date.getAutoFormat(page.site.code,
                                             page.title())
        if dictName:
            if not (dictName == 'yearsBC' and
                    page.site.code in date.maxyearBC and
                    value > date.maxyearBC[page.site.code]) or \
                    (dictName == 'yearsAD' and
                     page.site.code in date.maxyearAD and
                     value > date.maxyearAD[page.site.code]):
                pywikibot.output(
                    u'TitleTranslate: %s was recognized as %s with value %d'
                    % (page.title(), dictName, value))
                for entryLang, entry in date.formats[dictName].iteritems():
                    if entryLang != page.site.code:
                        if dictName == 'yearsBC' and \
                           entryLang in date.maxyearBC and \
                           value > date.maxyearBC[entryLang]:
                            pass
                        elif dictName == 'yearsAD' and \
                             entryLang in date.maxyearAD and \
                             value > date.maxyearAD[entryLang]:
                            pass
            else:
                            newname = entry(value)
                            x = pywikibot.Link(
                                newname,
                                pywikibot.getSite(code=entryLang,
                                                  fam=site.family))
                            if x not in result:
                                result.append(x) # add new page
    return result