Python TextfilePageGeneratorの例、pywikibot.pagegenerators.TextfilePageGenerator Pythonの例

コード例 #1

0

ファイルを表示

ファイル: pagegenerators_tests.py プロジェクト: trishnaguha/pywikibot-core

 def test_lines(self):
     filename = os.path.join(_data_dir, 'pagelist-lines.txt')
     site = self.get_site()
     titles = list(pagegenerators.TextfilePageGenerator(filename, site))
     self.assertEqual(len(titles), len(self.expected_titles))
     expected_titles = [
         expected_title[self.title_columns[site.namespaces[
             page.namespace()].case]]
         for expected_title, page in zip(self.expected_titles, titles)
     ]

コード例 #2

0

ファイルを表示

 def test_brackets(self):
     """Test TextfilePageGenerator with brackets."""
     filename = join_data_path('pagelist-brackets.txt')
     site = self.get_site()
     titles = list(pagegenerators.TextfilePageGenerator(filename, site))
     self.assertEqual(len(titles), len(self.expected_titles))
     expected_titles = [
         expected_title[self.title_columns[site.namespaces[
             page.namespace()].case]]
         for expected_title, page in zip(self.expected_titles, titles)
     ]

コード例 #3

0

ファイルを表示

def main():
    # if -file is not used, this temporary array is used to read the page title.
    pageTitle = []
    page = None
    gen = None
    interwiki = False
    keep_name = False
    targetLang = None
    targetFamily = None

    for arg in pywikibot.handleArgs():
        if arg == '-interwiki':
            interwiki = True
        elif arg.startswith('-keepname'):
            keep_name = True
        elif arg.startswith('-tolang:'):
            targetLang = arg[8:]
        elif arg.startswith('-tofamily:'):
            targetFamily = arg[10:]
        elif arg.startswith('-file'):
            if len(arg) == 5:
                filename = pywikibot.input(
                    u'Please enter the list\'s filename: ')
            else:
                filename = arg[6:]
            gen = pagegenerators.TextfilePageGenerator(filename)
        else:
            pageTitle.append(arg)

    if not gen:
        # if the page title is given as a command line argument,
        # connect the title's parts with spaces
        if pageTitle != []:
            pageTitle = ' '.join(pageTitle)
            page = pywikibot.Page(pywikibot.Site(), pageTitle)
        # if no page title was given as an argument, and none was
        # read from a file, query the user
        if not page:
            pageTitle = pywikibot.input(u'Which page to check:')
            page = pywikibot.Page(pywikibot.Site(), pageTitle)
            # generator which will yield only a single Page
        gen = iter([page])

    if not targetLang and not targetFamily:
        targetSite = pywikibot.Site('commons', 'commons')
    else:
        if not targetLang:
            targetLang = pywikibot.Site().language
        if not targetFamily:
            targetFamily = pywikibot.Site().family
        targetSite = pywikibot.Site(targetLang, targetFamily)
    bot = ImageTransferBot(gen, interwiki=interwiki, targetSite=targetSite,
                           keep_name=keep_name)
    bot.run()

コード例 #4

0

ファイルを表示

ファイル: reader.py プロジェクト: OlehOnyshchak/WikiImageRecommendation

def query(filename: str, params: QueryParams) -> None:
    site = pywikibot.Site()
    pages = list(
        pagegenerators.TextfilePageGenerator(filename=filename, site=site))
    limit = _validated_limit(params.limit, params.offset, len(pages))

    print('Downloading... offset={}, limit={}'.format(params.offset, limit))
    tc, uc = 0, 0
    for i in range(params.offset, params.offset + limit):
        p = pages[i]
        if p.pageid == 0:
            print("ERROR: Cannot fetch the page " + p.title())
            continue

        # onyshchak: create_if_not_exists - switch to enrich only existing data
        page_dir = _get_path(
            out_dir=params.out_dir + p.title(as_filename=True).rstrip('.'),
            create_if_not_exists=not params.only_update_cached_pages)

        if not page_dir.exists():
            continue

        if params.debug_info: print(i, page_dir)
        should_download_article = lambda path: (not path.exists() or stat(
            path).st_size == 0 or params.invalidate_text_cache)

        text_path = page_dir / 'text.json'
        if should_download_article(text_path):
            if params.debug_info: print("Downloading text.json")
            page_json = json.dumps({
                "title": p.title(),
                "id": p.pageid,
                "url": p.full_url(),
                "text": p.text,
            })

            _dump(text_path, page_json)

        # downloading page images
        tc, uc = _img_download(p.imagelinks(), page_dir,
                               params.invalidate_img_cache, tc, uc)

    print('Downloaded {} images, where {} of them unavailable from commons'.
          format(tc, uc))
    _file_log(skipped_svg, 'logs/skipped_svg_{}.txt'.format(params.offset))

コード例 #5

0

ファイルを表示

ファイル: reader.py プロジェクト: OlehOnyshchak/WikiImageRecommendation

def update_meta_description(filename, out_dir, offset=0, limit=None):
    site = pywikibot.Site()
    pages = list(
        pagegenerators.TextfilePageGenerator(filename=filename, site=site))
    limit = _validated_limit(limit, offset, len(pages))

    for i in range(offset, offset + limit):
        p = pages[i]
        if p.pageid == 0:
            print("ERROR: Cannot fetch the page " + p.title())
            continue

        page_dir = _get_path(out_dir + p.title(as_filename=True).rstrip('.'),
                             create_if_not_exists=False)
        if not page_dir.exists():
            print('not page_dir.exists()', page_dir)
            continue  # onyshchak: temporary switch to enrich only existing data

        print(i, p.title())
        img_dir = _get_path(page_dir / "img", create_if_not_exists=False)
        meta_path = img_dir / 'meta.json'
        meta = _getJSON(meta_path)

        updated = False
        for img in p.imagelinks():
            if not _valid_img_type(img.title(with_ns=False)):
                continue

            i = next(i for i, x in enumerate(meta['img_meta'])
                     if x['title'] == img.title(with_ns=False))
            updated_description = _get_description(img)
            if updated_description != meta['img_meta'][i]['description']:
                updated = True
                meta['img_meta'][i]['description'] = _get_description(img)
                print("DESCRIPTION", img_dir / meta['img_meta'][i]['filename'])

        if updated:
            meta_json = json.dumps(meta)
            _dump(meta_path, meta_json)

コード例 #6

0

ファイルを表示

ファイル: custome_fixes.py プロジェクト: dpriskorn/pywikibot-scripts

 def get_redirects(self):
     return [
         page.title()
         for page in pagegenerators.TextfilePageGenerator(site=self.site)
     ]

コード例 #7

0

ファイルを表示

def main(*args):
    # the option that's always selected when the bot wonders what to do with
    # a link. If it's None, the user is prompted (default behaviour).
    always = None
    alternatives = []
    getAlternatives = True
    dnSkip = False
    generator = None
    pageTitle = None
    primary = False
    main_only = False

    # For sorting the linked pages, case can be ignored
    minimum = 0

    local_args = pywikibot.handleArgs(*args)

    for arg in local_args:
        if arg.startswith('-primary:'):
            primary = True
            getAlternatives = False
            alternatives.append(arg[9:])
        elif arg == '-primary':
            primary = True
        elif arg.startswith('-always:'):
            always = arg[8:]
        elif arg.startswith('-file'):
            if len(arg) == 5:
                generator = pagegenerators.TextfilePageGenerator(filename=None)
            else:
                generator = pagegenerators.TextfilePageGenerator(
                    filename=arg[6:])
        elif arg.startswith('-pos:'):
            if arg[5] != ':':
                mysite = pywikibot.Site()
                page = pywikibot.Page(pywikibot.Link(arg[5:], mysite))
                if page.exists():
                    alternatives.append(page.title())
                else:
                    answer = pywikibot.inputChoice(
                        u'Possibility %s does not actually exist. Use it anyway?'
                        % page.title(), ['yes', 'no'], ['y', 'N'], 'N')
                    if answer == 'y':
                        alternatives.append(page.title())
            else:
                alternatives.append(arg[5:])
        elif arg == '-just':
            getAlternatives = False
        elif arg == '-dnskip':
            dnSkip = True
        elif arg == '-main':
            main_only = True
        elif arg.startswith('-min:'):
            minimum = int(arg[5:])
        elif arg.startswith('-start'):
            try:
                if len(arg) <= len('-start:'):
                    generator = pagegenerators.CategorizedPageGenerator(
                        pywikibot.Site().disambcategory())
                else:
                    generator = pagegenerators.CategorizedPageGenerator(
                        pywikibot.Site().disambcategory(), start=arg[7:])
                generator = pagegenerators.NamespaceFilterPageGenerator(
                    generator, [0])
            except pywikibot.NoPage:
                pywikibot.output(
                    "Disambiguation category for your wiki is not known.")
                raise
        elif not pageTitle:
            pageTitle = arg

    site = pywikibot.Site()

    if pageTitle:
        page = pywikibot.Page(pywikibot.Link(pageTitle, site))
        generator = iter([page])

    if not generator:
        pywikibot.showHelp()
        return

    site.login()

    bot = DisambiguationRobot(always,
                              alternatives,
                              getAlternatives,
                              dnSkip,
                              generator,
                              primary,
                              main_only,
                              minimum=minimum)
    bot.run()

コード例 #8

0

ファイルを表示

 def test_lines(self):
     filename = os.path.join(_data_dir, 'pagelist-lines.txt')
     site = self.get_site()
     titles = list(pagegenerators.TextfilePageGenerator(filename, site))
     self.assertPagelistTitles(titles, self.expected_titles[site.case()])

コード例 #9

0

ファイルを表示

def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    # the option that's always selected when the bot wonders what to do with
    # a link. If it's None, the user is prompted (default behaviour).
    always = None
    alternatives = []
    getAlternatives = True
    dnSkip = False
    generator = None
    pageTitle = None
    primary = False
    main_only = False

    # For sorting the linked pages, case can be ignored
    minimum = 0

    local_args = pywikibot.handle_args(args)

    for arg in local_args:
        if arg.startswith('-primary:'):
            primary = True
            getAlternatives = False
            alternatives.append(arg[9:])
        elif arg == '-primary':
            primary = True
        elif arg.startswith('-always:'):
            always = arg[8:]
        elif arg.startswith('-file'):
            if len(arg) == 5:
                generator = pagegenerators.TextfilePageGenerator(filename=None)
            else:
                generator = pagegenerators.TextfilePageGenerator(
                    filename=arg[6:])
        elif arg.startswith('-pos:'):
            if arg[5] != ':':
                mysite = pywikibot.Site()
                page = pywikibot.Page(pywikibot.Link(arg[5:], mysite))
                if page.exists():
                    alternatives.append(page.title())
                else:
                    if pywikibot.input_yn(
                            u'Possibility %s does not actually exist. Use it '
                            'anyway?' % page.title(),
                            default=False,
                            automatic_quit=False):
                        alternatives.append(page.title())
            else:
                alternatives.append(arg[5:])
        elif arg == '-just':
            getAlternatives = False
        elif arg == '-dnskip':
            dnSkip = True
        elif arg == '-main':
            main_only = True
        elif arg.startswith('-min:'):
            minimum = int(arg[5:])
        elif arg.startswith('-start'):
            try:
                generator = pagegenerators.CategorizedPageGenerator(
                    pywikibot.Site().disambcategory(),
                    start=arg[7:],
                    namespaces=[0])
            except pywikibot.NoPage:
                pywikibot.output(
                    "Disambiguation category for your wiki is not known.")
                raise
        elif not pageTitle:
            pageTitle = arg

    site = pywikibot.Site()

    if pageTitle:
        page = pywikibot.Page(pywikibot.Link(pageTitle, site))
        generator = iter([page])

    if not generator:
        pywikibot.showHelp()
        return

    site.login()

    bot = DisambiguationRobot(always,
                              alternatives,
                              getAlternatives,
                              dnSkip,
                              generator,
                              primary,
                              main_only,
                              minimum=minimum)
    bot.run()

コード例 #10

0

ファイルを表示

ファイル: reader.py プロジェクト: OlehOnyshchak/WikiImageRecommendation

def query_size(filename: str):
    site = pywikibot.Site()
    pages = list(
        pagegenerators.TextfilePageGenerator(filename=filename, site=site))

    return len(pages)

コード例 #11

0

ファイルを表示

def main(*args):
    # the option that's always selected when the bot wonders what to do with
    # a link. If it's None, the user is prompted (default behaviour).
    always = None
    alternatives = []
    getAlternatives = True
    dnSkip = False
    # if the -file argument is used, page titles are dumped in this array.
    # otherwise it will only contain one page.
    generator = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    primary = False
    main_only = False

    # For sorting the linked pages, case can be ignored
    minimum = 0

    for arg in pywikibot.handleArgs(*args):
        if arg.startswith('-primary:'):
            primary = True
            getAlternatives = False
            alternatives.append(arg[9:])
        elif arg == '-primary':
            primary = True
        elif arg.startswith('-always:'):
            always = arg[8:]
        elif arg.startswith('-file'):
            if len(arg) == 5:
                generator = pagegenerators.TextfilePageGenerator(filename=None)
            else:
                generator = pagegenerators.TextfilePageGenerator(
                    filename=arg[6:])
        elif arg.startswith('-pos:'):
            if arg[5] != ':':
                mysite = pywikibot.Site()
                page = pywikibot.Page(pywikibot.Link(arg[5:], mysite))
                if page.exists():
                    alternatives.append(page.title())
                else:
                    answer = pywikibot.inputChoice(
                        u'Possibility %s does not actually exist. Use it anyway?'
                        % page.title(), ['yes', 'no'], ['y', 'N'], 'N')
                    if answer == 'y':
                        alternatives.append(page.title())
            else:
                alternatives.append(arg[5:])
        elif arg == '-just':
            getAlternatives = False
        elif arg == '-dnskip':
            dnSkip = True
        elif arg == '-main':
            main_only = True
        elif arg.startswith('-min:'):
            minimum = int(arg[5:])
        elif arg.startswith('-start'):
            try:
                if len(arg) <= len('-start:'):
                    generator = pagegenerators.CategorizedPageGenerator(
                        pywikibot.Site().disambcategory())
                else:
                    generator = pagegenerators.CategorizedPageGenerator(
                        pywikibot.Site().disambcategory(), start=arg[7:])
                generator = pagegenerators.NamespaceFilterPageGenerator(
                    generator, [0])
            except pywikibot.NoPage:
                pywikibot.output(
                    "Disambiguation category for your wiki is not known.")
                raise
        elif arg.startswith("-"):
            pywikibot.output("Unrecognized command line argument: %s" % arg)
            # show help text and exit
            pywikibot.showHelp()
        else:
            pageTitle.append(arg)
    site = pywikibot.Site()
    site.login()

    # if the disambiguation page is given as a command line argument,
    # connect the title's parts with spaces
    if pageTitle != []:
        pageTitle = ' '.join(pageTitle)
        page = pywikibot.Page(pywikibot.Link(pageTitle, site))
        generator = iter([page])

    # if no disambiguation page was given as an argument, and none was
    # read from a file, query the user
    if not generator:
        pageTitle = pywikibot.input(
            u'On which disambiguation page do you want to work?')
        page = pywikibot.Page(pywikibot.Link(pageTitle, site))
        generator = iter([page])

    bot = DisambiguationRobot(always,
                              alternatives,
                              getAlternatives,
                              dnSkip,
                              generator,
                              primary,
                              main_only,
                              minimum=minimum)
    bot.run()

コード例 #12

0

ファイルを表示

def query(filename: str, params: QueryParams) -> None:
    site = pywikibot.Site(code=params.language_code,
                          fam='wikipedia',
                          user='******')
    pages = list(
        pagegenerators.TextfilePageGenerator(filename=filename, site=site))
    limit = _validated_limit(params.limit, params.offset, len(pages))

    icons: Set[str] = set()

    # TODO: don't execute driver when fill_captions=Flase
    options = Options()
    options.headless = True
    driver = webdriver.Firefox(options=options)

    print('Downloading... offset={}, limit={}'.format(params.offset, limit))
    tc, uc = 0, 0
    for i in range(params.offset, params.offset + limit):
        p = pages[i]
        if p.pageid == 0:
            print("\nERROR: Cannot fetch the page " + p.title())
            continue

        # onyshchak: create_if_not_exists - switch to enrich only existing data
        page_dir = _get_path(
            out_dir=params.out_dir + p.title(as_filename=True).rstrip('.'),
            create_if_not_exists=not params.only_update_cached_pages)

        if not page_dir.exists():
            continue

        if params.debug_info: print('\n{}) {}'.format(i, page_dir))
        should_download_article = lambda path: (not path.exists() or stat(
            path).st_size == 0 or params.invalidate_cache.text_cache)

        text_path = page_dir / 'text.json'
        if should_download_article(text_path):
            if params.debug_info: print("Downloading text.json")
            page_json = {
                "title": p.title(),
                "id": p.pageid,
                "url": p.full_url(),
            }

            if params.fill_property.text_wikitext:
                page_json["wikitext"] = p.text

            if params.fill_property.text_html:
                response = urllib.request.urlopen(p.full_url())
                page_json["html"] = response.read().decode("utf-8")

            _dump(text_path, page_json)

        # downloading page images
        tc, uc = _img_download(p.imagelinks(), page_dir, params, tc, uc)

        if params.fill_property.img_caption:
            _query_img_captions(
                page_dir=page_dir,
                driver=driver,
                icons=icons,
                language_code=params.language_code,
                invalidate_cache=params.invalidate_cache.caption_cache,
                debug_info=params.debug_info,
            )

    print('\nDownloaded {} images, where {} of them unavailable from commons'.
          format(tc, uc))
    driver.quit()

    icons_json = _getJSON(_KNOWN_ICONS_PATH)
    updated_icons = icons.union(icons_json['known_icons'])
    _dump(_KNOWN_ICONS_PATH, {"known_icons": list(updated_icons)})

コード例 #13

0

ファイルを表示

def main():
    gen = None
    oldName = None
    options = {}
    fromToPairs = []

    # Process global args and prepare generator args parser
    local_args = pywikibot.handleArgs()
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg.startswith('-pairs'):
            if len(arg) == len('-pairs'):
                filename = pywikibot.input(
                    u'Enter the name of the file containing pairs:')
            else:
                filename = arg[len('-pairs:'):]
            oldName1 = None
            for page in pagegenerators.TextfilePageGenerator(filename):
                if oldName1:
                    fromToPairs.append([oldName1, page.title()])
                    oldName1 = None
                else:
                    oldName1 = page.title()
            if oldName1:
                pywikibot.warning(
                    u'file %s contains odd number of links' % filename)
        elif arg == '-noredirect':
            options['noredirect'] = True
        elif arg == '-notalkpage':
            options['movetalkpage'] = False
        elif arg == '-always':
            options['always'] = True
        elif arg == '-skipredirects':
            options['skipredirects'] = True
        elif arg.startswith('-from:'):
            if oldName:
                pywikibot.warning(u'-from:%s without -to:' % oldName)
            oldName = arg[len('-from:'):]
        elif arg.startswith('-to:'):
            if oldName:
                fromToPairs.append([oldName, arg[len('-to:'):]])
                oldName = None
            else:
                pywikibot.warning(u'%s without -from' % arg)
        elif arg.startswith('-prefix'):
            if len(arg) == len('-prefix'):
                options['prefix'] = pywikibot.input(u'Enter the prefix:')
            else:
                options['prefix'] = arg[8:]
        elif arg.startswith('-summary'):
            if len(arg) == len('-summary'):
                options['summary'] = pywikibot.input(u'Enter the summary:')
            else:
                options['summary'] = arg[9:]
        else:
            genFactory.handleArg(arg)

    if oldName:
        pywikibot.warning(u'-from:%s without -to:' % oldName)
    site = pywikibot.Site()
    for pair in fromToPairs:
        page = pywikibot.Page(site, pair[0])
        bot = MovePagesBot(None, **options)
        bot.moveOne(page, pair[1])

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = MovePagesBot(preloadingGen, **options)
        bot.run()
    elif not fromToPairs:
        pywikibot.showHelp()

コード例 #14

0

ファイルを表示

ファイル: movepages.py プロジェクト: valhallasw/pywikibot-core

def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    gen = None
    oldName = None
    options = {}
    fromToPairs = []

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg.startswith('-pairs'):
            if len(arg) == len('-pairs'):
                filename = pywikibot.input(
                    u'Enter the name of the file containing pairs:')
            else:
                filename = arg[len('-pairs:'):]
            oldName1 = None
            for page in pagegenerators.TextfilePageGenerator(filename):
                if oldName1:
                    fromToPairs.append([oldName1, page.title()])
                    oldName1 = None
                else:
                    oldName1 = page.title()
            if oldName1:
                pywikibot.warning(
                    u'file %s contains odd number of links' % filename)
        elif arg == '-noredirect':
            options['noredirect'] = True
        elif arg == '-notalkpage':
            options['movetalkpage'] = False
        elif arg == '-always':
            options['always'] = True
        elif arg == '-skipredirects':
            options['skipredirects'] = True
        elif arg.startswith('-from:'):
            if oldName:
                pywikibot.warning(u'-from:%s without -to:' % oldName)
            oldName = arg[len('-from:'):]
        elif arg.startswith('-to:'):
            if oldName:
                fromToPairs.append([oldName, arg[len('-to:'):]])
                oldName = None
            else:
                pywikibot.warning(u'%s without -from' % arg)
        elif arg.startswith('-prefix'):
            if len(arg) == len('-prefix'):
                options['prefix'] = pywikibot.input(u'Enter the prefix:')
            else:
                options['prefix'] = arg[8:]
        elif arg.startswith('-summary'):
            if len(arg) == len('-summary'):
                options['summary'] = pywikibot.input(u'Enter the summary:')
            else:
                options['summary'] = arg[9:]
        else:
            genFactory.handleArg(arg)

    if oldName:
        pywikibot.warning(u'-from:%s without -to:' % oldName)
    site = pywikibot.Site()
    for pair in fromToPairs:
        page = pywikibot.Page(site, pair[0])
        bot = MovePagesBot(None, **options)
        bot.moveOne(page, pair[1])

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = MovePagesBot(preloadingGen, **options)
        bot.run()
        return True
    else:
        # in theory pairs could be missing too
        pywikibot.bot.suggest_help(missing_generator=True)
        return False

コード例 #15

0

ファイルを表示

ファイル: reader.py プロジェクト: OlehOnyshchak/WikiImageRecommendation

def fetch_meta_captions(filename, out_dir, offset=0, limit=None):
    site = pywikibot.Site()
    pages = list(
        pagegenerators.TextfilePageGenerator(filename=filename, site=site))
    limit = _validated_limit(limit, offset, len(pages))

    options = Options()
    options.headless = True
    driver = webdriver.Firefox(options=options)

    for j in range(offset, offset + limit):
        p = pages[j]
        if p.pageid == 0:
            print("ERROR: Cannot fetch the page " + p.title())
            continue

        page_dir = _get_path(out_dir + p.title(as_filename=True).rstrip('.'),
                             create_if_not_exists=False)
        if not page_dir.exists():
            print('not page_dir.exists()', page_dir)
            continue  # onyshchak: temporary switch to enrich only existing data

        print(j, p.title())
        img_dir = _get_path(page_dir / "img", create_if_not_exists=False)
        meta_path = img_dir / 'meta.json'
        meta_arr = _getJSON(meta_path)['img_meta']

        page_id = p.title(as_filename=True).rstrip('.')
        for img in p.imagelinks():
            if not _valid_img_type(img.title(with_ns=False)):
                continue

            img_id = img.title(as_filename=True, with_ns=False)
            res = [
                i for i, x in enumerate(meta_arr)
                if unquote(x['url']).split('/wiki/File:')[-1] == img_id
            ]
            if len(res) != 1:
                print('WARNING: outdated page {}, missing image {}'.format(
                    page_id, img_id))
                continue

            i = res[0]
            url = 'https://en.wikipedia.org/wiki/{}#/media/File:{}'.format(
                page_id, img_id)
            driver.get(url)
            time.sleep(1)  # reqired for JS to load content
            caption = None
            for k in range(5):
                try:
                    caption = driver.find_element_by_class_name(
                        "mw-mmv-title").text
                    if caption == "":
                        caption = None
                        raise Exception
                except:
                    time.sleep(1)  # reqired for JS to load content
                    print("RETRY", k, " ||| ", img_id)

            meta_arr[i].pop('caption', None)
            if caption and caption != _remove_prefix(
                    meta_arr[i]['description'], "English: "):
                meta_arr[i]['caption'] = caption
#                 print(j, img_id, ' ||| ', caption)

        _dump(meta_path, json.dumps({"img_meta": meta_arr}))

    driver.quit()

コード例 #16

0

ファイルを表示

ファイル: movepages.py プロジェクト: xqt/pywikibot

def main(*args) -> None:
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    oldName = None
    options = {}
    fromToPairs = []

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()
    local_args = genFactory.handle_args(local_args)

    for arg in local_args:
        if arg.startswith('-pairsfile'):
            if len(arg) == len('-pairsfile'):
                filename = pywikibot.input(
                    'Enter the name of the file containing pairs:')
            else:
                filename = arg[len('-pairsfile:'):]
            oldName1 = None
            for page in pagegenerators.TextfilePageGenerator(filename):
                if oldName1:
                    fromToPairs.append([oldName1, page.title()])
                    oldName1 = None
                else:
                    oldName1 = page.title()
            if oldName1:
                pywikibot.warning(
                    'file {} contains odd number of links'.format(filename))
        elif arg == '-noredirect':
            options['noredirect'] = True
        elif arg == '-notalkpage':
            options['movetalkpage'] = False
        elif arg == '-always':
            options['always'] = True
        elif arg == '-skipredirects':
            options['skipredirects'] = True
        elif arg.startswith('-from:'):
            if oldName:
                pywikibot.warning('-from:{} without -to:'.format(oldName))
            oldName = arg[len('-from:'):]
        elif arg.startswith('-to:'):
            if oldName:
                fromToPairs.append([oldName, arg[len('-to:'):]])
                oldName = None
            else:
                pywikibot.warning('{} without -from'.format(arg))
        elif arg.startswith('-prefix'):
            if len(arg) == len('-prefix'):
                options['prefix'] = pywikibot.input('Enter the prefix:')
            else:
                options['prefix'] = arg[8:]
        elif arg.startswith('-summary'):
            if len(arg) == len('-summary'):
                options['summary'] = pywikibot.input('Enter the summary:')
            else:
                options['summary'] = arg[9:]

    if oldName:
        pywikibot.warning('-from:{} without -to:'.format(oldName))
    site = pywikibot.Site()
    for pair in fromToPairs:
        page = pywikibot.Page(site, pair[0])
        bot = MovePagesBot(**options)
        bot.moveOne(page, pair[1])

    gen = genFactory.getCombinedGenerator(preload=True)
    if gen:
        bot = MovePagesBot(generator=gen, **options)
        bot.run()
    elif not fromToPairs:
        pywikibot.bot.suggest_help(missing_generator=True)

コード例 #17

0

ファイルを表示

ファイル: custome_fixes.py プロジェクト: KaleemBhatti/pywikibot-scripts

 def get_redirects(self):
     return list(
         map(methodcaller('title'),
             pagegenerators.TextfilePageGenerator(site=self.site)))