Python itergroup Examples, pywikibot.tools.itergroup Python Examples

Example #1

0

Show file

File: colors.py Project: sakuramochi0/kinpri-rush-live-wiki-bot

def main():
    """Main function."""
    fg_colors = [col for col in colors if col != 'default']
    bg_colors = fg_colors[:]
    n_fg_colors = len(fg_colors)
    fg_colors.insert(3 * int(n_fg_colors / 4), 'default')
    fg_colors.insert(2 * int(n_fg_colors / 4), 'default')
    fg_colors.insert(int(n_fg_colors / 4), 'default')
    fg_colors.insert(0, 'default')

    # Max len of color names for padding.
    max_len_fg_colors = len(max(fg_colors, key=len))
    max_len_bc_color = len(max(bg_colors, key=len))

    for bg_col in bg_colors:
        # Three lines per each backgoung color.
        for fg_col_group in itergroup(fg_colors, n_fg_colors / 4 + 1):
            line = ''
            for fg_col in fg_col_group:
                line += ' '
                line += color_format('{color}{0}{default}',
                                     fg_col.ljust(max_len_fg_colors),
                                     color='%s;%s' % (fg_col, bg_col))

            line = '{0} {1}'.format(bg_col.ljust(max_len_bc_color), line)
            pywikibot.output(line)

        pywikibot.output('')

Example #2

0

Show file

File: model.py Project: nullzero/wpcgi

 def apiquery(self, alllinks):
     output = {}
     for links in itergroup(alllinks, 50):
         query = api.Request(site=self.siteSource, action='query', prop='langlinks', titles=links,
                             redirects='', lllang=self.siteDest.code, lllimit=500)
         results = query.submit()
         if 'query-continue' in results:
             raise Exception('should not get query-continue')
         if 'query' not in results:
             continue
         results = results['query']
         redirects = DefaultDict()
         normalized = DefaultDict()
         if 'pages' not in results:
             continue
         if 'redirects' in results:
             redirects = DefaultDict((item['to'], item['from'])
                                     for item in results['redirects'])
         if 'normalized' in results:
             normalized = DefaultDict((item['to'], item['from'])
                                      for item in results['normalized'])
         results = results['pages']
         for pageid in results:
             if int(pageid) < 0:
                 continue
             pagedata = results[pageid]
             if 'langlinks' not in pagedata:
                 continue
             output[normalized[redirects[pagedata['title']]]] = pagedata['langlinks'][0]['*']
     return output

Example #3

0

Show file

File: page.py Project: nettrom/suggestbot

def PageRevIdGenerator(site, pagelist, step=50):
    """
    Generate page objects with their most recent revision ID.
    
    This generator is a modified version of `preloadpages` in pywikibot.site.

    :param site: site we're requesting page IDs from
    :param pagelist: an iterable that returns Page objects
    :param step: how many Pages to query at a time
    :type step: int
    """
    for sublist in itergroup(pagelist, step):
        pageids = [str(p._pageid) for p in sublist
                   if hasattr(p, "_pageid") and p._pageid > 0]
        cache = dict((p.title(withSection=False), p) for p in sublist)
        props = "revisions|info|categoryinfo"
        rvgen = api.PropertyGenerator(props, site=site)
        rvgen.set_maximum_items(-1)  # suppress use of "rvlimit" parameter
        if len(pageids) == len(sublist):
            # only use pageids if all pages have them
            rvgen.request["pageids"] = "|".join(pageids)
        else:
            rvgen.request["titles"] = "|".join(list(cache.keys()))
        rvgen.request[u"rvprop"] = u"ids|flags|timestamp|user|comment"
        
        logging.debug(u"Retrieving {n} pages from {s}.".format(n=len(cache),
                                                              s=site))
        for pagedata in rvgen:
            logging.debug(u"Preloading {0}".format(pagedata))
            try:
                if pagedata['title'] not in cache:
#                   API always returns a "normalized" title which is
#                   usually the same as the canonical form returned by
#                   page.title(), but sometimes not (e.g.,
#                   gender-specific localizations of "User" namespace).
#                   This checks to see if there is a normalized title in
#                   the response that corresponds to the canonical form
#                   used in the query.
                    for key in cache:
                        if site.sametitle(key, pagedata['title']):
                            cache[pagedata['title']] = cache[key]
                            break
                    else:
                        logging.warning(
                            u"preloadpages: Query returned unexpected title"
                            u"'%s'" % pagedata['title'])
                        continue
            except KeyError:
                logging.debug(u"No 'title' in %s" % pagedata)
                logging.debug(u"pageids=%s" % pageids)
                logging.debug(u"titles=%s" % list(cache.keys()))
                continue
            page = cache[pagedata['title']]
            api.update_page(page, pagedata)

        # Since we're not loading content and the pages are already in
        # memory, let's yield the pages in the same order as they were
        # received in case that's important.
        for page in sublist:
            yield page

Example #4

0

Show file

File: colors.py Project: PersianWikipedia/pywikibot-core

def main():
    """Main function."""
    fg_colors = [col for col in colors if col != 'default']
    bg_colors = fg_colors[:]
    n_fg_colors = len(fg_colors)
    fg_colors.insert(3 * int(n_fg_colors / 4), 'default')
    fg_colors.insert(2 * int(n_fg_colors / 4), 'default')
    fg_colors.insert(int(n_fg_colors / 4), 'default')
    fg_colors.insert(0, 'default')

    # Max len of color names for padding.
    max_len_fg_colors = len(max(fg_colors, key=len))
    max_len_bc_color = len(max(bg_colors, key=len))

    for bg_col in bg_colors:
        # Three lines per each backgoung color.
        for fg_col_group in itergroup(fg_colors, n_fg_colors / 4 + 1):
            line = ''
            for fg_col in fg_col_group:
                line += ' '
                line += color_format('{color}{0}{default}',
                                     fg_col.ljust(max_len_fg_colors),
                                     color='%s;%s' % (fg_col, bg_col))

            line = '{0} {1}'.format(bg_col.ljust(max_len_bc_color), line)
            pywikibot.output(line)

        pywikibot.output('')

Example #5

0

Show file

File: site_APISite.py Project: nullzero/wprobot

def _pagesexist(self, allpages):
    out = []
    for pages in itergroup(allpages, 500):
        text = self.parse("\n".join(
               ["* {{PAGESIZE:%s|R}}" % page.title() for page in pages]))
        for i, ps in enumerate(lre.pats["site_APISite_li"].findall(text)):
            out.append((pages[i], int(ps) != 0))
    return out

Example #6

0

Show file

File: hewikiReplacebot.py Project: eranroz/hewiki-ReplaceBot

def check_titles(site, report_page_name, replacements):
    """
    To avoid breaking links, adds page titles that will be changed to exception list
    :param site: site where the bot will run
    :param report_page_name: a page name to list of titles adds to exception
    :param replacements: dictionary of replacements
    """
    from pywikibot import textlib
    from pywikibot.tools import itergroup
    all_pages = site.allpages(namespace=0, filterredir=False, content=False)
    evaluation_progress = 0
    exceptions_dict = {}
    for titles_group in itergroup(all_pages, all_pages.query_limit):
        titles_group_t = [p.title(asLink=True) for p in titles_group]
        old_titles = titles_group_t
        evaluation_progress += len(titles_group_t)
        if evaluation_progress % 20000 == 0: print('\r%i page titles processed' % evaluation_progress)
        old_text = ' \n '.join(titles_group_t)
        for replacement_key, replacement in replacements.items():
            replacement_exceptions = replacement.exceptions or {}
            replacement_exceptions_inside = replacement_exceptions.get('inside', [])
            new_text = textlib.replaceExcept(
                old_text, replacement.old_regex, replacement.new,
                replacement_exceptions_inside,
                site=site)

            # replacement change valid title
            changed_titles = ((old_title, new_title) for old_title, new_title in zip(old_titles, new_text.split(' \n '))
                              if old_title != new_title and
                              old_title != '[[%s' % pywikibot.tools.first_upper(new_title[2:]))  # breaks link
            # no special treat for link
            changed_titles = ((old_title, new_title) for old_title, new_title in changed_titles
                              if replacement.old_regex.sub(replacement.new, ' %s ' % old_title[2:-2]) != ' %s ' % old_title[2:-2])
            # valid title is not disambig
            changed_titles = [old_title[2:-2] for old_title, new_title in changed_titles
                              if not pywikibot.Page(site, old_title[2:-2]).isDisambig()
                              ]
            if len(changed_titles) > 0:
                replacement_exceptions['inside'] = replacement_exceptions_inside + \
                                                   [re.compile(re.escape(title), re.U) for title in changed_titles]
                replacement.exceptions = replacement_exceptions
                if replacement_key not in exceptions_dict:
                    exceptions_dict[replacement_key] = []
                exceptions_dict[replacement_key] += changed_titles

    exceptions_dict = OrderedDict(sorted((int(k), v) for k, v in exceptions_dict.items()))
    report_page = pywikibot.Page(site, report_page_name)
    exception_report = ''
    for replace_key, replaced_titles in exceptions_dict.items():
        exception_report += '\n* %i\n%s' % (replace_key, '\n'.join(['** [[%s]]' % t for t in replaced_titles]))
    report_page.put(exception_report, summary='עדכון')

Example #7

0

Show file

File: deleteTalk.py Project: nullzero/wprobot

def main():
    namespaces = [x for x in range(1, 16, 2) if x not in [3, 5]]
    for ns in namespaces:
        gen = site.allpages(namespace=ns, filterredir=True)
        for i in gen:
            pywikibot.output("deleting " + i.title())
            i.delete(reason=u"โรบอต: หน้าเปลี่ยนทางไม่จำเป็น", prompt=False)

    for ns in namespaces:
        pywikibot.output("ns " + str(ns))
        gen = site.allpages(namespace=ns, content=True)
        for i, pages in enumerate(itergroup(gen, 5000)):
            pywikibot.output("processing bunch %d" % i)
            process(pages)

Example #8

0

Show file

    def preload_entities(self, pagelist, groupsize=50):
        """
        Yield subclasses of WikibasePage's with content prefilled.

        Note that pages will be iterated in a different order
        than in the underlying pagelist.

        @param pagelist: an iterable that yields either WikibasePage objects,
                         or Page objects linked to an ItemPage.
        @param groupsize: how many pages to query at a time
        @type groupsize: int
        """
        if not hasattr(self, '_entity_namespaces'):
            self._cache_entity_namespaces()
        for sublist in itergroup(pagelist, groupsize):
            req = {'ids': [], 'titles': [], 'sites': []}
            for p in sublist:
                if isinstance(p, pywikibot.page.WikibasePage):
                    ident = p._defined_by()
                    for key in ident:
                        req[key].append(ident[key])
                else:
                    if p.site == self and p.namespace() in (
                            self._entity_namespaces.values()):
                        req['ids'].append(p.title(with_ns=False))
                    else:
                        assert p.site.has_data_repository, \
                            'Site must have a data repository'
                        req['sites'].append(p.site.dbName())
                        req['titles'].append(p._link._text)

            req = self._simple_request(action='wbgetentities', **req)
            data = req.submit()
            for entity in data['entities']:
                if 'missing' in data['entities'][entity]:
                    continue
                cls = self._type_to_class[data['entities'][entity]['type']]
                page = cls(self, entity)
                # No api call is made because item._content is given
                page._content = data['entities'][entity]
                with suppress(pywikibot.IsRedirectPage):
                    page.get()  # cannot provide get_redirect=True (T145971)
                yield page

Example #9

0

Show file

 def apiquery(self, alllinks):
     output = {}
     for links in itergroup(alllinks, 50):
         query = api.Request(site=self.siteSource,
                             action='query',
                             prop='langlinks',
                             titles=links,
                             redirects='',
                             lllang=self.siteDest.code,
                             lllimit=500)
         results = query.submit()
         if 'query-continue' in results:
             raise Exception('should not get query-continue')
         if 'query' not in results:
             continue
         results = results['query']
         redirects = DefaultDict()
         normalized = DefaultDict()
         if 'pages' not in results:
             continue
         if 'redirects' in results:
             redirects = DefaultDict((item['to'], item['from'])
                                     for item in results['redirects'])
         if 'normalized' in results:
             normalized = DefaultDict((item['to'], item['from'])
                                      for item in results['normalized'])
         results = results['pages']
         for pageid in results:
             if int(pageid) < 0:
                 continue
             pagedata = results[pageid]
             if 'langlinks' not in pagedata:
                 continue
             output[normalized[redirects[
                 pagedata['title']]]] = pagedata['langlinks'][0]['*']
     return output

Example #10

0

Show file

File: hewikiReplacebot.py Project: eranroz/hewiki-ReplaceBot

def check_titles(site, report_page_name, replacements):
    """
    To avoid breaking links, adds page titles that will be changed to exception list
    :param site: site where the bot will run
    :param report_page_name: a page name to list of titles adds to exception
    :param replacements: dictionary of replacements
    """
    from pywikibot import textlib
    from pywikibot.tools import itergroup
    all_pages = site.allpages(namespace=0, filterredir=False, content=False)
    evaluation_progress = 0
    exceptions_dict = {}
    for titles_group in itergroup(all_pages, all_pages.query_limit):
        titles_group_t = [
            p.title(as_link=True, with_section=False) for p in titles_group
        ]
        old_titles = titles_group_t
        evaluation_progress += len(titles_group_t)
        if evaluation_progress % 20000 == 0:
            print('\r%i page titles processed' % evaluation_progress)
        old_text = ' \n '.join(titles_group_t)
        for replacement_key, replacement in replacements.items():
            replacement_exceptions = replacement.exceptions or {}
            replacement_exceptions_inside = replacement_exceptions.get(
                'inside', [])
            new_text = textlib.replaceExcept(old_text,
                                             replacement.old_regex,
                                             replacement.new,
                                             replacement_exceptions_inside,
                                             site=site)

            # replacement change valid title
            changed_titles = (
                (old_title, new_title) for old_title, new_title in zip(
                    old_titles, new_text.split(' \n '))
                if old_title != new_title and old_title != '[[%s' %
                pywikibot.tools.first_upper(new_title[2:]))  # breaks link
            # no special treat for link
            changed_titles = (
                (old_title, new_title)
                for old_title, new_title in changed_titles
                if replacement.old_regex.sub(replacement.new, ' %s ' %
                                             old_title[2:-2]) != ' %s ' %
                old_title[2:-2])
            # valid title is not disambig
            changed_titles = [
                old_title[2:-2] for old_title, new_title in changed_titles
                if not pywikibot.Page(site, old_title[2:-2]).isDisambig()
            ]
            if len(changed_titles) > 0:
                #changed_titles_exceptions = [re.compile(re.escape(title), re.U) for title in changed_titles]
                changed_titles_exceptions = [
                    re.compile(
                        '\[\[%s\|.+?\]\]|%s' %
                        (re.escape(title), re.escape(title)), re.U)
                    for title in changed_titles
                ]
                replacement_exceptions[
                    'inside'] = replacement_exceptions_inside + changed_titles_exceptions
                replacement.exceptions = replacement_exceptions
                if replacement_key not in exceptions_dict:
                    exceptions_dict[replacement_key] = []
                exceptions_dict[replacement_key] += changed_titles

    exceptions_dict = OrderedDict(
        sorted((int(k), v) for k, v in exceptions_dict.items()))
    report_page = pywikibot.Page(site, report_page_name)
    exception_report = ''
    for replace_key, replaced_titles in exceptions_dict.items():
        exception_report += '\n* %i\n%s' % (replace_key, '\n'.join(
            ['** [[%s]]' % t for t in replaced_titles]))
    report_page.put(exception_report, summary='עדכון')

Example #11

0

Show file

File: page.py Project: nettrom/suggestbot

def PredictionGenerator(site, pages, step=50):
    '''
    Generate pages with quality predictions.

    :param site: site of the pages we are predicting for
    :type pages: pywikibot.Site

    :param pages: List of pages we are predicting.
    :type pages: list of pywikibot.Page

    :param step: Number of pages to get predictions for at a time,
                 maximum is 50.
    :type step: int
    '''

    # looks like the best way to do this is to first make one
    # API request to update the pages with the current revision ID,
    # then make one ORES request to get the predictions.

    if step > 50:
        step = 50

    langcode = '{lang}wiki'.format(lang=site.lang)
        
    # example ORES URL predicting ratings for multiple revisions:
    # https://ores.wmflabs.org/v2/scores/enwiki/wp10/?revids=703654757%7C714153013%7C713916222%7C691301429%7C704638887%7C619467163
    # sub "%7C" with "|"

    # pywikibot.tools.itergroup splits up the list of pages
    for page_group in itergroup(pages, step):
        revid_page_map = {} # rev id (str) -> page object
        # we use the generator to efficiently load most recent rev id
        for page in PageRevIdGenerator(site, page_group):
            revid_page_map[str(page.latestRevision())] = page

        # make a request to score the revisions
        url = '{ores_url}{langcode}/wp10/?revids={revids}'.format(
            ores_url=config.ORES_url,
            langcode=langcode,
            revids='|'.join([str(page.latestRevision()) for page in page_group]))

        logging.debug('Requesting predictions for {n} pages from ORES'.format(
            n=len(revid_page_map)))

        num_attempts = 0
        while num_attempts < config.max_url_attempts:
            r = requests.get(url,
                             headers={'User-Agent': config.http_user_agent,
                                      'From': config.http_from})
            num_attempts += 1
            if r.status_code == 200:
                try:
                    response = r.json()
                    revid_pred_map = response['scores'][langcode]['wp10']['scores']
                    # iterate over returned predictions and update
                    for revid, score_data in revid_pred_map.items():
                        revid_page_map[revid].set_prediction(score_data['prediction'].lower())
                    break
                except ValueError:
                    logging.warning("Unable to decode ORES response as JSON")
                except KeyErrror:
                    logging.warning("ORES response keys not as expected")

            # something didn't go right, let's wait and try again
            sleep(500)

        for page in page_group:
            yield page

Example #12

0

Show file

File: page.py Project: sema0703/suggestbot

def PredictionGenerator(site, pages, step=50):
    '''
    Generate pages with quality predictions.

    :param site: site of the pages we are predicting for
    :type pages: pywikibot.Site

    :param pages: List of pages we are predicting.
    :type pages: list of pywikibot.Page

    :param step: Number of pages to get predictions for at a time,
                 maximum is 50.
    :type step: int
    '''

    # looks like the best way to do this is to first make one
    # API request to update the pages with the current revision ID,
    # then make one ORES request to get the predictions.

    if step > 50:
        step = 50

    langcode = '{lang}wiki'.format(lang=site.lang)

    # example ORES URL predicting ratings for multiple revisions:
    # https://ores.wmflabs.org/v2/scores/enwiki/wp10/?revids=703654757%7C714153013%7C713916222%7C691301429%7C704638887%7C619467163
    # sub "%7C" with "|"

    # pywikibot.tools.itergroup splits up the list of pages
    for page_group in itergroup(pages, step):
        revid_page_map = {}  # rev id (str) -> page object
        # we use the generator to efficiently load most recent rev id
        for page in PageRevIdGenerator(site, page_group):
            revid_page_map[str(page.latestRevision())] = page

        # make a request to score the revisions
        url = '{ores_url}{langcode}/wp10/?revids={revids}'.format(
            ores_url=config.ORES_url,
            langcode=langcode,
            revids='|'.join(
                [str(page.latestRevision()) for page in page_group]))

        logging.debug('Requesting predictions for {n} pages from ORES'.format(
            n=len(revid_page_map)))

        num_attempts = 0
        while num_attempts < config.max_url_attempts:
            r = requests.get(url,
                             headers={
                                 'User-Agent': config.http_user_agent,
                                 'From': config.http_from
                             })
            num_attempts += 1
            if r.status_code == 200:
                try:
                    response = r.json()
                    revid_pred_map = response['scores'][langcode]['wp10'][
                        'scores']
                    # iterate over returned predictions and update
                    for revid, score_data in revid_pred_map.items():
                        revid_page_map[revid].set_prediction(
                            score_data['prediction'].lower())
                    break
                except ValueError:
                    logging.warning("Unable to decode ORES response as JSON")
                except KeyErrror:
                    logging.warning("ORES response keys not as expected")

            # something didn't go right, let's wait and try again
            sleep(500)

        for page in page_group:
            yield page

Example #13

0

Show file

File: page.py Project: sema0703/suggestbot

def PageRevIdGenerator(site, pagelist, step=50):
    """
    Generate page objects with their most recent revision ID.
    
    This generator is a modified version of `preloadpages` in pywikibot.site.

    :param site: site we're requesting page IDs from
    :param pagelist: an iterable that returns Page objects
    :param step: how many Pages to query at a time
    :type step: int
    """
    for sublist in itergroup(pagelist, step):
        pageids = [
            str(p._pageid) for p in sublist
            if hasattr(p, "_pageid") and p._pageid > 0
        ]
        cache = dict((p.title(withSection=False), p) for p in sublist)
        props = "revisions|info|categoryinfo"
        rvgen = api.PropertyGenerator(props, site=site)
        rvgen.set_maximum_items(-1)  # suppress use of "rvlimit" parameter
        if len(pageids) == len(sublist):
            # only use pageids if all pages have them
            rvgen.request["pageids"] = "|".join(pageids)
        else:
            rvgen.request["titles"] = "|".join(list(cache.keys()))
        rvgen.request[u"rvprop"] = u"ids|flags|timestamp|user|comment"

        logging.debug(u"Retrieving {n} pages from {s}.".format(n=len(cache),
                                                               s=site))
        for pagedata in rvgen:
            logging.debug(u"Preloading {0}".format(pagedata))
            try:
                if pagedata['title'] not in cache:
                    #                   API always returns a "normalized" title which is
                    #                   usually the same as the canonical form returned by
                    #                   page.title(), but sometimes not (e.g.,
                    #                   gender-specific localizations of "User" namespace).
                    #                   This checks to see if there is a normalized title in
                    #                   the response that corresponds to the canonical form
                    #                   used in the query.
                    for key in cache:
                        if site.sametitle(key, pagedata['title']):
                            cache[pagedata['title']] = cache[key]
                            break
                    else:
                        logging.warning(
                            u"preloadpages: Query returned unexpected title"
                            u"'%s'" % pagedata['title'])
                        continue
            except KeyError:
                logging.debug(u"No 'title' in %s" % pagedata)
                logging.debug(u"pageids=%s" % pageids)
                logging.debug(u"titles=%s" % list(cache.keys()))
                continue
            page = cache[pagedata['title']]
            api.update_page(page, pagedata)

        # Since we're not loading content and the pages are already in
        # memory, let's yield the pages in the same order as they were
        # received in case that's important.
        for page in sublist:
            yield page

Example #14

0

Show file

File: fillLabels.py Project: nullzero/wprobot

def main():
    exlist = [exc.group(1) for exc in lre.pats["exc"].finditer(wp.Page(conf.pageConf).get())]
    pages1, pages2, pages3 = [], [], []
    if not args:
        pywikibot.output("quickscan mode")
        t = site.getcurrenttime()
        if t.day == 1:
            if t.month == 1:
                t = pywikibot.Timestamp(year=t.year - 1, month=12, day=31)
            else:
                t = pywikibot.Timestamp(year=t.year, month=t.month - 1, day=28)
        else:
            t = pywikibot.Timestamp(year=t.year, month=t.month, day=t.day - 1)

        gen1 = site.recentchanges(
            start=t,
            reverse=True,
            showRedirects=False,
            showBot=False,
            changetype=["new", "edit"],
            namespaces=conf.namespaces,
        )
        pages1 = [page["title"] for page in gen1]
        gen2 = site.logevents(start=t, reverse=True, logtype="move")
        pages2 = [page.new_title().title() for page in gen2]
    elif args[0] == "-all":
        pywikibot.output("fullscan mode")
        gen3 = ()
        for i in conf.namespaces:
            gen3 = itertools.chain(gen3, site.allpages(filterredir=False, start=u"ก", namespace=i))
        pages3 = [page.title() for page in gen3]
        pywikibot.output("load all!")
    else:
        pages1 = [u"หมวดหมู่:ชาววิกิพีเดียรักองค์โสมฯ"]
        pywikibot.output("unknown argument")

    allpages = list(set(filter(lambda x: (ord(u"ก") <= ord(x[0]) <= ord(u"๛")), pages1 + pages2 + pages3)))
    datasite = site.data_repository()
    cnti = 0
    pywikibot.output("processing %d pages" % len(allpages))

    for check in conf.checklist:
        if check["detectFromTitle"] is None:
            check["detectFromTitle"] = "[]"  # dummy string which invalid for title
        for checkClaim in check["claims"]:
            checkClaim["nameItem"] = pywikibot.ItemPage(datasite, checkClaim["nameItem"])
            if checkClaim["refItem"] is not None:
                checkClaim["refItem"] = pywikibot.ItemPage(datasite, checkClaim["refItem"])

    for pages in itergroup(allpages, 100):
        cnti += 1
        pywikibot.output("round %d" % cnti)
        dat = datasite.loadcontent({"sites": site.dbName(), "titles": "|".join(pages)})
        for i, qitem in enumerate(dat):
            pywikibot.output("item %d: %s" % (i, qitem))
            if not qitem.lower().startswith("q"):
                continue
            item = pywikibot.ItemPage(datasite, qitem)
            item._content = dat[qitem]
            super(pywikibot.ItemPage, item).get()  # For getting labels
            data = item.get()
            editdict = {}
            page = wp.Page(item.getSitelink(site))
            if page.title() in exlist:
                continue
            for check in conf.checklist:
                passCriteria = False
                description = None
                if check["detectFromTitle"] in page.title():
                    passCriteria = True
                if check["detectFromNamespace"] == page.namespace():
                    passCriteria = True
                passAlItem = True
                for claimCheck in check["claims"]:
                    passItem = False
                    if claimCheck["name"] in data["claims"]:
                        for claim in data["claims"][claimCheck["name"]]:
                            if claim.getTarget() == claimCheck["nameItem"]:
                                passItem = True
                                break

                    if not passItem:
                        passAllItem = False
                        if passCriteria:
                            claim = pywikibot.Claim(datasite, claimCheck["name"])
                            claim.setTarget(claimCheck["nameItem"])
                            item.addClaim(claim)
                            if claimCheck["ref"] is not None:
                                claim2 = pywikibot.Claim(datasite, claimCheck["ref"])
                                claim2.setTarget(claimCheck["refItem"])
                                claim.addSource(claim2)
                            pywikibot.output("added claim!")
                passCriteria = passCriteria or passAllItem
                if (description is None) and passCriteria:
                    description = check["description"]
                if passCriteria:
                    break

            oldlabels = None
            if "th" in data["labels"]:
                oldlabels = data["labels"]["th"]
            labels = lre.pats["rmdisam"].sub("", page.title())
            if not lre.pats["thai"].search(labels):
                continue
            if labels != oldlabels:
                pywikibot.output("old label: " + unicode(oldlabels))
                pywikibot.output("new label: " + unicode(labels))
                editdict["labels"] = labels
            if passCriteria and (
                ("th" in data["descriptions"] and data["descriptions"]["th"] != description)
                or ("th" not in data["descriptions"])
            ):
                editdict["descriptions"] = description
            out = transform(editdict)
            if not out:
                continue
            pywikibot.output("item: " + qitem)
            pywikibot.output("title: " + page.title())
            try:
                # raw_input("prompt: ...")
                item.editEntity(out)
            except:
                wp.error()
                pass