Beispiel #1
0
def get():
    #action=query&list=allusers&augroup=sysop&aulimit=max&format=jsonfm
    params = dict(site=site, augroup='sysop')
    gen = api.ListGenerator('allusers', **params)
    for x in gen:
        yield x['name'].encode('utf-8')
    yield 'Legoktm'
def InterestingGeographsByNumber(**kwargs):
    site = kwargs['site']
    # Fetch starting ID from a special page.
    startpage = pywikibot.Page(site, 'User:Geograph Update Bot/last ID')
    start = int(startpage.text)
    startsortkeyprefix = " %08d" % (start, )
    n = 0
    g0 = api.ListGenerator(
        "categorymembers",
        parameters=dict(
            cmtitle="Category:Images from Geograph Britain and Ireland",
            cmprop="title|sortkeyprefix",
            cmtype="file",
            cmstartsortkeyprefix=startsortkeyprefix),
        **kwargs)
    g1 = api.QueryGenerator(parameters=dict(
        generator="categorymembers",
        gcmtitle="Category:Images from Geograph Britain and Ireland",
        gcmtype="file",
        gcmstartsortkeyprefix=startsortkeyprefix,
        prop="imageinfo",
        iiprop="size"),
                            **kwargs)
    for page in InterestingGeographGenerator(site, g0, g1):
        yield page
        n = n + 1
        if (n % 50 == 0):
            # Write a checkpoint every fifty yielded items
            startpage.text = str(page.gridimage_id)
            startpage.save("Checkpoint: up to %d" % (page.gridimage_id, ))
def find_duplicates():
    last_id = dup_id = -1
    outfile = StringIO()
    site = pywikibot.Site()
    for item in api.ListGenerator(
            "categorymembers",
            site=site,
            cmtitle="Category:Images from Geograph Britain and Ireland",
            cmprop="title|sortkeyprefix",
            cmtype="file"):
        try:
            gridimage_id = int(item['sortkeyprefix'])
            print(gridimage_id, end="\r")
            if gridimage_id == last_id:
                if dup_id != last_id:
                    print("* [https://www.geograph.org.uk/photo/%d %d]" %
                          (gridimage_id, gridimage_id),
                          file=outfile)
                    print("** [[:%s]]" % (last_title, ), file=outfile)
                    dup_id = last_id
                print("** [[:%s]]" % (item['title'], ),
                      file=outfile,
                      flush=True)
            last_id = gridimage_id
            last_title = item['title']
        except Exception:
            pass
    reportpage = pywikibot.Page(
        site, "User:Geograph Update Bot/duplicate Geograph IDs/data")
    reportpage.text = (
        "<!-- This page will be overwritten by Geograph Update Bot -->")
    reportpage.text += outfile.getvalue()
    reportpage.save("New list of duplicate IDs")
Beispiel #4
0
def fetch_whitelist():
        #list=allusers&augroup=sysop|autopatrolled&format=jsonfm&aulimit=max
    params = {'site': site,
              'augroup': 'sysop|autopatrolled|bot',
              }
    gen = api.ListGenerator('allusers', **params)
    for u in gen:
        yield u['name']
Beispiel #5
0
 def setUp(self):
     """Set up test case."""
     super().setUp()
     mysite = self.get_site()
     mysite._paraminfo['query+allpages'] = {
         'prefix': 'ap',
         'limit': {'max': 10},
         'namespace': {'multi': True}
     }
     self.gen = api.ListGenerator(listaction='allpages', site=mysite)
Beispiel #6
0
def fetch(user):
    params = {'rcuser':user,
              'rclimit':'max',
              'rcshow':'!patrolled',
              'rctoken':'patrol',
              }
    gen = api.ListGenerator('recentchanges', site=site, **params)
    for change in gen:
        print change
        yield change['rcid'], change['patroltoken']
Beispiel #7
0
def pendingchangesGenerator():
    site = pywikibot.Site()
    list_gen = api.ListGenerator(listaction="oldreviewedpages",
                                 site=site,
                                 orlimit=5,
                                 ornamespace=0)

    for entry in list_gen:
        page = pywikibot.Page(site, entry["title"])
        yield page
Beispiel #8
0
 def setUp(self):
     """Set up test case."""
     super(TestDryListGenerator, self).setUp()
     mysite = self.get_site()
     mysite._paraminfo['query+allpages'] = {
         'prefix': 'ap',
         'limit': {'max': 10},
         'namespace': {'multi': True}
     }
     mysite._paraminfo.query_modules_with_limits = set(['allpages'])
     self.gen = api.ListGenerator(listaction="allpages", site=mysite)
Beispiel #9
0
def unreviewdpagesGenerator():
    site = pywikibot.Site()
    list_gen = api.ListGenerator(listaction="unreviewedpages",
                                 site=site,
                                 urlimit=5,
                                 urnamespace=0,
                                 urfilterredir="nonredirects")

    for entry in list_gen:
        page = pywikibot.Page(site, entry["title"])
        yield page
def find_rejected():
    outfile = StringIO()
    site = pywikibot.Site()
    c = geodb.cursor()
    c.execute("""
        SELECT MAX(gridimage_id) FROM gridimage_base
            ORDER BY gridimage_id desc limit 1""")
    row = c.fetchone()
    maxid = row[0]
    titles_by_id = {}
    for item in api.ListGenerator(
            "categorymembers",
            site=site,
            cmtitle="Category:Images from Geograph Britain and Ireland",
            cmprop="title|sortkeyprefix",
            cmtype="file"):
        try:
            gridimage_id = int(item['sortkeyprefix'])
            titles_by_id[gridimage_id] = item['title']
            if gridimage_id > maxid: continue
            print(gridimage_id, end="\r")
            c = geodb.cursor()
            c.execute(
                """
                SELECT gridimage_id FROM gridimage_base
                WHERE gridimage_id = ?
                """, (gridimage_id, ))
            if c.fetchone() == None:
                print("* [https://www.geograph.org.uk/photo/%d %d]: [[:%s]]" %
                      (gridimage_id, gridimage_id, item['title']),
                      file=outfile,
                      flush=True)
                r = requests.head('https://www.geograph.org.uk/photo/%d' %
                                  (gridimage_id, ),
                                  allow_redirects=True)
                if r.status_code == 200:
                    destid = int(urlsplit(r.url).path.rpartition('/')[2])
                    if titles_by_id[destid]:
                        print("** → [%s %d]: [[:%s]]" %
                              (r.url, destid, titles_by_id[destid]),
                              file=outfile,
                              flush=True)
                    print("** → [%s %d]" % (r.url, destid),
                          file=outfile,
                          flush=True)
        except Exception:
            pass
    reportpage = pywikibot.Page(
        site, "User:Geograph Update Bot/images rejected from Geograph/data")
    reportpage.text = (
        "<!-- This page will be overwritten by Geograph Update Bot -->")
    reportpage.text += outfile.getvalue()
    reportpage.save("New list of rejected IDs")
def InterestingGeographsByDate(**kwargs):
    site = kwargs['site']
    g0 = api.ListGenerator(
        "categorymembers",
        parameters=dict(
            cmtitle="Category:Images from Geograph Britain and Ireland",
            cmprop="title|sortkeyprefix",
            cmtype="file",
            cmsort="timestamp",
            cmdir="older",
        ),
        **kwargs)
    g1 = api.QueryGenerator(parameters=dict(
        generator="categorymembers",
        gcmtitle="Category:Images from Geograph Britain and Ireland",
        gcmtype="file",
        gcmsort="timestamp",
        gcmdir="older",
        prop="imageinfo",
        iiprop="size"),
                            **kwargs)
    yield from InterestingGeographGenerator(site, g0, g1)
 def fetchWatchlist(self):
     days = 1
     page = None
     for arg in self.args:
         if arg.startswith('--days'):
             try:
                 days = int(arg[7:])
             except ValueError:
                 pass
         if arg.startswith('--page'):
             try:
                 page = pywikibot.Page(self.site, arg[7:])
             except:
                 pass
     dayago = datetime.datetime.utcnow() - datetime.timedelta(days=days)
     dayago = dayago.strftime('%Y-%m-%dT%H:00:00Z')
     namespaces = [1, 3, 5, 7, 9, 11, 13, 15, 101, 109]
     if page:
         q = [page]
     else:
         q = api.ListGenerator(listaction='watchlist', wlstart=dayago)
         q.set_namespace(namespaces)
     queue = list()
     for item in q:
         if page:
             title = page.title()
         else:
             title = item['title']
         if not (title in queue):
             queue.append(pywikibot.Page(self.site, title))
     cur = self.conn.cursor()
     for page in queue:
         try:
             self.do_page(page, cur)
         except:
             pass
Beispiel #13
0
 def test_namespace_resolve_failed(self):
     """Test ListGenerator set_namespace when resolve fails."""
     self.gen = api.ListGenerator(listaction='allpages', site=self.site)
     self.assertTrue(self.gen.support_namespace())
     with self.assertRaises(KeyError):
         self.gen.set_namespace(10000)
Beispiel #14
0
 def test_namespace_multi(self):
     """Test ListGenerator set_namespace when multi."""
     self.gen = api.ListGenerator(listaction='allpages', site=self.site)
     self.assertTrue(self.gen.support_namespace())
     self.assertIsNone(self.gen.set_namespace([0, 1]))
Beispiel #15
0
 def test_namespace_non_multi(self):
     """Test ListGenerator set_namespace when non multi."""
     self.gen = api.ListGenerator(listaction='alllinks', site=self.site)
     with self.assertRaises(TypeError):
         self.gen.set_namespace([0, 1])
     self.assertIsNone(self.gen.set_namespace(0))
Beispiel #16
0
 def test_namespace_none(self):
     """Test ListGenerator set_namespace with None."""
     self.gen = api.ListGenerator(listaction='alllinks', site=self.site)
     with self.assertRaises(TypeError):
         self.gen.set_namespace(None)
Beispiel #17
0
 def test_namespace_param_is_not_settable(self):
     """Test ListGenerator support_namespace."""
     self.gen = api.ListGenerator(listaction='querypage', site=self.site)
     self.assertFalse(self.gen.support_namespace())
     self.assertFalse(self.gen.set_namespace([0, 1]))
Beispiel #18
0
 def get_autoreviewedusers(self):
     users_gen = api.ListGenerator(listaction="allusers",
                                   site=pywikibot.Site(),
                                   aurights='autoreview|autopatrol')
     userlist = {ul['name']: ul for ul in users_gen}
     return userlist