def test_getImages(self):
        # This test download the image list using API and index.php
        # Compare both lists in length and file by file
        # Check the presence of some special files, like odd chars filenames
        # The tested wikis are from different wikifarms and some alone

        print '#' * 73, '\n', 'test_getImages', '\n', '#' * 73
        tests = [
            # Alone wikis
            #['http://wiki.annotation.jp/index.php', 'http://wiki.annotation.jp/api.php', u'かずさアノテーション - ソーシャル・ゲノム・アノテーション.jpg'],
            [
                'http://archiveteam.org/index.php',
                'http://archiveteam.org/api.php',
                u'Archive-is 2013-07-02 17-05-40.png'
            ],
            [
                'http://skilledtests.com/wiki/index.php',
                'http://skilledtests.com/wiki/api.php',
                u'Benham\'s disc (animated).gif'
            ],

            # Editthis wikifarm
            # It has a page view limit

            # Gamepedia wikifarm
            [
                'http://dawngate.gamepedia.com/index.php',
                'http://dawngate.gamepedia.com/api.php', u'Spell Vanquish.png'
            ],

            # Neoseeker wikifarm
            [
                'http://digimon.neoseeker.com/w/index.php',
                'http://digimon.neoseeker.com/w/api.php', u'Ogremon card.png'
            ],

            # Orain wikifarm
            #['http://mc.orain.org/w/index.php', 'http://mc.orain.org/w/api.php', u'Mojang logo.svg'],

            # Referata wikifarm
            [
                'http://wikipapers.referata.com/w/index.php',
                'http://wikipapers.referata.com/w/api.php', u'Avbot logo.png'
            ],

            # ShoutWiki wikifarm
            [
                'http://commandos.shoutwiki.com/w/index.php',
                'http://commandos.shoutwiki.com/w/api.php',
                u'Night of the Wolves loading.png'
            ],

            # Wiki-site wikifarm
            [
                'http://minlingo.wiki-site.com/index.php',
                'http://minlingo.wiki-site.com/api.php', u'一 (書方灋ᅗᅩ).png'
            ],

            # Wikkii wikifarm
            # It seems offline
        ]
        session = requests.Session()
        session.headers = {'User-Agent': getUserAgent()}
        for index, api, filetocheck in tests:
            # Testing with API
            print '\nTesting', api
            config_api = {'api': api, 'delay': 0}
            req = urllib2.Request(url=api,
                                  data=urllib.urlencode({
                                      'action': 'query',
                                      'meta': 'siteinfo',
                                      'siprop': 'statistics',
                                      'format': 'json'
                                  }),
                                  headers={'User-Agent': getUserAgent()})
            f = urllib2.urlopen(req)
            imagecount = int(
                json.loads(f.read())['query']['statistics']['images'])
            f.close()

            print 'Trying to parse', filetocheck, 'with API'
            result_api = getImageNames(config=config_api, session=session)
            self.assertEqual(len(result_api), imagecount)
            self.assertTrue(
                filetocheck in
                [filename for filename, url, uploader in result_api])

            # Testing with index
            print '\nTesting', index
            config_index = {'index': index, 'delay': 0}
            req = urllib2.Request(url=api,
                                  data=urllib.urlencode({
                                      'action': 'query',
                                      'meta': 'siteinfo',
                                      'siprop': 'statistics',
                                      'format': 'json'
                                  }),
                                  headers={'User-Agent': getUserAgent()})
            f = urllib2.urlopen(req)
            imagecount = int(
                json.loads(f.read())['query']['statistics']['images'])
            f.close()

            print 'Trying to parse', filetocheck, 'with index'
            result_index = getImageNames(config=config_index, session=session)
            #print 111, set([filename for filename, url, uploader in result_api]) - set([filename for filename, url, uploader in result_index])
            self.assertEqual(len(result_index), imagecount)
            self.assertTrue(
                filetocheck in
                [filename for filename, url, uploader in result_index])

            # Compare every image in both lists, with/without API
            c = 0
            for filename_api, url_api, uploader_api in result_api:
                self.assertEqual(
                    filename_api, result_index[c][0],
                    u'{0} and {1} are different'.format(
                        filename_api, result_index[c][0]))
                self.assertEqual(
                    url_api, result_index[c][1],
                    u'{0} and {1} are different'.format(
                        url_api, result_index[c][1]))
                self.assertEqual(
                    uploader_api, result_index[c][2],
                    u'{0} and {1} are different'.format(
                        uploader_api, result_index[c][2]))
                c += 1
Beispiel #2
0
 def test_getImages(self):
     # This test download the image list using API and index.php
     # Compare both lists in length and file by file
     # Check the presence of some special files, like odd chars filenames
     # The tested wikis are from different wikifarms and some alone
     
     print '#'*73, '\n', 'test_getImages', '\n', '#'*73
     tests = [
         # Alone wikis
         #['http://wiki.annotation.jp/index.php', 'http://wiki.annotation.jp/api.php', u'かずさアノテーション - ソーシャル・ゲノム・アノテーション.jpg'],
         ['http://archiveteam.org/index.php', 'http://archiveteam.org/api.php', u'Archive-is 2013-07-02 17-05-40.png'],
         ['http://skilledtests.com/wiki/index.php', 'http://skilledtests.com/wiki/api.php', u'Benham\'s disc (animated).gif'],
         
         # Editthis wikifarm
         # It has a page view limit
         
         # Gamepedia wikifarm
         ['http://dawngate.gamepedia.com/index.php', 'http://dawngate.gamepedia.com/api.php', u'Spell Vanquish.png'],
         
         # Gentoo wikifarm
         ['http://wiki.gentoo.org/index.php', 'http://wiki.gentoo.org/api.php', u'Openclonk screenshot1.png'],
         
         # Neoseeker wikifarm
         ['http://digimon.neoseeker.com/w/index.php', 'http://digimon.neoseeker.com/w/api.php', u'Ogremon card.png'],
         
         # Orain wikifarm
         #['http://mc.orain.org/w/index.php', 'http://mc.orain.org/w/api.php', u'Mojang logo.svg'],
         
         # Referata wikifarm
         ['http://wikipapers.referata.com/w/index.php', 'http://wikipapers.referata.com/w/api.php', u'Avbot logo.png'],
         
         # ShoutWiki wikifarm
         ['http://commandos.shoutwiki.com/w/index.php', 'http://commandos.shoutwiki.com/w/api.php', u'Night of the Wolves loading.png'],
         
         # Wiki-site wikifarm
         ['http://minlingo.wiki-site.com/index.php', 'http://minlingo.wiki-site.com/api.php', u'一 (書方灋ᅗᅩ).png'],
         
         # Wikkii wikifarm
         # It seems offline
     ]
     session = requests.Session()
     session.headers = {'User-Agent': getUserAgent()}
     for index, api, filetocheck in tests:
         # Testing with API
         print '\nTesting', api
         config_api = {'api': api, 'delay': 0}
         req = urllib2.Request(url=api, data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json'}), headers={'User-Agent': getUserAgent()})
         f = urllib2.urlopen(req)
         imagecount = int(json.loads(f.read())['query']['statistics']['images'])
         f.close()
         
         print 'Trying to parse', filetocheck, 'with API'
         result_api = getImageNames(config=config_api, session=session)
         self.assertEqual(len(result_api), imagecount)
         self.assertTrue(filetocheck in [filename for filename, url, uploader in result_api])
         
         # Testing with index
         print '\nTesting', index
         config_index = {'index': index, 'delay': 0}
         req = urllib2.Request(url=api, data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json'}), headers={'User-Agent': getUserAgent()})
         f = urllib2.urlopen(req)
         imagecount = int(json.loads(f.read())['query']['statistics']['images'])
         f.close()
 
         print 'Trying to parse', filetocheck, 'with index'
         result_index = getImageNames(config=config_index, session=session)
         #print 111, set([filename for filename, url, uploader in result_api]) - set([filename for filename, url, uploader in result_index])
         self.assertEqual(len(result_index), imagecount)
         self.assertTrue(filetocheck in [filename for filename, url, uploader in result_index])
         
         # Compare every image in both lists, with/without API
         c = 0
         for filename_api, url_api, uploader_api in result_api:
             self.assertEqual(filename_api, result_index[c][0], u'{0} and {1} are different'.format(filename_api, result_index[c][0]))
             self.assertEqual(url_api, result_index[c][1], u'{0} and {1} are different'.format(url_api, result_index[c][1]))
             self.assertEqual(uploader_api, result_index[c][2], u'{0} and {1} are different'.format(uploader_api, result_index[c][2]))
             c += 1