def test_getImages(self): # This test download the image list using API and index.php # Compare both lists in length and file by file # Check the presence of some special files, like odd chars filenames # The tested wikis are from different wikifarms and some alone print '#' * 73, '\n', 'test_getImages', '\n', '#' * 73 tests = [ # Alone wikis #['http://wiki.annotation.jp/index.php', 'http://wiki.annotation.jp/api.php', u'かずさアノテーション - ソーシャル・ゲノム・アノテーション.jpg'], [ 'http://archiveteam.org/index.php', 'http://archiveteam.org/api.php', u'Archive-is 2013-07-02 17-05-40.png' ], [ 'http://skilledtests.com/wiki/index.php', 'http://skilledtests.com/wiki/api.php', u'Benham\'s disc (animated).gif' ], # Editthis wikifarm # It has a page view limit # Gamepedia wikifarm [ 'http://dawngate.gamepedia.com/index.php', 'http://dawngate.gamepedia.com/api.php', u'Spell Vanquish.png' ], # Neoseeker wikifarm [ 'http://digimon.neoseeker.com/w/index.php', 'http://digimon.neoseeker.com/w/api.php', u'Ogremon card.png' ], # Orain wikifarm #['http://mc.orain.org/w/index.php', 'http://mc.orain.org/w/api.php', u'Mojang logo.svg'], # Referata wikifarm [ 'http://wikipapers.referata.com/w/index.php', 'http://wikipapers.referata.com/w/api.php', u'Avbot logo.png' ], # ShoutWiki wikifarm [ 'http://commandos.shoutwiki.com/w/index.php', 'http://commandos.shoutwiki.com/w/api.php', u'Night of the Wolves loading.png' ], # Wiki-site wikifarm [ 'http://minlingo.wiki-site.com/index.php', 'http://minlingo.wiki-site.com/api.php', u'一 (書方灋ᅗᅩ).png' ], # Wikkii wikifarm # It seems offline ] session = requests.Session() session.headers = {'User-Agent': getUserAgent()} for index, api, filetocheck in tests: # Testing with API print '\nTesting', api config_api = {'api': api, 'delay': 0} req = urllib2.Request(url=api, data=urllib.urlencode({ 'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json' }), headers={'User-Agent': getUserAgent()}) f = urllib2.urlopen(req) imagecount = int( json.loads(f.read())['query']['statistics']['images']) f.close() print 'Trying to parse', filetocheck, 'with API' result_api = getImageNames(config=config_api, session=session) self.assertEqual(len(result_api), imagecount) self.assertTrue( filetocheck in [filename for filename, url, uploader in result_api]) # Testing with index print '\nTesting', index config_index = {'index': index, 'delay': 0} req = urllib2.Request(url=api, data=urllib.urlencode({ 'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json' }), headers={'User-Agent': getUserAgent()}) f = urllib2.urlopen(req) imagecount = int( json.loads(f.read())['query']['statistics']['images']) f.close() print 'Trying to parse', filetocheck, 'with index' result_index = getImageNames(config=config_index, session=session) #print 111, set([filename for filename, url, uploader in result_api]) - set([filename for filename, url, uploader in result_index]) self.assertEqual(len(result_index), imagecount) self.assertTrue( filetocheck in [filename for filename, url, uploader in result_index]) # Compare every image in both lists, with/without API c = 0 for filename_api, url_api, uploader_api in result_api: self.assertEqual( filename_api, result_index[c][0], u'{0} and {1} are different'.format( filename_api, result_index[c][0])) self.assertEqual( url_api, result_index[c][1], u'{0} and {1} are different'.format( url_api, result_index[c][1])) self.assertEqual( uploader_api, result_index[c][2], u'{0} and {1} are different'.format( uploader_api, result_index[c][2])) c += 1
def test_getImages(self): # This test download the image list using API and index.php # Compare both lists in length and file by file # Check the presence of some special files, like odd chars filenames # The tested wikis are from different wikifarms and some alone print '#'*73, '\n', 'test_getImages', '\n', '#'*73 tests = [ # Alone wikis #['http://wiki.annotation.jp/index.php', 'http://wiki.annotation.jp/api.php', u'かずさアノテーション - ソーシャル・ゲノム・アノテーション.jpg'], ['http://archiveteam.org/index.php', 'http://archiveteam.org/api.php', u'Archive-is 2013-07-02 17-05-40.png'], ['http://skilledtests.com/wiki/index.php', 'http://skilledtests.com/wiki/api.php', u'Benham\'s disc (animated).gif'], # Editthis wikifarm # It has a page view limit # Gamepedia wikifarm ['http://dawngate.gamepedia.com/index.php', 'http://dawngate.gamepedia.com/api.php', u'Spell Vanquish.png'], # Gentoo wikifarm ['http://wiki.gentoo.org/index.php', 'http://wiki.gentoo.org/api.php', u'Openclonk screenshot1.png'], # Neoseeker wikifarm ['http://digimon.neoseeker.com/w/index.php', 'http://digimon.neoseeker.com/w/api.php', u'Ogremon card.png'], # Orain wikifarm #['http://mc.orain.org/w/index.php', 'http://mc.orain.org/w/api.php', u'Mojang logo.svg'], # Referata wikifarm ['http://wikipapers.referata.com/w/index.php', 'http://wikipapers.referata.com/w/api.php', u'Avbot logo.png'], # ShoutWiki wikifarm ['http://commandos.shoutwiki.com/w/index.php', 'http://commandos.shoutwiki.com/w/api.php', u'Night of the Wolves loading.png'], # Wiki-site wikifarm ['http://minlingo.wiki-site.com/index.php', 'http://minlingo.wiki-site.com/api.php', u'一 (書方灋ᅗᅩ).png'], # Wikkii wikifarm # It seems offline ] session = requests.Session() session.headers = {'User-Agent': getUserAgent()} for index, api, filetocheck in tests: # Testing with API print '\nTesting', api config_api = {'api': api, 'delay': 0} req = urllib2.Request(url=api, data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json'}), headers={'User-Agent': getUserAgent()}) f = urllib2.urlopen(req) imagecount = int(json.loads(f.read())['query']['statistics']['images']) f.close() print 'Trying to parse', filetocheck, 'with API' result_api = getImageNames(config=config_api, session=session) self.assertEqual(len(result_api), imagecount) self.assertTrue(filetocheck in [filename for filename, url, uploader in result_api]) # Testing with index print '\nTesting', index config_index = {'index': index, 'delay': 0} req = urllib2.Request(url=api, data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json'}), headers={'User-Agent': getUserAgent()}) f = urllib2.urlopen(req) imagecount = int(json.loads(f.read())['query']['statistics']['images']) f.close() print 'Trying to parse', filetocheck, 'with index' result_index = getImageNames(config=config_index, session=session) #print 111, set([filename for filename, url, uploader in result_api]) - set([filename for filename, url, uploader in result_index]) self.assertEqual(len(result_index), imagecount) self.assertTrue(filetocheck in [filename for filename, url, uploader in result_index]) # Compare every image in both lists, with/without API c = 0 for filename_api, url_api, uploader_api in result_api: self.assertEqual(filename_api, result_index[c][0], u'{0} and {1} are different'.format(filename_api, result_index[c][0])) self.assertEqual(url_api, result_index[c][1], u'{0} and {1} are different'.format(url_api, result_index[c][1])) self.assertEqual(uploader_api, result_index[c][2], u'{0} and {1} are different'.format(uploader_api, result_index[c][2])) c += 1