def UpdateWiki(self): """ Write the contents of the teams dictionary back into the wiki """ wiki = MediaWiki(self.config.get('PlanetKubb', 'API')) wiki.login(self.config.get('KubbBot', 'Username'), self.config.get('KubbBot', 'Password')) # We need an edit token c = wiki.call({'action': 'query', 'titles': 'Foo', 'prop': 'info', 'intoken': 'edit'}) print c my_token = c['query']['pages']['-1']['edittoken'] print "Edit token: %s" % my_token print "== Updating wiki with new scores ==" for team in self.teams: print "\"%s\",%f,%f" % (team, self.teams[team].mu, self.teams[team].sigma) c = wiki.call({ 'action': 'sfautoedit', 'form': 'Team', 'target': team, 'Team[TrueSkill mu]': "%s" % self.teams[team].mu, 'Team[TrueSkill sigma]': "%s" % self.teams[team].sigma, 'token': my_token}) print c
def open_connection(bot_name, env_name): """Open a connection to MediaWiki for a bot.""" LOGGER.info("Opening MediaWiki connection for %s at %s", bot_name, API_URL) apiary_wiki = MediaWiki(API_URL) try: # Passwords may be defined in the environment or in the config file # We prefer the environment variable if it is present password = os.environ.get(env_name, None) if password is None: try: config.get('Passwords', bot_name) except Exception, e: LOGGER.warn('No configuration file detected.') LOGGER.info("Logging in as %s using %s", bot_name, password) apiary_wiki.login(bot_name, password) LOGGER.info("Getting edit token for %s", bot_name) wiki_return = apiary_wiki.call({ 'action': 'tokens', 'type': 'edit' }) edit_token = wiki_return['tokens']['edittoken'] LOGGER.info("%s has been given edit token %s", bot_name, edit_token)
def open_connection(bot_name, env_name, api_url): """Open a connection to MediaWiki for a bot.""" LOGGER.info("Opening MediaWiki connection for %s at %s", bot_name, api_url) apiary_wiki = MediaWiki(api_url) edit_token = None try: # Passwords may be defined in the environment or in the config file # We prefer the environment variable if it is present password = os.environ.get(env_name, None) if password is None: try: config.get('Passwords', bot_name) except Exception as e: LOGGER.warn('No configuration file detected.') if password is not None: LOGGER.info("Logging in as %s using %s", bot_name, password) apiary_wiki.login(bot_name, password) LOGGER.info("Getting edit token for %s", bot_name) wiki_return = apiary_wiki.call({ 'action': 'tokens', 'type': 'edit' }) edit_token = wiki_return['tokens']['edittoken'] LOGGER.info("%s has been given edit token %s", bot_name, edit_token) else: LOGGER.warn("No password was provided for %s. Queries allowed but editing will not work.", bot_name) except Exception as e: raise Exception("Unable to login as %s got '%s'", bot_name, e) return (apiary_wiki, edit_token)
def setup(config): try: wikiConn = MediaWiki("http://%s:8888/mediawiki/api.php" % mwserver, user_agent="IA-mwbridge") wikiConn.login("david", "bad pass") token = wikiConn.call({"action": "query", "meta": "tokens"})["query"]["tokens"]["csrftoken"] except Exception as e: print "Trouble connecting to mediawiki" + e
def finger(request): change_list = [] foundtext = False # PUT IN CONNECTION VALUES HERE wiki = MediaWiki('set_your_api_url') wiki.login('set_username', 'set_password') tokendoc = wiki.call({'action': 'tokens'}) edittoken = tokendoc.get("tokens").get('edittoken') foundtext = False searchtext = request.GET['text'].strip() searchpage = request.GET['page'].strip() if searchtext == '' or searchpage == '': context = { 'message': 'Missing either search text or page to search!', } return render(request, 'blame/error.html', context) queryresult = wiki.call({'action': 'query', 'prop': 'revisions', 'rvprop': 'ids|user', 'rvdir': 'newer', 'rvlimit': '5000', 'format': 'jsonfm', 'titles': searchpage}) #print(str(queryresult)) if ('-1' in list(queryresult['query']['pages'].keys())): context = { 'message': 'The page you requested was not found! Please check your capitalization, namespace, and spelling!', } return render(request, 'blame/error.html', context) revisions = (list(queryresult['query']['pages'][list(queryresult['query']['pages'].keys())[0]]['revisions'])) for revision in revisions: revisiondata = wiki.call({'action': 'query', 'prop': 'revisions', 'revids': revision['revid'], 'rvprop': 'content', 'format': 'jsonfm'}) revisiontext = revisiondata['query']['pages'][list(queryresult['query']['pages'].keys())[0]]['revisions'][0]['*'] if not foundtext and searchtext in revisiontext: # PUT IN URL VALUE HERE change_list.append({'changetype': 'Added', 'revision': revision['revid'], 'user': revision['user'], 'link': 'set_your_website_url?title=' + searchpage + '&oldid=' + str(revision['revid'])}) foundtext = True elif foundtext and not searchtext in revisiontext: # PUT IN URL VALUE HERE change_list.append({'changetype': 'Removed', 'revision': revision['revid'], 'user': revision['user'], 'link': 'set_your_website_url?title=' + searchpage + '&oldid=' + str(revision['revid'])}) foundtext = False context = { 'change_list': change_list, } return render(request, 'blame/finger.html', context)
class TropicalWikis: # Array to append sites to sites = [] # This file is a list of all the database names used by wikimedia # we can use this to try and derive the names of various wikis source_list = 'http://www.tropicalwikis.com/wiki/Special:Farmer/list' # Blank reference to store mediawiki object in wikiapiary = {} # Edit token my_token = "" # Counter create_counter = 0 def __init__(self): config = ConfigParser.ConfigParser() config.read('../apiary.cfg') self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api')) self.wikiapiary.login(config.get('TropicalBot', 'Username'), config.get('TropicalBot', 'Password')) # We need an edit token on wiki2 c = self.wikiapiary.call({ 'action': 'query', 'titles': 'Foo', 'prop': 'info', 'intoken': 'edit' }) self.my_token = c['query']['pages']['-1']['edittoken'] def getList(self): soup = BeautifulSoup(requests.get(self.source_list).text) i = 1 for item in soup.findAll("a", {"class": "external text"}): site = (item.contents[0], item["href"]) print i, site self.sites.append(site) i += 1 def createSite(self, name, url): siteTemplate = """{{Website |Name=%s |URL=%s |Image=Default website image.png |Farm=TropicalWikis |Collection method=API |API URL=%s |Collect general data=Yes |Collect extension data=Yes |Collect skin data=Yes |Collect statistics=Yes |Collect semantic statistics=No |Collect semantic usage=No |Collect logs=No |Collect recent changes=No |Statistics URL= |Collect statistics stats=Yes |Check every=240 |Audited=No |Validated=No |Curated=No |Active=No |Demote=No |Defunct=No |Error=No |Featured website vote=0 }} """ api_url = "%s/w/api.php" % url # Make sure a page doesn't exist with this name already c = self.wikiapiary.call({'action': 'query', 'titles': name}) try: if c['query']['pages']['-1']: print "No duplicate name detected." except: # Duplicate detected name = "%s (TropicalWikis)" % name my_template = siteTemplate % (name, url, api_url) print my_template c = self.wikiapiary.call({ 'action': 'edit', 'title': name, 'text': my_template, 'token': self.my_token, 'bot': 'true', 'summary': 'Creating entry for %s' % name }) print c self.create_counter += 1 def checkSite(self, site): print "Checking %s" % site[1] # Construct Ask query for WikiApiary my_query = ''.join(["[[Has API URL::%s/w/api.php]]" % site[1]]) # Execute the query against WikiApiary c = self.wikiapiary.call({'action': 'ask', 'query': my_query}) # Return the count of results for the query return int(c['query']['meta']['count']) def main(self): # Get the list of tokens from the config file self.getList() for site in self.sites: print "\nProcessing %s" % site[0] # Use a guess of the API domain to see if we have it already siteCount = self.checkSite(site) if siteCount == 0: print "%s is not in WikiApiary, validating stats." % site[0] # Now add it to WikiApiary self.createSite(site[0], site[1]) time.sleep(3) elif siteCount == 1: print "%s already exists, skipping." % site[0] elif siteCount > 1: print "ERROR: %s found %d websites, which should never happen." % ( site[0], siteCount)
class smw_community: sites = [] # Blank reference to store mediawiki object in wikiapiary = {} smwreferata = {} # Edit token my_token = "" # Counter create_counter = 0 def __init__(self): config = ConfigParser.ConfigParser() config.read('../apiary.cfg') # Connect to SMW Community Wiki self.smwreferata = MediaWiki('http://smw.referata.com/w/api.php') # Connect to WikiApiary self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api')) self.wikiapiary.login(config.get('wikkiibot', 'Username'), config.get('wikkiibot', 'Password')) # We need an edit token c = self.wikiapiary.call({ 'action': 'query', 'titles': 'Foo', 'prop': 'info', 'intoken': 'edit' }) self.my_token = c['query']['pages']['-1']['edittoken'] def load_from_smwreferata(self): # Build query for sites my_query = ''.join([ '[[Category:Sites]]', '[[Has status::Active]]', '|?Has URL', '|?Has data type' '|limit=1000' ]) print "Query: %s" % my_query sites = self.smwreferata.call({'action': 'ask', 'query': my_query}) # We could just return the raw JSON object from the API, however instead we are going to clean it up into an # easier to deal with array of dictionary objects. # To keep things sensible, we'll use the same name as the properties if len(sites['query']['results']) > 0: for pagename, site in sites['query']['results'].items(): print "Adding %s." % pagename self.sites.append({ 'Name': pagename, 'URL': site['printouts']['Has URL'][0], 'Tag': ','.join(site['printouts']['Has data type']) }) def add_api_to_sites(self): # Loop through the sites and find API urls for i in range(0, len(self.sites)): print "Investigating %s (%s)..." % (self.sites[i]['Name'], self.sites[i]['URL']) try: req = requests.get(self.sites[i]['URL']) if req.status_code == 200: soup = BeautifulSoup(req.text) api_url = soup.findAll('link', {'rel': 'EditURI'})[0]['href'] print " Found %s" % api_url new_api_url = urlparse.urlunparse( urlparse.urlparse(api_url)[0:3] + ('', '', '')) print " Resolved %s" % new_api_url self.sites[i]['API URL'] = new_api_url else: print " Returned %s" % req.status_code except Exception, e: print "Exception: %s" % e
[[Category:WikiTeam Import]]""" logo_page_text = """This image was automatically uploaded by [[User:Audit Bee|Audit Bee]] while importing. [[Category:Import logos]] """ # timeout in seconds timeout = 10 socket.setdefaulttimeout(timeout) wiki = MediaWiki( 'https://wikiapiary.com/w/api.php', cookie_file='cookie-jar', user_agent= 'python-simplemediawiki/1.1.1 (WikiApiary; Bumble Bee; +http://wikiapiary.com/wiki/User:Bumble_Bee)' ) wiki.login('Audit Bee', 'frYqj2AmPTqZDjn4TANE') # We need an edit token c = wiki.call({ 'action': 'query', 'titles': 'Foo', 'prop': 'info', 'intoken': 'edit' }) my_token = c['query']['pages']['-1']['edittoken'] i = 0 success = 0 fail = 0 logo_count = 0
class wmbot: # Array to append sites to sites = [] # This file is a list of all the database names used by wikimedia # we can use this to try and derive the names of various wikis source_list = 'http://noc.wikimedia.org/conf/all.dblist' # Blank reference to store mediawiki object in wikiapiary = {} # Edit token my_token = "" # Counter create_counter = 0 # Regex pattern regex_pattern = r'^(\w+)(wiki|wikibooks|wikiquote|wiktionary|wikinews|wikisource|wikiversity|wikimedia|wikivoyage)$' # Site data siteData = { 'wiki': { 'domain': 'wikipedia.org', 'name': 'Wikipedia (%s)', 'farm': 'Wikipedia', 'logo': 'Wikipedia-logo.png' }, 'wikibooks': { 'domain': 'wikibooks.org', 'name': 'Wikibooks (%s)', 'farm': 'Wikibooks', 'logo': 'Wikibooks Logo.png' }, 'wiktionary': { 'domain': 'wiktionary.org', 'name': 'Wiktionary (%s)', 'farm': 'Wiktionary', 'logo': '170px-Wiktportal.svg.png' }, 'wikiquote': { 'domain': 'wikiquote.org', 'name': 'Wikiquote (%s)', 'farm': 'Wikiquote', 'logo': 'Wikiquote Logo.png' }, 'wikinews': { 'domain': 'wikinews.org', 'name': 'Wikinews (%s)', 'farm': 'Wikinews', 'logo': '240px-Wikinews-logo.png' }, 'wikisource': { 'domain': 'wikisource.org', 'name': 'Wikisource (%s)', 'farm': 'Wikisource', 'logo': 'Wikisource Logo.png' }, 'wikiversity': { 'domain': 'wikiversity.org', 'name': 'Wikiversity (%s)', 'farm': 'Wikiversity', 'logo': 'Wikiversity Logo.png' }, 'wikivoyage': { 'domain': 'wikivoyage.org', 'name': 'Wikivoyage (%s)', 'farm': 'Wikivoyage', 'logo': 'WikivoyageOldLogoSmall.png' }, 'wikimedia': { 'domain': 'wikimedia.org', 'name': 'Wikimedia (%s)', 'farm': 'Wikimedia', 'logo': 'Wikimediafoundation-logo.png' } } def __init__(self): config = ConfigParser.ConfigParser() config.read('../apiary.cfg') self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api')) self.wikiapiary.login(config.get('wmbot', 'Username'), config.get('wmbot', 'Password')) # We need an edit token on wiki2 c = self.wikiapiary.call({ 'action': 'query', 'titles': 'Foo', 'prop': 'info', 'intoken': 'edit' }) self.my_token = c['query']['pages']['-1']['edittoken'] def getList(self): self.sites = requests.get(self.source_list).text.split('\n') def validateApi(self, api_url): # Call http://st.wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=general&format=json my_url = api_url + '?action=query&meta=siteinfo&siprop=general&format=json' try: result = requests.get(my_url).json() if 'generator' in result['query']['general']: print "Detected %s" % result['query']['general']['generator'] return True else: return False except: print "ERROR: Failed call to API check." return False def createSite(self, lang, token): siteTemplate = """{{Website |Name=%s |URL=%s |API URL=%s |Image=%s |Farm=%s |Collect general data=Yes |Collect extension data=Yes |Collect skin data=Yes |Check every=240 |Collect statistics=Yes |Audited=No |Curated=No |Active=Yes }} """ my_name = self.siteData[token]['name'] % lang my_template = siteTemplate % ( my_name, "http://%s.%s/" % (lang, self.siteData[token]['domain']), "http://%s.%s/w/api.php" % (lang, self.siteData[token]['domain']), self.siteData[token]['logo'], self.siteData[token]['farm']) print my_template self.wikiapiary.call({ 'action': 'edit', 'title': my_name, 'text': my_template, 'token': self.my_token, 'bot': 'true' }) self.create_counter += 1 def checkSite(self, lang, site_domain): # Build the API URL using Wikimedia's known convention api_url = "http://%s.%s/w/api.php" % (lang, site_domain) print "Testing %s" % api_url # First see if this is a valid API URL before we query WikiApiary isValid = self.validateApi(api_url) if isValid: # Construct Ask query for WikiApiary my_query = ''.join(["[[Has API URL::%s]]" % api_url]) # Execute the query against WikiApiary c = self.wikiapiary.call({'action': 'ask', 'query': my_query}) # Return the count of results for the query return True, int(c['query']['meta']['count']) else: return False, 0 def processSite(self, token): match = re.findall(self.regex_pattern, token) if len(match[0]) == 2: return match[0] else: return (False, False) def main(self): # Get the list of tokens from the config file self.getList() # Now loop through the tokens for token in self.sites: print "\nProcessing %s" % token # First turn a token into a lang and site (lang, site) = self.processSite(token) lang = lang.replace('_', '-') # If we successfully got lang and site proceed if lang is not False and site is not False: # Use a guess of the API domain to see if (valid, siteCount) = self.checkSite(lang, self.siteData[site]['domain']) if valid: if siteCount == 0: print "%s appears to be untracked token." % token # Now add it to WikiApiary self.createSite(lang, site) elif siteCount == 1: print "%s already exists." % token elif siteCount > 1: print "%s found %d websites, which should never happen." % ( token, siteCount) else: print "%s did not resolve to a valid API URL." % token else: print "%s could not process token." % token
class wmbot: # Array to append sites to sites = [] # This file is a list of all the database names used by wikimedia # we can use this to try and derive the names of various wikis source_list = 'http://noc.wikimedia.org/conf/all.dblist' # Blank reference to store mediawiki object in wikiapiary = {} # Edit token my_token = "" # Counter create_counter = 0 # Regex pattern regex_pattern = r'^(\w+)(wiki|wikibooks|wikiquote|wiktionary|wikinews|wikisource|wikiversity|wikimedia|wikivoyage)$' # Site data siteData = { 'wiki': { 'domain': 'wikipedia.org', 'name': 'Wikipedia (%s)', 'farm': 'Wikipedia', 'logo': 'Wikipedia-logo.png' }, 'wikibooks': { 'domain': 'wikibooks.org', 'name': 'Wikibooks (%s)', 'farm': 'Wikibooks', 'logo': 'Wikibooks Logo.png' }, 'wiktionary': { 'domain': 'wiktionary.org', 'name': 'Wiktionary (%s)', 'farm': 'Wiktionary', 'logo': '170px-Wiktportal.svg.png' }, 'wikiquote': { 'domain': 'wikiquote.org', 'name': 'Wikiquote (%s)', 'farm': 'Wikiquote', 'logo': 'Wikiquote Logo.png' }, 'wikinews': { 'domain': 'wikinews.org', 'name': 'Wikinews (%s)', 'farm': 'Wikinews', 'logo': '240px-Wikinews-logo.png' }, 'wikisource': { 'domain': 'wikisource.org', 'name': 'Wikisource (%s)', 'farm': 'Wikisource', 'logo': 'Wikisource Logo.png' }, 'wikiversity': { 'domain': 'wikiversity.org', 'name': 'Wikiversity (%s)', 'farm': 'Wikiversity', 'logo': 'Wikiversity Logo.png' }, 'wikivoyage': { 'domain': 'wikivoyage.org', 'name': 'Wikivoyage (%s)', 'farm': 'Wikivoyage', 'logo': 'WikivoyageOldLogoSmall.png' }, 'wikimedia': { 'domain': 'wikimedia.org', 'name': 'Wikimedia (%s)', 'farm': 'Wikimedia', 'logo': 'Wikimediafoundation-logo.png' } } def __init__(self): config = ConfigParser.ConfigParser() config.read('../apiary.cfg') self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api')) self.wikiapiary.login(config.get('wmbot', 'Username'), config.get('wmbot', 'Password')) # We need an edit token on wiki2 c = self.wikiapiary.call({ 'action': 'query', 'titles': 'Foo', 'prop': 'info', 'intoken': 'edit' }) self.my_token = c['query']['pages']['-1']['edittoken'] def getList(self): self.sites = requests.get(self.source_list).text.split('\n') def validateApi(self, api_url): # Call http://st.wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=general&format=json my_url = api_url + '?action=query&meta=siteinfo&siprop=general&format=json' try: result = requests.get(my_url).json() if 'generator' in result['query']['general']: print "Detected %s" % result['query']['general']['generator'] return True else: return False except: print "ERROR: Failed call to API check." return False def createSite(self, lang, token): siteTemplate = """{{Website |Name=%s |URL=%s |API URL=%s |Image=%s |Farm=%s |Collect general data=Yes |Collect extension data=Yes |Collect skin data=Yes |Check every=240 |Collect statistics=Yes |Audited=No |Curated=No |Active=Yes }} """ my_name = self.siteData[token]['name'] % lang my_template = siteTemplate % ( my_name, "http://%s.%s/" % (lang, self.siteData[token]['domain']), "http://%s.%s/w/api.php" % (lang, self.siteData[token]['domain']), self.siteData[token]['logo'], self.siteData[token]['farm']) print my_template self.wikiapiary.call({ 'action': 'edit', 'title': my_name, 'text': my_template, 'token': self.my_token, 'bot': 'true' }) self.create_counter += 1 def checkSite(self, lang, site_domain): # Build the API URL using Wikimedia's known convention api_url = "http://%s.%s/w/api.php" % (lang, site_domain) print "Testing %s" % api_url # First see if this is a valid API URL before we query WikiApiary isValid = self.validateApi(api_url) if isValid: # Construct Ask query for WikiApiary my_query = ''.join([ "[[Has API URL::%s]]" % api_url ]) # Execute the query against WikiApiary c = self.wikiapiary.call({ 'action': 'ask', 'query': my_query }) # Return the count of results for the query return True, int(c['query']['meta']['count']) else: return False, 0 def processSite(self, token): match = re.findall(self.regex_pattern, token) if len(match[0]) == 2: return match[0] else: return (False, False) def main(self): # Get the list of tokens from the config file self.getList() # Now loop through the tokens for token in self.sites: print "\nProcessing %s" % token # First turn a token into a lang and site (lang, site) = self.processSite(token) lang = lang.replace('_', '-') # If we successfully got lang and site proceed if lang is not False and site is not False: # Use a guess of the API domain to see if (valid, siteCount) = self.checkSite(lang, self.siteData[site]['domain']) if valid: if siteCount == 0: print "%s appears to be untracked token." % token # Now add it to WikiApiary self.createSite(lang, site) elif siteCount == 1: print "%s already exists." % token elif siteCount > 1: print "%s found %d websites, which should never happen." % (token, siteCount) else: print "%s did not resolve to a valid API URL." % token else: print "%s could not process token." % token
#!/usr/bin/python from simplemediawiki import MediaWiki from tabela import tabela from people import people import sys text = '==== Листа на дежурства ====\n\nОва е автоматски генерерирана листа на дежурни со две ротации, доколку не сте во можност да бидете дежурни некоја недела или ден запишете во забелешка и пишете на мејлинг листа. Доколку сте дежурен во вашиот google calendar е вметнат нов календар насловен „Хаклаб: Дежурства“ со настан за деновите кога сте дежурни. Поставете ги известувањата за да бидете навреме известени.\n\n' text+=tabela(people) wiki = MediaWiki('https://wiki.spodeli.org/api.php') user, password = open('credentials', 'r').read().split() wiki.login(user,password) token = wiki.call({'action': 'query', 'meta': 'tokens'})['query']['tokens']['csrftoken'] wiki.call({'action': 'edit', 'title': 'Хаклаб/Дежурства', 'section':'5', 'text':text, 'token':token})
class wikkii: # Array to append sites to sites = [] # This file is a list of all the database names used by wikimedia # we can use this to try and derive the names of various wikis source_list = 'http://wikkii.com/wiki/Special:Farmer/list' # Blank reference to store mediawiki object in wikiapiary = {} # Edit token my_token = "" # Counter create_counter = 0 def __init__(self): config = ConfigParser.ConfigParser() config.read('../apiary.cfg') self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api')) self.wikiapiary.login(config.get('wikkiibot', 'Username'), config.get('wikkiibot', 'Password')) # We need an edit token on wiki2 c = self.wikiapiary.call({ 'action': 'query', 'titles': 'Foo', 'prop': 'info', 'intoken': 'edit' }) self.my_token = c['query']['pages']['-1']['edittoken'] def getList(self): soup = BeautifulSoup(requests.get(self.source_list).text) i = 1 for item in soup.findAll("a", {"class": "extiw"}): site = (item.contents[0], item["href"], item["title"]) print i, site self.sites.append(site) i += 1 def validateStats(self, url): my_url = "%s/wiki/Special:Statistics?action=raw" % url try: result = requests.get(my_url, timeout=10).text values = result.split(';') if len(values) == 9: print "Got %d values from stats" % len(values) return True else: return False except: print "ERROR: Failed call to Statistics URL." return False def createSite(self, name, url): siteTemplate = """{{Website |Name=%s |URL=%s |Image=Default website image.png |Farm=Wikkii |Collection method=API, Special:Statistics |API URL=%s |Collect general data=No |Collect extension data=No |Collect skin data=No |Collect statistics=No |Collect semantic statistics=No |Collect semantic usage=No |Collect logs=No |Collect recent changes=No |Statistics URL=%s |Collect statistics stats=Yes |Check every=240 |Audited=No |Validated=No |Curated=No |Active=Yes |Demote=No |Defunct=No |Error=No |Featured website vote=0 }} """ api_url = "%sw/api.php" % url statistics_url = "%swiki/Special:Statistics" % url # Make sure a page doesn't exist with this name already c = self.wikiapiary.call({ 'action': 'query', 'titles': name }) try: if c['query']['pages']['-1']: print "No duplicate name detected." except: # Duplicate detected name = "%s (Wikkii)" % name my_template = siteTemplate % (name, url, api_url, statistics_url) print my_template c = self.wikiapiary.call({ 'action': 'edit', 'title': name, 'text': my_template, 'token': self.my_token, 'bot': 'true', 'summary': 'Creating entry for %s' % name }) print c self.create_counter += 1 def checkSite(self, site): print "Checking %s" % site[1] # Construct Ask query for WikiApiary my_query = ''.join([ "[[Has statistics URL::%swiki/Special:Statistics]]" % site[1] ]) # Execute the query against WikiApiary c = self.wikiapiary.call({ 'action': 'ask', 'query': my_query }) # Return the count of results for the query return int(c['query']['meta']['count']) def main(self): # Get the list of tokens from the config file self.getList() for site in self.sites: # Limit the number of sites we make per run if self.create_counter > 1000: break print "\nProcessing %s" % site[0] # Use a guess of the API domain to see if we have it already siteCount = self.checkSite(site) if siteCount == 0: print "%s is not in WikiApiary, validating stats." % site[0] if self.validateStats(site[1]): # Now add it to WikiApiary self.createSite(site[0], site[1]) time.sleep(3) else: print "%s did not resolve to a valid API URL." % site[0] elif siteCount == 1: print "%s already exists, skipping." % site[0] elif siteCount > 1: print "ERROR: %s found %d websites, which should never happen." % (site[0], siteCount)
import pprint # Get wiki location location = raw_input("Base URL to the wiki API (YOUR_WIKI_ROOT/api.php): ") if (location[0:7].lower() != "http://"): location = "http://" + location wiki = MediaWiki(location) if wiki.normalize_api_url() is None: sys.exit("Invalid Wiki URL") # Get login credetials ua = build_user_agent("uturn", "0.1", "https://github.com/tomasreimers/wiki-uturn"); while True: username = raw_input("Username: "******"Password: "******"Invalid login" # Get date to revert to print "When would you like to revert to (IN UTC)?" year = int(raw_input("Year: ")) month = int(raw_input("Month: ")) day = int(raw_input("Day: ")) hour = int(raw_input("Hour: ")) minute = int(raw_input("Minute: ")) second = int(raw_input("Second: ")) revertTime = datetime_to_timestamp(datetime.datetime(year, month, day, hour, minute, second)) if (revertTime > time.time()): sys.exit("This tool cannot go forward in time")
class ApiaryBot: args = [] config = [] apiary_wiki = [] apiary_db = [] stats = {} edit_token = '' def __init__(self): # Get command line options self.get_args() # Get configuration settings self.get_config(self.args.config) # Connect to the database self.connectdb() # Initialize stats self.stats['statistics'] = 0 self.stats['smwinfo'] = 0 self.stats['smwusage'] = 0 self.stats['general'] = 0 self.stats['extensions'] = 0 self.stats['skins'] = 0 self.stats['skippedstatistics'] = 0 self.stats['skippedgeneral'] = 0 self.stats['whois'] = 0 self.stats['maxmind'] = 0 self.stats['interwikimap'] = 0 self.stats['libraries'] = 0 self.stats['namespaces'] = 0 def get_config(self, config_file='../apiary.cfg'): try: self.config = ConfigParser.ConfigParser() self.config.read(config_file) except IOError: print("Cannot open %s." % config_file) def get_args(self): parser = argparse.ArgumentParser( prog="Bumble Bee", description= "retrieves usage and statistic information for WikiApiary") parser.add_argument("-s", "--segment", help="only work on websites in defined segment") parser.add_argument("--site", help="only work on this specific site id") parser.add_argument( "-f", "--force", action="store_true", help="run regardless of when the last time data was updated") parser.add_argument( "-d", "--debug", action="store_true", help="do not write any changes to wiki or database") parser.add_argument("--config", default="../apiary.cfg", help="use an alternative config file") parser.add_argument("-v", "--verbose", action="count", default=0, help="increase output verbosity") parser.add_argument("--version", action="version", version="%(prog)s 0.1") # All set, now get the arguments self.args = parser.parse_args() def filter_illegal_chars(self, pre_filter): # Utility function to make sure that strings are okay for page titles return re.sub(r'[#<>\[\]\|{}]', '', pre_filter).replace('=', '-') def sqlutcnow(self): now = datetime.datetime.utcnow() now = now.replace(tzinfo=pytz.utc) now = now.replace(microsecond=0) return now.strftime('%Y-%m-%d %H:%M:%S') def make_request(self, site, data_url, bot='Bumble Bee'): req = urllib2.Request(data_url) req.add_header('User-Agent', self.config.get(bot, 'User-Agent')) req.add_header('Accept-Encoding', 'gzip') opener = urllib2.build_opener() try: t1 = datetime.datetime.now() f = opener.open(req) duration = (datetime.datetime.now() - t1).total_seconds() except ssl.SSLError as e: msg = "SSL Error: " + str(e) self.record_error(site=site, log_message=msg, log_type='info', log_severity='normal', log_bot=bot, log_url=data_url) return None, None except urllib2.URLError as e: self.record_error(site=site, log_message="URLError: %s" % e, log_type='error', log_severity='normal', log_bot=bot, log_url=data_url) return None, None except urllib2.HTTPError as e: if e.code > 399 and e.code < 500: raise FourHundred(e) if e.code > 499 and e.code < 600: raise FiveHundred(e) self.record_error(site=site, log_message="%s" % e, log_type='error', log_severity='normal', log_bot=bot, log_url=data_url) return None, None except Exception as e: self.record_error(site=site, log_message=str(e), log_type='info', log_severity='normal', log_bot=bot, log_url=data_url) return None, None else: return f, duration def pull_json(self, site, data_url, bot='Bumble Bee'): socket.setdefaulttimeout(10) (f, duration) = self.make_request(site, data_url, bot) if f is None: return False, None, None else: # Clean the returned string before we parse it, # sometimes there are junky error messages from PHP in # here, or simply a newline that shouldn't be present # The regex here is really simple, but it seems to # work fine. if f.info().get('Content-Encoding') == 'gzip': buf = StringIO(f.read()) gz = gzip.GzipFile(fileobj=buf) ret_string = gz.read() else: ret_string = f.read() json_match = re.search(r"({.*})", ret_string, flags=re.MULTILINE) if json_match is None or json_match.group(1) is None: raise NoJSON(data_url + "||" + ret_string) # Found JSON block try: data = simplejson.loads(json_match.group(1)) except ValueError as e: raise NoJSON(data_url + "||" + ret_string) return True, data, duration def runSql(self, sql_command, args=None): if self.args.verbose >= 3: print("SQL: %s" % sql_command) try: cur = self.apiary_db.cursor() cur.execute('SET NAMES utf8mb4') cur.execute("SET CHARACTER SET utf8mb4") cur.execute("SET character_set_connection=utf8mb4") cur.execute(sql_command, args) cur.close() self.apiary_db.commit() return True, cur.rowcount except Exception as e: print("Exception generated while running SQL command.") print("Command: %s" % sql_command) print("Exception: %s" % e) return False, 0 def record_error(self, site=None, log_message='Unknown Error', log_type='info', log_severity='normal', log_bot=None, log_url=None): if self.args.verbose >= 2: print("New log message for %s" % site['pagename']) if self.args.verbose >= 1: print(log_message) if site is None: site = {} site = {'Has ID': 0} if 'Has name' in site: site['pagename'] = site['Has name'] elif 'pagename' not in site: site['pagename'] = 'Error' if log_bot is None: log_bot = "null" else: log_bot = "'%s'" % log_bot if log_url is None: log_url = "null" else: log_url = "'%s'" % log_url temp_sql = "INSERT apiary_website_logs " temp_sql += "(website_id, log_date, website_name, log_type, " temp_sql += "log_severity, log_message, log_bot, log_url) " if len(log_message) > 65535: print("log_message too long: %s" % log_message) log_message = log_message[0:65535] # The format string is not really a normal Python format # string. You must always use %s http://stackoverflow.com/a/5785163 temp_sql += "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)" args = (site['Has ID'], self.sqlutcnow(), site['pagename'], log_type, log_severity, log_message, log_bot, log_url) self.runSql(temp_sql, args) def clear_error(self, sitename): # This function clears the error status of a meeting socket.setdefaulttimeout(30) if self.args.verbose >= 2: print("Clearing error for %s" % sitename) c = self.apiary_wiki.call({ 'action': 'sfautoedit', 'form': 'Website', 'target': sitename, 'Website[Error]': 'No', 'wpSummary': 'clearing error' }) if self.args.verbose >= 3: print("result:%s" % c) def connectdb(self): # Setup our database connection # Use the account that can also insert and delete from the database self.apiary_db = mdb.connect( host=self.config.get('ApiaryDB', 'hostname'), db=self.config.get('ApiaryDB', 'database'), user=self.config.get('ApiaryDB RW', 'username'), passwd=self.config.get('ApiaryDB RW', 'password'), charset='utf8') def connectwiki(self, bot_name): self.apiary_wiki = MediaWiki(self.config.get('WikiApiary', 'API')) c = self.apiary_wiki.login(self.config.get(bot_name, 'Username'), self.config.get(bot_name, 'Password')) if self.args.verbose >= 1: print("Username: %s Password: %s" % (self.config.get( bot_name, 'Username'), self.config.get(bot_name, 'Password'))) print(c) def get_websites(self, segment, site): filter_string = "" if site is not None: if self.args.verbose >= 1: print("Processing site %d." % int(site)) filter_string = "[[Has ID::%d]]" % int(site) elif segment is not None: if self.args.verbose >= 1: print("Only retrieving segment %d." % int(self.args.segment)) filter_string = "[[Has bot segment::%d]]" % int(self.args.segment) #filter_string = "test" # Build query for sites my_query = ''.join([ '[[Category:Website]]', '[[Is defunct::False]]', '[[Is active::True]]', filter_string, '|?Has API URL', '|?Has statistics URL', '|?Check every', '|?Creation date', '|?Has ID', '|?Collect general data', '|?Collect extension data', '|?Collect skin data', '|?Collect statistics', '|?Collect semantic statistics', '|?Collect semantic usage', '|?Collect logs', '|?Collect recent changes', '|?Collect statistics stats', '|sort=Creation date', '|order=asc', '|limit=2000' ]) if self.args.verbose >= 3: print("Query: %s" % my_query) try: sites = self.apiary_wiki.call({'action': 'ask', 'query': my_query}) except Exception as e: self.record_error(log_message="Problem querying Wikiapiary: %s" % e, log_type='error', log_severity='important') else: # We could just return the raw JSON object from the API, however instead we are going to clean it up into an # easier to deal with array of dictionary objects. # To keep things sensible, we'll use the same name as the properties i = 0 if len(sites['query']['results']) > 0: my_sites = [] for pagename, site in sites['query']['results'].items(): i += 1 if self.args.verbose >= 3: print("[%d] Adding %s." % (i, pagename)) # Initialize the flags but do it carefully in case there is no value in the wiki yet collect_general_data = list_get( site['printouts'], 'Collect general data') == "t" collect_extension_data = list_get( site['printouts'], 'Collect extension data') == "t" collect_skin_data = list_get(site['printouts'], 'Collect skin data') == "t" collect_statistics = list_get(site['printouts'], 'Collect statistics') == "t" collect_semantic_statistics = list_get( site['printouts'], 'Collect semantic statistics') == "t" collect_semantic_usage = list_get( site['printouts'], 'Collect semantic usage') == "t" collect_statistics_stats = list_get( site['printouts'], 'Collect statistics stats') == "t" collect_logs = list_get(site['printouts'], 'Collect logs') == "t" collect_recent_changes = list_get( site['printouts'], 'Collect recent changes') == "t" has_statistics_url = list_get(site['printouts'], 'Has statistics URL', '') has_api_url = list_get(site['printouts'], 'Has API URL', '') if has_statistics_url.find('wikkii.com') > 0: # Temporary filter out all Farm:Wikkii sites if self.args.verbose >= 2: print("Skipping %s (%s)" % (pagename, site['fullurl'])) else: try: my_sites.append({ 'pagename': pagename, 'fullurl': site['fullurl'], 'Has API URL': has_api_url, 'Has statistics URL': has_statistics_url, 'Check every': int(site['printouts']['Check every'][0]), 'Creation date': site['printouts']['Creation date'][0], 'Has ID': int(site['printouts']['Has ID'][0]), 'Collect general data': collect_general_data, 'Collect extension data': collect_extension_data, 'Collect skin data': collect_skin_data, 'Collect statistics': collect_statistics, 'Collect semantic statistics': collect_semantic_statistics, 'Collect semantic usage': collect_semantic_usage, 'Collect statistics stats': collect_statistics_stats, 'Collect logs': collect_logs, 'Collect recent changes': collect_recent_changes }) except Exception as e: print("Failed to add %s" % pagename) print(e) self.record_error(site=site, log_message="Failed to add page", log_type='warn', log_severity='important', log_bot='apiary.py', log_url=data_url) return my_sites else: raise Exception("No sites were returned to work on.") def get_status(self, site): """ get_status will query the website_status table in ApiaryDB. It makes the decision if new data should be retrieved for a given website. Two booleans are returned, the first to tell if new statistics information should be requested and the second to pull general information. """ # Get the timestamps for the last statistics and general pulls cur = self.apiary_db.cursor() temp_sql = "SELECT last_statistics, last_general, check_every_limit FROM website_status WHERE website_id = %d" % site[ 'Has ID'] cur.execute(temp_sql) rows_returned = cur.rowcount if rows_returned == 1: # Let's see if it's time to pull information again data = cur.fetchone() cur.close() (last_statistics, last_general, check_every_limit) = data[0:3] if self.args.verbose >= 3: print("last_stats: %s" % last_statistics) print("last_general: %s" % last_general) print("check_every_limit: %s" % check_every_limit) #TODO: make this check the times! last_statistics_struct = time.strptime(str(last_statistics), '%Y-%m-%d %H:%M:%S') last_general_struct = time.strptime(str(last_general), '%Y-%m-%d %H:%M:%S') stats_delta = (time.mktime(time.gmtime()) - time.mktime(last_statistics_struct)) / 60 general_delta = (time.mktime(time.gmtime()) - time.mktime(last_general_struct)) / 60 if self.args.verbose >= 2: print("Delta from checks: stats %s general %s" % (stats_delta, general_delta)) (check_stats, check_general) = (False, False) if stats_delta > ( site['Check every'] + random.randint(0, 15) ) and stats_delta > check_every_limit: # Add randomness to keep checks spread around check_stats = True else: if self.args.verbose >= 2: print("Skipping stats...") self.stats['skippedstatistics'] += 1 if general_delta > ( (24 + random.randint(0, 24)) * 60 ): # General checks are always bound to 24 hours, plus a random offset to keep checks evenly distributed check_general = True else: if self.args.verbose >= 2: print("Skipping general...") self.stats['skippedgeneral'] += 1 return (check_stats, check_general) elif rows_returned == 0: cur.close() # This website doesn't have a status, so we should check everything if self.args.verbose >= 3: print("website has never been checked before") return (True, True) else: raise Exception("Status check returned multiple rows.") def update_status(self, site, checktype): # Update the website_status table my_now = self.sqlutcnow() if checktype == "statistics": temp_sql = "UPDATE website_status SET last_statistics = '%s' WHERE website_id = %d" % ( my_now, site['Has ID']) if checktype == "general": temp_sql = "UPDATE website_status SET last_general = '%s' WHERE website_id = %d" % ( my_now, site['Has ID']) (success, rows_affected) = self.runSql(temp_sql) if rows_affected == 0: # No rows were updated, this website likely didn't exist before, so we need to insert the first time if self.args.verbose >= 2: print( "No website_status record exists for ID %d, creating one" % site['Has ID']) temp_sql = "INSERT website_status (website_id, last_statistics, last_general, check_every_limit) " temp_sql += "VALUES (%d, \"%s\", \"%s\", %d) " % ( site['Has ID'], my_now, my_now, 240) temp_sql += "ON DUPLICATE KEY UPDATE last_statistics=\"%s\", last_general=\"%s\", check_every_limit=%d" % ( my_now, my_now, 240) self.runSql(temp_sql) def botlog(self, bot, message, type='info', duration=0): if self.args.verbose >= 1: print(message) temp_sql = "INSERT apiary_bot_log (log_date, log_type, bot, duration, message) " temp_sql += "VALUES (\"%s\", \"%s\", \"%s\", %f, \"%s\")" % ( self.sqlutcnow(), type, bot, duration, message) self.runSql(temp_sql)