コード例 #1
0
ファイル: Team-Scores.py プロジェクト: Planet-Kubb/wiki
    def UpdateWiki(self):
        """
        Write the contents of the teams dictionary back into the wiki
        """
        wiki = MediaWiki(self.config.get('PlanetKubb', 'API'))
        wiki.login(self.config.get('KubbBot', 'Username'), self.config.get('KubbBot', 'Password'))

        # We need an edit token
        c = wiki.call({'action': 'query', 'titles': 'Foo', 'prop': 'info', 'intoken': 'edit'})
        print c
        my_token = c['query']['pages']['-1']['edittoken']
        print "Edit token: %s" % my_token

        print "== Updating wiki with new scores =="
        for team in self.teams:
            print "\"%s\",%f,%f" % (team, self.teams[team].mu, self.teams[team].sigma)

            c = wiki.call({
                'action': 'sfautoedit',
                'form': 'Team',
                'target': team,
                'Team[TrueSkill mu]': "%s" % self.teams[team].mu,
                'Team[TrueSkill sigma]': "%s" % self.teams[team].sigma,
                'token': my_token})
            print c
コード例 #2
0
def open_connection(bot_name, env_name):
    """Open a connection to MediaWiki for a bot."""

    LOGGER.info("Opening MediaWiki connection for %s at %s", bot_name, API_URL)
    apiary_wiki = MediaWiki(API_URL)

    try:
        # Passwords may be defined in the environment or in the config file
        # We prefer the environment variable if it is present
        password = os.environ.get(env_name, None)
        if password is None:
            try:
                config.get('Passwords', bot_name)
            except Exception, e:
                LOGGER.warn('No configuration file detected.')
        LOGGER.info("Logging in as %s using %s", bot_name, password)
        apiary_wiki.login(bot_name, password)

        LOGGER.info("Getting edit token for %s", bot_name)
        wiki_return = apiary_wiki.call({
            'action': 'tokens',
            'type': 'edit'
        })
        edit_token = wiki_return['tokens']['edittoken']
        LOGGER.info("%s has been given edit token %s", bot_name, edit_token)
コード例 #3
0
def open_connection(bot_name, env_name, api_url):
    """Open a connection to MediaWiki for a bot."""

    LOGGER.info("Opening MediaWiki connection for %s at %s", bot_name, api_url)
    apiary_wiki = MediaWiki(api_url)
    edit_token = None

    try:
        # Passwords may be defined in the environment or in the config file
        # We prefer the environment variable if it is present
        password = os.environ.get(env_name, None)
        if password is None:
            try:
                config.get('Passwords', bot_name)
            except Exception as e:
                LOGGER.warn('No configuration file detected.')

        if password is not None:
            LOGGER.info("Logging in as %s using %s", bot_name, password)
            apiary_wiki.login(bot_name, password)

            LOGGER.info("Getting edit token for %s", bot_name)
            wiki_return = apiary_wiki.call({
                'action': 'tokens',
                'type': 'edit'
            })
            edit_token = wiki_return['tokens']['edittoken']
            LOGGER.info("%s has been given edit token %s", bot_name, edit_token)
        else:
            LOGGER.warn("No password was provided for %s. Queries allowed but editing will not work.", bot_name)

    except Exception as e:
        raise Exception("Unable to login as %s got '%s'", bot_name, e)

    return (apiary_wiki, edit_token)
コード例 #4
0
ファイル: mwbridge.py プロジェクト: davidfine/mwbridge
def setup(config):
    try:
        wikiConn = MediaWiki("http://%s:8888/mediawiki/api.php" % mwserver, user_agent="IA-mwbridge")
        wikiConn.login("david", "bad pass")
        token = wikiConn.call({"action": "query", "meta": "tokens"})["query"]["tokens"]["csrftoken"]
    except Exception as e:
        print "Trouble connecting to mediawiki" + e
コード例 #5
0
ファイル: views.py プロジェクト: Linktheoriginal/Wikiblame
def finger(request):
	change_list = []
	foundtext = False

	# PUT IN CONNECTION VALUES HERE
	wiki = MediaWiki('set_your_api_url')
	wiki.login('set_username', 'set_password')

	tokendoc = wiki.call({'action': 'tokens'})
	edittoken = tokendoc.get("tokens").get('edittoken')

	foundtext = False
	searchtext = request.GET['text'].strip()
	searchpage = request.GET['page'].strip()
	
	if searchtext == '' or searchpage == '':
		context = {
			'message': 'Missing either search text or page to search!',
		}
		return render(request, 'blame/error.html', context)
	
	queryresult = wiki.call({'action': 'query', 'prop': 'revisions', 'rvprop': 'ids|user', 'rvdir': 'newer', 'rvlimit': '5000', 'format': 'jsonfm', 'titles': searchpage})
	#print(str(queryresult))

	if ('-1' in list(queryresult['query']['pages'].keys())):
		context = {
			'message': 'The page you requested was not found!  Please check your capitalization, namespace, and spelling!',
		}
		return render(request, 'blame/error.html', context)
	
	revisions = (list(queryresult['query']['pages'][list(queryresult['query']['pages'].keys())[0]]['revisions']))
	
	for revision in revisions:
		revisiondata = wiki.call({'action': 'query', 'prop': 'revisions', 'revids': revision['revid'], 'rvprop': 'content', 'format': 'jsonfm'})
		
		revisiontext = revisiondata['query']['pages'][list(queryresult['query']['pages'].keys())[0]]['revisions'][0]['*']
		if not foundtext and searchtext in revisiontext:
			# PUT IN URL VALUE HERE
			change_list.append({'changetype': 'Added', 'revision': revision['revid'], 'user': revision['user'], 'link': 'set_your_website_url?title=' + searchpage + '&oldid=' + str(revision['revid'])})
			foundtext = True
		elif foundtext and not searchtext in revisiontext:
			# PUT IN URL VALUE HERE
			change_list.append({'changetype': 'Removed', 'revision': revision['revid'], 'user': revision['user'], 'link': 'set_your_website_url?title=' + searchpage + '&oldid=' + str(revision['revid'])})
			foundtext = False

	context = {
		'change_list': change_list,
	}
	return render(request, 'blame/finger.html', context)
	
	
コード例 #6
0
class TropicalWikis:

    # Array to append sites to
    sites = []
    # This file is a list of all the database names used by wikimedia
    # we can use this to try and derive the names of various wikis
    source_list = 'http://www.tropicalwikis.com/wiki/Special:Farmer/list'
    # Blank reference to store mediawiki object in
    wikiapiary = {}
    # Edit token
    my_token = ""
    # Counter
    create_counter = 0

    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('TropicalBot', 'Username'),
                              config.get('TropicalBot', 'Password'))

        # We need an edit token on wiki2
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']

    def getList(self):
        soup = BeautifulSoup(requests.get(self.source_list).text)
        i = 1
        for item in soup.findAll("a", {"class": "external text"}):
            site = (item.contents[0], item["href"])
            print i, site
            self.sites.append(site)
            i += 1

    def createSite(self, name, url):
        siteTemplate = """{{Website
|Name=%s
|URL=%s
|Image=Default website image.png
|Farm=TropicalWikis
|Collection method=API
|API URL=%s
|Collect general data=Yes
|Collect extension data=Yes
|Collect skin data=Yes
|Collect statistics=Yes
|Collect semantic statistics=No
|Collect semantic usage=No
|Collect logs=No
|Collect recent changes=No
|Statistics URL=
|Collect statistics stats=Yes
|Check every=240
|Audited=No
|Validated=No
|Curated=No
|Active=No
|Demote=No
|Defunct=No
|Error=No
|Featured website vote=0
}}
"""
        api_url = "%s/w/api.php" % url

        # Make sure a page doesn't exist with this name already
        c = self.wikiapiary.call({'action': 'query', 'titles': name})
        try:
            if c['query']['pages']['-1']:
                print "No duplicate name detected."
        except:
            # Duplicate detected
            name = "%s (TropicalWikis)" % name

        my_template = siteTemplate % (name, url, api_url)
        print my_template

        c = self.wikiapiary.call({
            'action': 'edit',
            'title': name,
            'text': my_template,
            'token': self.my_token,
            'bot': 'true',
            'summary': 'Creating entry for %s' % name
        })
        print c
        self.create_counter += 1

    def checkSite(self, site):
        print "Checking %s" % site[1]

        # Construct Ask query for WikiApiary
        my_query = ''.join(["[[Has API URL::%s/w/api.php]]" % site[1]])
        # Execute the query against WikiApiary
        c = self.wikiapiary.call({'action': 'ask', 'query': my_query})

        # Return the count of results for the query
        return int(c['query']['meta']['count'])

    def main(self):
        # Get the list of tokens from the config file
        self.getList()

        for site in self.sites:
            print "\nProcessing %s" % site[0]

            # Use a guess of the API domain to see if we have it already
            siteCount = self.checkSite(site)

            if siteCount == 0:
                print "%s is not in WikiApiary, validating stats." % site[0]
                # Now add it to WikiApiary
                self.createSite(site[0], site[1])
                time.sleep(3)
            elif siteCount == 1:
                print "%s already exists, skipping." % site[0]
            elif siteCount > 1:
                print "ERROR: %s found %d websites, which should never happen." % (
                    site[0], siteCount)
コード例 #7
0
class smw_community:
    sites = []
    # Blank reference to store mediawiki object in
    wikiapiary = {}
    smwreferata = {}
    # Edit token
    my_token = ""
    # Counter
    create_counter = 0

    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        # Connect to SMW Community Wiki
        self.smwreferata = MediaWiki('http://smw.referata.com/w/api.php')

        # Connect to WikiApiary
        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('wikkiibot', 'Username'),
                              config.get('wikkiibot', 'Password'))

        # We need an edit token
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']

    def load_from_smwreferata(self):
        # Build query for sites
        my_query = ''.join([
            '[[Category:Sites]]', '[[Has status::Active]]', '|?Has URL',
            '|?Has data type'
            '|limit=1000'
        ])
        print "Query: %s" % my_query
        sites = self.smwreferata.call({'action': 'ask', 'query': my_query})

        # We could just return the raw JSON object from the API, however instead we are going to clean it up into an
        # easier to deal with array of dictionary objects.
        # To keep things sensible, we'll use the same name as the properties
        if len(sites['query']['results']) > 0:
            for pagename, site in sites['query']['results'].items():
                print "Adding %s." % pagename

                self.sites.append({
                    'Name':
                    pagename,
                    'URL':
                    site['printouts']['Has URL'][0],
                    'Tag':
                    ','.join(site['printouts']['Has data type'])
                })

    def add_api_to_sites(self):
        # Loop through the sites and find API urls
        for i in range(0, len(self.sites)):
            print "Investigating %s (%s)..." % (self.sites[i]['Name'],
                                                self.sites[i]['URL'])
            try:
                req = requests.get(self.sites[i]['URL'])
                if req.status_code == 200:
                    soup = BeautifulSoup(req.text)
                    api_url = soup.findAll('link',
                                           {'rel': 'EditURI'})[0]['href']
                    print "  Found %s" % api_url
                    new_api_url = urlparse.urlunparse(
                        urlparse.urlparse(api_url)[0:3] + ('', '', ''))
                    print "  Resolved %s" % new_api_url
                    self.sites[i]['API URL'] = new_api_url
                else:
                    print "  Returned %s" % req.status_code
            except Exception, e:
                print "Exception: %s" % e
コード例 #8
0
ファイル: import-urls.py プロジェクト: WikiApiary/wikibees
[[Category:WikiTeam Import]]"""

logo_page_text = """This image was automatically uploaded by [[User:Audit Bee|Audit Bee]] while importing.
[[Category:Import logos]] """

# timeout in seconds
timeout = 10
socket.setdefaulttimeout(timeout)

wiki = MediaWiki(
    'https://wikiapiary.com/w/api.php',
    cookie_file='cookie-jar',
    user_agent=
    'python-simplemediawiki/1.1.1 (WikiApiary; Bumble Bee; +http://wikiapiary.com/wiki/User:Bumble_Bee)'
)
wiki.login('Audit Bee', 'frYqj2AmPTqZDjn4TANE')

# We need an edit token
c = wiki.call({
    'action': 'query',
    'titles': 'Foo',
    'prop': 'info',
    'intoken': 'edit'
})
my_token = c['query']['pages']['-1']['edittoken']

i = 0
success = 0
fail = 0
logo_count = 0
コード例 #9
0
class wmbot:

    # Array to append sites to
    sites = []
    # This file is a list of all the database names used by wikimedia
    # we can use this to try and derive the names of various wikis
    source_list = 'http://noc.wikimedia.org/conf/all.dblist'
    # Blank reference to store mediawiki object in
    wikiapiary = {}
    # Edit token
    my_token = ""
    # Counter
    create_counter = 0
    # Regex pattern
    regex_pattern = r'^(\w+)(wiki|wikibooks|wikiquote|wiktionary|wikinews|wikisource|wikiversity|wikimedia|wikivoyage)$'

    # Site data
    siteData = {
        'wiki': {
            'domain': 'wikipedia.org',
            'name': 'Wikipedia (%s)',
            'farm': 'Wikipedia',
            'logo': 'Wikipedia-logo.png'
        },
        'wikibooks': {
            'domain': 'wikibooks.org',
            'name': 'Wikibooks (%s)',
            'farm': 'Wikibooks',
            'logo': 'Wikibooks Logo.png'
        },
        'wiktionary': {
            'domain': 'wiktionary.org',
            'name': 'Wiktionary (%s)',
            'farm': 'Wiktionary',
            'logo': '170px-Wiktportal.svg.png'
        },
        'wikiquote': {
            'domain': 'wikiquote.org',
            'name': 'Wikiquote (%s)',
            'farm': 'Wikiquote',
            'logo': 'Wikiquote Logo.png'
        },
        'wikinews': {
            'domain': 'wikinews.org',
            'name': 'Wikinews (%s)',
            'farm': 'Wikinews',
            'logo': '240px-Wikinews-logo.png'
        },
        'wikisource': {
            'domain': 'wikisource.org',
            'name': 'Wikisource (%s)',
            'farm': 'Wikisource',
            'logo': 'Wikisource Logo.png'
        },
        'wikiversity': {
            'domain': 'wikiversity.org',
            'name': 'Wikiversity (%s)',
            'farm': 'Wikiversity',
            'logo': 'Wikiversity Logo.png'
        },
        'wikivoyage': {
            'domain': 'wikivoyage.org',
            'name': 'Wikivoyage (%s)',
            'farm': 'Wikivoyage',
            'logo': 'WikivoyageOldLogoSmall.png'
        },
        'wikimedia': {
            'domain': 'wikimedia.org',
            'name': 'Wikimedia (%s)',
            'farm': 'Wikimedia',
            'logo': 'Wikimediafoundation-logo.png'
        }
    }

    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('wmbot', 'Username'),
                              config.get('wmbot', 'Password'))

        # We need an edit token on wiki2
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']

    def getList(self):
        self.sites = requests.get(self.source_list).text.split('\n')

    def validateApi(self, api_url):
        # Call http://st.wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=general&format=json
        my_url = api_url + '?action=query&meta=siteinfo&siprop=general&format=json'
        try:
            result = requests.get(my_url).json()
            if 'generator' in result['query']['general']:
                print "Detected %s" % result['query']['general']['generator']
                return True
            else:
                return False
        except:
            print "ERROR: Failed call to API check."
            return False

    def createSite(self, lang, token):
        siteTemplate = """{{Website
|Name=%s
|URL=%s
|API URL=%s
|Image=%s
|Farm=%s
|Collect general data=Yes
|Collect extension data=Yes
|Collect skin data=Yes
|Check every=240
|Collect statistics=Yes
|Audited=No
|Curated=No
|Active=Yes
}}
"""
        my_name = self.siteData[token]['name'] % lang
        my_template = siteTemplate % (
            my_name, "http://%s.%s/" %
            (lang, self.siteData[token]['domain']), "http://%s.%s/w/api.php" %
            (lang, self.siteData[token]['domain']),
            self.siteData[token]['logo'], self.siteData[token]['farm'])
        print my_template

        self.wikiapiary.call({
            'action': 'edit',
            'title': my_name,
            'text': my_template,
            'token': self.my_token,
            'bot': 'true'
        })
        self.create_counter += 1

    def checkSite(self, lang, site_domain):
        # Build the API URL using Wikimedia's known convention
        api_url = "http://%s.%s/w/api.php" % (lang, site_domain)
        print "Testing %s" % api_url

        # First see if this is a valid API URL before we query WikiApiary
        isValid = self.validateApi(api_url)

        if isValid:
            # Construct Ask query for WikiApiary
            my_query = ''.join(["[[Has API URL::%s]]" % api_url])
            # Execute the query against WikiApiary
            c = self.wikiapiary.call({'action': 'ask', 'query': my_query})

            # Return the count of results for the query
            return True, int(c['query']['meta']['count'])
        else:
            return False, 0

    def processSite(self, token):
        match = re.findall(self.regex_pattern, token)
        if len(match[0]) == 2:
            return match[0]
        else:
            return (False, False)

    def main(self):
        # Get the list of tokens from the config file
        self.getList()

        # Now loop through the tokens
        for token in self.sites:
            print "\nProcessing %s" % token
            # First turn a token into a lang and site
            (lang, site) = self.processSite(token)
            lang = lang.replace('_', '-')
            # If we successfully got lang and site proceed
            if lang is not False and site is not False:
                # Use a guess of the API domain to see if
                (valid,
                 siteCount) = self.checkSite(lang,
                                             self.siteData[site]['domain'])

                if valid:
                    if siteCount == 0:
                        print "%s appears to be untracked token." % token
                        # Now add it to WikiApiary
                        self.createSite(lang, site)
                    elif siteCount == 1:
                        print "%s already exists." % token
                    elif siteCount > 1:
                        print "%s found %d websites, which should never happen." % (
                            token, siteCount)
                else:
                    print "%s did not resolve to a valid API URL." % token
            else:
                print "%s could not process token." % token
コード例 #10
0
ファイル: wmbot.py プロジェクト: iamedwardshen/WikiApiary
class wmbot:

    # Array to append sites to
    sites = []
    # This file is a list of all the database names used by wikimedia
    # we can use this to try and derive the names of various wikis
    source_list = 'http://noc.wikimedia.org/conf/all.dblist'
    # Blank reference to store mediawiki object in
    wikiapiary = {}
    # Edit token
    my_token = ""
    # Counter
    create_counter = 0
    # Regex pattern
    regex_pattern = r'^(\w+)(wiki|wikibooks|wikiquote|wiktionary|wikinews|wikisource|wikiversity|wikimedia|wikivoyage)$'

    # Site data
    siteData = {
        'wiki': {
            'domain': 'wikipedia.org',
            'name': 'Wikipedia (%s)',
            'farm': 'Wikipedia',
            'logo': 'Wikipedia-logo.png'
        },
        'wikibooks': {
            'domain': 'wikibooks.org',
            'name': 'Wikibooks (%s)',
            'farm': 'Wikibooks',
            'logo': 'Wikibooks Logo.png'
        },
        'wiktionary': {
            'domain': 'wiktionary.org',
            'name': 'Wiktionary (%s)',
            'farm': 'Wiktionary',
            'logo': '170px-Wiktportal.svg.png'
        },
        'wikiquote': {
            'domain': 'wikiquote.org',
            'name': 'Wikiquote (%s)',
            'farm': 'Wikiquote',
            'logo': 'Wikiquote Logo.png'
        },
        'wikinews': {
            'domain': 'wikinews.org',
            'name': 'Wikinews (%s)',
            'farm': 'Wikinews',
            'logo': '240px-Wikinews-logo.png'
        },
        'wikisource': {
            'domain': 'wikisource.org',
            'name': 'Wikisource (%s)',
            'farm': 'Wikisource',
            'logo': 'Wikisource Logo.png'
        },
        'wikiversity': {
            'domain': 'wikiversity.org',
            'name': 'Wikiversity (%s)',
            'farm': 'Wikiversity',
            'logo': 'Wikiversity Logo.png'
        },
        'wikivoyage': {
            'domain': 'wikivoyage.org',
            'name': 'Wikivoyage (%s)',
            'farm': 'Wikivoyage',
            'logo': 'WikivoyageOldLogoSmall.png'
        },
        'wikimedia': {
            'domain': 'wikimedia.org',
            'name': 'Wikimedia (%s)',
            'farm': 'Wikimedia',
            'logo': 'Wikimediafoundation-logo.png'
        }
    }

    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('wmbot', 'Username'), config.get('wmbot', 'Password'))

        # We need an edit token on wiki2
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']

    def getList(self):
        self.sites = requests.get(self.source_list).text.split('\n')

    def validateApi(self, api_url):
        # Call http://st.wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=general&format=json
        my_url = api_url + '?action=query&meta=siteinfo&siprop=general&format=json'
        try:
            result = requests.get(my_url).json()
            if 'generator' in result['query']['general']:
                print "Detected %s" % result['query']['general']['generator']
                return True
            else:
                return False
        except:
            print "ERROR: Failed call to API check."
            return False

    def createSite(self, lang, token):
        siteTemplate = """{{Website
|Name=%s
|URL=%s
|API URL=%s
|Image=%s
|Farm=%s
|Collect general data=Yes
|Collect extension data=Yes
|Collect skin data=Yes
|Check every=240
|Collect statistics=Yes
|Audited=No
|Curated=No
|Active=Yes
}}
"""
        my_name = self.siteData[token]['name'] % lang
        my_template = siteTemplate % (
            my_name,
            "http://%s.%s/" % (lang, self.siteData[token]['domain']),
            "http://%s.%s/w/api.php" % (lang, self.siteData[token]['domain']),
            self.siteData[token]['logo'],
            self.siteData[token]['farm'])
        print my_template

        self.wikiapiary.call({
            'action': 'edit',
            'title': my_name,
            'text': my_template,
            'token': self.my_token,
            'bot': 'true'
        })
        self.create_counter += 1

    def checkSite(self, lang, site_domain):
        # Build the API URL using Wikimedia's known convention
        api_url = "http://%s.%s/w/api.php" % (lang, site_domain)
        print "Testing %s" % api_url

        # First see if this is a valid API URL before we query WikiApiary
        isValid = self.validateApi(api_url)

        if isValid:
            # Construct Ask query for WikiApiary
            my_query = ''.join([
                "[[Has API URL::%s]]" % api_url
            ])
            # Execute the query against WikiApiary
            c = self.wikiapiary.call({
                'action': 'ask',
                'query': my_query
            })

            # Return the count of results for the query
            return True, int(c['query']['meta']['count'])
        else:
            return False, 0

    def processSite(self, token):
        match = re.findall(self.regex_pattern, token)
        if len(match[0]) == 2:
            return match[0]
        else:
            return (False, False)

    def main(self):
        # Get the list of tokens from the config file
        self.getList()

        # Now loop through the tokens
        for token in self.sites:
            print "\nProcessing %s" % token
            # First turn a token into a lang and site
            (lang, site) = self.processSite(token)
            lang = lang.replace('_', '-')
            # If we successfully got lang and site proceed
            if lang is not False and site is not False:
                # Use a guess of the API domain to see if
                (valid, siteCount) = self.checkSite(lang, self.siteData[site]['domain'])

                if valid:
                    if siteCount == 0:
                        print "%s appears to be untracked token." % token
                        # Now add it to WikiApiary
                        self.createSite(lang, site)
                    elif siteCount == 1:
                        print "%s already exists." % token
                    elif siteCount > 1:
                        print "%s found %d websites, which should never happen." % (token, siteCount)
                else:
                    print "%s did not resolve to a valid API URL." % token
            else:
                print "%s could not process token." % token
コード例 #11
0
ファイル: wiki.py プロジェクト: markododa/dezhurni
#!/usr/bin/python
from simplemediawiki import MediaWiki
from tabela import tabela
from people import people
import sys
text = '==== Листа на дежурства ====\n\nОва е автоматски генерерирана листа на дежурни со две ротации, доколку не сте во можност да бидете дежурни некоја недела или ден запишете во забелешка и пишете на мејлинг листа. Доколку сте дежурен во вашиот google calendar е вметнат нов календар насловен „Хаклаб: Дежурства“ со настан за деновите кога сте дежурни. Поставете ги известувањата за да бидете навреме известени.\n\n'
text+=tabela(people)
wiki = MediaWiki('https://wiki.spodeli.org/api.php')
user, password = open('credentials', 'r').read().split()
wiki.login(user,password)
token = wiki.call({'action': 'query', 'meta': 'tokens'})['query']['tokens']['csrftoken']
wiki.call({'action': 'edit', 'title': 'Хаклаб/Дежурства', 'section':'5', 'text':text, 'token':token})
コード例 #12
0
class wikkii:

    # Array to append sites to
    sites = []
    # This file is a list of all the database names used by wikimedia
    # we can use this to try and derive the names of various wikis
    source_list = 'http://wikkii.com/wiki/Special:Farmer/list'
    # Blank reference to store mediawiki object in
    wikiapiary = {}
    # Edit token
    my_token = ""
    # Counter
    create_counter = 0

    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('wikkiibot', 'Username'), config.get('wikkiibot', 'Password'))

        # We need an edit token on wiki2
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']

    def getList(self):
        soup = BeautifulSoup(requests.get(self.source_list).text)
        i = 1
        for item in soup.findAll("a", {"class": "extiw"}):
            site = (item.contents[0], item["href"], item["title"])
            print i, site
            self.sites.append(site)
            i += 1

    def validateStats(self, url):
        my_url = "%s/wiki/Special:Statistics?action=raw" % url
        try:
            result = requests.get(my_url, timeout=10).text
            values = result.split(';')
            if len(values) == 9:
                print "Got %d values from stats" % len(values)
                return True
            else:
                return False
        except:
            print "ERROR: Failed call to Statistics URL."
            return False

    def createSite(self, name, url):
        siteTemplate = """{{Website
|Name=%s
|URL=%s
|Image=Default website image.png
|Farm=Wikkii
|Collection method=API, Special:Statistics
|API URL=%s
|Collect general data=No
|Collect extension data=No
|Collect skin data=No
|Collect statistics=No
|Collect semantic statistics=No
|Collect semantic usage=No
|Collect logs=No
|Collect recent changes=No
|Statistics URL=%s
|Collect statistics stats=Yes
|Check every=240
|Audited=No
|Validated=No
|Curated=No
|Active=Yes
|Demote=No
|Defunct=No
|Error=No
|Featured website vote=0
}}
"""
        api_url = "%sw/api.php" % url
        statistics_url = "%swiki/Special:Statistics" % url

        # Make sure a page doesn't exist with this name already
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': name
        })
        try:
            if c['query']['pages']['-1']:
                print "No duplicate name detected."
        except:
            # Duplicate detected
            name = "%s (Wikkii)" % name

        my_template = siteTemplate % (name, url, api_url, statistics_url)
        print my_template

        c = self.wikiapiary.call({
            'action': 'edit',
            'title': name,
            'text': my_template,
            'token': self.my_token,
            'bot': 'true',
            'summary': 'Creating entry for %s' % name
        })
        print c
        self.create_counter += 1

    def checkSite(self, site):
        print "Checking %s" % site[1]

        # Construct Ask query for WikiApiary
        my_query = ''.join([
            "[[Has statistics URL::%swiki/Special:Statistics]]" % site[1]
        ])
        # Execute the query against WikiApiary
        c = self.wikiapiary.call({
            'action': 'ask',
            'query': my_query
        })

        # Return the count of results for the query
        return int(c['query']['meta']['count'])

    def main(self):
        # Get the list of tokens from the config file
        self.getList()

        for site in self.sites:
            # Limit the number of sites we make per run
            if self.create_counter > 1000:
                break

            print "\nProcessing %s" % site[0]

            # Use a guess of the API domain to see if we have it already
            siteCount = self.checkSite(site)

            if siteCount == 0:
                print "%s is not in WikiApiary, validating stats." % site[0]
                if self.validateStats(site[1]):
                    # Now add it to WikiApiary
                    self.createSite(site[0], site[1])
                    time.sleep(3)
                else:
                    print "%s did not resolve to a valid API URL." % site[0]
            elif siteCount == 1:
                print "%s already exists, skipping." % site[0]
            elif siteCount > 1:
                print "ERROR: %s found %d websites, which should never happen." % (site[0], siteCount)
コード例 #13
0
import pprint

# Get wiki location
location = raw_input("Base URL to the wiki API (YOUR_WIKI_ROOT/api.php): ")
if (location[0:7].lower() != "http://"):
    location = "http://" + location
wiki = MediaWiki(location)
if wiki.normalize_api_url() is None:
    sys.exit("Invalid Wiki URL")

# Get login credetials
ua = build_user_agent("uturn", "0.1", "https://github.com/tomasreimers/wiki-uturn");
while True:
    username = raw_input("Username: "******"Password: "******"Invalid login"

# Get date to revert to
print "When would you like to revert to (IN UTC)?"
year = int(raw_input("Year: "))
month = int(raw_input("Month: "))
day = int(raw_input("Day: "))
hour = int(raw_input("Hour: "))
minute = int(raw_input("Minute: "))
second = int(raw_input("Second: "))
revertTime = datetime_to_timestamp(datetime.datetime(year, month, day, hour, minute, second))
if (revertTime > time.time()):
    sys.exit("This tool cannot go forward in time")
コード例 #14
0
class ApiaryBot:

    args = []
    config = []
    apiary_wiki = []
    apiary_db = []
    stats = {}
    edit_token = ''

    def __init__(self):
        # Get command line options
        self.get_args()
        # Get configuration settings
        self.get_config(self.args.config)
        # Connect to the database
        self.connectdb()
        # Initialize stats
        self.stats['statistics'] = 0
        self.stats['smwinfo'] = 0
        self.stats['smwusage'] = 0
        self.stats['general'] = 0
        self.stats['extensions'] = 0
        self.stats['skins'] = 0
        self.stats['skippedstatistics'] = 0
        self.stats['skippedgeneral'] = 0
        self.stats['whois'] = 0
        self.stats['maxmind'] = 0
        self.stats['interwikimap'] = 0
        self.stats['libraries'] = 0
        self.stats['namespaces'] = 0

    def get_config(self, config_file='../apiary.cfg'):
        try:
            self.config = ConfigParser.ConfigParser()
            self.config.read(config_file)
        except IOError:
            print("Cannot open %s." % config_file)

    def get_args(self):
        parser = argparse.ArgumentParser(
            prog="Bumble Bee",
            description=
            "retrieves usage and statistic information for WikiApiary")
        parser.add_argument("-s",
                            "--segment",
                            help="only work on websites in defined segment")
        parser.add_argument("--site",
                            help="only work on this specific site id")
        parser.add_argument(
            "-f",
            "--force",
            action="store_true",
            help="run regardless of when the last time data was updated")
        parser.add_argument(
            "-d",
            "--debug",
            action="store_true",
            help="do not write any changes to wiki or database")
        parser.add_argument("--config",
                            default="../apiary.cfg",
                            help="use an alternative config file")
        parser.add_argument("-v",
                            "--verbose",
                            action="count",
                            default=0,
                            help="increase output verbosity")
        parser.add_argument("--version",
                            action="version",
                            version="%(prog)s 0.1")

        # All set, now get the arguments
        self.args = parser.parse_args()

    def filter_illegal_chars(self, pre_filter):
        # Utility function to make sure that strings are okay for page titles
        return re.sub(r'[#<>\[\]\|{}]', '', pre_filter).replace('=', '-')

    def sqlutcnow(self):
        now = datetime.datetime.utcnow()
        now = now.replace(tzinfo=pytz.utc)
        now = now.replace(microsecond=0)
        return now.strftime('%Y-%m-%d %H:%M:%S')

    def make_request(self, site, data_url, bot='Bumble Bee'):
        req = urllib2.Request(data_url)
        req.add_header('User-Agent', self.config.get(bot, 'User-Agent'))
        req.add_header('Accept-Encoding', 'gzip')
        opener = urllib2.build_opener()

        try:
            t1 = datetime.datetime.now()
            f = opener.open(req)
            duration = (datetime.datetime.now() - t1).total_seconds()
        except ssl.SSLError as e:
            msg = "SSL Error: " + str(e)
            self.record_error(site=site,
                              log_message=msg,
                              log_type='info',
                              log_severity='normal',
                              log_bot=bot,
                              log_url=data_url)
            return None, None
        except urllib2.URLError as e:
            self.record_error(site=site,
                              log_message="URLError: %s" % e,
                              log_type='error',
                              log_severity='normal',
                              log_bot=bot,
                              log_url=data_url)
            return None, None
        except urllib2.HTTPError as e:
            if e.code > 399 and e.code < 500:
                raise FourHundred(e)
            if e.code > 499 and e.code < 600:
                raise FiveHundred(e)
            self.record_error(site=site,
                              log_message="%s" % e,
                              log_type='error',
                              log_severity='normal',
                              log_bot=bot,
                              log_url=data_url)
            return None, None
        except Exception as e:
            self.record_error(site=site,
                              log_message=str(e),
                              log_type='info',
                              log_severity='normal',
                              log_bot=bot,
                              log_url=data_url)
            return None, None
        else:
            return f, duration

    def pull_json(self, site, data_url, bot='Bumble Bee'):
        socket.setdefaulttimeout(10)

        (f, duration) = self.make_request(site, data_url, bot)
        if f is None:
            return False, None, None
        else:
            # Clean the returned string before we parse it,
            # sometimes there are junky error messages from PHP in
            # here, or simply a newline that shouldn't be present
            # The regex here is really simple, but it seems to
            # work fine.
            if f.info().get('Content-Encoding') == 'gzip':
                buf = StringIO(f.read())
                gz = gzip.GzipFile(fileobj=buf)
                ret_string = gz.read()
            else:
                ret_string = f.read()
            json_match = re.search(r"({.*})", ret_string, flags=re.MULTILINE)
            if json_match is None or json_match.group(1) is None:
                raise NoJSON(data_url + "||" + ret_string)

            # Found JSON block
            try:
                data = simplejson.loads(json_match.group(1))
            except ValueError as e:
                raise NoJSON(data_url + "||" + ret_string)

            return True, data, duration

    def runSql(self, sql_command, args=None):
        if self.args.verbose >= 3:
            print("SQL: %s" % sql_command)
        try:
            cur = self.apiary_db.cursor()
            cur.execute('SET NAMES utf8mb4')
            cur.execute("SET CHARACTER SET utf8mb4")
            cur.execute("SET character_set_connection=utf8mb4")
            cur.execute(sql_command, args)
            cur.close()
            self.apiary_db.commit()
            return True, cur.rowcount
        except Exception as e:
            print("Exception generated while running SQL command.")
            print("Command: %s" % sql_command)
            print("Exception: %s" % e)
            return False, 0

    def record_error(self,
                     site=None,
                     log_message='Unknown Error',
                     log_type='info',
                     log_severity='normal',
                     log_bot=None,
                     log_url=None):

        if self.args.verbose >= 2:
            print("New log message for %s" % site['pagename'])

        if self.args.verbose >= 1:
            print(log_message)

        if site is None:
            site = {}
            site = {'Has ID': 0}

        if 'Has name' in site:
            site['pagename'] = site['Has name']
        elif 'pagename' not in site:
            site['pagename'] = 'Error'

        if log_bot is None:
            log_bot = "null"
        else:
            log_bot = "'%s'" % log_bot

        if log_url is None:
            log_url = "null"
        else:
            log_url = "'%s'" % log_url

        temp_sql = "INSERT  apiary_website_logs "
        temp_sql += "(website_id, log_date, website_name, log_type, "
        temp_sql += "log_severity, log_message, log_bot, log_url) "

        if len(log_message) > 65535:
            print("log_message too long: %s" % log_message)
            log_message = log_message[0:65535]
        # The format string is not really a normal Python format
        # string.  You must always use %s http://stackoverflow.com/a/5785163
        temp_sql += "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
        args = (site['Has ID'], self.sqlutcnow(), site['pagename'], log_type,
                log_severity, log_message, log_bot, log_url)

        self.runSql(temp_sql, args)

    def clear_error(self, sitename):
        # This function clears the error status of a meeting
        socket.setdefaulttimeout(30)

        if self.args.verbose >= 2:
            print("Clearing error for %s" % sitename)

        c = self.apiary_wiki.call({
            'action': 'sfautoedit',
            'form': 'Website',
            'target': sitename,
            'Website[Error]': 'No',
            'wpSummary': 'clearing error'
        })
        if self.args.verbose >= 3:
            print("result:%s" % c)

    def connectdb(self):
        # Setup our database connection
        # Use the account that can also insert and delete from the database
        self.apiary_db = mdb.connect(
            host=self.config.get('ApiaryDB', 'hostname'),
            db=self.config.get('ApiaryDB', 'database'),
            user=self.config.get('ApiaryDB RW', 'username'),
            passwd=self.config.get('ApiaryDB RW', 'password'),
            charset='utf8')

    def connectwiki(self, bot_name):
        self.apiary_wiki = MediaWiki(self.config.get('WikiApiary', 'API'))
        c = self.apiary_wiki.login(self.config.get(bot_name, 'Username'),
                                   self.config.get(bot_name, 'Password'))
        if self.args.verbose >= 1:
            print("Username: %s Password: %s" % (self.config.get(
                bot_name, 'Username'), self.config.get(bot_name, 'Password')))
            print(c)

    def get_websites(self, segment, site):
        filter_string = ""
        if site is not None:
            if self.args.verbose >= 1:
                print("Processing site %d." % int(site))
            filter_string = "[[Has ID::%d]]" % int(site)
        elif segment is not None:
            if self.args.verbose >= 1:
                print("Only retrieving segment %d." % int(self.args.segment))
            filter_string = "[[Has bot segment::%d]]" % int(self.args.segment)
            #filter_string = "test"

        # Build query for sites
        my_query = ''.join([
            '[[Category:Website]]', '[[Is defunct::False]]',
            '[[Is active::True]]', filter_string, '|?Has API URL',
            '|?Has statistics URL', '|?Check every', '|?Creation date',
            '|?Has ID', '|?Collect general data', '|?Collect extension data',
            '|?Collect skin data', '|?Collect statistics',
            '|?Collect semantic statistics', '|?Collect semantic usage',
            '|?Collect logs', '|?Collect recent changes',
            '|?Collect statistics stats', '|sort=Creation date', '|order=asc',
            '|limit=2000'
        ])
        if self.args.verbose >= 3:
            print("Query: %s" % my_query)
        try:
            sites = self.apiary_wiki.call({'action': 'ask', 'query': my_query})
        except Exception as e:
            self.record_error(log_message="Problem querying Wikiapiary: %s" %
                              e,
                              log_type='error',
                              log_severity='important')
        else:
            # We could just return the raw JSON object from the API, however instead we are going to clean it up into an
            # easier to deal with array of dictionary objects.
            # To keep things sensible, we'll use the same name as the properties
            i = 0
            if len(sites['query']['results']) > 0:
                my_sites = []
                for pagename, site in sites['query']['results'].items():
                    i += 1
                    if self.args.verbose >= 3:
                        print("[%d] Adding %s." % (i, pagename))
                    # Initialize the flags but do it carefully in case there is no value in the wiki yet
                    collect_general_data = list_get(
                        site['printouts'], 'Collect general data') == "t"
                    collect_extension_data = list_get(
                        site['printouts'], 'Collect extension data') == "t"
                    collect_skin_data = list_get(site['printouts'],
                                                 'Collect skin data') == "t"
                    collect_statistics = list_get(site['printouts'],
                                                  'Collect statistics') == "t"
                    collect_semantic_statistics = list_get(
                        site['printouts'],
                        'Collect semantic statistics') == "t"
                    collect_semantic_usage = list_get(
                        site['printouts'], 'Collect semantic usage') == "t"
                    collect_statistics_stats = list_get(
                        site['printouts'], 'Collect statistics stats') == "t"
                    collect_logs = list_get(site['printouts'],
                                            'Collect logs') == "t"
                    collect_recent_changes = list_get(
                        site['printouts'], 'Collect recent changes') == "t"
                    has_statistics_url = list_get(site['printouts'],
                                                  'Has statistics URL', '')
                    has_api_url = list_get(site['printouts'], 'Has API URL',
                                           '')

                    if has_statistics_url.find('wikkii.com') > 0:
                        # Temporary filter out all Farm:Wikkii sites
                        if self.args.verbose >= 2:
                            print("Skipping %s (%s)" %
                                  (pagename, site['fullurl']))
                    else:
                        try:
                            my_sites.append({
                                'pagename':
                                pagename,
                                'fullurl':
                                site['fullurl'],
                                'Has API URL':
                                has_api_url,
                                'Has statistics URL':
                                has_statistics_url,
                                'Check every':
                                int(site['printouts']['Check every'][0]),
                                'Creation date':
                                site['printouts']['Creation date'][0],
                                'Has ID':
                                int(site['printouts']['Has ID'][0]),
                                'Collect general data':
                                collect_general_data,
                                'Collect extension data':
                                collect_extension_data,
                                'Collect skin data':
                                collect_skin_data,
                                'Collect statistics':
                                collect_statistics,
                                'Collect semantic statistics':
                                collect_semantic_statistics,
                                'Collect semantic usage':
                                collect_semantic_usage,
                                'Collect statistics stats':
                                collect_statistics_stats,
                                'Collect logs':
                                collect_logs,
                                'Collect recent changes':
                                collect_recent_changes
                            })
                        except Exception as e:
                            print("Failed to add %s" % pagename)
                            print(e)
                            self.record_error(site=site,
                                              log_message="Failed to add page",
                                              log_type='warn',
                                              log_severity='important',
                                              log_bot='apiary.py',
                                              log_url=data_url)
                return my_sites
            else:
                raise Exception("No sites were returned to work on.")

    def get_status(self, site):
        """
        get_status will query the website_status table in ApiaryDB. It makes the decision if new
        data should be retrieved for a given website. Two booleans are returned, the first to
        tell if new statistics information should be requested and the second to pull general information.
        """
        # Get the timestamps for the last statistics and general pulls
        cur = self.apiary_db.cursor()
        temp_sql = "SELECT last_statistics, last_general, check_every_limit FROM website_status WHERE website_id = %d" % site[
            'Has ID']
        cur.execute(temp_sql)
        rows_returned = cur.rowcount

        if rows_returned == 1:
            # Let's see if it's time to pull information again
            data = cur.fetchone()
            cur.close()

            (last_statistics, last_general, check_every_limit) = data[0:3]
            if self.args.verbose >= 3:
                print("last_stats: %s" % last_statistics)
                print("last_general: %s" % last_general)
                print("check_every_limit: %s" % check_every_limit)

            #TODO: make this check the times!
            last_statistics_struct = time.strptime(str(last_statistics),
                                                   '%Y-%m-%d %H:%M:%S')
            last_general_struct = time.strptime(str(last_general),
                                                '%Y-%m-%d %H:%M:%S')

            stats_delta = (time.mktime(time.gmtime()) -
                           time.mktime(last_statistics_struct)) / 60
            general_delta = (time.mktime(time.gmtime()) -
                             time.mktime(last_general_struct)) / 60

            if self.args.verbose >= 2:
                print("Delta from checks: stats %s general %s" %
                      (stats_delta, general_delta))

            (check_stats, check_general) = (False, False)
            if stats_delta > (
                    site['Check every'] + random.randint(0, 15)
            ) and stats_delta > check_every_limit:  # Add randomness to keep checks spread around
                check_stats = True
            else:
                if self.args.verbose >= 2:
                    print("Skipping stats...")
                self.stats['skippedstatistics'] += 1

            if general_delta > (
                (24 + random.randint(0, 24)) * 60
            ):  # General checks are always bound to 24 hours, plus a random offset to keep checks evenly distributed
                check_general = True
            else:
                if self.args.verbose >= 2:
                    print("Skipping general...")
                self.stats['skippedgeneral'] += 1

            return (check_stats, check_general)

        elif rows_returned == 0:
            cur.close()
            # This website doesn't have a status, so we should check everything
            if self.args.verbose >= 3:
                print("website has never been checked before")
            return (True, True)

        else:
            raise Exception("Status check returned multiple rows.")

    def update_status(self, site, checktype):
        # Update the website_status table
        my_now = self.sqlutcnow()

        if checktype == "statistics":
            temp_sql = "UPDATE website_status SET last_statistics = '%s' WHERE website_id = %d" % (
                my_now, site['Has ID'])

        if checktype == "general":
            temp_sql = "UPDATE website_status SET last_general = '%s' WHERE website_id = %d" % (
                my_now, site['Has ID'])

        (success, rows_affected) = self.runSql(temp_sql)

        if rows_affected == 0:
            # No rows were updated, this website likely didn't exist before, so we need to insert the first time
            if self.args.verbose >= 2:
                print(
                    "No website_status record exists for ID %d, creating one" %
                    site['Has ID'])
            temp_sql = "INSERT website_status (website_id, last_statistics, last_general, check_every_limit) "
            temp_sql += "VALUES (%d, \"%s\", \"%s\", %d) " % (
                site['Has ID'], my_now, my_now, 240)
            temp_sql += "ON DUPLICATE KEY UPDATE last_statistics=\"%s\", last_general=\"%s\", check_every_limit=%d" % (
                my_now, my_now, 240)
            self.runSql(temp_sql)

    def botlog(self, bot, message, type='info', duration=0):
        if self.args.verbose >= 1:
            print(message)

        temp_sql = "INSERT  apiary_bot_log (log_date, log_type, bot, duration, message) "
        temp_sql += "VALUES (\"%s\", \"%s\", \"%s\", %f, \"%s\")" % (
            self.sqlutcnow(), type, bot, duration, message)

        self.runSql(temp_sql)