Ejemplo n.º 1
0
    def UpdateWiki(self):
        """
        Write the contents of the teams dictionary back into the wiki
        """
        wiki = MediaWiki(self.config.get('PlanetKubb', 'API'))
        wiki.login(self.config.get('KubbBot', 'Username'), self.config.get('KubbBot', 'Password'))

        # We need an edit token
        c = wiki.call({'action': 'query', 'titles': 'Foo', 'prop': 'info', 'intoken': 'edit'})
        print c
        my_token = c['query']['pages']['-1']['edittoken']
        print "Edit token: %s" % my_token

        print "== Updating wiki with new scores =="
        for team in self.teams:
            print "\"%s\",%f,%f" % (team, self.teams[team].mu, self.teams[team].sigma)

            c = wiki.call({
                'action': 'sfautoedit',
                'form': 'Team',
                'target': team,
                'Team[TrueSkill mu]': "%s" % self.teams[team].mu,
                'Team[TrueSkill sigma]': "%s" % self.teams[team].sigma,
                'token': my_token})
            print c
Ejemplo n.º 2
0
def open_connection(bot_name, env_name, api_url):
    """Open a connection to MediaWiki for a bot."""

    LOGGER.info("Opening MediaWiki connection for %s at %s", bot_name, api_url)
    apiary_wiki = MediaWiki(api_url)
    edit_token = None

    try:
        # Passwords may be defined in the environment or in the config file
        # We prefer the environment variable if it is present
        password = os.environ.get(env_name, None)
        if password is None:
            try:
                config.get('Passwords', bot_name)
            except Exception as e:
                LOGGER.warn('No configuration file detected.')

        if password is not None:
            LOGGER.info("Logging in as %s using %s", bot_name, password)
            apiary_wiki.login(bot_name, password)

            LOGGER.info("Getting edit token for %s", bot_name)
            wiki_return = apiary_wiki.call({
                'action': 'tokens',
                'type': 'edit'
            })
            edit_token = wiki_return['tokens']['edittoken']
            LOGGER.info("%s has been given edit token %s", bot_name, edit_token)
        else:
            LOGGER.warn("No password was provided for %s. Queries allowed but editing will not work.", bot_name)

    except Exception as e:
        raise Exception("Unable to login as %s got '%s'", bot_name, e)

    return (apiary_wiki, edit_token)
Ejemplo n.º 3
0
def open_connection(bot_name, env_name):
    """Open a connection to MediaWiki for a bot."""

    LOGGER.info("Opening MediaWiki connection for %s at %s", bot_name, API_URL)
    apiary_wiki = MediaWiki(API_URL)

    try:
        # Passwords may be defined in the environment or in the config file
        # We prefer the environment variable if it is present
        password = os.environ.get(env_name, None)
        if password is None:
            try:
                config.get('Passwords', bot_name)
            except Exception, e:
                LOGGER.warn('No configuration file detected.')
        LOGGER.info("Logging in as %s using %s", bot_name, password)
        apiary_wiki.login(bot_name, password)

        LOGGER.info("Getting edit token for %s", bot_name)
        wiki_return = apiary_wiki.call({
            'action': 'tokens',
            'type': 'edit'
        })
        edit_token = wiki_return['tokens']['edittoken']
        LOGGER.info("%s has been given edit token %s", bot_name, edit_token)
def get_lab_text(lab_slug, language):
    """Gets text description in English or Italian from a single lab from makeinitaly.foundation."""
    if language == "English" or language == "english" or language == "EN" or language == "En":
        language = "en"
    elif language == "Italian" or language == "italian" or language == "IT" or language == "It" or language == "it":
        language = "it"
    else:
        language = "en"
    wiki = MediaWiki(makeinitaly__foundation_api_url)
    wiki_response = wiki.call(
        {'action': 'query',
         'titles': lab_slug + "/" + language,
         'prop': 'revisions',
         'rvprop': 'content'})

    # If we don't know the pageid...
    for i in wiki_response["query"]["pages"]:
        if "revisions" in wiki_response["query"]["pages"][i]:
            content = wiki_response["query"]["pages"][i]["revisions"][0]["*"]
        else:
            content = ""

    # Clean the resulting string/list
    newstr01 = content.replace("}}", "")
    newstr02 = newstr01.replace("{{", "")
    result = newstr02.rstrip("\n|").split("\n|")

    return result[0]
Ejemplo n.º 5
0
def setup(config):
    try:
        wikiConn = MediaWiki("http://%s:8888/mediawiki/api.php" % mwserver, user_agent="IA-mwbridge")
        wikiConn.login("david", "bad pass")
        token = wikiConn.call({"action": "query", "meta": "tokens"})["query"]["tokens"]["csrftoken"]
    except Exception as e:
        print "Trouble connecting to mediawiki" + e
Ejemplo n.º 6
0
def handle(text, mic, profile):
    baseurl= "http://www.wikihow.com/"
    wiki = MediaWiki('http://www.wikihow.com/api.php')
    #wiki.login("*****@*****.**", "david1234")
    params = {'action':'query','list':'search','srsearch':text,'srprop':'redirecttitle','limit':'1', 'format':'json'}

    response = wiki.call(params)
    #r = json.dumps(response, sort_keys=True, indent=4, separators=(',', ': '))

    flag = 0
    flag_title = "none"
    pos= response['query']['search']
    query = getRequest(text)
    wiki.logout()
#Getting the article with the best score
    for key in pos:
        val = fuzz.ratio(key['title'],query)
        print(str(val) + "% " + key['title'])
        if val > flag:
            flag = val
            flag_title = key['title']
    if flag !=0:
        answer = flag_title
        mic.say(answer)

        #rWH = renderWH.renderWikihow()
        #url = baseurl + answer
        #print url
        #url_ = rWH.getContent(str(url))
        #rWH.renderContent(url_)
        webbrowser.open(baseurl + flag_title)
    else:
        mic.say("I could not find anything bro!")
Ejemplo n.º 7
0
 def connectwiki(self, bot_name):
     self.apiary_wiki = MediaWiki(self.config.get('WikiApiary', 'API'))
     c = self.apiary_wiki.login(self.config.get(bot_name, 'Username'),
                                self.config.get(bot_name, 'Password'))
     if self.args.verbose >= 1:
         print("Username: %s Password: %s" % (self.config.get(
             bot_name, 'Username'), self.config.get(bot_name, 'Password')))
         print(c)
Ejemplo n.º 8
0
def fetch_article_in_wikitext(articleTitle): #fetches the article data using the simplemediawiki lib.
	import codecs
	wiki = MediaWiki('http://en.wikipedia.org/w/api.php')
	wikiTextPage = wiki.call({'action':'parse', 'page': articleTitle, 'prop': 'wikitext'});
	wikiTextPage = wikiTextPage['parse']['wikitext']['*']
	codecs.open("FetchFunctionPulls/fetchWikiTextPage",'w', encoding='utf-8').write(wikiTextPage)
	htmlArticle = wiki.call({ 'action':'parse','page':articleTitle, 'prop': 'text'});
	htmlArticle = htmlArticle['parse']['text']['*']
	codecs.open("FetchFunctionPulls/fetchHTMLPage",'w', encoding='utf-8').write(htmlArticle)
	#print type(htmlArticle)
	return wikiTextPage, htmlArticle
Ejemplo n.º 9
0
    def __init__(self, url, username, password):
        self.wiki = MediaWiki(url)
        self.username = username
        self.password = password

        self.login = self._make_wiki_login_call({'action': 'login'})
        self.token = self._make_wiki_login_call({
            'action':
            'login',
            'lgtoken':
            self.login['login']['token']
        })
Ejemplo n.º 10
0
 def fetch(cls, url, use_cache=True):
     m = re.match(r'^http://([a-z\-]+)\.wikipedia\.org', url)
     page_lang = m.group(1).encode('utf8')
     page_title = extract_page_title(url, page_lang)
     wp = MediaWiki('http://%s.wikipedia.org/w/api.php' % page_lang)
     resp = wp.call({'action': 'query', 'prop': 'pageprops|revisions', 'titles': page_title.encode('utf8'), 'rvprop': 'content'})
     page = resp['query']['pages'].values()[0]
     content = page['revisions'][0].values()[0] if 'revisions' in page else None
     if 'pageprops' in page and 'wikibase_item' in page['pageprops']:
         wikidata_id = page['pageprops']['wikibase_item']
     else:
         wikidata_id = None
     return cls(page_title, content or '', page_lang, wikidata_id)
Ejemplo n.º 11
0
def parseWiki(con):
    useragent = build_user_agent('l2wiki', 0.1,
                                 'https://github.com/tm-calculate/l2wiki')
    wiki = MediaWiki('http://l2central.info/c/api.php', user_agent=useragent)
    cmcontinue = parseWikiPart(con, wiki)
    while cmcontinue:
        cmcontinue = parseWikiPart(con, wiki, cmcontinue)
    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('TropicalBot', 'Username'),
                              config.get('TropicalBot', 'Password'))

        # We need an edit token on wiki2
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']
Ejemplo n.º 13
0
Archivo: wiki.py Proyecto: rcbau/hacks
    def __init__(self, url, username, password):
        self.wiki = MediaWiki(url)
        self.username = username
        self.password = password

        self.login = self._make_wiki_login_call({'action': 'login'})
        self.token = self._make_wiki_login_call(
            {'action': 'login', 'lgtoken': self.login['login']['token']})
Ejemplo n.º 14
0
 def fetch(cls, url, use_cache=True):
     m = re.match(r'^http://([a-z]{2})\.wikipedia\.org', url)
     page_lang = m.group(1).encode('utf8')
     page_title = extract_page_title(url, page_lang)
     wp = MediaWiki('http://%s.wikipedia.org/w/api.php' % page_lang)
     return cls(
         page_title,
         get_page_content(wp, page_title, page_lang, use_cache) or '',
         page_lang)
Ejemplo n.º 15
0
 def fetch(cls, url, use_cache=True):
     m = re.match(r'^https?://([a-z\-]+)\.wikipedia\.org/wiki/(.*)$', url)
     page_lang = m.group(1).encode('utf8')
     page_title = urllib.unquote(m.group(2).encode('utf8')).decode('utf8')
     wp = MediaWiki('https://%s.wikipedia.org/w/api.php' % page_lang)
     resp = wp.call({
         'action': 'query',
         'prop': 'pageprops|revisions',
         'titles': page_title.encode('utf8'),
         'rvprop': 'content'
     })
     page = resp['query']['pages'].values()[0]
     content = page['revisions'][0].values(
     )[0] if 'revisions' in page else None
     if 'pageprops' in page and 'wikibase_item' in page['pageprops']:
         wikidata_id = page['pageprops']['wikibase_item']
     else:
         wikidata_id = None
     return cls(page_title, content or '', page_lang, wikidata_id)
Ejemplo n.º 16
0
    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        # Connect to SMW Community Wiki
        self.smwreferata = MediaWiki('http://smw.referata.com/w/api.php')

        # Connect to WikiApiary
        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('wikkiibot', 'Username'),
                              config.get('wikkiibot', 'Password'))

        # We need an edit token
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']
Ejemplo n.º 17
0
def handle(text, mic, profile):
    baseurl = "http://www.wikihow.com/"
    wiki = MediaWiki('http://www.wikihow.com/api.php')
    #wiki.login("*****@*****.**", "david1234")
    params = {
        'action': 'query',
        'list': 'search',
        'srsearch': text,
        'srprop': 'redirecttitle',
        'limit': '1',
        'format': 'json'
    }

    response = wiki.call(params)
    #r = json.dumps(response, sort_keys=True, indent=4, separators=(',', ': '))

    flag = 0
    flag_title = "none"
    pos = response['query']['search']
    query = getRequest(text)
    wiki.logout()
    #Getting the article with the best score
    for key in pos:
        val = fuzz.ratio(key['title'], query)
        print(str(val) + "% " + key['title'])
        if val > flag:
            flag = val
            flag_title = key['title']
    if flag != 0:
        answer = flag_title
        mic.say(answer)

        #rWH = renderWH.renderWikihow()
        #url = baseurl + answer
        #print url
        #url_ = rWH.getContent(str(url))
        #rWH.renderContent(url_)
        webbrowser.open(baseurl + flag_title)
    else:
        mic.say("I could not find anything bro!")
Ejemplo n.º 18
0
def get_segment(segment_id):
    '''Get a specific segment from wikiapiary'''

    LOGGER.info("Connecting to %s", APIARY_URL)
    apiary_wiki = MediaWiki(APIARY_URL)

    print ("Retrieving segment", segment_id)
    my_query = ''.join([
        '[[Category:Website]]',
        '[[Is defunct::False]]',
        '[[Is active::True]]',
        "[[Has bot segment::%d]]" % segment_id,
        '|?Has API URL',
        '|?Has statistics URL',
        '|?Check every',
        '|?Creation date',
        '|?Page ID',
        '|?Collect general data',
        '|?Collect extension data',
        '|?Collect skin data',
        '|?Collect statistics',
        '|?Collect semantic statistics',
        '|?Collect logs',
        '|?Collect recent changes',
        '|?Collect statistics stats',
        '|sort=Creation date',
        '|order=asc',
        '|limit=1000'])

    sites = apiary_wiki.call({
        'action': 'ask',
        'query': my_query
        })

    if len(sites['query']['results']) > 0:
        for pagename, site in sites['query']['results'].items():
            print ("Processing", pagename)
    else:
        LOGGER.error("No records returned.")
Ejemplo n.º 19
0
def get_labs(data_format):
    """Gets data from all labs from hackerspaces.org."""

    labs = []

    # Get the first page of data
    wiki = MediaWiki(hackerspaces_org_api_url)
    wiki_response = wiki.call({
        'action': 'query',
        'list': 'categorymembers',
        'cmtitle': 'Category:Hackerspace',
        'cmlimit': '500'
    })
    nextpage = wiki_response["query-continue"]["categorymembers"]["cmcontinue"]

    urls = []
    for i in wiki_response["query"]["categorymembers"]:
        urls.append(i["title"].replace(" ", "_"))

    # Load all the Labs in the first page
    for i in urls:
        current_lab = get_single_lab(i, data_format)
        labs.append(current_lab)

    # Load all the Labs from the other pages
    while "query-continue" in wiki_response:
        wiki = MediaWiki(hackerspaces_org_api_url)
        wiki_response = wiki.call({
            'action': 'query',
            'list': 'categorymembers',
            'cmtitle': 'Category:Hackerspace',
            'cmlimit': '500',
            "cmcontinue": nextpage
        })

        urls = []
        for i in wiki_response["query"]["categorymembers"]:
            urls.append(i["title"].replace(" ", "_"))

        # Load all the Labs
        for i in urls:
            current_lab = get_single_lab(i, data_format)
            labs.append(current_lab)

        if "query-continue" in wiki_response:
            nextpage = wiki_response["query-continue"]["categorymembers"][
                "cmcontinue"]
        else:
            break

    # Transform the list into a dictionary
    labs_dict = {}
    for j, k in enumerate(labs):
        labs_dict[j] = k

    return labs_dict
Ejemplo n.º 20
0
    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('wmbot', 'Username'), config.get('wmbot', 'Password'))

        # We need an edit token on wiki2
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']
Ejemplo n.º 21
0
def finger(request):
	change_list = []
	foundtext = False

	# PUT IN CONNECTION VALUES HERE
	wiki = MediaWiki('set_your_api_url')
	wiki.login('set_username', 'set_password')

	tokendoc = wiki.call({'action': 'tokens'})
	edittoken = tokendoc.get("tokens").get('edittoken')

	foundtext = False
	searchtext = request.GET['text'].strip()
	searchpage = request.GET['page'].strip()
	
	if searchtext == '' or searchpage == '':
		context = {
			'message': 'Missing either search text or page to search!',
		}
		return render(request, 'blame/error.html', context)
	
	queryresult = wiki.call({'action': 'query', 'prop': 'revisions', 'rvprop': 'ids|user', 'rvdir': 'newer', 'rvlimit': '5000', 'format': 'jsonfm', 'titles': searchpage})
	#print(str(queryresult))

	if ('-1' in list(queryresult['query']['pages'].keys())):
		context = {
			'message': 'The page you requested was not found!  Please check your capitalization, namespace, and spelling!',
		}
		return render(request, 'blame/error.html', context)
	
	revisions = (list(queryresult['query']['pages'][list(queryresult['query']['pages'].keys())[0]]['revisions']))
	
	for revision in revisions:
		revisiondata = wiki.call({'action': 'query', 'prop': 'revisions', 'revids': revision['revid'], 'rvprop': 'content', 'format': 'jsonfm'})
		
		revisiontext = revisiondata['query']['pages'][list(queryresult['query']['pages'].keys())[0]]['revisions'][0]['*']
		if not foundtext and searchtext in revisiontext:
			# PUT IN URL VALUE HERE
			change_list.append({'changetype': 'Added', 'revision': revision['revid'], 'user': revision['user'], 'link': 'set_your_website_url?title=' + searchpage + '&oldid=' + str(revision['revid'])})
			foundtext = True
		elif foundtext and not searchtext in revisiontext:
			# PUT IN URL VALUE HERE
			change_list.append({'changetype': 'Removed', 'revision': revision['revid'], 'user': revision['user'], 'link': 'set_your_website_url?title=' + searchpage + '&oldid=' + str(revision['revid'])})
			foundtext = False

	context = {
		'change_list': change_list,
	}
	return render(request, 'blame/finger.html', context)
	
	
Ejemplo n.º 22
0
 def __init__(self):
     self.client = MediaWiki('https://commons.wikimedia.org/w/api.php')
Ejemplo n.º 23
0
class Wiki(models.Model):
    MAIN_NAMESPACE = 0
    IMAGE_NAMESPACE = 6
    NAMESPACE_CHOICES = (
                          (MAIN_NAMESPACE, 'Articles'),
                          (IMAGE_NAMESPACE, 'Images'),
                          )
    NAMESPACE_TYPES = { MAIN_NAMESPACE: 'wikimediaarticle', IMAGE_NAMESPACE:'linksgallery' }
    
    name = models.CharField(max_length=200, help_text=_(u'Nom sous lequel ce wiki est connu'))
    shortname = models.SlugField(max_length=50,help_text=_(u'Identifiant du wiki (charactères ASCII et chiffres seulement)'))
    url = models.URLField(help_text=_(u'Adresse du répertoire où se trouve api.php'))
    language = models.ForeignKey(ResourceLanguage)
    # the namespace we are interested in in this wiki
    namespace = models.IntegerField(choices=NAMESPACE_CHOICES,default=MAIN_NAMESPACE,help_text=_(u'Type de médias présents sur ce wiki'))
    # Resource slug
    slug = models.CharField(max_length=50,editable=False)
    
    def __init__(self,*args,**kwargs):
        super(Wiki,self).__init__(*args,**kwargs)
        self.wiki = MediaWiki(self.url + 'api.php')
    def save(self,*args,**kwargs):
        if not self.slug:
            self.slug = self.shortname + '.' + self.language
        if not self.shortname:
            self.shortname = self.name
        super(Wiki,self).save(*args,**kwargs)
        
    def wiki_links_replacer(self,match):
        if match.group(1) == 'wiki':
            return 'href="' + reverse('catalog:wikimediaarticle-view',kwargs={'slug':self.slug}) + match.group(2) + '"'
        elif match.group(1) == 'w':
            return 'href="' + self.url + match.group(2) + '" target="_blank"'
        else:
            return match.group()
         
    def __unicode__(self):  # Python 3: def __str__(self):
        return self.name
    
    # find out the actual page title after all redirections
    def get_redirect_title(self, title):
        # first see if there is a redirect
        test = self.wiki.call({'action':'query','redirects':'true','titles':title})
        if not 'query' in test:
            raise Http404
        if 'redirects' in test['query']: # a redirect was encountered
            return test['query']['redirects'][-1]['to']
        elif not 'pages' in test['query']: # no page was found
            return ''
        else:
            return title
                
    def slugify(self,title):
        return self.slug + '/' + wiki_slugify(title)
    
    @staticmethod
    def make_from_slug(slug):
        wiki_slug, slug = slug.split('/',1)
        title = wiki_deslugify(slug)
        wiki = get_object_or_404(Wiki,slug=wiki_slug)
        title,snippet = wiki.get_snippet(title)
        return wiki,title,snippet
        
    # Retrieve the whole content of a single page
    def get_page(self, title):
        # first see if the page exists
        test = self.wiki.call({'action':'query','redirects':'true','titles':title})
        if not 'pages' in test['query']: # no page was found
            raise Http404
        else:
            # yes, there is ! return special string to indicate it
            data = self.wiki.call({'action':'parse', 'page': title, 'prop':'text','disablepp':'true'}) # , 'redirects':'true'
            html = data['parse']['text']['*']
            # we need to replace internal links
            html = p.sub(self.wiki_links_replacer,html)
            return html
        
    # retrieve a snippet for a single page
    def get_snippet(self,title):
        try:
            data = self.wiki.call({'action':'query', 'list':'search','srsearch':title,'srprop':'snippet', 'srnamespace':"%d" % self.namespace,'srlimit':'1'})
        except URLError:
            raise Http404    
        data = data['query']['search']
        # if we are searching imaging we need to retrieve the thumbnail URLs
        if not data:
            raise Http404
        else:
            return (data[0]['title'],data[0]['snippet'])

    @staticmethod
    def search_all_wikis(querystring,queryloc,language):
        # search on available wikis in the requested language
        wqs = Wiki.objects.filter(Q(namespace=Wiki.IMAGE_NAMESPACE) | Q(namespace=Wiki.MAIN_NAMESPACE,language__code=language))
        return chain.from_iterable([w.search(querystring) for w in wqs])
    @staticmethod
    def list_all_wikis(language):
        for wiki in Wiki.objects.filter(language__code=language).values('shortname'):
            yield wiki['shortname']
    
    def search(self,querystring):
        try:
            data = self.wiki.call({'action':'query', 'list':'search','srsearch':querystring,'srprop':'snippet', 'srnamespace':"%d" % self.namespace})
        except URLError:
            raise Http404
        data = data['query']['search']
        # if we are searching imaging we need to retrieve the thumbnail URLs
        print data
        if self.namespace is Wiki.IMAGE_NAMESPACE:
            titles = [ d['title'] for d in data ]
            pages = self.wiki.call({'action':'query', 'titles' : string.join(titles,'|'), 'prop':'imageinfo', 'iiprop':'url', 'iiurlwidth':'300'})
            urls = [ page['imageinfo'][0]['thumburl'] for page in pages['query']['pages'].values() ] 
            for idx,d in enumerate(data):
                description = "<img src='" + urls[idx] + "'/>"
                dummy,name = string.split(d['title'],':') 
                yield {'resource_type':Wiki.NAMESPACE_TYPES[Wiki.IMAGE_NAMESPACE], 'resource_source': 'external', 'name': name, 'slug': self.slugify(name), 'description': description }
        else:
            for d in data:
                yield {'resource_type':Wiki.NAMESPACE_TYPES[Wiki.MAIN_NAMESPACE], 'resource_source': 'external', 'resource_name': d['title'], 'resource_url' : reverse('catalog:wikimediaarticle-view',kwargs={'slug':self.slugify(d['title'])}), 'resource_description': d['snippet'], 'resource_tooltip': d['snippet'] + '<br><e>' + _(u'Source') + ': ' + self.shortname + '</e>'}
        
    # retrieve the url of a given image, if it exists on the wiki, otherwise raise 404 error
    def get_image_info(self,name):
        pages = self.wiki.call({'action':'query', 'titles' : 'File:'+name, 'prop':'imageinfo', 'iiprop':'url', 'iiurlwidth':'300'})
        print pages
        if 'pages' in pages['query']:
            inf = pages['query']['pages'].itervalues().next()['imageinfo'][0]
            return inf['url'],inf['thumburl']
        else:
            raise Http404
Ejemplo n.º 24
0
 def __init__(self, *args, **kwargs):
     super(Wiki, self).__init__(*args, **kwargs)
     self.wiki = MediaWiki(self.url + 'api.php')
Ejemplo n.º 25
0
 def __init__(self,*args,**kwargs):
     super(Wiki,self).__init__(*args,**kwargs)
     self.wiki = MediaWiki(self.url + 'api.php')
Ejemplo n.º 26
0
import re
import sqlalchemy
import solr
from simplemediawiki import MediaWiki
from editing import MusicBrainzClient
import pprint
import urllib
import time
from utils import mangle_name, join_names, contains_text_in_script, quote_page_title
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

wp = MediaWiki('http://ko.wikipedia.org/w/api.php')
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia_ko')

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

"""
CREATE TABLE bot_wp_artist_ko (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now()
);

ALTER TABLE ONLY bot_wp_artist_ko
    ADD CONSTRAINT bot_wp_artist_kokey PRIMARY KEY (gid);
"""

query = """
Ejemplo n.º 27
0
Archivo: wiki.py Proyecto: rcbau/hacks
class Wiki(object):
    def __init__(self, url, username, password):
        self.wiki = MediaWiki(url)
        self.username = username
        self.password = password

        self.login = self._make_wiki_login_call({'action': 'login'})
        self.token = self._make_wiki_login_call(
            {'action': 'login', 'lgtoken': self.login['login']['token']})

    def _make_wiki_login_call(self, packet):
        packet.update({'lgname': self.username,
                       'lgpassword': self.password})
        response = self.wiki.call(packet)
        if DEBUG:
            print response
        return response

    def all_pages(self):
        response = self.wiki.call({'action': 'query',
                                   'list': 'allpages'})
        if DEBUG:
            print response
        assert_present('query', response)

        marker = 'foo'
        marker_name = 'foo'
        while marker:
            if 'query-continue' in response:
                for possible in ['apfrom', 'apcontinue']:
                    if possible in response['query-continue']['allpages']:
                        marker = \
                            response['query-continue']['allpages'][possible]
                        marker_name = possible
                        break
            else:
                marker = None

            for page in response['query']['allpages']:
                yield page['title']
            response = self.wiki.call({'action': 'query',
                                       'list': 'allpages',
                                       marker_name: marker})
            if DEBUG:
                print response

    def get_page(self, title):
        response = self.wiki.call({'action': 'query',
                                   'titles': title,
                                   'prop': 'revisions',
                                   'rvprop': 'content'})
        if DEBUG:
            print response
        assert_present('query', response)

        pages = response['query']['pages']
        page_id = pages.keys()[0]

        if not 'revisions' in pages[page_id]:
            # This is a new page
            return ''

        return pages[page_id]['revisions'][0]['*']

    def check_for_page(self, title):
        response = self.wiki.call({'action': 'query',
                                   'titles': title,
                                   'prop': 'revisions',
                                   'rvprop': 'content'})
        if DEBUG:
            print response
        assert_present('query', response)

        pages = response['query']['pages']
        page_id = pages.keys()[0]

        if not 'revisions' in pages[page_id]:
            return False
        return True

    def post_page(self, title, text, minor=True, bot=True):
        response = self.wiki.call({'action': 'query',
                                   'prop': 'info',
                                   'titles': title,
                                   'intoken': 'edit'})
        if DEBUG:
            print response
        assert_present('query', response)

        pages = response['query']['pages']
        page_id = pages.keys()[0]

        response = self.wiki.call({'action': 'edit',
                                   'minor': minor,
                                   'bot': bot,
                                   'title': title,
                                   'text': json.dumps(text).replace(
                                       '\\n', '\n')[1:-1],
                                   'token': pages[page_id]['edittoken']})
        if DEBUG:
            print response
        if not 'nochange' in response['edit']:
            print 'Modified %s' % title

    def create_account(self, username, password, email, realname):
        response = self.wiki.call({'action': 'createaccount',
                                   'name': username,
                                   'password': password,
                                   'email': email,
                                   'realname': realname})
        if DEBUG:
            print response

        response = self.wiki.call({'action': 'createaccount',
                                   'name': username,
                                   'password': password,
                                   'email': email,
                                   'realname': realname,
                                   'token': response['createaccount']['token']})

        if DEBUG:
            print response

        return 'error' not in response
Ejemplo n.º 28
0
from editing import MusicBrainzClient
import pprint
import urllib
import time
from mbbot.wp.wikipage import WikiPage
from mbbot.wp.analysis import determine_country
from utils import mangle_name, join_names, out, colored_out, bcolors, escape_query, quote_page_title, wp_is_canonical_page
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

wp_lang = sys.argv[1] if len(sys.argv) > 1 else 'en'

wp = MediaWiki('http://%s.wikipedia.org/w/api.php' % wp_lang)

suffix = '_' + wp_lang if wp_lang != 'en' else ''
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia' + suffix)

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
"""
CREATE TABLE bot_wp_artist_link (
    gid uuid NOT NULL,
    lang character varying(2),
    processed timestamp with time zone DEFAULT now()
    CONSTRAINT bot_wp_artist_link_pkey PRIMARY KEY (gid, lang)
);

CREATE TABLE bot_wp_artist_link_ignore (
    gid uuid NOT NULL,
Ejemplo n.º 29
0
class wmbot:

    # Array to append sites to
    sites = []
    # This file is a list of all the database names used by wikimedia
    # we can use this to try and derive the names of various wikis
    source_list = 'http://noc.wikimedia.org/conf/all.dblist'
    # Blank reference to store mediawiki object in
    wikiapiary = {}
    # Edit token
    my_token = ""
    # Counter
    create_counter = 0
    # Regex pattern
    regex_pattern = r'^(\w+)(wiki|wikibooks|wikiquote|wiktionary|wikinews|wikisource|wikiversity|wikimedia|wikivoyage)$'

    # Site data
    siteData = {
        'wiki': {
            'domain': 'wikipedia.org',
            'name': 'Wikipedia (%s)',
            'farm': 'Wikipedia',
            'logo': 'Wikipedia-logo.png'
        },
        'wikibooks': {
            'domain': 'wikibooks.org',
            'name': 'Wikibooks (%s)',
            'farm': 'Wikibooks',
            'logo': 'Wikibooks Logo.png'
        },
        'wiktionary': {
            'domain': 'wiktionary.org',
            'name': 'Wiktionary (%s)',
            'farm': 'Wiktionary',
            'logo': '170px-Wiktportal.svg.png'
        },
        'wikiquote': {
            'domain': 'wikiquote.org',
            'name': 'Wikiquote (%s)',
            'farm': 'Wikiquote',
            'logo': 'Wikiquote Logo.png'
        },
        'wikinews': {
            'domain': 'wikinews.org',
            'name': 'Wikinews (%s)',
            'farm': 'Wikinews',
            'logo': '240px-Wikinews-logo.png'
        },
        'wikisource': {
            'domain': 'wikisource.org',
            'name': 'Wikisource (%s)',
            'farm': 'Wikisource',
            'logo': 'Wikisource Logo.png'
        },
        'wikiversity': {
            'domain': 'wikiversity.org',
            'name': 'Wikiversity (%s)',
            'farm': 'Wikiversity',
            'logo': 'Wikiversity Logo.png'
        },
        'wikivoyage': {
            'domain': 'wikivoyage.org',
            'name': 'Wikivoyage (%s)',
            'farm': 'Wikivoyage',
            'logo': 'WikivoyageOldLogoSmall.png'
        },
        'wikimedia': {
            'domain': 'wikimedia.org',
            'name': 'Wikimedia (%s)',
            'farm': 'Wikimedia',
            'logo': 'Wikimediafoundation-logo.png'
        }
    }

    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('wmbot', 'Username'),
                              config.get('wmbot', 'Password'))

        # We need an edit token on wiki2
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']

    def getList(self):
        self.sites = requests.get(self.source_list).text.split('\n')

    def validateApi(self, api_url):
        # Call http://st.wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=general&format=json
        my_url = api_url + '?action=query&meta=siteinfo&siprop=general&format=json'
        try:
            result = requests.get(my_url).json()
            if 'generator' in result['query']['general']:
                print "Detected %s" % result['query']['general']['generator']
                return True
            else:
                return False
        except:
            print "ERROR: Failed call to API check."
            return False

    def createSite(self, lang, token):
        siteTemplate = """{{Website
|Name=%s
|URL=%s
|API URL=%s
|Image=%s
|Farm=%s
|Collect general data=Yes
|Collect extension data=Yes
|Collect skin data=Yes
|Check every=240
|Collect statistics=Yes
|Audited=No
|Curated=No
|Active=Yes
}}
"""
        my_name = self.siteData[token]['name'] % lang
        my_template = siteTemplate % (
            my_name, "http://%s.%s/" %
            (lang, self.siteData[token]['domain']), "http://%s.%s/w/api.php" %
            (lang, self.siteData[token]['domain']),
            self.siteData[token]['logo'], self.siteData[token]['farm'])
        print my_template

        self.wikiapiary.call({
            'action': 'edit',
            'title': my_name,
            'text': my_template,
            'token': self.my_token,
            'bot': 'true'
        })
        self.create_counter += 1

    def checkSite(self, lang, site_domain):
        # Build the API URL using Wikimedia's known convention
        api_url = "http://%s.%s/w/api.php" % (lang, site_domain)
        print "Testing %s" % api_url

        # First see if this is a valid API URL before we query WikiApiary
        isValid = self.validateApi(api_url)

        if isValid:
            # Construct Ask query for WikiApiary
            my_query = ''.join(["[[Has API URL::%s]]" % api_url])
            # Execute the query against WikiApiary
            c = self.wikiapiary.call({'action': 'ask', 'query': my_query})

            # Return the count of results for the query
            return True, int(c['query']['meta']['count'])
        else:
            return False, 0

    def processSite(self, token):
        match = re.findall(self.regex_pattern, token)
        if len(match[0]) == 2:
            return match[0]
        else:
            return (False, False)

    def main(self):
        # Get the list of tokens from the config file
        self.getList()

        # Now loop through the tokens
        for token in self.sites:
            print "\nProcessing %s" % token
            # First turn a token into a lang and site
            (lang, site) = self.processSite(token)
            lang = lang.replace('_', '-')
            # If we successfully got lang and site proceed
            if lang is not False and site is not False:
                # Use a guess of the API domain to see if
                (valid,
                 siteCount) = self.checkSite(lang,
                                             self.siteData[site]['domain'])

                if valid:
                    if siteCount == 0:
                        print "%s appears to be untracked token." % token
                        # Now add it to WikiApiary
                        self.createSite(lang, site)
                    elif siteCount == 1:
                        print "%s already exists." % token
                    elif siteCount > 1:
                        print "%s found %d websites, which should never happen." % (
                            token, siteCount)
                else:
                    print "%s did not resolve to a valid API URL." % token
            else:
                print "%s could not process token." % token
Ejemplo n.º 30
0
class Wiki(models.Model):
    MAIN_NAMESPACE = 0
    IMAGE_NAMESPACE = 6
    NAMESPACE_CHOICES = (
        (MAIN_NAMESPACE, 'Articles'),
        (IMAGE_NAMESPACE, 'Images'),
    )
    NAMESPACE_TYPES = {
        MAIN_NAMESPACE: 'wikimediaarticle',
        IMAGE_NAMESPACE: 'linksgallery'
    }

    name = models.CharField(max_length=200,
                            help_text=_(u'Nom sous lequel ce wiki est connu'))
    shortname = models.SlugField(
        max_length=50,
        help_text=_(
            u'Identifiant du wiki (charactères ASCII et chiffres seulement)'))
    url = models.URLField(
        help_text=_(u'Adresse du répertoire où se trouve api.php'))
    language = models.ForeignKey(ResourceLanguage)
    # the namespace we are interested in in this wiki
    namespace = models.IntegerField(
        choices=NAMESPACE_CHOICES,
        default=MAIN_NAMESPACE,
        help_text=_(u'Type de médias présents sur ce wiki'))
    # Resource slug
    slug = models.CharField(max_length=50, editable=False)

    def __init__(self, *args, **kwargs):
        super(Wiki, self).__init__(*args, **kwargs)
        self.wiki = MediaWiki(self.url + 'api.php')

    def save(self, *args, **kwargs):
        if not self.slug:
            self.slug = self.shortname + '.' + self.language
        if not self.shortname:
            self.shortname = self.name
        super(Wiki, self).save(*args, **kwargs)

    def wiki_links_replacer(self, match):
        if match.group(1) == 'wiki':
            return 'href="' + reverse('catalog:wikimediaarticle-view',
                                      kwargs={'slug': self.slug
                                              }) + match.group(2) + '"'
        elif match.group(1) == 'w':
            return 'href="' + self.url + match.group(2) + '" target="_blank"'
        else:
            return match.group()

    def __unicode__(self):  # Python 3: def __str__(self):
        return self.name

    # find out the actual page title after all redirections
    def get_redirect_title(self, title):
        # first see if there is a redirect
        test = self.wiki.call({
            'action': 'query',
            'redirects': 'true',
            'titles': title
        })
        if not 'query' in test:
            raise Http404
        if 'redirects' in test['query']:  # a redirect was encountered
            return test['query']['redirects'][-1]['to']
        elif not 'pages' in test['query']:  # no page was found
            return ''
        else:
            return title

    def slugify(self, title):
        return self.slug + '/' + wiki_slugify(title)

    @staticmethod
    def make_from_slug(slug):
        wiki_slug, slug = slug.split('/', 1)
        title = wiki_deslugify(slug)
        wiki = get_object_or_404(Wiki, slug=wiki_slug)
        title, snippet = wiki.get_snippet(title)
        return wiki, title, snippet

    # Retrieve the whole content of a single page
    def get_page(self, title):
        # first see if the page exists
        test = self.wiki.call({
            'action': 'query',
            'redirects': 'true',
            'titles': title
        })
        if not 'pages' in test['query']:  # no page was found
            raise Http404
        else:
            # yes, there is ! return special string to indicate it
            data = self.wiki.call({
                'action': 'parse',
                'page': title,
                'prop': 'text',
                'disablepp': 'true'
            })  # , 'redirects':'true'
            html = data['parse']['text']['*']
            # we need to replace internal links
            html = p.sub(self.wiki_links_replacer, html)
            return html

    # retrieve a snippet for a single page
    def get_snippet(self, title):
        try:
            data = self.wiki.call({
                'action': 'query',
                'list': 'search',
                'srsearch': title,
                'srprop': 'snippet',
                'srnamespace': "%d" % self.namespace,
                'srlimit': '1'
            })
        except URLError:
            raise Http404
        data = data['query']['search']
        # if we are searching imaging we need to retrieve the thumbnail URLs
        if not data:
            raise Http404
        else:
            return (data[0]['title'], data[0]['snippet'])

    @staticmethod
    def search_all_wikis(querystring, queryloc, language):
        # search on available wikis in the requested language
        wqs = Wiki.objects.filter(
            Q(namespace=Wiki.IMAGE_NAMESPACE)
            | Q(namespace=Wiki.MAIN_NAMESPACE, language__code=language))
        return chain.from_iterable([w.search(querystring) for w in wqs])

    @staticmethod
    def list_all_wikis(language):
        for wiki in Wiki.objects.filter(
                language__code=language).values('shortname'):
            yield wiki['shortname']

    def search(self, querystring):
        try:
            data = self.wiki.call({
                'action': 'query',
                'list': 'search',
                'srsearch': querystring,
                'srprop': 'snippet',
                'srnamespace': "%d" % self.namespace
            })
        except URLError:
            raise Http404
        data = data['query']['search']
        # if we are searching imaging we need to retrieve the thumbnail URLs
        print data
        if self.namespace is Wiki.IMAGE_NAMESPACE:
            titles = [d['title'] for d in data]
            pages = self.wiki.call({
                'action': 'query',
                'titles': string.join(titles, '|'),
                'prop': 'imageinfo',
                'iiprop': 'url',
                'iiurlwidth': '300'
            })
            urls = [
                page['imageinfo'][0]['thumburl']
                for page in pages['query']['pages'].values()
            ]
            for idx, d in enumerate(data):
                description = "<img src='" + urls[idx] + "'/>"
                dummy, name = string.split(d['title'], ':')
                yield {
                    'resource_type':
                    Wiki.NAMESPACE_TYPES[Wiki.IMAGE_NAMESPACE],
                    'resource_source': 'external',
                    'name': name,
                    'slug': self.slugify(name),
                    'description': description
                }
        else:
            for d in data:
                yield {
                    'resource_type':
                    Wiki.NAMESPACE_TYPES[Wiki.MAIN_NAMESPACE],
                    'resource_source':
                    'external',
                    'resource_name':
                    d['title'],
                    'resource_url':
                    reverse('catalog:wikimediaarticle-view',
                            kwargs={'slug': self.slugify(d['title'])}),
                    'resource_description':
                    d['snippet'],
                    'resource_tooltip':
                    d['snippet'] + '<br><e>' + _(u'Source') + ': ' +
                    self.shortname + '</e>'
                }

    # retrieve the url of a given image, if it exists on the wiki, otherwise raise 404 error
    def get_image_info(self, name):
        pages = self.wiki.call({
            'action': 'query',
            'titles': 'File:' + name,
            'prop': 'imageinfo',
            'iiprop': 'url',
            'iiurlwidth': '300'
        })
        print pages
        if 'pages' in pages['query']:
            inf = pages['query']['pages'].itervalues().next()['imageinfo'][0]
            return inf['url'], inf['thumburl']
        else:
            raise Http404
Ejemplo n.º 31
0
def get_single_lab(lab_slug, data_format):
    """Gets data from a single lab from hackerspaces.org."""
    wiki = MediaWiki(hackerspaces_org_api_url)
    wiki_response = wiki.call({
        'action': 'query',
        'titles': lab_slug,
        'prop': 'revisions',
        'rvprop': 'content'
    })

    # If we don't know the pageid...
    for i in wiki_response["query"]["pages"]:
        content = wiki_response["query"]["pages"][i]["revisions"][0]["*"]

    # Transform the data into a Lab object
    current_lab = Lab()

    equipment_list = []

    # Parse the Mediawiki code
    wikicode = mwparserfromhell.parse(content)
    for k in wikicode.filter_templates():
        element_name = unicode(k.name)
        if "Hackerspace" in element_name:
            for j in k.params:
                if unicode(j.name) == "logo":
                    current_lab.logo = unicode(j.value)
                if unicode(j.name) == "country":
                    current_lab.country = unicode(j.value)
                if unicode(j.name) == "state":
                    current_lab.state = unicode(j.value)
                if unicode(j.name) == "city":
                    current_lab.city = unicode(j.value)
                if unicode(j.name) == "founding":
                    current_lab.city = unicode(j.value)
                if unicode(j.name) == "coordinate":
                    value = unicode(j.value)
                    current_lab.coordinates = value
                    latlong = []
                    if ", " in value:
                        latlong = value.rstrip(", ").split(", ")
                    elif " , " in value:
                        latlong = value.rstrip(" , ").split(" , ")
                    else:
                        latlong = ["", ""]
                    current_lab.lat = latlong[0]
                    current_lab.long = latlong[1]
                if unicode(j.name) == "membercount":
                    current_lab.membercount = unicode(j.value)
                if unicode(j.name) == "fee":
                    current_lab.fee = unicode(j.value)
                if unicode(j.name) == "size":
                    current_lab.size = unicode(j.value)
                if unicode(j.name) == "status":
                    current_lab.status = unicode(j.value)
                if unicode(j.name) == "site":
                    current_lab.site = unicode(j.value)
                if unicode(j.name) == "wiki":
                    current_lab.wiki = unicode(j.value)
                if unicode(j.name) == "irc":
                    current_lab.irc = unicode(j.value)
                if unicode(j.name) == "jabber":
                    current_lab.jabber = unicode(j.value)
                if unicode(j.name) == "phone":
                    current_lab.phone = unicode(j.value)
                if unicode(j.name) == "youtube":
                    current_lab.youtube = unicode(j.value)
                if unicode(j.name) == "eventbrite":
                    current_lab.eventbrite = unicode(j.value)
                if unicode(j.name) == "facebook":
                    current_lab.facebook = unicode(j.value)
                if unicode(j.name) == "ustream":
                    current_lab.ustream = unicode(j.value)
                if unicode(j.name) == "flickr":
                    current_lab.flickr = unicode(j.value)
                if unicode(j.name) == "twitter":
                    current_lab.twitter = unicode(j.value)
                if unicode(j.name) == "googleplus":
                    current_lab.googleplus = unicode(j.value)
                if unicode(j.name) == "email":
                    current_lab.email = unicode(j.value)
                if unicode(j.name) == "maillist":
                    current_lab.maillist = unicode(j.value)
                if unicode(j.name) == "ical":
                    current_lab.ical = unicode(j.value)
                if unicode(j.name) == "forum":
                    current_lab.forum = unicode(j.value)
                if unicode(j.name) == "street-address":
                    current_lab.street_address = unicode(j.value)
                if unicode(j.name) == "postalcode":
                    current_lab.postalcode = unicode(j.value)
                if unicode(j.name) == "region":
                    current_lab.region = unicode(j.value)
                if unicode(j.name) == "post-office-box":
                    current_lab.post_office_box = unicode(j.value)
        elif "Equipment" in element_name:
            for j in k.params:
                equipment_list.append(j.replace("equipment=", ""))

    current_lab.equipment = equipment_list

    # Load the free text
    freetext = ""
    for k in wikicode._nodes:
        try:
            test_value = k.name
        except AttributeError:
            freetext += unicode(k)
    current_lab.text = freetext

    if data_format == "dict":
        return current_lab.__dict__
    elif data_format == "object":
        return current_lab
Ejemplo n.º 32
0
#! /usr/bin/env python

# Imports
from simplemediawiki import MediaWiki, build_user_agent
import sys
from timestamper import *
import datetime
import time
import pprint

# Get wiki location
location = raw_input("Base URL to the wiki API (YOUR_WIKI_ROOT/api.php): ")
if (location[0:7].lower() != "http://"):
    location = "http://" + location
wiki = MediaWiki(location)
if wiki.normalize_api_url() is None:
    sys.exit("Invalid Wiki URL")

# Get login credetials
ua = build_user_agent("uturn", "0.1", "https://github.com/tomasreimers/wiki-uturn");
while True:
    username = raw_input("Username: "******"Password: "******"Invalid login"

# Get date to revert to
print "When would you like to revert to (IN UTC)?"
year = int(raw_input("Year: "))
Ejemplo n.º 33
0
class ApiaryBot:

    args = []
    config = []
    apiary_wiki = []
    apiary_db = []
    stats = {}
    edit_token = ''

    def __init__(self):
        # Get command line options
        self.get_args()
        # Get configuration settings
        self.get_config(self.args.config)
        # Connect to the database
        self.connectdb()
        # Initialize stats
        self.stats['statistics'] = 0
        self.stats['smwinfo'] = 0
        self.stats['smwusage'] = 0
        self.stats['general'] = 0
        self.stats['extensions'] = 0
        self.stats['skins'] = 0
        self.stats['skippedstatistics'] = 0
        self.stats['skippedgeneral'] = 0
        self.stats['whois'] = 0
        self.stats['maxmind'] = 0
        self.stats['interwikimap'] = 0
        self.stats['libraries'] = 0
        self.stats['namespaces'] = 0

    def get_config(self, config_file='../apiary.cfg'):
        try:
            self.config = ConfigParser.ConfigParser()
            self.config.read(config_file)
        except IOError:
            print("Cannot open %s." % config_file)

    def get_args(self):
        parser = argparse.ArgumentParser(
            prog="Bumble Bee",
            description=
            "retrieves usage and statistic information for WikiApiary")
        parser.add_argument("-s",
                            "--segment",
                            help="only work on websites in defined segment")
        parser.add_argument("--site",
                            help="only work on this specific site id")
        parser.add_argument(
            "-f",
            "--force",
            action="store_true",
            help="run regardless of when the last time data was updated")
        parser.add_argument(
            "-d",
            "--debug",
            action="store_true",
            help="do not write any changes to wiki or database")
        parser.add_argument("--config",
                            default="../apiary.cfg",
                            help="use an alternative config file")
        parser.add_argument("-v",
                            "--verbose",
                            action="count",
                            default=0,
                            help="increase output verbosity")
        parser.add_argument("--version",
                            action="version",
                            version="%(prog)s 0.1")

        # All set, now get the arguments
        self.args = parser.parse_args()

    def filter_illegal_chars(self, pre_filter):
        # Utility function to make sure that strings are okay for page titles
        return re.sub(r'[#<>\[\]\|{}]', '', pre_filter).replace('=', '-')

    def sqlutcnow(self):
        now = datetime.datetime.utcnow()
        now = now.replace(tzinfo=pytz.utc)
        now = now.replace(microsecond=0)
        return now.strftime('%Y-%m-%d %H:%M:%S')

    def make_request(self, site, data_url, bot='Bumble Bee'):
        req = urllib2.Request(data_url)
        req.add_header('User-Agent', self.config.get(bot, 'User-Agent'))
        req.add_header('Accept-Encoding', 'gzip')
        opener = urllib2.build_opener()

        try:
            t1 = datetime.datetime.now()
            f = opener.open(req)
            duration = (datetime.datetime.now() - t1).total_seconds()
        except ssl.SSLError as e:
            msg = "SSL Error: " + str(e)
            self.record_error(site=site,
                              log_message=msg,
                              log_type='info',
                              log_severity='normal',
                              log_bot=bot,
                              log_url=data_url)
            return None, None
        except urllib2.URLError as e:
            self.record_error(site=site,
                              log_message="URLError: %s" % e,
                              log_type='error',
                              log_severity='normal',
                              log_bot=bot,
                              log_url=data_url)
            return None, None
        except urllib2.HTTPError as e:
            if e.code > 399 and e.code < 500:
                raise FourHundred(e)
            if e.code > 499 and e.code < 600:
                raise FiveHundred(e)
            self.record_error(site=site,
                              log_message="%s" % e,
                              log_type='error',
                              log_severity='normal',
                              log_bot=bot,
                              log_url=data_url)
            return None, None
        except Exception as e:
            self.record_error(site=site,
                              log_message=str(e),
                              log_type='info',
                              log_severity='normal',
                              log_bot=bot,
                              log_url=data_url)
            return None, None
        else:
            return f, duration

    def pull_json(self, site, data_url, bot='Bumble Bee'):
        socket.setdefaulttimeout(10)

        (f, duration) = self.make_request(site, data_url, bot)
        if f is None:
            return False, None, None
        else:
            # Clean the returned string before we parse it,
            # sometimes there are junky error messages from PHP in
            # here, or simply a newline that shouldn't be present
            # The regex here is really simple, but it seems to
            # work fine.
            if f.info().get('Content-Encoding') == 'gzip':
                buf = StringIO(f.read())
                gz = gzip.GzipFile(fileobj=buf)
                ret_string = gz.read()
            else:
                ret_string = f.read()
            json_match = re.search(r"({.*})", ret_string, flags=re.MULTILINE)
            if json_match is None or json_match.group(1) is None:
                raise NoJSON(data_url + "||" + ret_string)

            # Found JSON block
            try:
                data = simplejson.loads(json_match.group(1))
            except ValueError as e:
                raise NoJSON(data_url + "||" + ret_string)

            return True, data, duration

    def runSql(self, sql_command, args=None):
        if self.args.verbose >= 3:
            print("SQL: %s" % sql_command)
        try:
            cur = self.apiary_db.cursor()
            cur.execute('SET NAMES utf8mb4')
            cur.execute("SET CHARACTER SET utf8mb4")
            cur.execute("SET character_set_connection=utf8mb4")
            cur.execute(sql_command, args)
            cur.close()
            self.apiary_db.commit()
            return True, cur.rowcount
        except Exception as e:
            print("Exception generated while running SQL command.")
            print("Command: %s" % sql_command)
            print("Exception: %s" % e)
            return False, 0

    def record_error(self,
                     site=None,
                     log_message='Unknown Error',
                     log_type='info',
                     log_severity='normal',
                     log_bot=None,
                     log_url=None):

        if self.args.verbose >= 2:
            print("New log message for %s" % site['pagename'])

        if self.args.verbose >= 1:
            print(log_message)

        if site is None:
            site = {}
            site = {'Has ID': 0}

        if 'Has name' in site:
            site['pagename'] = site['Has name']
        elif 'pagename' not in site:
            site['pagename'] = 'Error'

        if log_bot is None:
            log_bot = "null"
        else:
            log_bot = "'%s'" % log_bot

        if log_url is None:
            log_url = "null"
        else:
            log_url = "'%s'" % log_url

        temp_sql = "INSERT  apiary_website_logs "
        temp_sql += "(website_id, log_date, website_name, log_type, "
        temp_sql += "log_severity, log_message, log_bot, log_url) "

        if len(log_message) > 65535:
            print("log_message too long: %s" % log_message)
            log_message = log_message[0:65535]
        # The format string is not really a normal Python format
        # string.  You must always use %s http://stackoverflow.com/a/5785163
        temp_sql += "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
        args = (site['Has ID'], self.sqlutcnow(), site['pagename'], log_type,
                log_severity, log_message, log_bot, log_url)

        self.runSql(temp_sql, args)

    def clear_error(self, sitename):
        # This function clears the error status of a meeting
        socket.setdefaulttimeout(30)

        if self.args.verbose >= 2:
            print("Clearing error for %s" % sitename)

        c = self.apiary_wiki.call({
            'action': 'sfautoedit',
            'form': 'Website',
            'target': sitename,
            'Website[Error]': 'No',
            'wpSummary': 'clearing error'
        })
        if self.args.verbose >= 3:
            print("result:%s" % c)

    def connectdb(self):
        # Setup our database connection
        # Use the account that can also insert and delete from the database
        self.apiary_db = mdb.connect(
            host=self.config.get('ApiaryDB', 'hostname'),
            db=self.config.get('ApiaryDB', 'database'),
            user=self.config.get('ApiaryDB RW', 'username'),
            passwd=self.config.get('ApiaryDB RW', 'password'),
            charset='utf8')

    def connectwiki(self, bot_name):
        self.apiary_wiki = MediaWiki(self.config.get('WikiApiary', 'API'))
        c = self.apiary_wiki.login(self.config.get(bot_name, 'Username'),
                                   self.config.get(bot_name, 'Password'))
        if self.args.verbose >= 1:
            print("Username: %s Password: %s" % (self.config.get(
                bot_name, 'Username'), self.config.get(bot_name, 'Password')))
            print(c)

    def get_websites(self, segment, site):
        filter_string = ""
        if site is not None:
            if self.args.verbose >= 1:
                print("Processing site %d." % int(site))
            filter_string = "[[Has ID::%d]]" % int(site)
        elif segment is not None:
            if self.args.verbose >= 1:
                print("Only retrieving segment %d." % int(self.args.segment))
            filter_string = "[[Has bot segment::%d]]" % int(self.args.segment)
            #filter_string = "test"

        # Build query for sites
        my_query = ''.join([
            '[[Category:Website]]', '[[Is defunct::False]]',
            '[[Is active::True]]', filter_string, '|?Has API URL',
            '|?Has statistics URL', '|?Check every', '|?Creation date',
            '|?Has ID', '|?Collect general data', '|?Collect extension data',
            '|?Collect skin data', '|?Collect statistics',
            '|?Collect semantic statistics', '|?Collect semantic usage',
            '|?Collect logs', '|?Collect recent changes',
            '|?Collect statistics stats', '|sort=Creation date', '|order=asc',
            '|limit=2000'
        ])
        if self.args.verbose >= 3:
            print("Query: %s" % my_query)
        try:
            sites = self.apiary_wiki.call({'action': 'ask', 'query': my_query})
        except Exception as e:
            self.record_error(log_message="Problem querying Wikiapiary: %s" %
                              e,
                              log_type='error',
                              log_severity='important')
        else:
            # We could just return the raw JSON object from the API, however instead we are going to clean it up into an
            # easier to deal with array of dictionary objects.
            # To keep things sensible, we'll use the same name as the properties
            i = 0
            if len(sites['query']['results']) > 0:
                my_sites = []
                for pagename, site in sites['query']['results'].items():
                    i += 1
                    if self.args.verbose >= 3:
                        print("[%d] Adding %s." % (i, pagename))
                    # Initialize the flags but do it carefully in case there is no value in the wiki yet
                    collect_general_data = list_get(
                        site['printouts'], 'Collect general data') == "t"
                    collect_extension_data = list_get(
                        site['printouts'], 'Collect extension data') == "t"
                    collect_skin_data = list_get(site['printouts'],
                                                 'Collect skin data') == "t"
                    collect_statistics = list_get(site['printouts'],
                                                  'Collect statistics') == "t"
                    collect_semantic_statistics = list_get(
                        site['printouts'],
                        'Collect semantic statistics') == "t"
                    collect_semantic_usage = list_get(
                        site['printouts'], 'Collect semantic usage') == "t"
                    collect_statistics_stats = list_get(
                        site['printouts'], 'Collect statistics stats') == "t"
                    collect_logs = list_get(site['printouts'],
                                            'Collect logs') == "t"
                    collect_recent_changes = list_get(
                        site['printouts'], 'Collect recent changes') == "t"
                    has_statistics_url = list_get(site['printouts'],
                                                  'Has statistics URL', '')
                    has_api_url = list_get(site['printouts'], 'Has API URL',
                                           '')

                    if has_statistics_url.find('wikkii.com') > 0:
                        # Temporary filter out all Farm:Wikkii sites
                        if self.args.verbose >= 2:
                            print("Skipping %s (%s)" %
                                  (pagename, site['fullurl']))
                    else:
                        try:
                            my_sites.append({
                                'pagename':
                                pagename,
                                'fullurl':
                                site['fullurl'],
                                'Has API URL':
                                has_api_url,
                                'Has statistics URL':
                                has_statistics_url,
                                'Check every':
                                int(site['printouts']['Check every'][0]),
                                'Creation date':
                                site['printouts']['Creation date'][0],
                                'Has ID':
                                int(site['printouts']['Has ID'][0]),
                                'Collect general data':
                                collect_general_data,
                                'Collect extension data':
                                collect_extension_data,
                                'Collect skin data':
                                collect_skin_data,
                                'Collect statistics':
                                collect_statistics,
                                'Collect semantic statistics':
                                collect_semantic_statistics,
                                'Collect semantic usage':
                                collect_semantic_usage,
                                'Collect statistics stats':
                                collect_statistics_stats,
                                'Collect logs':
                                collect_logs,
                                'Collect recent changes':
                                collect_recent_changes
                            })
                        except Exception as e:
                            print("Failed to add %s" % pagename)
                            print(e)
                            self.record_error(site=site,
                                              log_message="Failed to add page",
                                              log_type='warn',
                                              log_severity='important',
                                              log_bot='apiary.py',
                                              log_url=data_url)
                return my_sites
            else:
                raise Exception("No sites were returned to work on.")

    def get_status(self, site):
        """
        get_status will query the website_status table in ApiaryDB. It makes the decision if new
        data should be retrieved for a given website. Two booleans are returned, the first to
        tell if new statistics information should be requested and the second to pull general information.
        """
        # Get the timestamps for the last statistics and general pulls
        cur = self.apiary_db.cursor()
        temp_sql = "SELECT last_statistics, last_general, check_every_limit FROM website_status WHERE website_id = %d" % site[
            'Has ID']
        cur.execute(temp_sql)
        rows_returned = cur.rowcount

        if rows_returned == 1:
            # Let's see if it's time to pull information again
            data = cur.fetchone()
            cur.close()

            (last_statistics, last_general, check_every_limit) = data[0:3]
            if self.args.verbose >= 3:
                print("last_stats: %s" % last_statistics)
                print("last_general: %s" % last_general)
                print("check_every_limit: %s" % check_every_limit)

            #TODO: make this check the times!
            last_statistics_struct = time.strptime(str(last_statistics),
                                                   '%Y-%m-%d %H:%M:%S')
            last_general_struct = time.strptime(str(last_general),
                                                '%Y-%m-%d %H:%M:%S')

            stats_delta = (time.mktime(time.gmtime()) -
                           time.mktime(last_statistics_struct)) / 60
            general_delta = (time.mktime(time.gmtime()) -
                             time.mktime(last_general_struct)) / 60

            if self.args.verbose >= 2:
                print("Delta from checks: stats %s general %s" %
                      (stats_delta, general_delta))

            (check_stats, check_general) = (False, False)
            if stats_delta > (
                    site['Check every'] + random.randint(0, 15)
            ) and stats_delta > check_every_limit:  # Add randomness to keep checks spread around
                check_stats = True
            else:
                if self.args.verbose >= 2:
                    print("Skipping stats...")
                self.stats['skippedstatistics'] += 1

            if general_delta > (
                (24 + random.randint(0, 24)) * 60
            ):  # General checks are always bound to 24 hours, plus a random offset to keep checks evenly distributed
                check_general = True
            else:
                if self.args.verbose >= 2:
                    print("Skipping general...")
                self.stats['skippedgeneral'] += 1

            return (check_stats, check_general)

        elif rows_returned == 0:
            cur.close()
            # This website doesn't have a status, so we should check everything
            if self.args.verbose >= 3:
                print("website has never been checked before")
            return (True, True)

        else:
            raise Exception("Status check returned multiple rows.")

    def update_status(self, site, checktype):
        # Update the website_status table
        my_now = self.sqlutcnow()

        if checktype == "statistics":
            temp_sql = "UPDATE website_status SET last_statistics = '%s' WHERE website_id = %d" % (
                my_now, site['Has ID'])

        if checktype == "general":
            temp_sql = "UPDATE website_status SET last_general = '%s' WHERE website_id = %d" % (
                my_now, site['Has ID'])

        (success, rows_affected) = self.runSql(temp_sql)

        if rows_affected == 0:
            # No rows were updated, this website likely didn't exist before, so we need to insert the first time
            if self.args.verbose >= 2:
                print(
                    "No website_status record exists for ID %d, creating one" %
                    site['Has ID'])
            temp_sql = "INSERT website_status (website_id, last_statistics, last_general, check_every_limit) "
            temp_sql += "VALUES (%d, \"%s\", \"%s\", %d) " % (
                site['Has ID'], my_now, my_now, 240)
            temp_sql += "ON DUPLICATE KEY UPDATE last_statistics=\"%s\", last_general=\"%s\", check_every_limit=%d" % (
                my_now, my_now, 240)
            self.runSql(temp_sql)

    def botlog(self, bot, message, type='info', duration=0):
        if self.args.verbose >= 1:
            print(message)

        temp_sql = "INSERT  apiary_bot_log (log_date, log_type, bot, duration, message) "
        temp_sql += "VALUES (\"%s\", \"%s\", \"%s\", %f, \"%s\")" % (
            self.sqlutcnow(), type, bot, duration, message)

        self.runSql(temp_sql)
Ejemplo n.º 34
0
class wmbot:

    # Array to append sites to
    sites = []
    # This file is a list of all the database names used by wikimedia
    # we can use this to try and derive the names of various wikis
    source_list = 'http://noc.wikimedia.org/conf/all.dblist'
    # Blank reference to store mediawiki object in
    wikiapiary = {}
    # Edit token
    my_token = ""
    # Counter
    create_counter = 0
    # Regex pattern
    regex_pattern = r'^(\w+)(wiki|wikibooks|wikiquote|wiktionary|wikinews|wikisource|wikiversity|wikimedia|wikivoyage)$'

    # Site data
    siteData = {
        'wiki': {
            'domain': 'wikipedia.org',
            'name': 'Wikipedia (%s)',
            'farm': 'Wikipedia',
            'logo': 'Wikipedia-logo.png'
        },
        'wikibooks': {
            'domain': 'wikibooks.org',
            'name': 'Wikibooks (%s)',
            'farm': 'Wikibooks',
            'logo': 'Wikibooks Logo.png'
        },
        'wiktionary': {
            'domain': 'wiktionary.org',
            'name': 'Wiktionary (%s)',
            'farm': 'Wiktionary',
            'logo': '170px-Wiktportal.svg.png'
        },
        'wikiquote': {
            'domain': 'wikiquote.org',
            'name': 'Wikiquote (%s)',
            'farm': 'Wikiquote',
            'logo': 'Wikiquote Logo.png'
        },
        'wikinews': {
            'domain': 'wikinews.org',
            'name': 'Wikinews (%s)',
            'farm': 'Wikinews',
            'logo': '240px-Wikinews-logo.png'
        },
        'wikisource': {
            'domain': 'wikisource.org',
            'name': 'Wikisource (%s)',
            'farm': 'Wikisource',
            'logo': 'Wikisource Logo.png'
        },
        'wikiversity': {
            'domain': 'wikiversity.org',
            'name': 'Wikiversity (%s)',
            'farm': 'Wikiversity',
            'logo': 'Wikiversity Logo.png'
        },
        'wikivoyage': {
            'domain': 'wikivoyage.org',
            'name': 'Wikivoyage (%s)',
            'farm': 'Wikivoyage',
            'logo': 'WikivoyageOldLogoSmall.png'
        },
        'wikimedia': {
            'domain': 'wikimedia.org',
            'name': 'Wikimedia (%s)',
            'farm': 'Wikimedia',
            'logo': 'Wikimediafoundation-logo.png'
        }
    }

    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('wmbot', 'Username'), config.get('wmbot', 'Password'))

        # We need an edit token on wiki2
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']

    def getList(self):
        self.sites = requests.get(self.source_list).text.split('\n')

    def validateApi(self, api_url):
        # Call http://st.wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=general&format=json
        my_url = api_url + '?action=query&meta=siteinfo&siprop=general&format=json'
        try:
            result = requests.get(my_url).json()
            if 'generator' in result['query']['general']:
                print "Detected %s" % result['query']['general']['generator']
                return True
            else:
                return False
        except:
            print "ERROR: Failed call to API check."
            return False

    def createSite(self, lang, token):
        siteTemplate = """{{Website
|Name=%s
|URL=%s
|API URL=%s
|Image=%s
|Farm=%s
|Collect general data=Yes
|Collect extension data=Yes
|Collect skin data=Yes
|Check every=240
|Collect statistics=Yes
|Audited=No
|Curated=No
|Active=Yes
}}
"""
        my_name = self.siteData[token]['name'] % lang
        my_template = siteTemplate % (
            my_name,
            "http://%s.%s/" % (lang, self.siteData[token]['domain']),
            "http://%s.%s/w/api.php" % (lang, self.siteData[token]['domain']),
            self.siteData[token]['logo'],
            self.siteData[token]['farm'])
        print my_template

        self.wikiapiary.call({
            'action': 'edit',
            'title': my_name,
            'text': my_template,
            'token': self.my_token,
            'bot': 'true'
        })
        self.create_counter += 1

    def checkSite(self, lang, site_domain):
        # Build the API URL using Wikimedia's known convention
        api_url = "http://%s.%s/w/api.php" % (lang, site_domain)
        print "Testing %s" % api_url

        # First see if this is a valid API URL before we query WikiApiary
        isValid = self.validateApi(api_url)

        if isValid:
            # Construct Ask query for WikiApiary
            my_query = ''.join([
                "[[Has API URL::%s]]" % api_url
            ])
            # Execute the query against WikiApiary
            c = self.wikiapiary.call({
                'action': 'ask',
                'query': my_query
            })

            # Return the count of results for the query
            return True, int(c['query']['meta']['count'])
        else:
            return False, 0

    def processSite(self, token):
        match = re.findall(self.regex_pattern, token)
        if len(match[0]) == 2:
            return match[0]
        else:
            return (False, False)

    def main(self):
        # Get the list of tokens from the config file
        self.getList()

        # Now loop through the tokens
        for token in self.sites:
            print "\nProcessing %s" % token
            # First turn a token into a lang and site
            (lang, site) = self.processSite(token)
            lang = lang.replace('_', '-')
            # If we successfully got lang and site proceed
            if lang is not False and site is not False:
                # Use a guess of the API domain to see if
                (valid, siteCount) = self.checkSite(lang, self.siteData[site]['domain'])

                if valid:
                    if siteCount == 0:
                        print "%s appears to be untracked token." % token
                        # Now add it to WikiApiary
                        self.createSite(lang, site)
                    elif siteCount == 1:
                        print "%s already exists." % token
                    elif siteCount > 1:
                        print "%s found %d websites, which should never happen." % (token, siteCount)
                else:
                    print "%s did not resolve to a valid API URL." % token
            else:
                print "%s could not process token." % token
Ejemplo n.º 35
0
#!/usr/bin/python
from simplemediawiki import MediaWiki
from tabela import tabela
from people import people
import sys
text = '==== Листа на дежурства ====\n\nОва е автоматски генерерирана листа на дежурни со две ротации, доколку не сте во можност да бидете дежурни некоја недела или ден запишете во забелешка и пишете на мејлинг листа. Доколку сте дежурен во вашиот google calendar е вметнат нов календар насловен „Хаклаб: Дежурства“ со настан за деновите кога сте дежурни. Поставете ги известувањата за да бидете навреме известени.\n\n'
text+=tabela(people)
wiki = MediaWiki('https://wiki.spodeli.org/api.php')
user, password = open('credentials', 'r').read().split()
wiki.login(user,password)
token = wiki.call({'action': 'query', 'meta': 'tokens'})['query']['tokens']['csrftoken']
wiki.call({'action': 'edit', 'title': 'Хаклаб/Дежурства', 'section':'5', 'text':text, 'token':token})
Ejemplo n.º 36
0
 def __init__(self):
     self.client = MediaWiki('https://www.wikidata.org/w/api.php')
Ejemplo n.º 37
0
def get_labs(format):
    """Gets data from all labs from makeinitaly.foundation."""

    labs = []

    # Get the first page of data
    wiki = MediaWiki(makeinitaly__foundation_api_url)
    wiki_response = wiki.call({
        'action': 'query',
        'list': 'categorymembers',
        'cmtitle': 'Category:Italian_FabLabs',
        'cmlimit': '500'
    })
    if "query-continue" in wiki_response:
        nextpage = wiki_response["query-continue"]["categorymembers"][
            "cmcontinue"]

    urls = []
    for i in wiki_response["query"]["categorymembers"]:
        urls.append(i["title"].replace(" ", "_"))

    # Load all the Labs in the first page
    for i in urls:
        current_lab = get_single_lab(i)
        labs.append(current_lab)

    # Load all the Labs from the other pages
    while "query-continue" in wiki_response:
        wiki = MediaWiki(makeinitaly__foundation_api_url)
        wiki_response = wiki.call({
            'action': 'query',
            'list': 'categorymembers',
            'cmtitle': 'Category:Hackerspace',
            'cmlimit': '500',
            "cmcontinue": nextpage
        })

        urls = []
        for i in wiki_response["query"]["categorymembers"]:
            urls.append(i["title"].replace(" ", "_"))

        # Load all the Labs
        for i in urls:
            current_lab = get_single_lab(i, data_format)
            labs.append(current_lab)

        if "query-continue" in wiki_response:
            nextpage = wiki_response["query-continue"]["categorymembers"][
                "cmcontinue"]
        else:
            break

    # Transform the list into a dictionary
    labs_dict = {}
    for j, k in enumerate(labs):
        labs_dict[j] = k.__dict__

    # Return a dictiornary / json
    if format.lower() == "dict" or format.lower() == "json":
        output = labs_dict
    # Return a geojson
    elif format.lower() == "geojson" or format.lower() == "geo":
        labs_list = []
        for l in labs_dict:
            single = labs_dict[l].__dict__
            single_lab = Feature(type="Feature",
                                 geometry=Point((single["latitude"],
                                                 single["longitude"])),
                                 properties=single)
            labs_list.append(single_lab)
        output = dumps(FeatureCollection(labs_list))
    # Return a Pandas DataFrame
    elif format.lower() == "pandas" or format.lower() == "dataframe":
        output = {}
        for j in labs_dict:
            output[j] = labs_dict[j].__dict__
        # Transform the dict into a Pandas DataFrame
        output = pd.DataFrame.from_dict(output)
        output = output.transpose()
    # Return an object
    elif format.lower() == "object" or format.lower() == "obj":
        output = labs
    # Default: return an object
    else:
        output = labs
    # Return a proper json
    if format.lower() == "json":
        output = json.dumps(labs_dict)
    return output
Ejemplo n.º 38
0
import datetime
import glob
import json
import os
import re
import sys
import textwrap

from simplemediawiki import MediaWiki


with open(os.path.expanduser('~/.mediawiki'), 'r') as f:
    conf = json.loads(f.read())['ircbot']

wiki = MediaWiki(conf['url'])

day_re = re.compile('^--- Day changed (.*) (.*) ([0-9]+) (20[0-9]+)$')
human_re = re.compile('.*<([^>]+)>.*')

days = {}
days_order = []

months = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6,
          'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}


def make_wiki_login_call(packet):
    packet.update({'lgname': conf['username'],
                   'lgpassword': conf['password']})
    return wiki.call(packet)
Ejemplo n.º 39
0
parser.add_argument('--filename',
                    '-f',
                    dest='filename',
                    help='Output filename',
                    default='list_titles.txt')

args = parser.parse_args()

lang_val = args.lang
filename = args.filename

wiki_url = "https://"
wiki_url = wiki_url + lang_val
wiki_url = wiki_url + ".wikipedia.org/w/api.php"

wiki = MediaWiki(wiki_url)

output_file = open(filename, "w")

continue_param = ''
request_obj = {}
request_obj['action'] = 'query'
request_obj['list'] = 'allpages'
request_obj['aplimit'] = 'max'
request_obj['apnamespace'] = '0'

page_list = wiki.call(request_obj)
pages_in_query = page_list['query']['allpages']

for each_page in pages_in_query:
    page_ID = each_page['pageid']
Ejemplo n.º 40
0
class wikkii:

    # Array to append sites to
    sites = []
    # This file is a list of all the database names used by wikimedia
    # we can use this to try and derive the names of various wikis
    source_list = 'http://wikkii.com/wiki/Special:Farmer/list'
    # Blank reference to store mediawiki object in
    wikiapiary = {}
    # Edit token
    my_token = ""
    # Counter
    create_counter = 0

    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('wikkiibot', 'Username'), config.get('wikkiibot', 'Password'))

        # We need an edit token on wiki2
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']

    def getList(self):
        soup = BeautifulSoup(requests.get(self.source_list).text)
        i = 1
        for item in soup.findAll("a", {"class": "extiw"}):
            site = (item.contents[0], item["href"], item["title"])
            print i, site
            self.sites.append(site)
            i += 1

    def validateStats(self, url):
        my_url = "%s/wiki/Special:Statistics?action=raw" % url
        try:
            result = requests.get(my_url, timeout=10).text
            values = result.split(';')
            if len(values) == 9:
                print "Got %d values from stats" % len(values)
                return True
            else:
                return False
        except:
            print "ERROR: Failed call to Statistics URL."
            return False

    def createSite(self, name, url):
        siteTemplate = """{{Website
|Name=%s
|URL=%s
|Image=Default website image.png
|Farm=Wikkii
|Collection method=API, Special:Statistics
|API URL=%s
|Collect general data=No
|Collect extension data=No
|Collect skin data=No
|Collect statistics=No
|Collect semantic statistics=No
|Collect semantic usage=No
|Collect logs=No
|Collect recent changes=No
|Statistics URL=%s
|Collect statistics stats=Yes
|Check every=240
|Audited=No
|Validated=No
|Curated=No
|Active=Yes
|Demote=No
|Defunct=No
|Error=No
|Featured website vote=0
}}
"""
        api_url = "%sw/api.php" % url
        statistics_url = "%swiki/Special:Statistics" % url

        # Make sure a page doesn't exist with this name already
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': name
        })
        try:
            if c['query']['pages']['-1']:
                print "No duplicate name detected."
        except:
            # Duplicate detected
            name = "%s (Wikkii)" % name

        my_template = siteTemplate % (name, url, api_url, statistics_url)
        print my_template

        c = self.wikiapiary.call({
            'action': 'edit',
            'title': name,
            'text': my_template,
            'token': self.my_token,
            'bot': 'true',
            'summary': 'Creating entry for %s' % name
        })
        print c
        self.create_counter += 1

    def checkSite(self, site):
        print "Checking %s" % site[1]

        # Construct Ask query for WikiApiary
        my_query = ''.join([
            "[[Has statistics URL::%swiki/Special:Statistics]]" % site[1]
        ])
        # Execute the query against WikiApiary
        c = self.wikiapiary.call({
            'action': 'ask',
            'query': my_query
        })

        # Return the count of results for the query
        return int(c['query']['meta']['count'])

    def main(self):
        # Get the list of tokens from the config file
        self.getList()

        for site in self.sites:
            # Limit the number of sites we make per run
            if self.create_counter > 1000:
                break

            print "\nProcessing %s" % site[0]

            # Use a guess of the API domain to see if we have it already
            siteCount = self.checkSite(site)

            if siteCount == 0:
                print "%s is not in WikiApiary, validating stats." % site[0]
                if self.validateStats(site[1]):
                    # Now add it to WikiApiary
                    self.createSite(site[0], site[1])
                    time.sleep(3)
                else:
                    print "%s did not resolve to a valid API URL." % site[0]
            elif siteCount == 1:
                print "%s already exists, skipping." % site[0]
            elif siteCount > 1:
                print "ERROR: %s found %d websites, which should never happen." % (site[0], siteCount)
Ejemplo n.º 41
0
import re
import sqlalchemy
import solr
from simplemediawiki import MediaWiki
from editing import MusicBrainzClient
import pprint
import urllib
import time
from utils import mangle_name, join_names, quote_page_title
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

wp = MediaWiki('https://en.wikipedia.org/w/api.php')
wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia')

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
"""
CREATE TABLE bot_wp_label (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now()
);

ALTER TABLE ONLY bot_wp_label
    ADD CONSTRAINT bot_wp_label_pkey PRIMARY KEY (gid);

"""

query = """
Ejemplo n.º 42
0
class Wiki(object):
    def __init__(self, url, username, password):
        self.wiki = MediaWiki(url)
        self.username = username
        self.password = password

        self.login = self._make_wiki_login_call({'action': 'login'})
        self.token = self._make_wiki_login_call({
            'action':
            'login',
            'lgtoken':
            self.login['login']['token']
        })

    def _make_wiki_login_call(self, packet):
        packet.update({'lgname': self.username, 'lgpassword': self.password})
        response = self.wiki.call(packet)
        if DEBUG:
            print response
        return response

    def all_pages(self):
        response = self.wiki.call({'action': 'query', 'list': 'allpages'})
        if DEBUG:
            print response
        assert_present('query', response)

        marker = 'foo'
        marker_name = 'foo'
        while marker:
            if 'query-continue' in response:
                for possible in ['apfrom', 'apcontinue']:
                    if possible in response['query-continue']['allpages']:
                        marker = \
                            response['query-continue']['allpages'][possible]
                        marker_name = possible
                        break
            else:
                marker = None

            for page in response['query']['allpages']:
                yield page['title']
            response = self.wiki.call({
                'action': 'query',
                'list': 'allpages',
                marker_name: marker
            })
            if DEBUG:
                print response

    def get_page(self, title):
        response = self.wiki.call({
            'action': 'query',
            'titles': title,
            'prop': 'revisions',
            'rvprop': 'content'
        })
        if DEBUG:
            print response
        assert_present('query', response)

        pages = response['query']['pages']
        page_id = pages.keys()[0]

        if not 'revisions' in pages[page_id]:
            # This is a new page
            return ''

        return pages[page_id]['revisions'][0]['*']

    def check_for_page(self, title):
        response = self.wiki.call({
            'action': 'query',
            'titles': title,
            'prop': 'revisions',
            'rvprop': 'content'
        })
        if DEBUG:
            print response
        assert_present('query', response)

        pages = response['query']['pages']
        page_id = pages.keys()[0]

        if not 'revisions' in pages[page_id]:
            return False
        return True

    def post_page(self, title, text, minor=True, bot=True):
        response = self.wiki.call({
            'action': 'query',
            'prop': 'info',
            'titles': title,
            'intoken': 'edit'
        })
        if DEBUG:
            print response
        assert_present('query', response)

        pages = response['query']['pages']
        page_id = pages.keys()[0]

        response = self.wiki.call({
            'action':
            'edit',
            'minor':
            minor,
            'bot':
            bot,
            'title':
            title,
            'text':
            json.dumps(text).replace('\\n', '\n')[1:-1],
            'token':
            pages[page_id]['edittoken']
        })
        if DEBUG:
            print response
        if not 'nochange' in response['edit']:
            print 'Modified %s' % title

    def create_account(self, username, password, email, realname):
        response = self.wiki.call({
            'action': 'createaccount',
            'name': username,
            'password': password,
            'email': email,
            'realname': realname
        })
        if DEBUG:
            print response

        response = self.wiki.call({
            'action': 'createaccount',
            'name': username,
            'password': password,
            'email': email,
            'realname': realname,
            'token': response['createaccount']['token']
        })

        if DEBUG:
            print response

        return 'error' not in response
from simplemediawiki import MediaWiki
from couchdb.client import Server

server = Server()
try:
    db = server.create('feedme')
except:
    db = server['feedme']

wiki = MediaWiki('http://en.wikibooks.org/w/api.php')
recipes = wiki.call({'action': 'query', 'list': 'categorymembers', 'cmtitle': 'Category:Recipes', 'cmlimit': 'max'})
for recipe in recipes['query']['categorymembers']:
    recipe_doc = recipe
    doc_id, doc_rev = db.save(recipe_doc)
    print "Added recipe %s (%s)" % (recipe_doc['title'], doc_id)
    #recipe_content = wiki.call({'action': 'parse', 'text': '{{%s}}' % recipe['title']})
    #print recipe_content
Ejemplo n.º 44
0
import re
import sqlalchemy
import solr
from simplemediawiki import MediaWiki
from editing import MusicBrainzClient
import pprint
import urllib
import time
from utils import mangle_name, join_names, quote_page_title
import config as cfg

engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)

wp = MediaWiki("https://en.wikipedia.org/w/api.php")
wps = solr.SolrConnection("http://localhost:8983/solr/wikipedia")

mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)

"""
CREATE TABLE bot_wp_label (
    gid uuid NOT NULL,
    processed timestamp with time zone DEFAULT now()
);

ALTER TABLE ONLY bot_wp_label
    ADD CONSTRAINT bot_wp_label_pkey PRIMARY KEY (gid);

"""
Ejemplo n.º 45
0
def get_single_lab(lab_slug, data_format):
    """Gets data from a single lab from makeinitaly.foundation."""
    wiki = MediaWiki(makeinitaly__foundation_api_url)
    wiki_response = wiki.call(
        {'action': 'query',
         'titles': lab_slug,
         'prop': 'revisions',
         'rvprop': 'content'})

    # If we don't know the pageid...
    for i in wiki_response["query"]["pages"]:
        content = wiki_response["query"]["pages"][i]["revisions"][0]["*"]

    # Clean the resulting string/list
    newstr01 = content.replace("}}", "")
    newstr02 = newstr01.replace("{{", "")
    result = newstr02.rstrip("\n|").split("\n|")
    # result.remove(u'FabLab')

    # Transform the data into a Lab object
    current_lab = Lab()

    # Add existing data
    for i in result:
        if "coordinates=" in i:
            value = i.replace("coordinates=", "")
            current_lab.coordinates = value
            latlong = []
            if ", " in value:
                latlong = value.rstrip(", ").split(", ")
            elif " , " in value:
                latlong = value.rstrip(" , ").split(" , ")
            else:
                latlong = ["", ""]
            current_lab.lat = latlong[0]
            current_lab.long = latlong[1]
        elif "province=" in i:
            value = i.replace("province=", "")
            current_lab.province = value.upper()
        elif "region=" in i:
            value = i.replace("region=", "")
            current_lab.region = value
        elif "address=" in i:
            value = i.replace("address=", "")
            current_lab.address = value
        elif "city=" in i:
            value = i.replace("city=", "")
            current_lab.city = value
        elif "fablabsio=" in i:
            value = i.replace("fablabsio=", "")
            current_lab.fablabsio = value
        elif "website=" in i:
            value = i.replace("website=", "")
            current_lab.website = value
        elif "facebook=" in i:
            value = i.replace("facebook=", "")
            current_lab.facebook = value
        elif "twitter=" in i:
            value = i.replace("twitter=", "")
            current_lab.twitter = value
        elif "email=" in i:
            value = i.replace("email=", "")
            current_lab.email = value
        elif "manager=" in i:
            value = i.replace("manager=", "")
            current_lab.manager = value
        elif "birthyear=" in i:
            value = i.replace("birthyear=", "")
            current_lab.birthyear = value

    current_lab.text_en = get_lab_text(lab_slug=lab_slug, language="en")
    current_lab.text_it = get_lab_text(lab_slug=lab_slug, language="it")

    if data_format == "dict":
        return current_lab.__dict__
    elif data_format == "object":
        return current_lab
Ejemplo n.º 46
0
class smw_community:
    sites = []
    # Blank reference to store mediawiki object in
    wikiapiary = {}
    smwreferata = {}
    # Edit token
    my_token = ""
    # Counter
    create_counter = 0

    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        # Connect to SMW Community Wiki
        self.smwreferata = MediaWiki('http://smw.referata.com/w/api.php')

        # Connect to WikiApiary
        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('wikkiibot', 'Username'),
                              config.get('wikkiibot', 'Password'))

        # We need an edit token
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']

    def load_from_smwreferata(self):
        # Build query for sites
        my_query = ''.join([
            '[[Category:Sites]]', '[[Has status::Active]]', '|?Has URL',
            '|?Has data type'
            '|limit=1000'
        ])
        print "Query: %s" % my_query
        sites = self.smwreferata.call({'action': 'ask', 'query': my_query})

        # We could just return the raw JSON object from the API, however instead we are going to clean it up into an
        # easier to deal with array of dictionary objects.
        # To keep things sensible, we'll use the same name as the properties
        if len(sites['query']['results']) > 0:
            for pagename, site in sites['query']['results'].items():
                print "Adding %s." % pagename

                self.sites.append({
                    'Name':
                    pagename,
                    'URL':
                    site['printouts']['Has URL'][0],
                    'Tag':
                    ','.join(site['printouts']['Has data type'])
                })

    def add_api_to_sites(self):
        # Loop through the sites and find API urls
        for i in range(0, len(self.sites)):
            print "Investigating %s (%s)..." % (self.sites[i]['Name'],
                                                self.sites[i]['URL'])
            try:
                req = requests.get(self.sites[i]['URL'])
                if req.status_code == 200:
                    soup = BeautifulSoup(req.text)
                    api_url = soup.findAll('link',
                                           {'rel': 'EditURI'})[0]['href']
                    print "  Found %s" % api_url
                    new_api_url = urlparse.urlunparse(
                        urlparse.urlparse(api_url)[0:3] + ('', '', ''))
                    print "  Resolved %s" % new_api_url
                    self.sites[i]['API URL'] = new_api_url
                else:
                    print "  Returned %s" % req.status_code
            except Exception, e:
                print "Exception: %s" % e
Ejemplo n.º 47
0
|Active=No
|Demote=No
|Defunct=No
}}
[[Category:WikiTeam Import]]"""

logo_page_text = """This image was automatically uploaded by [[User:Audit Bee|Audit Bee]] while importing.
[[Category:Import logos]] """

# timeout in seconds
timeout = 10
socket.setdefaulttimeout(timeout)

wiki = MediaWiki(
    'https://wikiapiary.com/w/api.php',
    cookie_file='cookie-jar',
    user_agent=
    'python-simplemediawiki/1.1.1 (WikiApiary; Bumble Bee; +http://wikiapiary.com/wiki/User:Bumble_Bee)'
)
wiki.login('Audit Bee', 'frYqj2AmPTqZDjn4TANE')

# We need an edit token
c = wiki.call({
    'action': 'query',
    'titles': 'Foo',
    'prop': 'info',
    'intoken': 'edit'
})
my_token = c['query']['pages']['-1']['edittoken']

i = 0
success = 0
class TropicalWikis:

    # Array to append sites to
    sites = []
    # This file is a list of all the database names used by wikimedia
    # we can use this to try and derive the names of various wikis
    source_list = 'http://www.tropicalwikis.com/wiki/Special:Farmer/list'
    # Blank reference to store mediawiki object in
    wikiapiary = {}
    # Edit token
    my_token = ""
    # Counter
    create_counter = 0

    def __init__(self):
        config = ConfigParser.ConfigParser()
        config.read('../apiary.cfg')

        self.wikiapiary = MediaWiki(config.get('WikiApiary', 'api'))
        self.wikiapiary.login(config.get('TropicalBot', 'Username'),
                              config.get('TropicalBot', 'Password'))

        # We need an edit token on wiki2
        c = self.wikiapiary.call({
            'action': 'query',
            'titles': 'Foo',
            'prop': 'info',
            'intoken': 'edit'
        })
        self.my_token = c['query']['pages']['-1']['edittoken']

    def getList(self):
        soup = BeautifulSoup(requests.get(self.source_list).text)
        i = 1
        for item in soup.findAll("a", {"class": "external text"}):
            site = (item.contents[0], item["href"])
            print i, site
            self.sites.append(site)
            i += 1

    def createSite(self, name, url):
        siteTemplate = """{{Website
|Name=%s
|URL=%s
|Image=Default website image.png
|Farm=TropicalWikis
|Collection method=API
|API URL=%s
|Collect general data=Yes
|Collect extension data=Yes
|Collect skin data=Yes
|Collect statistics=Yes
|Collect semantic statistics=No
|Collect semantic usage=No
|Collect logs=No
|Collect recent changes=No
|Statistics URL=
|Collect statistics stats=Yes
|Check every=240
|Audited=No
|Validated=No
|Curated=No
|Active=No
|Demote=No
|Defunct=No
|Error=No
|Featured website vote=0
}}
"""
        api_url = "%s/w/api.php" % url

        # Make sure a page doesn't exist with this name already
        c = self.wikiapiary.call({'action': 'query', 'titles': name})
        try:
            if c['query']['pages']['-1']:
                print "No duplicate name detected."
        except:
            # Duplicate detected
            name = "%s (TropicalWikis)" % name

        my_template = siteTemplate % (name, url, api_url)
        print my_template

        c = self.wikiapiary.call({
            'action': 'edit',
            'title': name,
            'text': my_template,
            'token': self.my_token,
            'bot': 'true',
            'summary': 'Creating entry for %s' % name
        })
        print c
        self.create_counter += 1

    def checkSite(self, site):
        print "Checking %s" % site[1]

        # Construct Ask query for WikiApiary
        my_query = ''.join(["[[Has API URL::%s/w/api.php]]" % site[1]])
        # Execute the query against WikiApiary
        c = self.wikiapiary.call({'action': 'ask', 'query': my_query})

        # Return the count of results for the query
        return int(c['query']['meta']['count'])

    def main(self):
        # Get the list of tokens from the config file
        self.getList()

        for site in self.sites:
            print "\nProcessing %s" % site[0]

            # Use a guess of the API domain to see if we have it already
            siteCount = self.checkSite(site)

            if siteCount == 0:
                print "%s is not in WikiApiary, validating stats." % site[0]
                # Now add it to WikiApiary
                self.createSite(site[0], site[1])
                time.sleep(3)
            elif siteCount == 1:
                print "%s already exists, skipping." % site[0]
            elif siteCount > 1:
                print "ERROR: %s found %d websites, which should never happen." % (
                    site[0], siteCount)
Ejemplo n.º 49
0
class Wikidata(object):
    """Talks to the Wikidata API to retrieve information about entities and claims."""
    PROPERTY_COUNTRY = 'P17'
    PROPERTY_FLAG_IMAGE = 'P41'
    PROPERTY_APPLIES_TO_TERRITORIAL_JURISDICTION = 'P1001'

    def __init__(self):
        self.client = MediaWiki('https://www.wikidata.org/w/api.php')

    def get_entities_from_title(self, title, sites='enwiki'):
        """Return the entities matching the supplied title in a list.

        Arguments:
        title -- Name of the entity
        sites -- Wikidata site which should be searched for the title (default enwiki)

        Returns an empty list when no matching entity was found.
        """
        params = {'action': 'wbgetentities', 'sites': sites, 'titles': title, 'props': ''}

        call = self.client.call(params)
        entities = call['entities'].keys()
        
        result = list()
        
        if entities[0] != -1:
            for entity in entities:
                result.append(entity)
                
        return result
    
    def get_claims_from_entity(self, entity, property=None):
        """Return the claims of the supplied entity.
        
        Arguments:
        entity -- Entity identifier
        property -- Filter to return only claims which has this property (default None)
        
        Returns a dictionary containing each claim. The value holds a list with the property values.
        Returns None when the entity was not found.
        """
        params = {'action': 'wbgetclaims', 'entity': entity}
        if property is not None:
            params['property'] = property

        call = self.client.call(params)
        
        # If entity was not found or on an empty claims dictionary return
        if u'error' in call or not call['claims']:
            return None
            
        claims = call['claims']
        result = dict()
        
        for property in claims:
            result[property] = list()
            values = claims[property]

            # multiple values are possible (see P31 on Q42)
            for value in values:
                result[property].append(value['mainsnak']['datavalue'])
        
        return result
Ejemplo n.º 50
0
def get_single_lab(lab_slug, open_cage_api_key):
    """Gets data from a single lab from hackerspaces.org."""
    wiki = MediaWiki(hackerspaces_org_api_url)
    wiki_response = wiki.call({
        'action': 'query',
        'titles': lab_slug,
        'prop': 'revisions',
        'rvprop': 'content'
    })

    # If we don't know the pageid...
    for i in wiki_response["query"]["pages"]:
        content = wiki_response["query"]["pages"][i]["revisions"][0]["*"]

    # Transform the data into a Lab object
    current_lab = Hackerspace()

    equipment_list = []

    # Parse the Mediawiki code
    wikicode = mwparserfromhell.parse(content)
    for k in wikicode.filter_templates():
        element_name = unicode(k.name)
        if "Hackerspace" in element_name:
            for j in k.params:
                current_lab.name = lab_slug
                j_value = unicode(j.value)
                j_name = unicode(j.name)
                # Remove new line in content
                if j_value[-1:] == "\n" or j_value[:1] == "\n":
                    j_value = j_value.replace('\n', '')
                if j_name == "logo":
                    current_lab.logo = j_value
                if j_name == "founding":
                    current_lab.founding = j_value
                if j_name == "coordinate":
                    # Clean the coordinates
                    j_value = j_value.replace('"', '')
                    j_value = j_value.replace('N', '')
                    j_value = j_value.replace('S', '')
                    j_value = j_value.replace('W', '')
                    j_value = j_value.replace('E', '')
                    j_value = j_value.replace(u'°', '')
                    j_value = j_value.replace(' ', '')
                    # Get the full address with the coordinates
                    address = get_location(query=j_value,
                                           format="reverse",
                                           api_key=open_cage_api_key)
                    current_lab.city = address["city"]
                    current_lab.county = address["county"]
                    current_lab.state = address["state"]
                    current_lab.postal_code = address["postal_code"]
                    current_lab.address_1 = address["address_1"]
                    current_lab.country = address["country"]
                    current_lab.country_code = address["country_code"]
                    current_lab.continent = address["continent"]
                    current_lab.latitude = address["latitude"]
                    current_lab.longitude = address["longitude"]
                if j_name == "membercount":
                    current_lab.membercount = j_value
                if j_name == "fee":
                    current_lab.fee = j_value
                if j_name == "size":
                    current_lab.size = j_value
                if j_name == "status":
                    current_lab.status = j_value
                if j_name == "site":
                    current_lab.site = j_value
                if j_name == "wiki":
                    current_lab.wiki = j_value
                if j_name == "irc":
                    current_lab.irc = j_value
                if j_name == "jabber":
                    current_lab.jabber = j_value
                if j_name == "phone":
                    current_lab.phone = j_value
                if j_name == "youtube":
                    current_lab.youtube = j_value
                if j_name == "eventbrite":
                    current_lab.eventbrite = j_value
                if j_name == "facebook":
                    current_lab.facebook = j_value
                if j_name == "ustream":
                    current_lab.ustream = j_value
                if j_name == "flickr":
                    current_lab.flickr = j_value
                if j_name == "twitter":
                    current_lab.twitter = j_value
                if j_name == "googleplus":
                    current_lab.googleplus = j_value
                if j_name == "email":
                    current_lab.email = j_value
                if j_name == "maillist":
                    current_lab.maillist = j_value
                if j_name == "ical":
                    current_lab.ical = j_value
                if j_name == "forum":
                    current_lab.forum = j_value
        elif "Equipment" in element_name:
            for j in k.params:
                equipment_list.append(j.replace("equipment=", ""))

            current_lab.equipment = equipment_list

    # Load the free text
    freetext = ""
    for k in wikicode._nodes:
        try:
            test_value = k.name
        except AttributeError:
            freetext += unicode(k)
    current_lab.text = freetext

    return current_lab
Ejemplo n.º 51
0
class WikimediaCommons(object):
    """Talks to the Wikimedia Commons API to retrieve information about an
    image hosted on Wikimedia Commons.
    """
    
    def __init__(self):
        self.client = MediaWiki('https://commons.wikimedia.org/w/api.php')

    def __call_api(self, title, namespace='Image', thumbwidth=None):
        """Call the Commons API.
        
        Arguments:
        title -- Title of the page.
        namespace -- Namespace this title lies in (default 'Image')
        thumbwidth -- Width in pixels required for the thumbnail image URL (default None)
        
        Returns the API response or None when the title was not found.
        """
        title = '{0}:{1}'.format(namespace, title)
        params = {'action': 'query', 'titles': title, 'prop': 'imageinfo', 'iiprop': 'url'}
        if thumbwidth is not None:
            params['iiurlwidth'] = thumbwidth

        result = self.client.call(params)
        pages = result['query']['pages']
        id = pages.keys()[0]
        
        if id == -1:
            return None
        else:
            return pages[id]

    def get_image_url(self, name):
        """Retrieve the URL to the raw image.
        
        Arguments:
        name -- Image name
        
        Returns the image URL or None when the image was not found.
        """
        result = self.__call_api(name)
        if result is None:
            return None

        image = result['imageinfo'][0]

        return image['url']

    def get_thumb_image_url(self, name, width):
        """Retrieve the URL to the thumbnail image.
        
        Arguments:
        name -- Image name
        width -- Requested width in pixel
        
        Returns the thumbnail image URL or None when the image was not found.
        """
        result = self.__call_api(name, thumbwidth=width)
        if result is None:
            return None

        image = result['imageinfo'][0]

        return image['thumburl']