def GetData(params, site = None, useAPI = True, retryCount = 5, encodeTitle = True, sysop = False, back_response = False): """Get data from the query api, and convert it into a data object """ if not site: site = wikipedia.getSite() data = {} titlecount = 0 for k,v in params.iteritems(): if k == u'file': data[k] = v elif type(v) == list: if k in [u'titles', u'pageids', u'revids', u'ususers'] and len(v) > 10: # Titles param might be long, case convert it to post request titlecount = len(params[k]) data[k] = unicode(ListToParam(v)) else: params[k] = unicode(ListToParam(v)) elif not isinstance(v,basestring): params[k] = unicode(v) elif type(v) == unicode: params[k] = ToUtf8(v) if 'format' not in params or params['format'] != u'json': params['format'] = u'json' if not useAPI: params['noprofile'] = '' if data: for k in data: del params[k] if wikipedia.verbose: #dump params info. wikipedia.output(u"==== API action:%s ====" % params[u'action']) if data and 'file' not in data: wikipedia.output(u"%s: (%d items)" % (data.keys()[0], titlecount ) ) for k, v in params.iteritems(): if k not in ['action', 'format', 'file', 'xml', 'text']: if k == 'lgpassword' and wikipedia.verbose == 1: v = u'XXXXX' elif not isinstance(v, unicode): v = v.decode('utf-8') wikipedia.output(u"%s: %s" % (k, v) ) wikipedia.output(u'-' * 16 ) postAC = [ 'edit', 'login', 'purge', 'rollback', 'delete', 'undelete', 'protect', 'parse', 'block', 'unblock', 'move', 'emailuser','import', 'userrights', 'upload', 'patrol' ] if useAPI: if params['action'] in postAC: path = site.api_address() cont = '' else: path = site.api_address() + site.urlEncode(params.items()) else: path = site.query_address() + site.urlEncode(params.items()) if wikipedia.verbose: if titlecount > 1: wikipedia.output(u"Requesting %d %s from %s" % (titlecount, data.keys()[0], site)) else: wikipedia.output(u"Requesting API query from %s" % site) lastError = None retry_idle_time = 1 while retryCount >= 0: try: jsontext = "Nothing received" if params['action'] == 'upload' and ('file' in data): import upload res, jsontext = upload.post_multipart(site, path, params.items(), (('file', params['filename'].encode(site.encoding()), data['file']),), site.cookies(sysop=sysop) ) elif params['action'] in postAC: res, jsontext = site.postForm(path, params, sysop, site.cookies(sysop = sysop) ) else: if back_response: res, jsontext = site.getUrl( path, retry=True, data=data, sysop=sysop, back_response=True) else: jsontext = site.getUrl( path, retry=True, sysop=sysop, data=data) # This will also work, but all unicode strings will need to be converted from \u notation # decodedObj = eval( jsontext ) jsontext = json.loads( jsontext ) if "error" in jsontext: errorDetails = jsontext["error"] if errorDetails["code"] == 'badtoken': wikipedia.output('Received a bad login token error from the server. Attempting to refresh.') params['token'] = site.getToken(sysop = sysop, getagain = True) continue if back_response: return res, jsontext else: return jsontext except ValueError, error: if "<title>Wiki does not exist</title>" in jsontext: raise wikipedia.NoSuchSite(u'Wiki %s does not exist yet' % site) if 'Wikimedia Error' in jsontext: #wikimedia server error raise wikipedia.ServerError retryCount -= 1 wikipedia.output(u"Error downloading data: %s" % error) wikipedia.output(u"Request %s:%s" % (site.lang, path)) lastError = error if retryCount >= 0: wikipedia.output(u"Retrying in %i minutes..." % retry_idle_time) time.sleep(retry_idle_time*60) # Next time wait longer, but not longer than half an hour retry_idle_time *= 2 if retry_idle_time > 30: retry_idle_time = 30 else: wikipedia.debugDump('ApiGetDataParse', site, str(error) + '\n%s\n%s' % (site.hostname(), path), jsontext)
def GetData(params, site=None, useAPI=True, retryCount=config.maxretries, encodeTitle=True, sysop=False, back_response=False): """Get data from the query api, and convert it into a data object """ if ('action' in params) and pywikibot.simulate and \ (params['action'] in pywikibot.config.actions_to_block): pywikibot.output( u'\03{lightyellow}SIMULATION: %s action blocked.\03{default}' % params['action']) jsontext_dummy = {params['action']: {u'result': u''}} if back_response: import StringIO res_dummy = StringIO.StringIO() res_dummy.__dict__.update({u'code': 0, u'msg': u''}) return res_dummy, jsontext_dummy else: return jsontext_dummy if not site: site = pywikibot.getSite() data = {} titlecount = 0 for k, v in params.iteritems(): if k == u'file': data[k] = v elif type(v) == list: if k in [u'titles', u'pageids', u'revids', u'ususers'] and len(v) > 10: # Titles param might be long, case convert it to post request titlecount = len(params[k]) data[k] = unicode(ListToParam(v)) else: params[k] = unicode(ListToParam(v)) elif not isinstance(v, basestring): params[k] = unicode(v) elif type(v) == unicode: params[k] = ToUtf8(v) if 'format' not in params or params['format'] != u'json': params['format'] = u'json' if 'action' in params and params['action'] == 'query' and not ( 'continue' in params or 'rawcontinue' in params): params['rawcontinue'] = '' if not useAPI: params['noprofile'] = '' if data: for k in data: del params[k] if pywikibot.verbose: # dump params info. pywikibot.output(u"==== API action:%s ====" % params[u'action']) if data and 'file' not in data: pywikibot.output(u"%s: (%d items)" % (data.keys()[0], titlecount)) for k, v in params.iteritems(): if k not in ['action', 'format', 'file', 'xml', 'text']: if k == 'lgpassword' and pywikibot.verbose == 1: v = u'XXXXX' elif not isinstance(v, unicode): v = v.decode('utf-8') pywikibot.output(u"%s: %s" % (k, v)) pywikibot.output(u'-' * 16) postAC = [ 'edit', 'login', 'purge', 'rollback', 'delete', 'undelete', 'protect', 'parse', 'block', 'unblock', 'move', 'emailuser', 'import', 'userrights', 'upload', 'patrol', 'wbcreateclaim', 'wbeditentity', 'wbremoveclaims' ] if site.versionnumber() >= 18: postAC.append('watch') if useAPI: if params['action'] in postAC or params['action'][:5] == 'wbset': path = site.api_address() else: path = site.api_address() + site.urlEncode(params.items()) else: path = site.query_address() + site.urlEncode(params.items()) if pywikibot.verbose: if titlecount > 1: pywikibot.output(u"Requesting %d %s from %s" % (titlecount, data.keys()[0], site)) else: pywikibot.output(u"Requesting API query from %s" % site) lastError = None retry_idle_time = 1 while retryCount >= 0: try: jsontext = "Nothing received" if params['action'] == 'upload' and ('file' in data): import upload res, jsontext = upload.post_multipart( site, path, params.items(), (('file', params['filename'].encode(site.encoding()), data['file']), ), site.cookies(sysop=sysop)) elif params['action'] in postAC or params['action'][:5] == 'wbset': res, jsontext = site.postForm(path, params, sysop, site.cookies(sysop=sysop)) else: if back_response: res, jsontext = site.getUrl(path, retry=True, data=data, sysop=sysop, back_response=True) else: jsontext = site.getUrl(path, retry=True, sysop=sysop, data=data) # This will also work, but all unicode strings will need to be # converted from \u notation ## decodedObj = eval(jsontext) jsontext = json.loads(jsontext) if "error" in jsontext: errorDetails = jsontext["error"] if errorDetails["code"] == 'badtoken': pywikibot.output('Received a bad login token error from ' 'the server. Attempting to refresh.') params['token'] = site.getToken(sysop=sysop, getagain=True) continue if back_response: return res, jsontext else: return jsontext except ValueError, error: if "<title>Wiki does not exist</title>" in jsontext: raise pywikibot.NoSuchSite(u'Wiki %s does not exist yet' % site) if 'Wikimedia Error' in jsontext: # wikimedia server error raise pywikibot.ServerError retryCount -= 1 pywikibot.output(u"Error downloading data: %s" % error) pywikibot.output(u"Request %s:%s" % (site.lang, path)) lastError = error if retryCount >= 0: pywikibot.output(u"Retrying in %i minutes..." % retry_idle_time) time.sleep(retry_idle_time * 60) # Next time wait longer, but not longer than half an hour retry_idle_time *= 2 if retry_idle_time > 30: retry_idle_time = 30 else: pywikibot.debugDump('ApiGetDataParse', site, str(error) + '\n%s\n%s' % (site.hostname(), path), jsontext)