def getInternetArchiveURL(url, timestamp=None): """Return archived URL by Internet Archive. Parameters: url - url to search an archived version for timestamp - requested archive date. The version closest to that moment is returned. Format: YYYYMMDDhhmmss or part thereof. See [[:mw:Archived Pages]] and https://archive.org/help/wayback_api.php for more details. """ import json uri = u'https://archive.org/wayback/available?' query = {'url': url} if timestamp is not None: query['timestamp'] = timestamp uri = uri + urllib.urlencode(query) jsontext = http.request(uri=uri, site=None) if "closest" in jsontext: data = json.loads(jsontext) return data['archived_snapshots']['closest']['url'] else: return None
def getWebCitationURL(url, timestamp=None): """Return archived URL by Web Citation. Parameters: url - url to search an archived version for timestamp - requested archive date. The version closest to that moment is returned. Format: YYYYMMDDhhmmss or part thereof. See http://www.webcitation.org/doc/WebCiteBestPracticesGuide.pdf for more details """ import xml.etree.ElementTree as ET uri = u'http://www.webcitation.org/query?' query = {'returnxml': 'true', 'url': url} if timestamp is not None: query['date'] = timestamp uri = uri + urlencode(query) xmltext = http.request(uri=uri, site=None) if "success" in xmltext: data = ET.fromstring(xmltext) return data.find('.//webcite_url').text else: return None
def getWebCitationURL(url, timestamp=None): """Return archived URL by Web Citation. Parameters: url - url to search an archived version for timestamp - requested archive date. The version closest to that moment is returned. Format: YYYYMMDDhhmmss or part thereof. See http://www.webcitation.org/doc/WebCiteBestPracticesGuide.pdf for more details """ import xml.etree.ElementTree as ET uri = u'http://www.webcitation.org/query?' query = {'returnxml': 'true', 'url': url} if not timestamp is None: query['date'] = timestamp uri = uri + urllib.urlencode(query) xmltext = http.request(uri=uri, site=None) if "success" in xmltext: data = ET.fromstring(xmltext) return data.find('.//webcite_url').text else: return None
def test_https(self): """Test http.request using https://www.wikiquote.org/.""" r = http.request(site=None, uri='https://www.wikiquote.org/') self.assertIsInstance(r, unicode) self.assertIn('<html lang="mul"', r) self.assertOneDeprecationParts( 'Invoking http.request without argument site', 'http.fetch()')
def github_svn_rev2hash(tag, rev): """Convert a Subversion revision to a Git hash using Github. @param tag: name of the Subversion repo on Github @param rev: Subversion revision identifier @return: the git hash @rtype: str """ from io import StringIO import xml.dom.minidom from pywikibot.comms import http uri = 'https://github.com/wikimedia/%s/!svn/vcc/default' % tag data = http.request(site=None, uri=uri, method='PROPFIND', body="<?xml version='1.0' encoding='utf-8'?>" "<propfind xmlns=\"DAV:\"><allprop/></propfind>", headers={ 'label': str(rev), 'user-agent': 'SVN/1.7.5 {pwb}' }) dom = xml.dom.minidom.parse(StringIO(data)) hsh = dom.getElementsByTagName("C:git-commit")[0].firstChild.nodeValue return hsh
def getInternetArchiveURL(url, timestamp=None): """Return archived URL by Internet Archive. Parameters: url - url to search an archived version for timestamp - requested archive date. The version closest to that moment is returned. Format: YYYYMMDDhhmmss or part thereof. See [[:mw:Archived Pages]] and https://archive.org/help/wayback_api.php for more details. """ import json uri = u'https://archive.org/wayback/available?' query = {'url': url} if timestamp is not None: query['timestamp'] = timestamp uri = uri + urlencode(query) jsontext = http.request(uri=uri, site=None) if "closest" in jsontext: data = json.loads(jsontext) return data['archived_snapshots']['closest']['url'] else: return None
def test_https_ignore_cert_error(self): """Test http.request ignoring invalid vikidia SSL certificate.""" # As the connection is cached, the above test will cause # subsequent requests to go to the existing, broken, connection. # So, this uses a different host, which hopefully hasnt been # connected previously by other tests. r = http.request(site=None, uri='https://en.vikidia.org/wiki/Main_Page', disable_ssl_certificate_validation=True) self.assertIsInstance(r, unicode) self.assertIn('<title>Vikidia</title>', r)
def postForm(site, address, predata, method="POST"): # replaces: # data = pywikibot.getSite().postForm(address, predata=predata) address = site.family.apipath(site.lang) from pywikibot.comms import http from urllib import urlencode urldata = urlencode(predata) data = http.request(site, uri=address, method=method, body=urldata) return data
def getversion_onlinerepo(repo=None): """Retrieve current framework revision number from online repository. @param repo: (optional) Online repository location @type repo: URL or string """ from pywikibot.comms import http url = repo or 'https://git.wikimedia.org/feed/pywikibot/core' hsh = None buf = http.request(site=None, uri=url) buf = buf.split('\r\n') try: hsh = buf[13].split('/')[5][:-1] except Exception as e: raise ParseError(repr(e) + ' while parsing ' + repr(buf)) return hsh
def getInternetArchiveURL(site, url, timestamp=None): """Return archived URL by Internet Archive.""" # See [[:mw:Archived Pages]] and http://archive.org/help/wayback_api.php import json query = u'http://archive.org/wayback/available?' query += u'url=' query += url if not timestamp is None: query += u'×tamp=' query += timestamp if pywikibot.verbose: pywikibot.output(u"Requesting query from Internet Archive: %s" % query) jsontext = http.request(uri=query, site=site, retry=False, no_hostname=True) if "closest" in jsontext: data = json.loads(jsontext) return data['archived_snapshots']['closest']['url'] else: return None
def getWebCitationURL(site, url, timestamp=None): """Return archived URL by Web Citation.""" # See http://www.webcitation.org/doc/WebCiteBestPracticesGuide.pdf from BeautifulSoup import BeautifulStoneSoup query = u'http://www.webcitation.org/query?' query += u'returnxml=true' query += u'&url=' query += url if not timestamp is None: query += u'&date=' query += timestamp if pywikibot.verbose: pywikibot.output(u"Requesting query from Web Citation: %s" % query) xmltext = http.request(uri=query, site=site, retry=False, no_hostname=True) if "success" in xmltext: data = BeautifulStoneSoup(xmltext) return data.find('webcite_url').string else: return None
def getDataFromHost(self, queryStr): """ Go and fetch a query from the host's API. """ url = self.getUrl(queryStr) try: resp = http.request(None, url) except: pywikibot.warning(u"Failed to retrieve %s" % url) raise try: data = json.loads(resp) except ValueError: pywikibot.warning(u"Data received from host but no JSON could be decoded") raise pywikibot.ServerError return data
def github_svn_rev2hash(tag, rev): """Convert a Subversion revision to a Git hash using Github. @param tag: name of the Subversion repo on Github @param rev: Subversion revision identifier @return: the git hash @rtype: str """ from io import StringIO import xml.dom.minidom from pywikibot.comms import http uri = 'https://github.com/wikimedia/%s/!svn/vcc/default' % tag data = http.request(site=None, uri=uri, method='PROPFIND', body="<?xml version='1.0' encoding='utf-8'?>" "<propfind xmlns=\"DAV:\"><allprop/></propfind>", headers={'label': str(rev), 'user-agent': 'SVN/1.7.5 {pwb}'}) dom = xml.dom.minidom.parse(StringIO(data)) hsh = dom.getElementsByTagName("C:git-commit")[0].firstChild.nodeValue return hsh
def test_https(self): """Test http.request using https://www.wikiquote.org/.""" r = http.request(site=None, uri='https://www.wikiquote.org/') self.assertIsInstance(r, unicode) self.assertIn('<html lang="mul"', r)
def submit(self): """Submit a query and parse the response. @return: a dict containing data retrieved from api.php """ while True: paramstring = self.http_params() action = self.params.get("action", "") simulate = self._simulate(action) if simulate: return simulate if self.throttle: self.site.throttle(write=self.write) else: pywikibot.log("Action '{0}' is submitted not throttled.".format(action)) uri = self.site.scriptpath() + "/api.php" try: if self.mime: # construct a MIME message containing all API key/values container = MIMEMultipart(_subtype='form-data') for key in self.params: # key "file" requires special treatment in a multipart # message if key == "file": local_filename = self.params[key] filetype = mimetypes.guess_type(local_filename)[0] \ or 'application/octet-stream' file_content = file(local_filename, "rb").read() submsg = Request._generate_MIME_part( key, file_content, filetype.split('/'), {'filename': local_filename}) else: submsg = Request._generate_MIME_part( key, self.params[key], None, None) container.attach(submsg) for key, value in self.mime_params.items(): container.attach(Request._generate_MIME_part(key, *value)) # strip the headers to get the HTTP message body body = container.as_string() marker = "\n\n" # separates headers from body eoh = body.find(marker) body = body[eoh + len(marker):] # retrieve the headers from the MIME object headers = dict(list(container.items())) else: headers = {'Content-Type': 'application/x-www-form-urlencoded'} body = paramstring rawdata = http.request( self.site, uri, method="POST", headers=headers, body=body) # import traceback # traceback.print_stack() # print rawdata except Server504Error: pywikibot.log(u"Caught HTTP 504 error; retrying") self.wait() continue except FatalServerError: # This error is not going to be fixed by just waiting pywikibot.error(traceback.format_exc()) raise # TODO: what other exceptions can occur here? except Exception: # for any other error on the http request, wait and retry pywikibot.error(traceback.format_exc()) pywikibot.log(u"%s, %s" % (uri, paramstring)) self.wait() continue if not isinstance(rawdata, unicode): rawdata = rawdata.decode(self.site.encoding()) pywikibot.debug(u"API response received:\n" + rawdata, _logger) if rawdata.startswith(u"unknown_action"): raise APIError(rawdata[:14], rawdata[16:]) try: result = json.loads(rawdata) except ValueError: # if the result isn't valid JSON, there must be a server # problem. Wait a few seconds and try again pywikibot.warning( "Non-JSON response received from server %s; the server may be down." % self.site) pywikibot.debug(rawdata, _logger) # there might also be an overflow, so try a smaller limit for param in self.params: if param.endswith("limit"): value = self.params[param] try: self.params[param] = str(int(value) // 2) pywikibot.output(u"Set %s = %s" % (param, self.params[param])) except: pass self.wait() continue if not result: result = {} if not isinstance(result, dict): raise APIError("Unknown", "Unable to process query response of type %s." % type(result), data=result) if self['action'] == 'query': if 'userinfo' in result.get('query', ()): if hasattr(self.site, '_userinfo'): self.site._userinfo.update(result['query']['userinfo']) else: self.site._userinfo = result['query']['userinfo'] status = self.site._loginstatus # save previous login status if (("error" in result and result["error"]["code"].endswith("limit")) or (status >= 0 and self.site._userinfo['name'] != self.site._username[status])): # user is no longer logged in (session expired?) # reset userinfo, then make user log in again del self.site._userinfo self.site._loginstatus = -1 if status < 0: status = 0 # default to non-sysop login self.site.login(status) # retry the previous query continue self._handle_warnings(result) if "error" not in result: return result if "*" in result["error"]: # help text returned result['error']['help'] = result['error'].pop("*") code = result["error"].pop("code", "Unknown") info = result["error"].pop("info", None) if code == "maxlag": lag = lagpattern.search(info) if lag: pywikibot.log( u"Pausing due to database lag: " + info) self.site.throttle.lag(int(lag.group("lag"))) continue if code.startswith(u'internal_api_error_'): class_name = code[len(u'internal_api_error_'):] if class_name in ['DBConnectionError', # r 4984 & r 4580 'DBQueryError', # bug 58158 'ReadOnlyError' # bug 59227 ]: pywikibot.log(u'MediaWiki exception %s; retrying.' % class_name) self.wait() continue pywikibot.log(u"MediaWiki exception %s: query=\n%s" % (class_name, pprint.pformat(self.params))) pywikibot.log(u" response=\n%s" % result) raise APIMWException(class_name, info, **result["error"]) # bugs 46535, 62126, 64494, 66619 # maybe removed when it 46535 is solved if code == "failed-save" and \ action == 'wbeditentity' and \ self._is_wikibase_error_retryable(result["error"]): self.wait() continue # raise error try: pywikibot.log(u"API Error: query=\n%s" % pprint.pformat(self.params)) pywikibot.log(u" response=\n%s" % result) raise APIError(code, info, **result["error"]) except TypeError: raise RuntimeError(result)
def test_get(self): r = http.request(site=None, uri='http://www.wikipedia.org/') self.assertIsInstance(r, str if sys.version_info[0] >= 3 else unicode) self.assertIn('<html lang="mul"', r)
def test_http(self): """Test http request function.""" r = http.request(site=None, uri='http://www.wikipedia.org/') self.assertIsInstance(r, unicode) self.assertIn('<html lang="mul"', r)
def subTemplate(self, content, param): """Substitute the template tags in content according to param. @param content: Content with tags to substitute. @type content: string @param param: Param with data how to substitute tags. @type param: dict Returns a tuple containig the new content with tags substituted and a list of those tags. """ substed_tags = [] # DRTRIGON-73 metadata = {'mw-signature': u'~~~~', 'mw-timestamp': u'~~~~~',} # DRTRIGON-132 # 0.2.) check for 'simple' mode and get additional params if param['simple']: p = self.site.getExpandedString(param['simple']) param.update(pywikibot.extract_templates_and_params(p)[0][1]) # 0.5.) check cron/date if param['cron']: # [min] [hour] [day of month] [month] [day of week] # (date supported only, thus [min] and [hour] dropped) if not (param['cron'][0] == '@'): param['cron'] = '* * ' + param['cron'] entry = crontab.CronTab(param['cron']) # find the delay from midnight (does not return 0.0 - but next) delay = entry.next(datetime.datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)- \ datetime.timedelta(microseconds=1)) pywikibot.output(u'CRON delay for execution: %.3f (<= %i)' % (delay, self._bot_config['CRONMaxDelay'])) if not (delay <= self._bot_config['CRONMaxDelay']): return (content, substed_tags, metadata) # 1.) getUrl or wiki text # (security: check url not to point to a local file on the server, # e.g. 'file://' - same as used in xsalt.py) secure = False for item in [u'http://', u'https://', u'mail://', u'local://', u'wiki://']: secure = secure or (param['url'][:len(item)] == item) param['zip'] = ast.literal_eval(param['zip']) if not secure: return (content, substed_tags, metadata) if (param['url'][:7] == u'wiki://'): url = param['url'][7:].strip('[]') # enable wiki-links if ast.literal_eval(param['expandtemplates']): # DRTRIGON-93 (only with 'wiki://') external_buffer = pywikibot.Page(self.site, url).get(expandtemplates=True) else: external_buffer = self.load( pywikibot.Page(self.site, url) ) elif (param['url'][:7] == u'mail://'): # DRTRIGON-101 url = param['url'].replace(u'{{@}}', u'@') # e.g. nlwiki mbox = SubsterMailbox( pywikibot.config.datafilepath(self._bot_config['data_path'], self._bot_config['mbox_file'], '')) external_buffer = mbox.find_data(url) mbox.close() elif (param['url'][:8] == u'local://'): # DRTRIGON-131 if (param['url'][8:] == u'cache/state_bots'): # filename hard-coded d = shelve.open(pywikibot.config.datafilepath('cache', 'state_bots')) external_buffer = pprint.pformat( ast.literal_eval(pprint.pformat(d))) d.close() else: external_buffer = u'n/a' else: # consider using 'expires', 'last-modified', 'etag' in order to # make the updating data requests more efficient! use those stored # on page, if the user placed them, else use the conventional mode. # http://www.diveintopython.net/http_web_services/etags.html f_url, external_buffer = http.request(self.site, param['url'], no_hostname = True, back_response = True) headers = f_url.headers # same like 'f_url.info()' #if param['zip']: if ('text/' not in headers['content-type']): pywikibot.output(u'Source is of non-text content-type, ' u'using raw data instead.') external_buffer = f_url.read() del f_url # free some memory (no need to keep copy) for h in ['content-length', 'date', 'last-modified', 'expires']: if h in headers: metadata['url-%s' % h] = headers[h] # some intermediate processing (unzip, xlsx2csv, ...) if param['zip']: # 'application/zip', ... fileno = 0 if (param['zip'] == True) else (param['zip']-1) external_buffer = self.unzip(external_buffer, fileno) if param['xlsx']: # 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' external_buffer = self.xlsx2csv(external_buffer, param['xlsx']) if param['ods']: # 'application/vnd.oasis.opendocument.spreadsheet' external_buffer = self.ods2csv(external_buffer, param['ods']) if not ast.literal_eval(param['beautifulsoup']): # DRTRIGON-88 # 2.) regexp #for subitem in param['regex']: subitem = param['regex'] regex = re.compile(subitem, re.S | re.I) # 3.) subst in content external_data = regex.search(external_buffer) external_data_dict = {} if external_data: # not None external_data = external_data.groups() pywikibot.output(u'Groups found by regex: %i' % len(external_data)) # DRTRIGON-114: Support for named groups in regexs if regex.groupindex: for item in regex.groupindex: external_data_dict[u'%s-%s' % (param['value'], item)] = external_data[regex.groupindex[item]-1] elif (len(external_data) == 1): external_data_dict = {param['value']: external_data[0]} else: external_data_dict = {param['value']: str(external_data)} pywikibot.debug( str(external_data_dict) ) param['postproc'] = eval(param['postproc']) # should be secured as given below, but needs code changes in wiki too #param['postproc'] = ast.literal_eval(param['postproc']) for value in external_data_dict: external_data = external_data_dict[value] # 4.) postprocessing func = param['postproc'][0] # needed by exec call of self._code DATA = [ external_data ] # args = param['postproc'][1:] # scope = {} # (scope to run in) scope.update( locals() ) # (add DATA, *args, ...) scope.update( globals() ) # (add imports and else) if func: exec(self._code + (self._bot_config['CodeTemplate'] % func), scope, scope) external_data = DATA[0] pywikibot.debug( external_data ) # 5.) subst content (content, tags) = self.subTag(content, value, external_data, int(param['count'])) substed_tags += tags else: # DRTRIGON-105: Support for multiple BS template configurations value = param['value'] if value: value += u'-' # DRTRIGON-88: Enable Beautiful Soup power for Subster BS_tags = self.get_BS_regex(value).findall(content) pywikibot.output(u'BeautifulSoup tags found by regex: %i' % len(BS_tags)) prev_content = content BS = BeautifulSoup.BeautifulSoup(external_buffer) for item in BS_tags: external_data = eval('BS.%s' % item[1]) external_data = self._BS_regex_str%{'var1':value+'BS:'+item[1],'var2':value,'cont':external_data} content = content.replace(item[0], external_data, 1) if (content != prev_content): substed_tags.append(value+'BS') metadata['bot-timestamp'] = pywikibot.Timestamp.now().isoformat(' ') return (content, substed_tags, metadata)
def test_get(self): r = http.request(site=None, uri='http://www.wikipedia.org/') self.assertIsInstance(r, str) self.assertTrue('<html lang="mul"' in r)
def submit(self): """Submit a query and parse the response. @return: The data retrieved from api.php (a dict) """ while True: paramstring = self.http_params() action = self.params.get("action", "") simulate = self._simulate(action) if simulate: return simulate self.site.throttle(write=self.write) uri = self.site.scriptpath() + "/api.php" ssl = False if self.site.family.name in config.available_ssl_project: if action == "login" and config.use_SSL_onlogin: ssl = True elif config.use_SSL_always: ssl = True try: if self.mime: # construct a MIME message containing all API key/values container = MIMEMultipart(_subtype='form-data') for key in self.params: # key "file" requires special treatment in a multipart # message if key == "file": local_filename = self.params[key] filetype = mimetypes.guess_type(local_filename)[0] \ or 'application/octet-stream' file_content = file(local_filename, "rb").read() submsg = MIMENonMultipart(*filetype.split("/")) submsg.add_header("Content-disposition", "form-data", name=key, filename=local_filename) submsg.set_payload(file_content) else: try: self.params[key].encode("ascii") keytype = ("text", "plain") except UnicodeError: keytype = ("application", "octet-stream") submsg = MIMENonMultipart(*keytype) submsg.add_header("Content-disposition", "form-data", name=key) submsg.set_payload(self.params[key]) container.attach(submsg) # strip the headers to get the HTTP message body body = container.as_string() marker = "\n\n" # separates headers from body eoh = body.find(marker) body = body[eoh + len(marker):] # retrieve the headers from the MIME object mimehead = dict(list(container.items())) rawdata = http.request(self.site, uri, ssl, method="POST", headers=mimehead, body=body) else: rawdata = http.request( self.site, uri, ssl, method="POST", headers={ 'Content-Type': 'application/x-www-form-urlencoded' }, body=paramstring) # import traceback # traceback.print_stack() # print rawdata except Server504Error: pywikibot.log(u"Caught HTTP 504 error; retrying") self.wait() continue except FatalServerError: # This error is not going to be fixed by just waiting pywikibot.error(traceback.format_exc()) raise # TODO: what other exceptions can occur here? except Exception: # for any other error on the http request, wait and retry pywikibot.error(traceback.format_exc()) pywikibot.log(u"%s, %s" % (uri, paramstring)) self.wait() continue if not isinstance(rawdata, unicode): rawdata = rawdata.decode(self.site.encoding()) pywikibot.debug(u"API response received:\n" + rawdata, _logger) if rawdata.startswith(u"unknown_action"): raise APIError(rawdata[:14], rawdata[16:]) try: result = json.loads(rawdata) except ValueError: # if the result isn't valid JSON, there must be a server # problem. Wait a few seconds and try again pywikibot.warning( "Non-JSON response received from server %s; the server may be down." % self.site) pywikibot.debug(rawdata, _logger) # there might also be an overflow, so try a smaller limit for param in self.params: if param.endswith("limit"): value = self.params[param] try: self.params[param] = str(int(value) // 2) pywikibot.output(u"Set %s = %s" % (param, self.params[param])) except: pass self.wait() continue if not result: result = {} if not isinstance(result, dict): raise APIError("Unknown", "Unable to process query response of type %s." % type(result), data=result) if self['action'] == 'query': if 'userinfo' in result.get('query', ()): if hasattr(self.site, '_userinfo'): self.site._userinfo.update(result['query']['userinfo']) else: self.site._userinfo = result['query']['userinfo'] status = self.site._loginstatus # save previous login status if (("error" in result and result["error"]["code"].endswith("limit")) or (status >= 0 and self.site._userinfo['name'] != self.site._username[status])): # user is no longer logged in (session expired?) # reset userinfo, then make user log in again del self.site._userinfo self.site._loginstatus = -1 if status < 0: status = 0 # default to non-sysop login self.site.login(status) # retry the previous query continue if "warnings" in result: modules = [k for k in result["warnings"] if k != "info"] for mod in modules: if '*' in result["warnings"][mod]: text = result["warnings"][mod]['*'] elif 'html' in result["warnings"][mod]: # Bugzilla 49978 text = result["warnings"][mod]['html']['*'] else: # This is just a warning, we shouldn't raise an # exception because of it continue pywikibot.warning(u"API warning (%s): %s" % (mod, text)) if "error" not in result: return result if "*" in result["error"]: # help text returned result['error']['help'] = result['error'].pop("*") code = result["error"].pop("code", "Unknown") info = result["error"].pop("info", None) if code == "maxlag": lag = lagpattern.search(info) if lag: pywikibot.log(u"Pausing due to database lag: " + info) self.site.throttle.lag(int(lag.group("lag"))) continue if code.startswith(u'internal_api_error_'): self.wait() continue # bugs 46535, 62126, 64494 # maybe removed when it 46535 is solved if code == "failed-save" and action == 'wbeditentity': try: message = result["error"]["messages"]["0"]["name"] except KeyError: message = None if message == u'edit-already-exists': self.wait() continue # raise error try: pywikibot.log(u"API Error: query=\n%s" % pprint.pformat(self.params)) pywikibot.log(u" response=\n%s" % result) raise APIError(code, info, **result["error"]) except TypeError: raise RuntimeError(result)
def submit(self): """Submit a query and parse the response. @return: The data retrieved from api.php (a dict) """ paramstring = self.http_params() while True: action = self.params.get("action", "") simulate = self._simulate(action) if simulate: return simulate self.site.throttle(write=self.write) uri = self.site.scriptpath() + "/api.php" ssl = False if self.site.family.name in config.available_ssl_project: if action == "login" and config.use_SSL_onlogin: ssl = True elif config.use_SSL_always: ssl = True try: if self.mime: # construct a MIME message containing all API key/values container = MIMEMultipart(_subtype='form-data') for key in self.params: # key "file" requires special treatment in a multipart # message if key == "file": local_filename = self.params[key] filetype = mimetypes.guess_type(local_filename)[0] \ or 'application/octet-stream' file_content = file(local_filename, "rb").read() submsg = MIMENonMultipart(*filetype.split("/")) submsg.add_header("Content-disposition", "form-data", name=key, filename=local_filename) submsg.set_payload(file_content) else: try: self.params[key].encode("ascii") keytype = ("text", "plain") except UnicodeError: keytype = ("application", "octet-stream") submsg = MIMENonMultipart(*keytype) submsg.add_header("Content-disposition", "form-data", name=key) submsg.set_payload(self.params[key]) container.attach(submsg) # strip the headers to get the HTTP message body body = container.as_string() marker = "\n\n" # separates headers from body eoh = body.find(marker) body = body[ eoh + len(marker): ] # retrieve the headers from the MIME object mimehead = dict(container.items()) rawdata = http.request(self.site, uri, ssl, method="POST", headers=mimehead, body=body) else: rawdata = http.request(self.site, uri, ssl, method="POST", headers={'Content-Type': 'application/x-www-form-urlencoded'}, body=paramstring) ## import traceback ## traceback.print_stack() ## print rawdata except Server504Error: pywikibot.log(u"Caught HTTP 504 error; retrying") self.wait() continue #TODO: what other exceptions can occur here? except Exception, e: # for any other error on the http request, wait and retry pywikibot.error(traceback.format_exc()) pywikibot.log(u"%s, %s" % (uri, paramstring)) self.wait() continue if not isinstance(rawdata, unicode): rawdata = rawdata.decode(self.site.encoding()) pywikibot.debug(u"API response received:\n" + rawdata, _logger) if rawdata.startswith(u"unknown_action"): raise APIError(rawdata[:14], rawdata[16:]) try: result = json.loads(rawdata) except ValueError: # if the result isn't valid JSON, there must be a server # problem. Wait a few seconds and try again pywikibot.warning( "Non-JSON response received from server %s; the server may be down." % self.site) pywikibot.debug(rawdata, _logger) # there might also be an overflow, so try a smaller limit for param in self.params: if param.endswith("limit"): value = self.params[param] try: self.params[param] = str(int(value) // 2) pywikibot.output(u"Set %s = %s" % (param, self.params[param])) except: pass self.wait() continue if not result: result = {} if type(result) is not dict: raise APIError("Unknown", "Unable to process query response of type %s." % type(result), {'data': result}) if self['action'] == 'query': if 'userinfo' in result.get('query', ()): if hasattr(self.site, '_userinfo'): self.site._userinfo.update(result['query']['userinfo']) else: self.site._userinfo = result['query']['userinfo'] status = self.site._loginstatus # save previous login status if ( ("error" in result and result["error"]["code"].endswith("limit")) or (status >= 0 and self.site._userinfo['name'] != self.site._username[status])): # user is no longer logged in (session expired?) # reset userinfo, then make user log in again del self.site._userinfo self.site._loginstatus = -1 if status < 0: status = 0 # default to non-sysop login self.site.login(status) # retry the previous query continue if "warnings" in result: modules = [k for k in result["warnings"] if k != "info"] for mod in modules: if '*' in result["warnings"][mod]: text = result["warnings"][mod]['*'] elif 'html' in result["warnings"][mod]: # Bugzilla 49978 text = result["warnings"][mod]['html']['*'] else: # This is just a warning, we shouldn't raise an # exception because of it continue pywikibot.warning( u"API warning (%s): %s" % (mod, text)) if "error" not in result: return result if "*" in result["error"]: # help text returned result['error']['help'] = result['error'].pop("*") code = result["error"].pop("code", "Unknown") info = result["error"].pop("info", None) if code == "maxlag": lag = lagpattern.search(info) if lag: pywikibot.log( u"Pausing due to database lag: " + info) self.site.throttle.lag(int(lag.group("lag"))) continue if code in (u'internal_api_error_DBConnectionError', ): self.wait() continue # raise error try: pywikibot.log(u"API Error: query=\n%s" % pprint.pformat(self.params)) pywikibot.log(u" response=\n%s" % result) raise APIError(code, info, **result["error"]) except TypeError: raise RuntimeError(result)
def is_translation(page): url = "%s/index.php?title=%s" % (SITE.scriptpath(), page.title(asUrl=True)) return '"wgTranslatePageTranslation":"translation"' in http.request( SITE, url)