def wsgi_request(self, environ, start_response): if self.wsgi_debug: print 'wsgi_request start' print environ print '...' outstream = StringIO() response = Response(stdout=outstream, stderr=sys.stderr) extra = { 'SESSION': self.app.REQUEST.SESSION, 'AUTHENTICATED_USER': self.app.REQUEST.AUTHENTICATED_USER, } publish_module('Zope2', environ=environ, response=response, extra=extra, stdin=environ['wsgi.input']) output = outstream.getvalue() headers, body = output.split(newline*2, 1) headers = [header.split(': ', 1) for header in headers.split(newline)] status = headers.pop(0)[1] if self.wsgi_debug: print 'wsgi_request done, status="%s"' % status print ' ' + '\n'.join([': '.join(header) for header in headers]) headers = [ (header[0], ', '.join(header[1:])) for header in headers ] if 'content-type' not in (header[0].lower() for header in headers): headers.append( ('Content-Type', 'text/html; charset=utf-8') ) start_response(status, headers) return [body]
def publish(self, path, basic=None, env=None, extra=None, request_method='GET', stdin=None, handle_errors=True): '''Publishes the object at 'path' returning a response object.''' from StringIO import StringIO from ZPublisher.Response import Response from ZPublisher.Test import publish_module from AccessControl.SecurityManagement import getSecurityManager from AccessControl.SecurityManagement import setSecurityManager # Save current security manager sm = getSecurityManager() # Commit the sandbox for good measure transaction.commit() if env is None: env = {} if extra is None: extra = {} request = self.app.REQUEST env['SERVER_NAME'] = request['SERVER_NAME'] env['SERVER_PORT'] = request['SERVER_PORT'] env['REQUEST_METHOD'] = request_method p = path.split('?') if len(p) == 1: env['PATH_INFO'] = p[0] elif len(p) == 2: [env['PATH_INFO'], env['QUERY_STRING']] = p else: raise TypeError, '' if basic: env['HTTP_AUTHORIZATION'] = "Basic %s" % base64.encodestring(basic) if stdin is None: stdin = StringIO() outstream = StringIO() response = Response(stdout=outstream, stderr=sys.stderr) publish_module('Zope2', response=response, stdin=stdin, environ=env, extra=extra, debug=not handle_errors, ) # Restore security manager setSecurityManager(sm) return ResponseWrapper(response, outstream, path)
def publish(self, path, basic=None, env=None, extra=None, request_method='GET', stdin=None, handle_errors=True): '''Publishes the object at 'path' returning a response object.''' from StringIO import StringIO from ZPublisher.Response import Response from ZPublisher.Test import publish_module # Commit the sandbox for good measure transaction.commit() if env is None: env = {} if extra is None: extra = {} request = self.app.REQUEST env['SERVER_NAME'] = request['SERVER_NAME'] env['SERVER_PORT'] = request['SERVER_PORT'] env['REQUEST_METHOD'] = request_method p = path.split('?') if len(p) == 1: env['PATH_INFO'] = p[0] elif len(p) == 2: [env['PATH_INFO'], env['QUERY_STRING']] = p else: raise TypeError, '' if basic: env['HTTP_AUTHORIZATION'] = "Basic %s" % base64.encodestring(basic) if stdin is None: stdin = StringIO() outstream = StringIO() response = Response(stdout=outstream, stderr=sys.stderr) publish_module('Zope2', response=response, stdin=stdin, environ=env, extra=extra, debug=not handle_errors, ) return ResponseWrapper(response, outstream, path)
def __call__(self, requestString, handle_errors=True): from ZPublisher.Iterators import IStreamIterator from ZPublisher.Response import Response from ZPublisher.Test import publish_module class TestResponse(Response): def setBody(self, body, title='', is_error=0, **kw): if IStreamIterator.providedBy(body): body = ''.join(body) Response.setBody(self, body, title, is_error, **kw) # Discard leading white space to make call layout simpler requestString = requestString.lstrip() # Split off and parse the command line l = requestString.find('\n') commandLine = requestString[:l].rstrip() requestString = requestString[l+1:] method, url, protocol = commandLine.split() instream = StringIO(requestString) env = {"HTTP_HOST": 'localhost', "HTTP_REFERER": 'localhost', "REQUEST_METHOD": method, "SERVER_PROTOCOL": protocol, } p = url.split('?', 1) if len(p) == 1: env['PATH_INFO'] = p[0] elif len(p) == 2: [env['PATH_INFO'], env['QUERY_STRING']] = p else: raise TypeError, '' # If you followed closely, you notice that one part of the url # gets unquoted (PATH_INFO) while the other (QUERY_STRING) # doesn't That complies with what the ZSERVER does. env['PATH_INFO'] = urllib.unquote(env['PATH_INFO']) headers = [splitHeader(header) for header in rfc822.Message(instream).headers] # Store request body without headers instream = StringIO(instream.read()) for name, value in headers: name = ('_'.join(name.upper().split('-'))) if name not in ('CONTENT_TYPE', 'CONTENT_LENGTH'): name = 'HTTP_' + name env[name] = value.rstrip() if env.has_key('HTTP_AUTHORIZATION'): env['HTTP_AUTHORIZATION'] = authHeader(env['HTTP_AUTHORIZATION']) outstream = StringIO() response = TestResponse(stdout=outstream, stderr=sys.stderr) publish_module('Zope2', response=response, stdin=instream, environ=env, debug=not handle_errors, ) self.app._p_jar.sync() return response
def http(request_string, handle_errors=True): """Execute an HTTP request string via the publisher This is used for HTTP doc tests. """ import urllib import rfc822 from cStringIO import StringIO from ZPublisher.Response import Response from ZPublisher.Test import publish_module from AccessControl.SecurityManagement import getSecurityManager from AccessControl.SecurityManagement import setSecurityManager # Save current Security Manager old_sm = getSecurityManager() # Commit work done by previous python code. transaction.commit() # Discard leading white space to make call layout simpler request_string = request_string.lstrip() # Split off and parse the command line l = request_string.find('\n') command_line = request_string[:l].rstrip() request_string = request_string[l+1:] method, path, protocol = command_line.split() path = urllib.unquote(path) instream = StringIO(request_string) env = {"HTTP_HOST": 'localhost', "HTTP_REFERER": 'localhost', "REQUEST_METHOD": method, "SERVER_PROTOCOL": protocol, } p = path.split('?') if len(p) == 1: env['PATH_INFO'] = p[0] elif len(p) == 2: [env['PATH_INFO'], env['QUERY_STRING']] = p else: raise TypeError, '' header_output = HTTPHeaderOutput( protocol, ('x-content-type-warning', 'x-powered-by', 'bobo-exception-type', 'bobo-exception-file', 'bobo-exception-value', 'bobo-exception-line')) headers = [split_header(header) for header in rfc822.Message(instream).headers] # Store request body without headers instream = StringIO(instream.read()) for name, value in headers: name = ('_'.join(name.upper().split('-'))) if name not in ('CONTENT_TYPE', 'CONTENT_LENGTH'): name = 'HTTP_' + name env[name] = value.rstrip() if env.has_key('HTTP_AUTHORIZATION'): env['HTTP_AUTHORIZATION'] = auth_header(env['HTTP_AUTHORIZATION']) outstream = StringIO() response = Response(stdout=outstream, stderr=sys.stderr) publish_module('Zope2', response=response, stdin=instream, environ=env, debug=not handle_errors, ) header_output.setResponseStatus(response.getStatus(), response.errmsg) header_output.setResponseHeaders(response.headers) header_output.appendResponseHeaders(response._cookie_list()) header_output.appendResponseHeaders(response.accumulated_headers.splitlines()) # Restore previous security manager, which may have been changed # by calling the publish method above setSecurityManager(old_sm) # Sync connection sync() return DocResponseWrapper(response, outstream, path, header_output)
def http(request_string, handle_errors=True): """Execute an HTTP request string via the publisher This is used for HTTP doc tests. """ import urllib import rfc822 from cStringIO import StringIO from ZPublisher.Response import Response from ZPublisher.Test import publish_module # Commit work done by previous python code. transaction.commit() # Discard leading white space to make call layout simpler request_string = request_string.lstrip() # Split off and parse the command line l = request_string.find('\n') command_line = request_string[:l].rstrip() request_string = request_string[l+1:] method, path, protocol = command_line.split() path = urllib.unquote(path) instream = StringIO(request_string) env = {"HTTP_HOST": 'localhost', "HTTP_REFERER": 'localhost', "REQUEST_METHOD": method, "SERVER_PROTOCOL": protocol, } p = path.split('?') if len(p) == 1: env['PATH_INFO'] = p[0] elif len(p) == 2: [env['PATH_INFO'], env['QUERY_STRING']] = p else: raise TypeError, '' header_output = HTTPHeaderOutput( protocol, ('x-content-type-warning', 'x-powered-by', 'bobo-exception-type', 'bobo-exception-file', 'bobo-exception-value', 'bobo-exception-line')) headers = [split_header(header) for header in rfc822.Message(instream).headers] # Store request body without headers instream = StringIO(instream.read()) for name, value in headers: name = ('_'.join(name.upper().split('-'))) if name not in ('CONTENT_TYPE', 'CONTENT_LENGTH'): name = 'HTTP_' + name env[name] = value.rstrip() if env.has_key('HTTP_AUTHORIZATION'): env['HTTP_AUTHORIZATION'] = auth_header(env['HTTP_AUTHORIZATION']) outstream = StringIO() response = Response(stdout=outstream, stderr=sys.stderr) publish_module('Zope2', response=response, stdin=instream, environ=env, debug=not handle_errors, ) header_output.setResponseStatus(response.getStatus(), response.errmsg) header_output.setResponseHeaders(response.headers) header_output.appendResponseHeaders(response._cookie_list()) header_output.appendResponseHeaders(response.accumulated_headers.splitlines()) sync() return DocResponseWrapper(response, outstream, path, header_output)
def run(app, args, rate=5): # Adjust root logging handler levels level = getConfiguration().eventlog.getLowestHandlerLevel() root = logging.getLogger() for handler in root.handlers: handler.setLevel(level) logger = logging.getLogger("linkcheck.processor") logger.setLevel(level) logger.info("looking for sites...") session = requests.Session(timeout=5) counter = 0 sites = {} # Enter runloop while True: errors = set() for name, item in app.objectItems(): if name in sites: continue if IPloneSiteRoot.providedBy(item): try: tool = getToolByName(item, 'portal_linkcheck') except AttributeError: continue logger.info("found site '%s'." % name) registry = getUtility(IRegistry, context=item) try: settings = registry.forInterface(ISettings) except KeyError: logger.warn("settings not available; please reinstall.") continue responses = [] def worker(): while True: url = q.get() r = None try: r = session.get(url) except requests.Timeout: status_code = 504 except requests.RequestException as exc: logger.warn(exc) status_code = 503 except UnicodeError as exc: logger.warn("Unable to decode string: %r (%s)." % ( url, exc)) status_code = 502 if r is None: r = requests.Response() r.status_code = status_code r.url = url responses.append(r) q.task_done() q = Queue() for i in range(settings.concurrency): t = threading.Thread(target=worker) t.daemon = True t.start() logger.info( "%d worker threads started." % settings.concurrency ) sites[name] = (tool, settings, q, responses) if not sites and not counter: logger.info( "no sites found; polling every %d second(s) ..." % rate ) for tool, settings, queue, responses in sites.values(): # Synchronize database tool._p_jar.sync() if not tool.is_available(): logger.warn("Tool not available; please run update step.") logger.info("Sleeping for 10 seconds...") time.sleep(10) break if not counter % 3600: now = datetime.datetime.now() # This timestamp is the threshold for items that need an # update. needs_update = int(time.mktime( (now - datetime.timedelta(hours=settings.interval)).\ timetuple() )) # This timestamp is the threshold for items that are no # longer active. expired = int(time.mktime( (now - datetime.timedelta(days=settings.expiration)).\ timetuple() )) discard = set() for url, entry in tool.checked.items(): if url in tool.queue: continue # Discard items that are expired if entry[0] and entry[0] < expired: discard.add(url) # Enqueue items with an out of date timestamp. elif entry[0] and entry[0] < needs_update: tool.queue.put(url) for url in discard: del tool.checked[url] # Fetch set of URLs to check (up to transaction size). queued = tool.queue[:settings.transaction_size] if not queued: continue urls = filter(None, map(tool.links.get, queued)) # This keeps track of status updates, which we'll apply at # the end. updates = [] # Distinguish between internal and external requests. internal, external = partition( lambda url: url.startswith('/'), urls ) # Must be HTTP or HTTPS external, invalid = partition( lambda url: url.startswith('http://') or \ url.startswith('https://'), external ) for url in external: queue.put(url) # Wait for responses queue.join() while responses: response = responses.pop() status = response.status_code # This may be a redirect. if response.history: url = response.history[0].url if response.history[0].status_code == 301: status = 301 else: url = response.url updates.append((url, status)) for url in internal: # For now, we simply ignore internal links if we're # not publishing. if not settings.use_publisher: continue stdout = StringIO() stderr = StringIO() env = { 'GATEWAY_INTERFACE': 'CGI/1.1 ', 'HTTP_ACCEPT': '*/*', 'HTTP_HOST': '127.0.0.1', 'HTTP_USER_AGENT': 'Bobo', 'REQUEST_METHOD': 'GET', 'SCRIPT_NAME': '', 'SERVER_HOSTNAME': 'bobo.server.host', 'SERVER_NAME': 'bobo.server', 'SERVER_PORT': '80', 'SERVER_PROTOCOL': 'HTTP/1.0 ', } env['PATH_INFO'] = "/" + tool.aq_parent.absolute_url() + url try: status = publish_module( 'Zope2', environ=env, stdout=stdout, stderr=stderr ) except ConflictError: status = 503 else: # This is assumed to be a good response. if status == 302: status = 200 updates.append((url, status)) # Pull URLs out of queue, actually removing them. unchanged = [] urls = set(urls) while urls: try: i = tool.queue.pull() except IndexError: transaction.abort() continue try: url = tool.links[i] urls.remove(url) except KeyError: unchanged.append(i) # This shouldn't happen to frequently. for i in unchanged: tool.queue.put(i) url = tool.links[i] logger.warn("putting back unprocessed url: %s." % url) for url in invalid: tool.update(url, 0) errors.add(url) # Apply status updates for url, status in updates: tool.update(url, status) transaction.get().note('updated link validity') try: transaction.commit() except ConflictError: transaction.abort() for url in errors: logger.warn("error checking: %s." % url) time.sleep(rate) app._p_jar.sync() counter += 1