def start_notebook(url, port, user): hub_url = 'https://%s:%s/hub' % (url, port) user_url = 'https://%s:%s/user/%s' % (url, port, user) cookies = login(hub_url, user, user) api = NBAPI(url=user_url, cookies=cookies) path = 'Hello.ipynb' for i in itertools.count(): gen_log.info("loading %s (%s)", user, i) nb = api.get_notebook(path) gen_log.info("starting %s (%s)", user, i) session = Session() kernel = yield api.new_kernel(session.session) try: for j in range(20): gen_log.info("running %s (%s:%s)", user, j, i) yield run_notebook(nb, kernel, session) yield sleep(0.05) gen_log.info("saving %s (%s)", user, i) api.save_notebook(nb, path) finally: api.kill_kernel(kernel['id']) gen_log.info("history: %s", response.history)
def post(self): msg = 'success' save_flag = self.get_argument('flag', '').lower() cp = self.get_argument('cp', '').strip() cpid = self.get_argument('cpid', '').strip() if 'deep' == save_flag: poiid = self.get_argument('poiid', '').strip() deep = self.get_argument('deep', '') if not poiid or 'null' == poiid.lower(): deep_queue.put((cp, poiid, cpid, deep, -3, -3)) deep_queue.put((cp, poiid, cpid, deep, 1, 1)) logger.info("queue.size:%s", len(deep_queue)) self.do_flush(deep_sql, deep_queue) elif 'rti' == save_flag: rti = self.get_argument('rti', '') value = (cp, cpid, rti, flag_value, flag_value) rti_queue.put(value) self.do_flush(rti_sql, rti_queue) elif 'newpoi' == save_flag: new_poi = self.get_argument('newpoiid', '') value = (cp, cpid, new_poi) newpoi_queue.put(value) self.do_flush(newpoi_sql, newpoi_queue) else: msg = {'failure': MESSAGE.BAD_FLAG} logger.info("cp=%s, cpid=%s, msg=%s\nReuqest=%s", cp, cpid, MESSAGE.BAD_FLAG, self.request.arguments) self.write(msg)
def configure(path, uid=None): """Configures the tornado logging streams with application specific customizatons, including configuring the application to log to the specified directory. Throws: OSError -- if the given directory doesn't exist and cannot be created, or if it exists but cannot be written to Args: path -- a directory to create and write logs to Keyword Args: uid -- If provided, the uid that should own the current, non-rotated version of each log is owned by the given system user. This is useful if we plan on dropping down to a less privilaged user on application run """ # First, create the specified logging directory if it does not already # exist. If we don't have permissions to create the directory, # then OSError will be thrown if not os.path.isdir(path): os.mkdir(path) # Next, make sure that the current process has the needed permissions # to write to the specified logging directory. If not, throw an # exception, to prevent log-less execution if not os.access(path, os.W_OK | os.X_OK): error = "Unable to write to logging directory {0}".format("path") raise OSError(error) # Otherwise, if we're sure we can write to the specified logging # directory, configure the built in tornado loggers to use that # directory instead of the system wide one format = "%(created)f|%(message)s" tornado_logs = (('access.log', access_log), ('application.log', app_log), ('general.log', gen_log)) for log_name, logger in tornado_logs: log_path = os.path.join(path, log_name) handler = TimedRotatingFileHandler(log_path, when="midnight") formatter = logging.Formatter(format) handler.setFormatter(formatter) logger.addHandler(handler) # Allow application errors to propogate up, so that serious errors # can wind up on STDERR or other useful places if logger is not app_log: logger.propagate = False if uid: os.chown(log_path, uid, -1) tornado.log.enable_pretty_logging() # Finally, write a simple start up message, both to test that we're # able to write as expected, and to get a start time in the logs gen_log.setLevel(logging.INFO) gen_log.info("Starting webserver (pid:{0}).".format(os.getpid()))
def callback(): try: while True: gen_log.info(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) time.sleep(1) except: pass
def get_authenticated_user(self, callback, http_client=None): """Gets the OAuth authorized user and access token on callback. This method should be called from the handler for your registered OAuth Callback URL to complete the registration process. We call callback with the authenticated user, which in addition to standard attributes like 'name' includes the 'access_key' attribute, which contains the OAuth access you can use to make authorized requests to this service on behalf of the user. """ request_key = escape.utf8(self.get_argument("oauth_token")) oauth_verifier = self.get_argument("oauth_verifier", None) request_cookie = self.get_cookie("_oauth_request_token") if not request_cookie: gen_log.warning("Missing OAuth request token cookie") callback(None) return self.clear_cookie("_oauth_request_token") cookie_key, cookie_secret = [base64.b64decode(escape.utf8(i)) for i in request_cookie.split("|")] if cookie_key != request_key: gen_log.info((cookie_key, request_key, request_cookie)) gen_log.warning("Request token does not match cookie") callback(None) return token = dict(key=cookie_key, secret=cookie_secret) if oauth_verifier: token["verifier"] = oauth_verifier if http_client is None: http_client = self.get_auth_http_client() http_client.fetch(self._oauth_access_token_url(token), self.async_callback(self._on_access_token, callback))
def run_notebook(nb, kernel, session): """Run all the cells of a notebook""" ncells = sum(cell['cell_type'] == 'code' for cell in nb.cells) i = 0 for cell in nb.cells: if cell['cell_type'] == 'code': i += 1 gen_log.info("Executing cell %i/%i", i, ncells) yield execute(cell, kernel, session)
def saveFile(files, key, path): fl = files[key][0] req_name = fl["filename"] body = fl["body"] timestamp = int(time.time() + 300) fileName = "%d_%s.%s" % (timestamp, str(uuid.uuid1()), req_name.split(".").pop()) gen_log.info(fileName) with open(path + fileName, "w") as f: f.write(body) return fileName
def api(self, path, **kwargs): try: import time s_time = time.time() data = yield self._make_request(path, **kwargs) e_time = time.time() gen_log.info("=====Time request wio api, {}".format(float(e_time)-float(s_time))) except Exception as e: gen_log.error(e) raise raise gen.Return(data)
def _check_file(modify_times, path): try: modified = os.stat(path).st_mtime except Exception: return if path not in modify_times: modify_times[path] = modified return if modify_times[path] != modified: gen_log.info("%s modified; restarting server", path) _reload()
def add(self, req): if self.get_style(req): req.reply_error("The style already exist") gen_log.info('Style %s already exists' % req.content['name']) return style = create_style(req.content['name']) req.client.styles.append(style) req.content['style'] = style req.send_to_all(style) return True
def _on_headers(self, data): self._old_request = None self._end_notified = False self._please_notify_end_of_request = False try: data = native_str(data.decode('latin1')) eol = data.find("\r\n") start_line = data[:eol] try: method, uri, version = start_line.split(" ") except ValueError: raise _BadRequestException("Malformed HTTP request line") if not version.startswith("HTTP/"): raise _BadRequestException("Malformed HTTP version in HTTP Request-Line") headers = httputil.HTTPHeaders.parse(data[eol:]) # HTTPRequest wants an IP, not a full socket address if getattr(self.stream.socket, 'family', socket.AF_INET) in ( socket.AF_INET, socket.AF_INET6): # Jython 2.5.2 doesn't have the socket.family attribute, # so just assume IP in that case. remote_ip = self.address[0] else: # Unix (or other) socket; fake the remote address remote_ip = '0.0.0.0' self._request = HTTPRequest( connection=self, method=method, uri=uri, version=version, headers=headers, remote_ip=remote_ip) if self._events["connect"]: self._events["connect"](self._request) for name,handler in self._events.items(): if handler: self._request.on(name, handler) content_length = headers.get("Content-Length") if content_length: content_length = int(content_length) if content_length > self.stream.max_buffer_size: raise _BadRequestException("Content-Length too long") if headers.get("Expect") == "100-continue": self.stream.write(b("HTTP/1.1 100 (Continue)\r\n\r\n")) self.stream.read_bytes(content_length, self._on_request_body) return self.request_callback(self._request) except _BadRequestException, e: gen_log.info("Malformed HTTP request from %s: %s", self.address[0], e) self.close() return
def parse_request(data): try: req = ast.literal_eval(data) path, sep, query = req['uri'].partition('?') get_arguments = parse_qs_bytes(query, keep_blank_values=True) post_arguments = parse_qs_bytes(req['body'], keep_blank_values=True) host = req['headers']['Host'] headers = req['headers'] return headers, host, path, get_arguments, post_arguments except _BadRequestException as e: gen_log.info("Malformed HTTP request:%s", e) return
def _make_request(self, path, query=None, method="GET", body=None, headers=None): """ Makes request on `path` in the graph. path -- endpoint to the facebook graph api query -- A dictionary that becomes a query string to be appended to the path method -- GET, POST, etc body -- message body headers -- Like "Content-Type" """ if not query: query = {} if self.access_token: query["access_token"] = self.access_token query_string = urllib.urlencode(query) if query else "" if method == "GET": body = None else: if headers and "json" in headers.get('Content-Type'): body = json.dumps(body) if body else "" else: body = urllib.urlencode(body) if body else "" url = BASE_URL + path if query_string: url += "?" + query_string # url = "https://wio.temp-io.life/v1/nodes/create?access_token=123" gen_log.info("URL=====> {}".format(url)) gen_log.info("method=====> {}".format(method)) gen_log.info("body=====> {}".format(body)) client = AsyncHTTPClient() request = HTTPRequest(url, method=method, body=body, headers=headers) try: response = yield client.fetch(request) except HTTPError as e: raise WioAPIError(e) except Exception as e: gen_log.error(e) raise content_type = response.headers.get('Content-Type') gen_log.info("#### content_type: {}".format(content_type)) gen_log.info("#### body: {}".format(response.body)) if 'json' in content_type: data = json.loads(response.body.decode()) else: raise WioAPIError('Maintype was not json') raise gen.Return(data)
def _on_headers(self, data): try: data = data.decode('latin1') eol = data.find("\r\n") start_line = data[:eol] try: method, uri, version = start_line.split(" ") except ValueError: raise tornado.httpserver._BadRequestException("Malformed HTTP request line") if not version.startswith("HTTP/"): raise tornado.httpserver._BadRequestException("Malformed HTTP version in HTTP Request-Line") headers = tornado.httputil.HTTPHeaders.parse(data[eol:]) # HTTPRequest wants an IP, not a full socket address if self.address_family in (socket.AF_INET, socket.AF_INET6): remote_ip = self.address[0] else: # Unix (or other) socket; fake the remote address remote_ip = '0.0.0.0' self._request = tornado.httpserver.HTTPRequest( connection=self, method=method, uri=uri, version=version, headers=headers, remote_ip=remote_ip, protocol=self.protocol) content_length = headers.get("Content-Length") if content_length: content_length = int(content_length) use_tmp_files = self._get_handler_info() if not use_tmp_files and content_length > self.stream.max_buffer_size: raise _BadRequestException("Content-Length too long") if headers.get("Expect") == "100-continue": self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") if use_tmp_files: gen_log.debug('using temporary files for uploading') # avoid raising # IOError("Reached maximum read buffer size") # in tornado.iostream.BaseIOStream._read_to_buffer self.stream.max_buffer_size = maxint self._receive_content(content_length) else: gen_log.debug('using memory for uploading') self.stream.read_bytes(content_length, self._on_request_body) return self.request_callback(self._request) except tornado.httpserver._BadRequestException as e: gen_log.info("Malformed HTTP request from %s: %s", self.address[0], e) self.close() return
def get(self,*args,**kwargs): pageIndex=int(self.get_argument('pageIndex',1)) company=self.get_argument('type','wechat') querys={'wechat':{'wechat':{'$exists':True}}, '1xinxi':{'sendid':{'$exists':True}}, 'chanzor':{'sendid':{'$exists':False},'wechat':{'$exists':False}}} db = self.application.db gen_log.info(company) gen_log.info(pageIndex) record_list = yield db.sendrecord.find(querys.get(company)).sort([("_id",-1)]).skip((pageIndex-1)*20).limit(20).to_list(length=None) self.set_header('content-type','application/json') self.write(json_encode({"data":record_list})) pass
def open_run_save(api, path, legacy=False): """open a notebook, run it, and save. Only the original notebook is saved, the output is not recorded. """ nb = api.get_notebook(path) session = Session() kernel = yield api.new_kernel(session.session, legacy=legacy) try: yield run_notebook(nb, kernel, session) finally: api.kill_kernel(kernel['id']) gen_log.info("Saving %s/notebooks/%s", api.url, path) api.save_notebook(nb, path)
def __init__(self) : gen_log.info ("\n/// GenericSpiderMix / init ") # Default fields for mixin class # self.name = "" # The name of the spider to use when executing the spider self.error_array = [] self.item_count = 0 # will be incremented each time a new item is created self.item_count_depth_1 = 0 # will be incremented each time an item is completed in detailed page self.LIMIT = 5 # The number of pages where the spider will stop self.page_count = 1 # The number of pages already scraped self.download_delay = 0 # The delay in seconds between each request. some website will block too many requests
def _on_headers(self, data): try: data = native_str(data.decode("latin1")) eol = data.find("\r\n") start_line = data[:eol] try: method, uri, version = start_line.split(" ") except ValueError: raise _BadRequestException("Malformed HTTP request line") if not version.startswith("HTTP/"): raise _BadRequestException("Malformed HTTP version in HTTP Request-Line") try: headers = httputil.HTTPHeaders.parse(data[eol:]) except ValueError: # Probably from split() if there was no ':' in the line raise _BadRequestException("Malformed HTTP headers") # HTTPRequest wants an IP, not a full socket address if self.address_family in (socket.AF_INET, socket.AF_INET6): remote_ip = self.address[0] else: # Unix (or other) socket; fake the remote address remote_ip = "0.0.0.0" self._request = HTTPRequest( connection=self, method=method, uri=uri, version=version, headers=headers, remote_ip=remote_ip, protocol=self.protocol, ) content_length = headers.get("Content-Length") if content_length: content_length = int(content_length) if content_length > self.stream.max_buffer_size: raise _BadRequestException("Content-Length too long") if headers.get("Expect") == "100-continue": self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") self.stream.read_bytes(content_length, self._on_request_body) return self.request_callback(self._request) except _BadRequestException as e: gen_log.info("Malformed HTTP request from %s: %s", self.address[0], e) self.close() return
def _on_headers(self, data): try: data = native_str(data.decode('latin1')) eol = data.find("\r\n") start_line = data[:eol] try: method, uri, version = start_line.split(" ") except ValueError: raise _BadRequestException("Malformed HTTP request line") if not version.startswith("HTTP/"): raise _BadRequestException("Malformed HTTP version in HTTP Request-Line") try: headers = httputil.HTTPHeaders.parse(data[eol:]) except ValueError: # Probably from split() if there was no ':' in the line raise _BadRequestException("Malformed HTTP headers") # HTTPRequest wants an IP, not a full socket address if self.address_family in (socket.AF_INET, socket.AF_INET6): remote_ip = self.address[0] else: # Unix (or other) socket; fake the remote address remote_ip = '0.0.0.0' # 构造一个httpRequest对象 self._request = HTTPRequest( connection=self, method=method, uri=uri, version=version, headers=headers, remote_ip=remote_ip, protocol=self.protocol) # 如果头部带有content-length就继续解包,然后回调我们request_body函数 # 用回调的方式,估计也是因为多路复用,导致非阻塞的情况 content_length = headers.get("Content-Length") if content_length: content_length = int(content_length) if content_length > self.stream.max_buffer_size: raise _BadRequestException("Content-Length too long") if headers.get("Expect") == "100-continue": self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") self.stream.read_bytes(content_length, self._on_request_body) # 原来都是写到内存里面的 return # 如果请求不带content-length,那很简单,直接开始处理的具体逻辑 self.request_callback(self._request) # 这里是调用app内部的那个 __call__魔术方法了 except _BadRequestException as e: gen_log.info("Malformed HTTP request from %r: %s", self.address, e) self.close() return
def _handle_events(self, fd, events): if self.closed(): gen_log.warning("Got events for closed stream %s", fd) return try: if self._connecting: self._handle_connect() if self.closed(): return if events & self.io_loop.READ: # NOTE: We use explict read instead of implicit. # The reason IOStream is not idle is that when an event happened, # tornado iostream will still try to read them into buffer. # Our approach is that when someone is trying to read the iostream, # we will read it. if self._should_socket_close() or self.reading(): self._handle_read() if self.closed(): return if events & self.io_loop.WRITE: self._handle_write() if self.closed(): return if events & self.io_loop.ERROR: self.error = self.get_fd_error() self.io_loop.add_callback(self.close) return state = self.io_loop.ERROR if self.reading(): state |= self.io_loop.READ if self.writing(): state |= self.io_loop.WRITE if state == self.io_loop.ERROR and self._read_buffer_size == 0: state |= self.io_loop.READ if state != self._state: assert self._state is not None, \ "shouldn't happen: _handle_events without self._state" self._state = state self.io_loop.update_handler(self.fileno(), self._state) except UnsatisfiableReadError as e: gen_log.info("Unsatisfiable read, closing connection: %s" % e) self.close(exc_info=True) except Exception: gen_log.error("Uncaught exception, closing connection.", exc_info=True) self.close(exc_info=True) raise
def _read_to_buffer(self): """ Reads from the socket and appends the result to the read buffer. Returns the number of bytes read. Returns 0 if there is nothing to read (i.e. the read returns EWOULDBLOCK or equivalent). On error closes the socket and raises an exception. This method will read upto the allowed max_buffer_size, in which case if the buffer limit is reached, the read is placed back onto the IOLoop for rescheduling. This happens immediately with the hope that the underlying program code is reading quickly enough to keep the buffer well drained. """ next_size = self._read_buffer_size + self.read_chunk_size if next_size >= self.max_buffer_size: gen_log.info("Reached maximum read buffer size of: {}".format( self.max_buffer_size)) # Reschedule and treat this as a EWOULDBLOCK self._add_io_state(ioloop.IOLoop.READ) return 0 chunk = None try: chunk = self.read_from_fd() except (socket.error, IOError, OSError) as e: # ssl.SSLError is a subclass of socket.error if e.args[0] in _ERRNO_CONNRESET: # Treat ECONNRESET as a connection close rather than # an error to minimize log spam (the exception will # be available on self.error for apps that care). self.close(exc_info=True) return self.close(exc_info=True) raise if chunk is None: return 0 chunk_length = len(chunk) self._read_buffer.append(chunk) self._read_buffer_size += chunk_length return chunk_length
def ontimer_reload(prev_mtime,filepath,mainfile): # wait for file to stop changing, than check syntax,than reload global autoreload_compile_error # in an aplication: if autoreload_compile_error!=None: send to main page value of autoreload_compile_error statinfo=os.stat(filepath) if statinfo.st_size>0 and statinfo.st_ctime==statinfo.st_mtime and statinfo.st_mtime-prev_mtime==0: compiled=False try: py_compile.compile(mainfile,doraise=True) compiled=True except py_compile.PyCompileError as e: autoreload_compile_error=str(e) logging.info("autoreload: compile error...\n"+autoreload_compile_error, filepath) if compiled: logging.info("%s modified; restarting server", filepath) tornado.autoreload._reload() else: gen_log.info("autoreload: waiting for file upload complete.") Timer(0.3, ontimer_reload,(statinfo.st_mtime,filepath,mainfile)).start()
def __init__(self, providers_config, counter): handlers = [ (r"/(.+)", ProxyHandler), ] settings = dict( app_name=providers_config['name'], static_path=os.path.join(os.path.dirname(__file__), "static"), ) # Создаем список провайдеров, устанавливаем для него хранилище ключей и заполняем элементами из # переданного в веб сервер конфига словаря с настройками self.provider_list = ApiProviderList() self.provider_list.set_counter_storage(counter)\ .add_items_from_dict(providers_config['providers']) tornado.web.Application.__init__(self, handlers, **settings) gen_log.info('Server is started!')
def main(): """Command-line wrapper to re-run a script whenever its source changes. Scripts may be specified by filename or module name:: python -m tornado.autoreload -m tornado.test.runtests python -m tornado.autoreload tornado/test/runtests.py Running a script with this wrapper is similar to calling `tornado.autoreload.wait` at the end of the script, but this wrapper can catch import-time problems like syntax errors that would otherwise prevent the script from reaching its call to `wait`. """ original_argv = sys.argv sys.argv = sys.argv[:] if len(sys.argv) >= 3 and sys.argv[1] == "-m": mode = "module" module = sys.argv[2] del sys.argv[1:3] elif len(sys.argv) >= 2: mode = "script" script = sys.argv[1] sys.argv = sys.argv[1:] else: print >>sys.stderr, _USAGE sys.exit(1) try: if mode == "module": import runpy runpy.run_module(module, run_name="__main__", alter_sys=True) elif mode == "script": with open(script) as f: global __file__ __file__ = script # Use globals as our "locals" dictionary so that # something that tries to import __main__ (e.g. the unittest # module) will see the right things. exec f.read() in globals(), globals() except SystemExit, e: logging.basicConfig() gen_log.info("Script exited with status %s", e.code)
def recv_timeout(conn, timeout=0.5): conn.setblocking(0) total_data = [] data = '' begin = time.time() while True: if total_data and time.time() - begin > timeout: break elif time.time() - begin > timeout * 2: break try: data = conn.recv(8192) if data: total_data.append(data) begin = time.time() else: time.sleep(0.1) except: gen_log.info("recv_timeout failed") return ''.join(total_data)
def flush(): if failure: for sql in failure: p = failure.get(sql) if p: batch_update(db, sql, p) logger.info("flush failed save:[ %s ] records", len(p)) failure.clear() if deep_queue: logger.info("flush deep_queue size: [%s]", len(deep_queue)) batch_update(db, deep_sql, deep_queue) if rti_queue: logger.info("flush rti_queue size: [%s]", len(rti_queue)) batch_update(db, rti_sql, rti_queue) if newpoi_queue: logger.info("flush newpoi_queue size: [%s]", len(newpoi_queue)) batch_update(db, newpoi_sql, newpoi_queue) logger.info("flush finished")
def _check_file(modify_times, module, path): try: modified = os.stat(path).st_mtime except Exception: return if path not in modify_times: modify_times[path] = modified return if modify_times[path] != modified: gen_log.info("%s modified; restarting server", path) modify_times[path] = modified else: return try: importlib.reload(module) except Exception: gen_log.error("Error importing %s, not reloading" % (path,)) traceback.print_exc() return False return True
def run(self, result=None): logger = logging.getLogger() if not logger.handlers: logging.basicConfig() handler = logger.handlers[0] if len(logger.handlers) > 1 or not isinstance(handler, logging.StreamHandler): # Logging has been configured in a way we don't recognize, # so just leave it alone. super(LogTrapTestCase, self).run(result) return old_stream = handler.stream try: handler.stream = StringIO() gen_log.info("RUNNING TEST: " + str(self)) old_error_count = len(result.failures) + len(result.errors) super(LogTrapTestCase, self).run(result) new_error_count = len(result.failures) + len(result.errors) if new_error_count != old_error_count: old_stream.write(handler.stream.getvalue()) finally: handler.stream = old_stream
def post(self): ''' Create new user with given arguments (email,password,name,...) Auto login after user created. :return: ''' hashed_password = yield executor.submit( bcrypt.hashpw, tornado.escape.utf8(self.get_argument("password")), bcrypt.gensalt()) email = self.get_argument("email") name = self.get_argument("name") avatar = self.get_argument("avatar", None) with self.db.cursor() as cursor: cursor.callproc('create_user', [email, hashed_password, name, avatar]) self.db.commit() user_id = cursor.fetchone()[0] # Error will return 0. else user_id if user_id == 0: # Create user error: Dupe email. logger.info("AUTH SIGNUP FAIL reason:[Duplicated email] email:[%s]" % email) self.render("auth/sign_up.html", title="Sign up", error="Email already exists: %s" % email) else: logger.info("AUTH SIGNUP OK user_id:[%s] email:[%s]" % (user_id, email)) self.set_secure_cookie("vonng_com_user", str(user_id)) redirect_url = self.get_argument("next", "/") logger.info("AUTH SIGNIN OK user_id:[%s] redirect:[%s]" % (user_id, redirect_url)) self.redirect(redirect_url)
def post(self): email = self.get_argument("email") with self.db.cursor(cursor_factory=psycopg2.extras.NamedTupleCursor) as cursor: cursor.callproc('get_user', [email]) user = cursor.fetchone() if not user: logger.info("AUTH SIGNIN FAIL reason:[No such email] email:[%s]" % email) self.render("auth/sign_in.html", title="Sign in", error="No matching login entry for email:%s" % email) user_id = user.user_id password = user.hashed_password hashed_password = yield executor.submit( bcrypt.hashpw, tornado.escape.utf8(self.get_argument("password")), tornado.escape.utf8(password)) if hashed_password == password: self.set_secure_cookie("vonng_com_user", str(user_id)) redirect_url = self.get_argument("next", "/") logger.info("AUTH SIGNIN OK user_id:[%s] redirect:[%s]" % (user_id, redirect_url)) self.redirect(redirect_url) else: logger.info("AUTH SIGNIN FAIL reason:[Incorrect password] user_id:[%s] email:[%s]" % (user_id, email)) self.render("auth/sign_in.html", title="Sign in", error="Incorrect password")
def read_headers(self, delegate): try: _delegate, delegate = self._parse_delegate(delegate) header_future = self.stream.read_until_regex( b"\r?\n\r?\n", max_bytes=self.params.max_header_size) if self.params.header_timeout is None: header_data = yield header_future else: try: header_data = yield gen.with_timeout( self.stream.io_loop.time() + self.params.header_timeout, header_future, quiet_exceptions=StreamClosedError) except gen.TimeoutError: self.close() raise gen.Return(False) start_line, headers = self._parse_headers(header_data) start_line = parse_response_start_line(start_line) self._response_start_line = start_line self._disconnect_on_finish = not self._can_keep_alive( start_line, headers) with _ExceptionLoggingContext(app_log): header_future = delegate.headers_received(start_line, headers) if header_future is not None: yield header_future if self.stream is None: # We've been detached. raise gen.Return(False) # determine body skip #TODO: 100 <= code < 200 if (self._request_start_line is not None and self._request_start_line.method == 'HEAD'): _delegate.skip_body = True code = start_line.code if code == 304: # 304 responses may include the content-length header # but do not actually have a body. # http://tools.ietf.org/html/rfc7230#section-3.3 _delegate.skip_body = True if code >= 100 and code < 200: # 1xx responses should never indicate the presence of # a body. if ('Content-Length' in headers or 'Transfer-Encoding' in headers): raise HTTPInputError("Response code %d cannot have body" % code) # TODO: client delegates will get headers_received twice # in the case of a 100-continue. Document or change? yield self.read_headers(delegate) # return the response with no body set with _ExceptionLoggingContext(app_log): delegate.finish() except HTTPInputError as e: gen_log.info("Malformed HTTP message from %s: %s", self.context, e) self.close() self._clear_callbacks() raise gen.Return(False) finally: header_future = None raise gen.Return(True)
gen_log.warning("Script exited with uncaught exception", exc_info=True) # If an exception occurred at import time, the file with the error # never made it into sys.modules and so we won't know to watch it. # Just to make sure we've covered everything, walk the stack trace # from the exception and watch every file. for (filename, lineno, name, line) in traceback.extract_tb(sys.exc_info()[2]): watch(filename) if isinstance(e, SyntaxError): # SyntaxErrors are special: their innermost stack frame is fake # so extract_tb won't see it and we have to get the filename # from the exception object. watch(e.filename) else: logging.basicConfig() gen_log.info("Script exited normally") # restore sys.argv so subsequent executions will include autoreload sys.argv = original_argv if mode == 'module': # runpy did a fake import of the module as __main__, but now it's # no longer in sys.modules. Figure out where it is and watch it. loader = pkgutil.get_loader(module) if loader is not None: watch(loader.get_filename()) wait() if __name__ == "__main__": # See also the other __main__ block at the top of the file, which modifies
from tornado.options import options from {{cookiecutter.project_slug}}.run import create_app, run_app, define_options from tornado.log import gen_log from {{cookiecutter.project_slug}}.settings import settings if __name__ == "__main__": define_options() options.parse_command_line() option_dict = options.as_dict() gen_log.info("option_dict:{}".format(option_dict)) app = create_app(**option_dict, **settings) run_app(app, options)
async def delete(self): """ Delete some file on server :return: None """ filename = self.get_argument("filename", None) try: self.file_helper.delete_file(Config.file_directory, filename) except Exception as file_err: self.__error(error_msg=f"File error: {str(file_err)}") return self.__success(success_msg=f"File {filename} has been deleted") def make_app(): Config.file_directory.mkdir(parents=True, exist_ok=True) # fmt: off return web.Application(handlers=[(r"/api/file", FileHandler)]) # fmt: on if __name__ == "__main__": tornado.options.parse_command_line() gen_log.info("Server running") app = make_app() app.listen(8888) ioloop.IOLoop.current().start()
# -*- encoding: utf-8 -*- from tornado.log import enable_pretty_logging, LogFormatter, access_log, app_log, gen_log gen_log.info("---> importing .pipelines") import os import json from pprint import pprint, pformat import pymongo from pymongo import MongoClient from scrapy import signals import logging from tornado.log import app_log, gen_log, access_log # set logger for scrapy log_pipe = logging.getLogger("log_pipeline") log_pipe.setLevel(logging.DEBUG) # Create the Handler for logging data to a file logger_handler = logging.FileHandler('logs/openscraper_pipeline_logging.log') logger_handler.setLevel(logging.WARNING) # Create a Formatter for formatting the log messages logger_formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s') # Add the Formatter to the Handler logger_handler.setFormatter(logger_formatter)
import tornado.options from tornado.options import define, options from tornado.log import gen_log import etcd from utils import bytes2int, get_ip define("port", default=30000, help="run on the given port", type=int) define("weight", default=1, help="weight", type=int) define("etcd", default="localhost", help="etcd hostname", type=str) tornado.options.parse_command_line() client = etcd.Client(host=options.etcd, port=2379) client.write( '/dubbomesh/com.alibaba.dubbo.performance.demo.provider.IHelloService/{0}:{1}' .format(get_ip(), options.port), options.weight) gen_log.info('register with {0}:{1} [{2}]'.format(get_ip(), options.port, options.weight)) while True: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect(('127.0.0.1', 20880)) s.close() gen_log.info("connect to 20880 success") break except socket.error: gen_log.info("connect to 20880 fail, wait a second") time.sleep(0.2) pass
def fork_processes( num_processes: Optional[int], max_restarts: Optional[int] = None ) -> int: """Starts multiple worker processes. If ``num_processes`` is None or <= 0, we detect the number of cores available on this machine and fork that number of child processes. If ``num_processes`` is given and > 0, we fork that specific number of sub-processes. Since we use processes and not threads, there is no shared memory between any server code. Note that multiple processes are not compatible with the autoreload module (or the ``autoreload=True`` option to `tornado.web.Application` which defaults to True when ``debug=True``). When using multiple processes, no IOLoops can be created or referenced until after the call to ``fork_processes``. In each child process, ``fork_processes`` returns its *task id*, a number between 0 and ``num_processes``. Processes that exit abnormally (due to a signal or non-zero exit status) are restarted with the same id (up to ``max_restarts`` times). In the parent process, ``fork_processes`` calls ``sys.exit(0)`` after all child processes have exited normally. max_restarts defaults to 100. Availability: Unix """ if sys.platform == "win32": # The exact form of this condition matters to mypy; it understands # if but not assert in this context. raise Exception("fork not available on windows") if max_restarts is None: max_restarts = 100 global _task_id assert _task_id is None if num_processes is None or num_processes <= 0: num_processes = cpu_count() gen_log.info("Starting %d processes", num_processes) children = {} def start_child(i: int) -> Optional[int]: pid = os.fork() if pid == 0: # child process _reseed_random() global _task_id _task_id = i return i else: children[pid] = i return None for i in range(num_processes): id = start_child(i) if id is not None: return id num_restarts = 0 while children: pid, status = os.wait() if pid not in children: continue id = children.pop(pid) if os.WIFSIGNALED(status): gen_log.warning( "child %d (pid %d) killed by signal %d, restarting", id, pid, os.WTERMSIG(status), ) elif os.WEXITSTATUS(status) != 0: gen_log.warning( "child %d (pid %d) exited with status %d, restarting", id, pid, os.WEXITSTATUS(status), ) else: gen_log.info("child %d (pid %d) exited normally", id, pid) continue num_restarts += 1 if num_restarts > max_restarts: raise RuntimeError("Too many child restarts, giving up") new_id = start_child(id) if new_id is not None: return new_id # All child processes exited cleanly, so exit the master process # instead of just returning to right after the call to # fork_processes (which will probably just start up another IOLoop # unless the caller checks the return value). sys.exit(0)
# -*- encoding: utf-8 -*- from tornado.log import enable_pretty_logging, LogFormatter, access_log, app_log, gen_log gen_log.info("--> importing .urls") from controller import * from spider_handler import * from api_handler import * ### most routing functions are in controller.py ### for url mapping in Tornado cf : https://stackoverflow.com/questions/17166051/url-regex-mapping-in-tornado-web-server ### cf : https://code.tutsplus.com/tutorials/8-regular-expressions-you-should-know--net-6149 ### cf : https://gist.github.com/c4urself/1028897 ### cf : http://www.lexev.org/en/2014/set-url-for-tornado-handlers/ ### cf : https://makandracards.com/theogfx/11605-python-+-tornado-variable-length-url-parameters ### cf : https://stackoverflow.com/questions/10726486/tornado-url-query-parameters ### cf : https://stackoverflow.com/questions/6038061/regular-expression-to-find-urls-within-a-string ### cf : https://docs.python.org/2/library/re.html#regular-expression-syntax ### "/( ?P<your_arg>.*? )" - arg in url as kwarg ### "/( \w+ )" - string of letter ### "/( [0-9Xx\-]+ )" - string of numbers numbers and dashes ### "/( .* )" - whatever comes ### "( [^/]+ )" - as slug ### if url is like "/api/?q=this&r=that" --> self.request.arguments gets : {'q': ['this'], 'r': ['that']} urls = [ ### index
def _make_request(self, path, query=None, method="GET", body=None, headers=None, **kwargs): """ Makes request on `path` in the graph. path -- endpoint to the facebook graph api query -- A dictionary that becomes a query string to be appended to the path method -- GET, POST, etc body -- message body headers -- Like "Content-Type" """ if not query: query = {} if self.access_token: query["access_token"] = self.access_token query_string = urllib.urlencode(query) if query else "" if method == "GET": body = None else: if headers and "json" in headers.get('Content-Type'): body = json.dumps(body) if body else "" else: body = urllib.urlencode(body) if body else "" url = BASE_URL + path if query_string: url += "?" + query_string # url = "https://wio.temp-io.life/v1/nodes/create?access_token=123" gen_log.info("URL=====> {}".format(url)) gen_log.info("method=====> {}".format(method)) gen_log.info("body=====> {}".format(body)) client = AsyncHTTPClient() request = HTTPRequest(url, method=method, body=body, headers=headers, **kwargs) try: response = yield client.fetch(request) except HTTPError as e: raise WioAPIError(e) except Exception as e: gen_log.error(e) raise content_type = response.headers.get('Content-Type') gen_log.info("#### content_type: {}".format(content_type)) gen_log.info("#### body: {}".format(response.body)) if 'json' in content_type: data = json.loads(response.body.decode()) else: raise WioAPIError('Maintype was not json') raise gen.Return(data)
def fork_processes(ports=None, max_restarts=100): global listen_port assert listen_port is None assert isinstance(ports, (list, tuple)) and len(ports) > 0, '端口列表不正确.' gen_log.info("Starting service on ports: %s", ports) children = {} def start_child(port): pid = os.fork() if pid == 0: # child process _reseed_random() global listen_port listen_port = port print('FORKED: %s' % port) return port else: children[pid] = port return None for port in ports: l_port = start_child(port) if l_port is not None: return l_port num_restarts = 0 exit_pending = 0 while 1: try: pid, status = os.wait() except KeyboardInterrupt: print('Ctrl+C, Exit.') sys.exit(0) except OSError as e: if errno_from_exception(e) == errno.EINTR: continue if errno_from_exception(e) == errno.ECHILD: break raise if pid not in children: continue port = children.pop(pid) if exit_pending: continue if os.WIFSIGNALED(status): gen_log.warning( "child [port:%s] [pid:%d] killed by [signal:%d], restarting", port, pid, os.WTERMSIG(status)) elif os.WEXITSTATUS(status) != 0: gen_log.warning( "child [port:%s] [pid:%d] exited with status %d, restarting", port, pid, os.WEXITSTATUS(status)) else: gen_log.info("child [port:%s] [pid:%d] exited normally", port, pid) continue num_restarts += 1 if num_restarts > max_restarts: raise RuntimeError("child [port:%s] too many restarts, giving up", port) l_port = start_child(port) if l_port is not None: return l_port # All child processes exited cleanly, so exit the master process # instead of just returning to right after the call to # fork_processes (which will probably just start up another IOLoop # unless the caller checks the return value). print('Children: %s' % sorted(children.items(), key=lambda x: x[1])) print('Main Process Exiting...') sys.exit(0)
def run(): # init the database db = database.db(cfg.database_url) db.create_all() if cfg.test_data: gen_log.info('Deleting any existing temperature data from database') db.delete_temperature_data() gen_log.info('Loading test data to database') with open(cfg.test_data) as f: for line in f: line = line.strip() data = line.split() # Lines starting with # are comments if data[0] == '#': continue # Automatically generate a (large) number of entries using a line like: # generate start_timestamp interval_secs count # For example, generate data at 5 minute intervals for 12*24*365=105120, a year of data # generate 1435708800 300 105120 if data[0].lower() == 'generate': temp = hardware.Temperature(db, cfg.sensor_params) start_ts = int(data[1]) interval_sec = int(data[2]) generate_count = int(data[3]) gen_log.info( 'Generating {} records at {} second intervals from timestamp {}' .format(generate_count, interval_sec, start_ts)) for loop_count in range(generate_count): # On an RPi3 it takes about 30 seconds to generate 1000 records. # Give some output every 1000 records so the user can see progress. if (loop_count + 1) % 1000 == 0: gen_log.info( 'Generating temperature record {}'.format( loop_count + 1)) db.save_temperature( temp.get_temp(), start_ts + (loop_count * interval_sec)) continue # 1st number data[0] is the timestamp, 2nd number data[1] is the temperature db.save_temperature(data[1], data[0]) gen_log.info('Test data written to database. Starting server.') else: # init the temperature sensor temp = hardware.Temperature(db, cfg.sensor_params) temp.save_current() # settings for the tornado app settings = {'db': db, 'debug': cfg.debug_mode} # list of handlers for the server handlers = [ url(r'^/api/temperature/?$', temperature_handler), url(r'^/api/temperature/(\d+)/?$', temperature_handler), url(r'^/api/temperature/current/?$', current_temp_handler), url(r'^/api/temperature/now/?$', current_temp_handler), url(r'^/api/temperature/(max|min|ave|stats)/?$', stats_temp_handler), url(r'^/api/info/?$', info_handler), ] # add the static file handler if we want to use it if cfg.serve_webapp: handlers.append( url(r'^/(.*)$', StaticFileHandler, { 'path': 'webapp', 'default_filename': 'index.html' })) # set up the server app server = Application(handlers, **settings) server.listen(cfg.listen_port) if not cfg.test_data: # log the temperature at intervals PeriodicCallback(temp.save_current, int(cfg.temp_interval) * 1000).start() # Let the server loop servicing requests tornado.ioloop.IOLoop.current().start()
def _read_message(self, delegate): need_delegate_close = False try: # 这一句会从 iostream 里异步地读取 header # 的内容,返回的是一个将会填充 # header 内容的值的 future 对象。 # HTTP 协议中,header 和 body 之间使用类似于 \r\n\r\n # 这样的被称为 CRLF 的字符串来划分, # 所以作者传入了下面的正则来保证刚好读完 header 的信息 header_future = self.stream.read_until_regex( b"\r?\n\r?\n", max_bytes=self.params.max_header_size) if self.params.header_timeout is None: header_data = yield header_future else: # 如果设置了 timeout,则将 Future 对象转化为一个 # gen._Timeout 对象,让 iostream 在 # 限定时间内去填充 future 对象内的 result,否则会报超时的错。 try: header_data = yield gen.with_timeout( self.stream.io_loop.time() + self.params.header_timeout, header_future, io_loop=self.stream.io_loop, quiet_exceptions=iostream.StreamClosedError) except gen.TimeoutError: self.close() raise gen.Return(False) # 将 header 信息转化为保存有响应信息的对象。因为客户端和服务端中 # http 头信息的形式是不同的,所以要根据 connection 是客户端方还 # 是服务端方来作不同的处理。 # 客户端得到的为 start line 被叫做 status-line,结构类似于: # HTTP/1.1 200 OK # 服务端得到的 start line 被叫做 # request-line,结构类似于:GET / HTTP/1.1 start_line, headers = self._parse_headers(header_data) if self.is_client: start_line = httputil.parse_response_start_line(start_line) self._response_start_line = start_line else: start_line = httputil.parse_request_start_line(start_line) self._request_start_line = start_line self._request_headers = headers # 根据头信息来判断连接的保持规则。 self._disconnect_on_finish = not self._can_keep_alive( start_line, headers) need_delegate_close = True # 在这个 Context 之下,如果代码抛出异常,将会把 exception 的信息 # 用 app_log 当做日志输出 with _ExceptionLoggingContext(app_log): # 将解析好的头信息交给 delegate,这时对于服务端来说,会根据 # start line 中的 path 信息来匹配处理请求的那个 Handler。 header_future = delegate.headers_received(start_line, headers) # 在 stream 的情况下,header_future 则不会是 None。 if header_future is not None: yield header_future if self.stream is None: # We've been detached. need_delegate_close = False raise gen.Return(False) skip_body = False if self.is_client: # 对于客户端而言如果使用这个连接的对象是客户端类型的,则进 # 行头信息以及 HTTP 状态码来决定是否继续读取 body 数据。 if (self._request_start_line is not None and self._request_start_line.method == 'HEAD'): skip_body = True code = start_line.code if code == 304: # 304 responses may include the content-length header # but do not actually have a body. # http://tools.ietf.org/html/rfc7230#section-3.3 skip_body = True if code >= 100 and code < 200: # 1xx responses should never indicate the presence of # a body. if ('Content-Length' in headers or 'Transfer-Encoding' in headers): raise httputil.HTTPInputError( "Response code %d cannot have body" % code) # TODO: client delegates will get headers_received twice # in the case of a 100-continue. Document or change? yield self._read_message(delegate) else: # 对于服务端而言,不存在需要跳过 body 数据的情况。 if (headers.get("Expect") == "100-continue" and # 处理头中包含『Expect: 100-continue』的客户端请求。 not self._write_finished): self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") if not skip_body: body_future = self._read_body( start_line.code if self.is_client else 0, headers, delegate) if body_future is not None: if self._body_timeout is None: yield body_future else: try: yield gen.with_timeout( self.stream.io_loop.time() + self._body_timeout, body_future, self.stream.io_loop, quiet_exceptions=iostream.StreamClosedError) except gen.TimeoutError: gen_log.info("Timeout reading body from %s", self.context) self.stream.close() raise gen.Return(False) self._read_finished = True if not self._write_finished or self.is_client: # 服务端在这里便会调用到匹配到的 handler # 来向请求方写回返回数据。 # 这时 _RequestDispatcher 对象中已经保有了 header 和 body 的数 # 据,可以稍微处理一下(例如将表单数据 parse 一下)作为参数交给 # Handler 处理了。 need_delegate_close = False with _ExceptionLoggingContext(app_log): delegate.finish() # If we're waiting for the application to produce an asynchronous # response, and we're not detached, register a close callback # on the stream (we didn't need one while we were reading) if (not self._finish_future.done() and self.stream is not None and not self.stream.closed()): self.stream.set_close_callback(self._on_connection_close) yield self._finish_future if self.is_client and self._disconnect_on_finish: self.close() if self.stream is None: raise gen.Return(False) except httputil.HTTPInputError as e: gen_log.info("Malformed HTTP message from %s: %s", self.context, e) self.close() raise gen.Return(False) finally: if need_delegate_close: with _ExceptionLoggingContext(app_log): delegate.on_connection_close() self._clear_callbacks() raise gen.Return(True)
class HTTPConnection(object): """Handles a connection to an HTTP client, executing HTTP requests. We parse HTTP headers and bodies, and execute the request callback until the HTTP conection is closed. """ def __init__(self, stream, address, request_callback, no_keep_alive=False, xheaders=False, protocol=None): self.stream = stream self.address = address # Save the socket's address family now so we know how to # interpret self.address even after the stream is closed # and its socket attribute replaced with None. self.address_family = stream.socket.family self.request_callback = request_callback self.no_keep_alive = no_keep_alive self.xheaders = xheaders self.protocol = protocol self._clear_request_state() # Save stack context here, outside of any request. This keeps # contexts from one request from leaking into the next. self._header_callback = stack_context.wrap(self._on_headers) self.stream.set_close_callback(self._on_connection_close) self.stream.read_until(b"\r\n\r\n", self._header_callback) def _clear_request_state(self): """Clears the per-request state. This is run in between requests to allow the previous handler to be garbage collected (and prevent spurious close callbacks), and when the connection is closed (to break up cycles and facilitate garbage collection in cpython). """ self._request = None self._request_finished = False self._write_callback = None self._close_callback = None def set_close_callback(self, callback): """Sets a callback that will be run when the connection is closed. Use this instead of accessing `HTTPConnection.stream.set_close_callback <.BaseIOStream.set_close_callback>` directly (which was the recommended approach prior to Tornado 3.0). """ self._close_callback = stack_context.wrap(callback) def _on_connection_close(self): if self._close_callback is not None: callback = self._close_callback self._close_callback = None callback() # Delete any unfinished callbacks to break up reference cycles. self._header_callback = None self._clear_request_state() def close(self): self.stream.close() # Remove this reference to self, which would otherwise cause a # cycle and delay garbage collection of this connection. self._header_callback = None self._clear_request_state() def write(self, chunk, callback=None): """Writes a chunk of output to the stream.""" if not self.stream.closed(): self._write_callback = stack_context.wrap(callback) self.stream.write(chunk, self._on_write_complete) def finish(self): """Finishes the request.""" self._request_finished = True # No more data is coming, so instruct TCP to send any remaining # data immediately instead of waiting for a full packet or ack. self.stream.set_nodelay(True) if not self.stream.writing(): self._finish_request() def _on_write_complete(self): if self._write_callback is not None: callback = self._write_callback self._write_callback = None callback() # _on_write_complete is enqueued on the IOLoop whenever the # IOStream's write buffer becomes empty, but it's possible for # another callback that runs on the IOLoop before it to # simultaneously write more data and finish the request. If # there is still data in the IOStream, a future # _on_write_complete will be responsible for calling # _finish_request. if self._request_finished and not self.stream.writing(): self._finish_request() def _finish_request(self): if self.no_keep_alive or self._request is None: disconnect = True else: connection_header = self._request.headers.get("Connection") if connection_header is not None: connection_header = connection_header.lower() if self._request.supports_http_1_1(): disconnect = connection_header == "close" elif ("Content-Length" in self._request.headers or self._request.method in ("HEAD", "GET")): disconnect = connection_header != "keep-alive" else: disconnect = True self._clear_request_state() if disconnect: self.close() return try: # Use a try/except instead of checking stream.closed() # directly, because in some cases the stream doesn't discover # that it's closed until you try to read from it. self.stream.read_until(b"\r\n\r\n", self._header_callback) # Turn Nagle's algorithm back on, leaving the stream in its # default state for the next request. self.stream.set_nodelay(False) except iostream.StreamClosedError: self.close() def _on_headers(self, data): try: try: data = native_str(data.decode('latin1')) except LookupError, err: data = native_str(data) eol = data.find("\r\n") start_line = data[:eol] try: method, uri, version = start_line.split(" ") except ValueError: raise _BadRequestException("Malformed HTTP request line") if not version.startswith("HTTP/"): raise _BadRequestException( "Malformed HTTP version in HTTP Request-Line") try: headers = httputil.HTTPHeaders.parse(data[eol:]) except ValueError: # Probably from split() if there was no ':' in the line raise _BadRequestException("Malformed HTTP headers") # HTTPRequest wants an IP, not a full socket address if self.address_family in (socket.AF_INET, socket.AF_INET6): remote_ip = self.address[0] else: # Unix (or other) socket; fake the remote address remote_ip = '0.0.0.0' self._request = HTTPRequest(connection=self, method=method, uri=uri, version=version, headers=headers, remote_ip=remote_ip, protocol=self.protocol) content_length = headers.get("Content-Length") if content_length: content_length = int(content_length) if content_length > self.stream.max_buffer_size: raise _BadRequestException("Content-Length too long") if headers.get("Expect") == "100-continue": self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") self.stream.read_bytes(content_length, self._on_request_body) return self.request_callback(self._request) except _BadRequestException as e: gen_log.info("Malformed HTTP request from %r: %s", self.address, e) self.close() return
def _on_headers(self, data): try: # 首先是找到起始行: data = native_str(data.decode('latin1')) eol = data.find("\r\n") # 获取请求的起始行数据,例如:GET / HTTP/1.1 start_line = data[:eol] try: # 然后是用空格分解【首行】来找到方法,uri和协议版本: # 请求方式、请求地址、http版本号 method, uri, version = start_line.split(" ") except ValueError: raise _BadRequestException("Malformed HTTP request line") if not version.startswith("HTTP/"): raise _BadRequestException("Malformed HTTP version in HTTP Request-Line") try: # 接着依靠HTTPHeaders解析 剩余的请求头,返回一个字典 # 把请求头信息包装到一个字典中。(不包括第一行) headers = httputil.HTTPHeaders.parse(data[eol:]) except ValueError: # Probably from split() if there was no ':' in the line raise _BadRequestException("Malformed HTTP headers") # HTTPRequest wants an IP, not a full socket address # 然后设定 remote_ip(好像没什么用?) if self.address_family in (socket.AF_INET, socket.AF_INET6): remote_ip = self.address[0] else: # Unix (or other) socket; fake the remote address remote_ip = '0.0.0.0' # 接着创建 request 对象(这就是 RequestHandler 接收的那个 request), # 然后用 Content-Length 检查是否有请求体, # 如果没有则直接调用 HTTP 层次的回调(亦即 application 的__call__方法), # 如果有则读取指定长度的内容并跳到回调 _on_request_body, 当然最终还是会调用 application 对象。 # 在 _on_request_body 方法里是调用 parse_body_arguments方法来完成【解析主体, # 请求头和请求体的解析】稍候再说。至此,执行流程就和 Application对象的接口与起到的作用 接上了。 # 至于何时调用handle_stream,后面会说到。 # # 把请求信息封装到一个HTTPRequest对象中 # 注意:self._request = HTTPRequest, # HTTPRequest中封装了HTTPConnection # HTTPConnection中封装了stream和application # connection=self,此self就是HTTPConnection # headers=headers,就是HTTPHeader对象 self._request = HTTPRequest( connection=self, method=method, uri=uri, version=version, headers=headers, remote_ip=remote_ip, protocol=self.protocol) # HTTPRequest对象被交给了(其实HTTP Body最后也是交给他【self.request_callback(self._request)】) # self.request_callback(self._request) # 从请求头中获取 Content-Length content_length = headers.get("Content-Length") if content_length: content_length = int(content_length) if content_length > self.stream.max_buffer_size: raise _BadRequestException("Content-Length too long") if headers.get("Expect") == "100-continue": self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") self.stream.read_bytes(content_length, self._on_request_body) return # **************** 执行Application对象的 __call__ 方法,也就是路由系统的入口 ******************* # self.request_callback(self._request) # 这个request_callback是什么来头呢?它是在HTTPConnection构造时传进来的参数。 # 我们回到HTTPServer.handle_stream() # def handle_stream(self, stream, address): # HTTPConnection(stream, address, self.request_callback, # self.no_keep_alive, self.xheaders, self.protocol) # 它是一个HTTPServer类的成员,继续往回追来历: # def __init__(self, request_callback, no_keep_alive=False, io_loop=None, # xheaders=False, ssl_options=None, protocol=None, **kwargs): # self.request_callback = request_callback # Bingo!这就是HTTPServer初始化时传进来的那个RequestHandler。 # 在helloworld.py里,我们看到的是: # application = tornado.web.Application([(r"/", MainHandler), ]) # http_server = tornado.httpserver.HTTPServer(application) # # 在另一个例子里,我们看到的是: # def handle_request(request): # message = "You requested %s\n" % request.uri # request.write("HTTP/1.1 200 OK\r\nContent-Length: %d\r\n\r\n%s" % ( # len(message), message)) # request.finish() # http_server = tornado.httpserver.HTTPServer(handle_request) # 可见这个request_handler通吃【很多种类型】的参数, # 可以是一个Application类的对象,也可是一个简单的函数。 # 如果是handler是简单函数,如上面的handle_request,这个很好理解,由一个函数处理HTTPRequest对象嘛。 # 如果是一个Application对象,就有点奇怪了。我们能把一个对象作另一个对象的参数来呼叫吗? # Python中有一个有趣的语法,只要定义类型的时候,实现__call__【函数】,这个类型就成为可调用的。 # 换句话说,我们可以把这个类的对象当作函数来使用,相当于【重载了】括号运算符。 except _BadRequestException as e: gen_log.info("Malformed HTTP request from %s: %s", self.address[0], e) self.close() return
def main(**kwargs): """A simple test runner. This test runner is essentially equivalent to `unittest.main` from the standard library, but adds support for tornado-style option parsing and log formatting. The easiest way to run a test is via the command line:: python -m tornado.testing tornado.test.stack_context_test See the standard library unittest module for ways in which tests can be specified. Projects with many tests may wish to define a test script like ``tornado/test/runtests.py``. This script should define a method ``all()`` which returns a test suite and then call `tornado.testing.main()`. Note that even when a test script is used, the ``all()`` test suite may be overridden by naming a single test on the command line:: # Runs all tests python -m tornado.test.runtests # Runs one test python -m tornado.test.runtests tornado.test.stack_context_test Additional keyword arguments passed through to ``unittest.main()``. For example, use ``tornado.testing.main(verbosity=2)`` to show many test details as they are run. See http://docs.python.org/library/unittest.html#unittest.main for full argument list. """ from tornado.options import define, options, parse_command_line define('exception_on_interrupt', type=bool, default=True, help=("If true (default), ctrl-c raises a KeyboardInterrupt " "exception. This prints a stack trace but cannot interrupt " "certain operations. If false, the process is more reliably " "killed, but does not print a stack trace.")) # support the same options as unittest's command-line interface define('verbose', type=bool) define('quiet', type=bool) define('failfast', type=bool) define('catch', type=bool) define('buffer', type=bool) argv = [sys.argv[0]] + parse_command_line(sys.argv) if not options.exception_on_interrupt: signal.signal(signal.SIGINT, signal.SIG_DFL) if options.verbose is not None: kwargs['verbosity'] = 2 if options.quiet is not None: kwargs['verbosity'] = 0 if options.failfast is not None: kwargs['failfast'] = True if options.catch is not None: kwargs['catchbreak'] = True if options.buffer is not None: kwargs['buffer'] = True if __name__ == '__main__' and len(argv) == 1: print("No tests specified", file=sys.stderr) sys.exit(1) try: # In order to be able to run tests by their fully-qualified name # on the command line without importing all tests here, # module must be set to None. Python 3.2's unittest.main ignores # defaultTest if no module is given (it tries to do its own # test discovery, which is incompatible with auto2to3), so don't # set module if we're not asking for a specific test. if len(argv) > 1: unittest.main(module=None, argv=argv, **kwargs) else: unittest.main(defaultTest="all", argv=argv, **kwargs) except SystemExit as e: if e.code == 0: gen_log.info('PASS') else: gen_log.error('FAIL') raise
def _close_on_error(self, file): gen_log.info('closing %d on connection close.', file.fileno()) file.close()
def fork_processes(num_processes, max_restarts=100): """Starts multiple worker processes. If ``num_processes`` is None or <= 0, we detect the number of cores available on this machine and fork that number of child processes. If ``num_processes`` is given and > 0, we fork that specific number of sub-processes. Since we use processes and not threads, there is no shared memory between any server code. Note that multiple processes are not compatible with the autoreload module (or the debug=True option to `tornado.web.Application`). When using multiple processes, no IOLoops can be created or referenced until after the call to ``fork_processes``. In each child process, ``fork_processes`` returns its *task id*, a number between 0 and ``num_processes``. Processes that exit abnormally (due to a signal or non-zero exit status) are restarted with the same id (up to ``max_restarts`` times). In the parent process, ``fork_processes`` returns None if all child processes have exited normally, but will otherwise only exit by throwing an exception. """ global _task_id assert _task_id is None if num_processes is None or num_processes <= 0: num_processes = cpu_count() if ioloop.IOLoop.initialized(): raise RuntimeError( "Cannot run in multiple processes: IOLoop instance " "has already been initialized. You cannot call " "IOLoop.instance() before calling start_processes()") gen_log.info("Starting %d processes", num_processes) children = {} def start_child(i): pid = os.fork() if pid == 0: # child process _reseed_random() global _task_id _task_id = i return i else: children[pid] = i return None for i in range(num_processes): id = start_child(i) if id is not None: return id num_restarts = 0 while children: try: pid, status = os.wait() except OSError as e: if e.errno == errno.EINTR: continue raise if pid not in children: continue id = children.pop(pid) if os.WIFSIGNALED(status): gen_log.warning( "child %d (pid %d) killed by signal %d, restarting", id, pid, os.WTERMSIG(status)) elif os.WEXITSTATUS(status) != 0: gen_log.warning( "child %d (pid %d) exited with status %d, restarting", id, pid, os.WEXITSTATUS(status)) else: gen_log.info("child %d (pid %d) exited normally", id, pid) continue num_restarts += 1 if num_restarts > max_restarts: raise RuntimeError("Too many child restarts, giving up") new_id = start_child(id) if new_id is not None: return new_id # All child processes exited cleanly, so exit the master process # instead of just returning to right after the call to # fork_processes (which will probably just start up another IOLoop # unless the caller checks the return value). sys.exit(0)
async def _read_message(self, delegate: httputil.HTTPMessageDelegate) -> bool: need_delegate_close = False try: # 取出请求首部 header_future = self.stream.read_until_regex( b"\r?\n\r?\n", max_bytes=self.params.max_header_size ) if self.params.header_timeout is None: header_data = await header_future else: try: header_data = await gen.with_timeout( self.stream.io_loop.time() + self.params.header_timeout, header_future, quiet_exceptions=iostream.StreamClosedError, ) except gen.TimeoutError: self.close() return False # 解析请求行 和请求首部参数 start_line_str, headers = self._parse_headers(header_data) if self.is_client: resp_start_line = httputil.parse_response_start_line(start_line_str) self._response_start_line = resp_start_line start_line = ( resp_start_line ) # type: Union[httputil.RequestStartLine, httputil.ResponseStartLine] # TODO: this will need to change to support client-side keepalive self._disconnect_on_finish = False else: # 解析请求行 返回 method, path, version req_start_line = httputil.parse_request_start_line(start_line_str) self._request_start_line = req_start_line self._request_headers = headers start_line = req_start_line # 解析是否需要完成时关闭连接 self._disconnect_on_finish = not self._can_keep_alive( req_start_line, headers ) need_delegate_close = True with _ExceptionLoggingContext(app_log): header_recv_future = delegate.headers_received(start_line, headers) if header_recv_future is not None: await header_recv_future if self.stream is None: # We've been detached. need_delegate_close = False return False skip_body = False if self.is_client: assert isinstance(start_line, httputil.ResponseStartLine) if ( self._request_start_line is not None and self._request_start_line.method == "HEAD" ): skip_body = True code = start_line.code if code == 304: # 304 responses may include the content-length header # but do not actually have a body. # http://tools.ietf.org/html/rfc7230#section-3.3 skip_body = True if code >= 100 and code < 200: # 1xx responses should never indicate the presence of # a body. if "Content-Length" in headers or "Transfer-Encoding" in headers: raise httputil.HTTPInputError( "Response code %d cannot have body" % code ) # TODO: client delegates will get headers_received twice # in the case of a 100-continue. Document or change? await self._read_message(delegate) else: if headers.get("Expect") == "100-continue" and not self._write_finished: self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") # 读取请求主体 if not skip_body: body_future = self._read_body( resp_start_line.code if self.is_client else 0, headers, delegate ) if body_future is not None: if self._body_timeout is None: await body_future else: try: await gen.with_timeout( self.stream.io_loop.time() + self._body_timeout, body_future, quiet_exceptions=iostream.StreamClosedError, ) except gen.TimeoutError: gen_log.info("Timeout reading body from %s", self.context) self.stream.close() return False self._read_finished = True if not self._write_finished or self.is_client: need_delegate_close = False with _ExceptionLoggingContext(app_log): delegate.finish() # If we're waiting for the application to produce an asynchronous # response, and we're not detached, register a close callback # on the stream (we didn't need one while we were reading) if ( not self._finish_future.done() and self.stream is not None and not self.stream.closed() ): self.stream.set_close_callback(self._on_connection_close) await self._finish_future if self.is_client and self._disconnect_on_finish: self.close() if self.stream is None: return False except httputil.HTTPInputError as e: gen_log.info("Malformed HTTP message from %s: %s", self.context, e) if not self.is_client: await self.stream.write(b"HTTP/1.1 400 Bad Request\r\n\r\n") self.close() return False finally: if need_delegate_close: with _ExceptionLoggingContext(app_log): delegate.on_connection_close() header_future = None # type: ignore self._clear_callbacks() return True
# -*- encoding: utf-8 -*- from tornado.log import enable_pretty_logging, LogFormatter, access_log, app_log, gen_log gen_log.info("--> importing .spider_handler") from base_handler import * from base_utils import * # from tornado.log import access_log, app_log, gen_log # already imported from base_handler # main decorator to handle parallelism # from handler_threading import * ### OpenScraper generic scraper from scraper import run_generic_spider from config.settings_scrapy import DEFAULT_COUNTDOWN ### + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ### ### RUN SPIDER handlers as background tasks ################################################# ### + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ### # threading for background tasks (spiders mainly) # cf : https://stackoverflow.com/questions/22082165/running-an-async-background-task-in-tornado/25304704 # cf : https://gist.github.com/marksilvis/ea1142680db66e2bb9b2a29e57306d76 # cf : https://stackoverflow.com/questions/22082165/running-an-async-background-task-in-tornado # cf : https://gist.github.com/mivade/421c427db75c8c5fa1d1 # cf : http://www.tornadoweb.org/en/stable/faq.html#my-code-is-asynchronous-but-it-s-not-running-in-parallel-in-two-browser-tabs # cf : http://www.tornadoweb.org/en/stable/guide/queues.html # cf : https://emptysqua.re/blog/refactoring-tornado-coroutines/
def setup_loggers(): """ set up tornado loggers with custom format logger has 5 severity levels : D - DEBUG (lowest) I - INFO W - WARNING E - ERROR C - CRITICAL (highest) """ # config logger output in console # logging.basicConfig( level = logging.DEBUG, # format = "%(name)s - %(funcName)s - %(levelname)s : %(message)s" ) # Create a Formatter for formatting the log messages # log_formatter = logging.Formatter('%(name)s -- %(funcName)s - %(levelname)s - %(message)s') openscraper_log_format = '%(color)s::: %(levelname)s %(name)s %(asctime)s ::: %(module)s:%(lineno)d -in- %(funcName)s() :::%(end_color)s \ %(message)s' # datefmt='%y%m%d %H:%M:%S' # style='%' # color=True # colors={40: 1, 10: 4, 20: 2, 30: 3} """ default tornado format for logging : fmt='%(color)s[%(levelname)1.1s %(asctime)s %(module)s:%(lineno)d]%(end_color)s %(message)s' datefmt='%y%m%d %H:%M:%S' style='%' color=True colors={40: 1, 10: 4, 20: 2, 30: 3} )) """ # log_formatter = logging.Formatter( fmt=openscraper_log_format ) tornado_log_formatter = LogFormatter(fmt=openscraper_log_format, color=True) enable_pretty_logging() ### Logger as self var # create the Logger # dictConfig(logging_config) # self.log = logging.getLogger(__name__) # self.logger = logging.getLogger() # self.access_log = logging.getLogger("tornado.access") # self.app_log = logging.getLogger("tornado.application") # self.gen_log = logging.getLogger("tornado.general") ### Get root logger root_logger = logging.getLogger() # print root_logger.__dict__ ### Format root_logger stream # parent_logger = app_log.parent # print parent_logger.__dict__ # root_stream_handler = parent_logger.handlers # root_stream_handler[0].setFormatter(tornado_log_formatter) root_logger.handlers[0].setFormatter(tornado_log_formatter) # streamHandler = logging.StreamHandler() # stream=sys.stdout # streamHandler.setFormatter(tornado_log_formatter) # self.gen_log.addHandler(streamHandler) # self.app_log.addHandler(streamHandler) # self.access_log.addHandler(streamHandler) # self.log.setLevel(logging.DEBUG) # Create the Handlers for logging data to log files gen_log_handler = logging.FileHandler('logs/openscraper_general.log') gen_log_handler.setLevel(logging.WARNING) access_log_handler = logging.FileHandler('logs/openscraper_access.log') access_log_handler.setLevel(logging.WARNING) app_log_handler = logging.FileHandler('logs/openscraper_app.log') app_log_handler.setLevel(logging.WARNING) # Add the Formatter to the Handler gen_log_handler.setFormatter(tornado_log_formatter) access_log_handler.setFormatter(tornado_log_formatter) app_log_handler.setFormatter(tornado_log_formatter) # Add the Handler to the Logger gen_log.addHandler(gen_log_handler) access_log.addHandler(access_log_handler) app_log.addHandler(app_log_handler) # test loggers print app_log.info('>>> this is app_log ') gen_log.info('>>> this is gen_log ') access_log.info('>>> this is access-log ') print
def _read_message(self, delegate): need_delegate_close = False try: # 消息头与消息体之间由一个空行分开 header_future = self.stream.read_until_regex( b"\r?\n\r?\n", max_bytes=self.params.max_header_size) if self.params.header_timeout is None: header_data = yield header_future else: try: header_data = yield gen.with_timeout( self.stream.io_loop.time() + self.params.header_timeout, header_future, io_loop=self.stream.io_loop) except gen.TimeoutError: self.close() raise gen.Return(False) # 解析消息头,分离头字段和首行(request-line/status-line) start_line, headers = self._parse_headers(header_data) # 作为 client 解析的是 server 的 response,作为 server 解析的是 client 的 request。 # response 与 request 的 start_line(status-line/request-line) 的字段内容不同: # 1. response's status-line: HTTP-Version SP Status-Code SP Reason-Phrase CRLF # 2. request's request-line:Method SP Request-URI SP HTTP-Version CRLF # start_line 的值是一个 namedtuple。 if self.is_client: start_line = httputil.parse_response_start_line(start_line) self._response_start_line = start_line else: start_line = httputil.parse_request_start_line(start_line) self._request_start_line = start_line self._request_headers = headers # 非 keep-alive 的请求或响应处理完成后要关闭连接。 self._disconnect_on_finish = not self._can_keep_alive( start_line, headers) need_delegate_close = True with _ExceptionLoggingContext(app_log): header_future = delegate.headers_received(start_line, headers) if header_future is not None: # 如果 header_future 是一个 `Future` 实例,则要等到完成才读取 body。 yield header_future # websocket ??? if self.stream is None: # We've been detached. need_delegate_close = False raise gen.Return(False) skip_body = False if self.is_client: # 作为 client 如果发起的是 HEAD 请求,那么 server response 应该无消息体 if (self._request_start_line is not None and self._request_start_line.method == 'HEAD'): skip_body = True code = start_line.code if code == 304: # 304 responses may include the content-length header # but do not actually have a body. # http://tools.ietf.org/html/rfc7230#section-3.3 skip_body = True if code >= 100 and code < 200: # 1xx responses should never indicate the presence of # a body. if ('Content-Length' in headers or 'Transfer-Encoding' in headers): raise httputil.HTTPInputError( "Response code %d cannot have body" % code) # TODO: client delegates will get headers_received twice # in the case of a 100-continue. Document or change? yield self._read_message(delegate) else: # 100-continue 这个状态码是在 HTTP/1.1 中为了提高传输效率而设置的。当 # client 需要 POST 较大数据给 WebServer 时,可以在发送 HTTP 请求时带上 # Expect: 100-continue,WebServer 如果接受这个请求则应答一个 # ``HTTP/1.1 100 (Continue)``,那么 client 就继续传输 request body, # 否则应答 ``HTTP/1.1 417 Expectation Failed`` client 就放弃传输剩余 # 的数据。(注:Expect 头部域,用于指出客户端要求的特殊服务器行为采用扩展语法 # 定义,以方便扩展。) if (headers.get("Expect") == "100-continue" and not self._write_finished): self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") if not skip_body: body_future = self._read_body( start_line.code if self.is_client else 0, headers, delegate) if body_future is not None: if self._body_timeout is None: yield body_future else: try: yield gen.with_timeout( self.stream.io_loop.time() + self._body_timeout, body_future, self.stream.io_loop) except gen.TimeoutError: gen_log.info("Timeout reading body from %s", self.context) self.stream.close() raise gen.Return(False) self._read_finished = True # 对 client mode ,response 解析完成就调用 HTTPMessageDelegate.finish() 方法是合适的; # 对 server mode ,_write_finished 表示 response 是否发送完成,未完成前调用 # HTTPMessageDelegate.finish() 方法让 delegate 执行请求响应; if not self._write_finished or self.is_client: need_delegate_close = False with _ExceptionLoggingContext(app_log): delegate.finish() # If we're waiting for the application to produce an asynchronous # response, and we're not detached, register a close callback # on the stream (we didn't need one while we were reading) # # NOTE:_finish_future resolves when all data has been written and flushed # to the IOStream. # # hold 住执行流程,直到异步响应完成,所有数据都写入 fd,才继续后续处理,通常调用方执行 `finish` 方法 # 设置 `_finish_future` 完成,详细见 `finish` 和 `_finish_request` 方法实现。 if (not self._finish_future.done() and self.stream is not None and not self.stream.closed()): self.stream.set_close_callback(self._on_connection_close) yield self._finish_future # 对于 client mode,处理完响应后如果不是 keep-alive 就断开连接。 # 对于 server mode,需要在 response 完成后才断开连接,详细见 _finish_request/finish 方法实现。 if self.is_client and self._disconnect_on_finish: self.close() if self.stream is None: raise gen.Return(False) except httputil.HTTPInputError as e: gen_log.info("Malformed HTTP message from %s: %s", self.context, e) self.close() raise gen.Return(False) finally: # 连接 “关闭” 前还没能结束处理请求(call HTTPMessageDelegate.finish()),则 # call HTTPMessageDelegate.on_connection_close() if need_delegate_close: with _ExceptionLoggingContext(app_log): delegate.on_connection_close() self._clear_callbacks() raise gen.Return(True)
def _read_message(self, delegate): need_delegate_close = False try: header_future = self.stream.read_until_regex( b"\r?\n\r?\n", max_bytes=self.params.max_header_size) if self.params.header_timeout is None: header_data = yield header_future else: try: header_data = yield gen.with_timeout( self.stream.io_loop.time() + self.params.header_timeout, header_future, quiet_exceptions=iostream.StreamClosedError) except gen.TimeoutError: self.close() raise gen.Return(False) start_line, headers = self._parse_headers(header_data) if self.is_client: start_line = httputil.parse_response_start_line(start_line) self._response_start_line = start_line else: start_line = httputil.parse_request_start_line(start_line) self._request_start_line = start_line self._request_headers = headers self._disconnect_on_finish = not self._can_keep_alive( start_line, headers) need_delegate_close = True with _ExceptionLoggingContext(app_log): header_future = delegate.headers_received(start_line, headers) if header_future is not None: yield header_future if self.stream is None: # We've been detached. need_delegate_close = False raise gen.Return(False) skip_body = False if self.is_client: if (self._request_start_line is not None and self._request_start_line.method == 'HEAD'): skip_body = True code = start_line.code if code == 304: # 304 responses may include the content-length header # but do not actually have a body. # http://tools.ietf.org/html/rfc7230#section-3.3 skip_body = True if code >= 100 and code < 200: # 1xx responses should never indicate the presence of # a body. if ('Content-Length' in headers or 'Transfer-Encoding' in headers): raise httputil.HTTPInputError( "Response code %d cannot have body" % code) # TODO: client delegates will get headers_received twice # in the case of a 100-continue. Document or change? yield self._read_message(delegate) else: if (headers.get("Expect") == "100-continue" and not self._write_finished): self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") if not skip_body: body_future = self._read_body( start_line.code if self.is_client else 0, headers, delegate) if body_future is not None: if self._body_timeout is None: yield body_future else: try: yield gen.with_timeout( self.stream.io_loop.time() + self._body_timeout, body_future, quiet_exceptions=iostream.StreamClosedError) except gen.TimeoutError: gen_log.info("Timeout reading body from %s", self.context) self.stream.close() raise gen.Return(False) self._read_finished = True if not self._write_finished or self.is_client: need_delegate_close = False with _ExceptionLoggingContext(app_log): delegate.finish() # If we're waiting for the application to produce an asynchronous # response, and we're not detached, register a close callback # on the stream (we didn't need one while we were reading) if (not self._finish_future.done() and self.stream is not None and not self.stream.closed()): self.stream.set_close_callback(self._on_connection_close) yield self._finish_future if self.is_client and self._disconnect_on_finish: self.close() if self.stream is None: raise gen.Return(False) except httputil.HTTPInputError as e: gen_log.info("Malformed HTTP message from %s: %s", self.context, e) if not self.is_client: yield self.stream.write(b'HTTP/1.1 400 Bad Request\r\n\r\n') self.close() raise gen.Return(False) finally: if need_delegate_close: with _ExceptionLoggingContext(app_log): delegate.on_connection_close() header_future = None self._clear_callbacks() raise gen.Return(True)
def _read_message(self, delegate): need_delegate_close = False try: header_future = self.stream.read_until_regex( b"\r?\n\r?\n", max_bytes=self.params.max_header_size) if self.params.header_timeout is None: header_data = yield header_future else: try: header_data = yield gen.with_timeout( self.stream.io_loop.time() + self.params.header_timeout, header_future, io_loop=self.stream.io_loop) except gen.TimeoutError: self.close() raise gen.Return(False) start_line, headers = self._parse_headers(header_data) if self.is_client: start_line = httputil.parse_response_start_line(start_line) self._response_start_line = start_line else: start_line = httputil.parse_request_start_line(start_line) self._request_start_line = start_line self._request_headers = headers self._disconnect_on_finish = not self._can_keep_alive( start_line, headers) need_delegate_close = True header_future = delegate.headers_received(start_line, headers) if header_future is not None: yield header_future if self.stream is None: # We've been detached. need_delegate_close = False raise gen.Return(False) skip_body = False if self.is_client: if (self._request_start_line is not None and self._request_start_line.method == 'HEAD'): skip_body = True code = start_line.code if code == 304: skip_body = True if code >= 100 and code < 200: # TODO: client delegates will get headers_received twice # in the case of a 100-continue. Document or change? yield self._read_message(delegate) else: if (headers.get("Expect") == "100-continue" and not self._write_finished): self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") if not skip_body: body_future = self._read_body(headers, delegate) if body_future is not None: if self._body_timeout is None: yield body_future else: try: yield gen.with_timeout( self.stream.io_loop.time() + self._body_timeout, body_future, self.stream.io_loop) except gen.TimeoutError: gen_log.info("Timeout reading body from %s", self.context) self.stream.close() raise gen.Return(False) self._read_finished = True if not self._write_finished or self.is_client: need_delegate_close = False delegate.finish() # If we're waiting for the application to produce an asynchronous # response, and we're not detached, register a close callback # on the stream (we didn't need one while we were reading) if (not self._finish_future.done() and self.stream is not None and not self.stream.closed()): self.stream.set_close_callback(self._on_connection_close) yield self._finish_future if self.is_client and self._disconnect_on_finish: self.close() if self.stream is None: raise gen.Return(False) except httputil.HTTPInputException as e: gen_log.info("Malformed HTTP message from %s: %s", self.context, e) self.close() raise gen.Return(False) finally: if need_delegate_close: delegate.on_connection_close() self._clear_callbacks() raise gen.Return(True)
def task(self, *args): temp_id = args[0] gen_log.info("=== Do task id({})====".format(temp_id)) doc = yield Temp().get_temp(temp_id) if doc is None: gen_log.info("Temp({}) be deleted!".format(temp_id)) yield self.remove_from_tasks(temp_id) raise gen.Return() access_token = doc.get('key') period = doc.get('read_period') temp_open = doc.get('open') has_sleep = doc.get('has_sleep') board_type_id = doc.get('board_type_id') if not temp_open: gen_log.info("Temp({}) be closed!".format(temp_id)) yield self.remove_from_tasks(temp_id, "normal", "The temp is closed.") raise gen.Return() end_time = time.time() + (3 * period) while True: try: temps = [] wio = Wio(access_token) for i in range(4): result = yield wio.get_temp(board_type_id) temps.append(result) yield gen.sleep(1) temp = round(sum(temps[1:]) / (len(temps) - 1), 1) except Exception as e: # TODO, if not pulgin grove temp, will gen error if time.time() > end_time: gen_log.error("Temp({}) {}".format(temp_id, e)) yield self.remove_from_tasks( temp_id, "error", "The node is not wake up on three period.") yield self.close_temp(temp_id) raise gen.Return() yield gen.sleep(5) gen_log.info("Temp({}) {}".format(temp_id, e)) continue gen_log.info("{} ==> {}".format(temps, temp)) self.update_temp(temp_id, temp) break if has_sleep is True: wio = Wio(access_token) try: yield wio.sleep(period, board_type_id) self.update_status(temp_id, "normal", "The node is sleep mode.") except Exception as e: gen_log.error(e) else: self.update_status(temp_id, "normal", "The node is online mode.") IOLoop.current().add_timeout(time.time() + (period or 60), self.task, temp_id) gen_log.info("task ids: ({})".format(self.tasks)) gen_log.info("===End task and restart({})====".format(temp_id))
def add_subscriber(self, subscriber): self._handlers.add(subscriber) # yield result gen_log.info('PikaConnector: subscriber %s added' % repr(subscriber))
def run(): options.parse_command_line() if options.config: options.parse_config_file(options.config) options.storage = os.path.abspath(options.storage) if os.getuid() == 0 and options.user: pw = pwd.getpwnam(options.user) uid, gid = pw.pw_uid, pw.pw_gid log.info("Changind user to %s [%s:%s]", options.user, uid, gid) os.setgid(uid) os.setuid(uid) try: if not all(f(options.storage) for f in (os.path.exists, os.path.isdir)): log.info('Creating new package storage directory: "%s"', options.storage) os.makedirs(options.storage) def on_interrupt(*args): log.warning("Receiving interrupt signal. Application will be stopped.") exit(errno.EINTR) log.debug("Preparing signal handling") for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGQUIT): signal.signal(sig, on_interrupt) def handle_pdb(sig, frame): import pdb pdb.Pdb().set_trace(frame) if options.debug: signal.signal(signal.SIGUSR2, handle_pdb) log.debug("Creating application instance") app = create_app( options.debug, options.secret, options.gzip, ) log.debug("Creating IOLoop instance.") io_loop = IOLoop.current() io_loop.run_sync(lambda: init_db(options.database)) if not (os.path.exists(options.cache_dir) and os.path.isdir(options.cache_dir)): os.makedirs(options.cache_dir) Cache.CACHE_DIR = options.cache_dir log.info("Init thread pool with %d threads", options.pool_size) handlers.base.BaseHandler.THREAD_POOL = futures.ThreadPoolExecutor(options.pool_size) AsyncHTTPClient.configure(None, max_clients=options.max_http_clients) proxy_url = URL(os.getenv('{0}_proxy'.format(options.pypi_server.scheme))) if proxy_url: log.debug("Configuring for proxy: %s", proxy_url) AsyncHTTPClient.configure( 'tornado.curl_httpclient.CurlAsyncHTTPClient', defaults={ 'proxy_host': proxy_url.host, 'proxy_port': proxy_url.port, 'proxy_username': proxy_url.user, 'proxy_password': proxy_url.password, } ) PYPIClient.configure( options.pypi_server, handlers.base.BaseHandler.THREAD_POOL ) if options.pypi_proxy: pypi_updater = PeriodicCallback(PYPIClient.packages, HOUR * 1000, io_loop) io_loop.add_callback(PYPIClient.packages) io_loop.add_callback(pypi_updater.start) log.info("Starting server http://%s:%d/", options.address, options.port) http_server = HTTPServer(app, xheaders=options.proxy_mode) http_server.listen(options.port, address=options.address) log.debug('Setting "%s" as storage', options.storage) PackageFile.set_storage(options.storage) log.debug("Starting main loop") io_loop.start() except Exception as e: log.fatal("Exception on main loop:") log.exception(e) exit(1) else: exit(0)
def close_connection(self): """This method closes the connection to RabbitMQ.""" gen_log.info('Closing connection') self._connection.close()
def main(): """Command-line wrapper to re-run a script whenever its source changes. Scripts may be specified by filename or module name:: python -m tornado.autoreload -m tornado.test.runtests python -m tornado.autoreload tornado/test/runtests.py Running a script with this wrapper is similar to calling `tornado.autoreload.wait` at the end of the script, but this wrapper can catch import-time problems like syntax errors that would otherwise prevent the script from reaching its call to `wait`. """ original_argv = sys.argv sys.argv = sys.argv[:] if len(sys.argv) >= 3 and sys.argv[1] == "-m": mode = "module" module = sys.argv[2] del sys.argv[1:3] elif len(sys.argv) >= 2: mode = "script" script = sys.argv[1] sys.argv = sys.argv[1:] else: print(_USAGE, file=sys.stderr) sys.exit(1) try: if mode == "module": import runpy runpy.run_module(module, run_name="__main__", alter_sys=True) elif mode == "script": with open(script) as f: # Execute the script in our namespace instead of creating # a new one so that something that tries to import __main__ # (e.g. the unittest module) will see names defined in the # script instead of just those defined in this module. global __file__ __file__ = script # If __package__ is defined, imports may be incorrectly # interpreted as relative to this module. global __package__ del __package__ exec_in(f.read(), globals(), globals()) except SystemExit as e: logging.basicConfig() gen_log.info("Script exited with status %s", e.code) except Exception as e: logging.basicConfig() gen_log.warning("Script exited with uncaught exception", exc_info=True) # If an exception occurred at import time, the file with the error # never made it into sys.modules and so we won't know to watch it. # Just to make sure we've covered everything, walk the stack trace # from the exception and watch every file. for (filename, lineno, name, line) in traceback.extract_tb(sys.exc_info()[2]): watch(filename) if isinstance(e, SyntaxError): # SyntaxErrors are special: their innermost stack frame is fake # so extract_tb won't see it and we have to get the filename # from the exception object. watch(e.filename) else: logging.basicConfig() gen_log.info("Script exited normally") # restore sys.argv so subsequent executions will include autoreload sys.argv = original_argv if mode == 'module': # runpy did a fake import of the module as __main__, but now it's # no longer in sys.modules. Figure out where it is and watch it. loader = pkgutil.get_loader(module) if loader is not None: watch(loader.get_filename()) wait()
filename=options.APPLICATION_LOG, when='D') fh_app.setFormatter(fmt) app_log.addHandler(fh_app) fh_gen = logging.handlers.TimedRotatingFileHandler( filename=options.GENERAL_LOG, when='D') fh_gen.setFormatter(fmt) gen_log.addHandler(fh_gen) from urls import url_patterns application = tornado.web.Application( url_patterns, cookie_secret="__TODO:_GENERATE_YOUR_OWN_RANDOM_VALUE_HERE__", xsrf_cookies=False, debug=options.debug, ) if __name__ == "__main__": gen_log.info(',[session_id:],server started. port:{0}'.format( options.port)) # Tornado Service Start try: application.listen(options.port) tornado.ioloop.IOLoop.instance().start() except KeyboardInterrupt: tornado.ioloop.IOLoop.instance().stop() gen_log.info(',[session_id:],server stoped.')
def fork_processes(num_processes, max_restarts=100): """Starts multiple worker processes. If ``num_processes`` is None or <= 0, we detect the number of cores available on this machine and fork that number of child processes. If ``num_processes`` is given and > 0, we fork that specific number of sub-processes. Since we use processes and not threads, there is no shared memory between any server code. Note that multiple processes are not compatible with the autoreload module (or the debug=True option to `tornado.web.Application`). When using multiple processes, no IOLoops can be created or referenced until after the call to ``fork_processes``. In each child process, ``fork_processes`` returns its *task id*, a number between 0 and ``num_processes``. Processes that exit abnormally (due to a signal or non-zero exit status) are restarted with the same id (up to ``max_restarts`` times). In the parent process, ``fork_processes`` returns None if all child processes have exited normally, but will otherwise only exit by throwing an exception. """ # global _task_id assert _task_id is None if num_processes is None or num_processes <= 0: num_processes = cpu_count() if ioloop.IOLoop.initialized(): raise RuntimeError( "Cannot run in multiple processes: IOLoop instance " "has already been initialized. You cannot call " "IOLoop.instance() before calling start_processes()") gen_log.info("Starting %d processes", num_processes) children = {} # 这一段很简单,就是在没有传入进程数的时候使用默认的cpu个数作为将要生成的进程个数。 # 这是一个内函数,作用就是生成子进程。 # 【fork】是个很有意思的方法,他会同时返回【两种状态】,为什么呢? # 其实fork相当于在原有的一条路(父进程)旁边又修了一条路(子进程)。 # 如果这条路修成功了,那么在原有的路上(父进程)你就看到旁边来了另外一条路(子进程), # 所以也就是返回新生成的那条路的名字(子进程的pid),但是在另外一条路上(子进程), # 你看到的是自己本身修建成功了,也就返回自己的状态码(返回结果是0)。 # 所以if pid==0表示这时候cpu已经切换到子进程了,相当于我们在新生成的这条路上面做事(返回任务id); # else表示又跑到原来的路上做事了,在这里我们记录下新生成的子进程, # 这时候children[pid]=i里面的pid就是新生成的子进程的pid, # 而 i 就是刚才在子进程里面我们返回的任务id(其实就是用来代替子进程的id号)。 def start_child(i): pid = os.fork() if pid == 0: # child process _reseed_random() global _task_id _task_id = i return i else: children[pid] = i return None # if id is not None表示如果我们在刚刚生成的那个子进程的上下文里面,那么就什么都不干, # 直接返回子进程的任务id就好了,啥都别想了,也别再折腾。 # 如果还在父进程的上下文的话那么就继续生成子进程。 for i in range(num_processes): id = start_child(i) if id is not None: return id num_restarts = 0 while children: try: # pid, status = os.wait()的意思是等待任意子进程退出或者结束, # 这时候我们就把它从我们的children表里面去除掉,然后通过status判断子进程退出的原因。 # 如果子进程是因为接收到kill信号或者抛出exception了,那么我们就重新启动一个子进程, # 用的当然还是刚刚退出的那个子进程的任务号。 # 如果子进程是自己把事情做完了才退出的,那么就算了,等待别的子进程退出吧。 pid, status = os.wait() except OSError as e: if e.errno == errno.EINTR: continue raise if pid not in children: continue id = children.pop(pid) if os.WIFSIGNALED(status): gen_log.warning( "child %d (pid %d) killed by signal %d, restarting", id, pid, os.WTERMSIG(status)) elif os.WEXITSTATUS(status) != 0: gen_log.warning( "child %d (pid %d) exited with status %d, restarting", id, pid, os.WEXITSTATUS(status)) else: gen_log.info("child %d (pid %d) exited normally", id, pid) continue num_restarts += 1 if num_restarts > max_restarts: raise RuntimeError("Too many child restarts, giving up") # 我们看到在重新启动子进程的时候又使用了 new_id = start_child(id) if new_id is not None: return new_id # All child processes exited cleanly, so exit the master process # instead of just returning to right after the call to # fork_processes (which will probably just start up another IOLoop # unless the caller checks the return value). sys.exit(0)