Example #1
0
def start_notebook(url, port, user):
    hub_url = 'https://%s:%s/hub' % (url, port)
    user_url = 'https://%s:%s/user/%s' % (url, port, user)

    cookies = login(hub_url, user, user)
    api = NBAPI(url=user_url, cookies=cookies)

    path = 'Hello.ipynb'
    for i in itertools.count():
        gen_log.info("loading %s (%s)", user, i)
        nb = api.get_notebook(path)

        gen_log.info("starting %s (%s)", user, i)
        session = Session()
        kernel = yield api.new_kernel(session.session)

        try:
            for j in range(20):
                gen_log.info("running %s (%s:%s)", user, j, i)
                yield run_notebook(nb, kernel, session)
                yield sleep(0.05)

            gen_log.info("saving %s (%s)", user, i)
            api.save_notebook(nb, path)

        finally:
            api.kill_kernel(kernel['id'])


    gen_log.info("history: %s", response.history)
Example #2
0
    def post(self):
        msg = 'success'
        save_flag = self.get_argument('flag', '').lower()
        cp = self.get_argument('cp', '').strip()
        cpid = self.get_argument('cpid', '').strip()

        if 'deep' == save_flag:
            poiid = self.get_argument('poiid', '').strip()
            deep = self.get_argument('deep', '')
            if not poiid or 'null' == poiid.lower():
                deep_queue.put((cp, poiid, cpid, deep, -3, -3))
            deep_queue.put((cp, poiid, cpid, deep, 1, 1))
            logger.info("queue.size:%s", len(deep_queue))
            self.do_flush(deep_sql, deep_queue)

        elif 'rti' == save_flag:
            rti = self.get_argument('rti', '')
            value = (cp, cpid, rti, flag_value, flag_value)
            rti_queue.put(value)
            self.do_flush(rti_sql, rti_queue)

        elif 'newpoi' == save_flag:
            new_poi = self.get_argument('newpoiid', '')
            value = (cp, cpid, new_poi)
            newpoi_queue.put(value)
            self.do_flush(newpoi_sql, newpoi_queue)

        else:
            msg = {'failure': MESSAGE.BAD_FLAG}
            logger.info("cp=%s, cpid=%s, msg=%s\nReuqest=%s",
                        cp, cpid, MESSAGE.BAD_FLAG, self.request.arguments)
        self.write(msg)
def configure(path, uid=None):
    """Configures the tornado logging streams with application specific
    customizatons, including configuring the application to log to
    the specified directory.

    Throws:
        OSError -- if the given directory doesn't exist and cannot be
                   created, or if it exists but cannot be written to

    Args:
        path -- a directory to create and write logs to

    Keyword Args:
        uid -- If provided, the uid that should own the current, non-rotated
               version of each log is owned by the given system user.
               This is useful if we plan on dropping down to a less
               privilaged user on application run
    """
    # First, create the specified logging directory if it does not already
    # exist.  If we don't have permissions to create the directory,
    # then OSError will be thrown
    if not os.path.isdir(path):
        os.mkdir(path)

    # Next, make sure that the current process has the needed permissions
    # to write to the specified logging directory. If not, throw an
    # exception, to prevent log-less execution
    if not os.access(path, os.W_OK | os.X_OK):
        error = "Unable to write to logging directory {0}".format("path")
        raise OSError(error)

    # Otherwise, if we're sure we can write to the specified logging
    # directory, configure the built in tornado loggers to use that
    # directory instead of the system wide one
    format = "%(created)f|%(message)s"

    tornado_logs = (('access.log', access_log), ('application.log', app_log),
                    ('general.log', gen_log))

    for log_name, logger in tornado_logs:
        log_path = os.path.join(path, log_name)
        handler = TimedRotatingFileHandler(log_path, when="midnight")
        formatter = logging.Formatter(format)
        handler.setFormatter(formatter)
        logger.addHandler(handler)

        # Allow application errors to propogate up, so that serious errors
        # can wind up on STDERR or other useful places
        if logger is not app_log:
            logger.propagate = False

        if uid:
            os.chown(log_path, uid, -1)

    tornado.log.enable_pretty_logging()

    # Finally, write a simple start up message, both to test that we're
    # able to write as expected, and to get a start time in the logs
    gen_log.setLevel(logging.INFO)
    gen_log.info("Starting webserver (pid:{0}).".format(os.getpid()))
Example #4
0
 def callback():
     try:
         while True:
             gen_log.info(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
             time.sleep(1)
     except:
         pass
Example #5
0
File: auth.py Project: Arcylus/PBI
    def get_authenticated_user(self, callback, http_client=None):
        """Gets the OAuth authorized user and access token on callback.

        This method should be called from the handler for your registered
        OAuth Callback URL to complete the registration process. We call
        callback with the authenticated user, which in addition to standard
        attributes like 'name' includes the 'access_key' attribute, which
        contains the OAuth access you can use to make authorized requests
        to this service on behalf of the user.

        """
        request_key = escape.utf8(self.get_argument("oauth_token"))
        oauth_verifier = self.get_argument("oauth_verifier", None)
        request_cookie = self.get_cookie("_oauth_request_token")
        if not request_cookie:
            gen_log.warning("Missing OAuth request token cookie")
            callback(None)
            return
        self.clear_cookie("_oauth_request_token")
        cookie_key, cookie_secret = [base64.b64decode(escape.utf8(i)) for i in request_cookie.split("|")]
        if cookie_key != request_key:
            gen_log.info((cookie_key, request_key, request_cookie))
            gen_log.warning("Request token does not match cookie")
            callback(None)
            return
        token = dict(key=cookie_key, secret=cookie_secret)
        if oauth_verifier:
            token["verifier"] = oauth_verifier
        if http_client is None:
            http_client = self.get_auth_http_client()
        http_client.fetch(self._oauth_access_token_url(token),
                          self.async_callback(self._on_access_token, callback))
Example #6
0
def run_notebook(nb, kernel, session):
    """Run all the cells of a notebook"""
    ncells = sum(cell['cell_type'] == 'code' for cell in nb.cells)
    i = 0
    for cell in nb.cells:
        if cell['cell_type'] == 'code':
            i += 1
            gen_log.info("Executing cell %i/%i", i, ncells)
            yield execute(cell, kernel, session)
def saveFile(files, key, path):
    fl = files[key][0]
    req_name = fl["filename"]
    body = fl["body"]
    timestamp = int(time.time() + 300)
    fileName = "%d_%s.%s" % (timestamp, str(uuid.uuid1()), req_name.split(".").pop())
    gen_log.info(fileName)
    with open(path + fileName, "w") as f:
        f.write(body)
    return fileName
Example #8
0
 def api(self, path, **kwargs):
     try:
         import time
         s_time = time.time()
         data = yield self._make_request(path, **kwargs)
         e_time = time.time()
         gen_log.info("=====Time request wio api, {}".format(float(e_time)-float(s_time)))
     except Exception as e:
         gen_log.error(e)
         raise
     raise gen.Return(data)
Example #9
0
def _check_file(modify_times, path):
    try:
        modified = os.stat(path).st_mtime
    except Exception:
        return
    if path not in modify_times:
        modify_times[path] = modified
        return
    if modify_times[path] != modified:
        gen_log.info("%s modified; restarting server", path)
        _reload()
Example #10
0
    def add(self, req):
        if self.get_style(req):
            req.reply_error("The style already exist")
            gen_log.info('Style %s already exists' % req.content['name'])
            return

        style = create_style(req.content['name'])
        req.client.styles.append(style)
        req.content['style'] = style

        req.send_to_all(style)
        return True
Example #11
0
    def _on_headers(self, data):
        self._old_request = None
        self._end_notified = False
        self._please_notify_end_of_request = False
        try:
            data = native_str(data.decode('latin1'))
            eol = data.find("\r\n")
            start_line = data[:eol]
            try:
                method, uri, version = start_line.split(" ")
            except ValueError:
                raise _BadRequestException("Malformed HTTP request line")
            if not version.startswith("HTTP/"):
                raise _BadRequestException("Malformed HTTP version in HTTP Request-Line")
            headers = httputil.HTTPHeaders.parse(data[eol:])

            # HTTPRequest wants an IP, not a full socket address
            if getattr(self.stream.socket, 'family', socket.AF_INET) in (
                socket.AF_INET, socket.AF_INET6):
                # Jython 2.5.2 doesn't have the socket.family attribute,
                # so just assume IP in that case.
                remote_ip = self.address[0]
            else:
                # Unix (or other) socket; fake the remote address
                remote_ip = '0.0.0.0'

            self._request = HTTPRequest(
                connection=self, method=method, uri=uri, version=version,
                headers=headers, remote_ip=remote_ip)

            if self._events["connect"]:
                self._events["connect"](self._request)

            for name,handler in self._events.items():
                if handler:
                    self._request.on(name, handler)

            content_length = headers.get("Content-Length")
            if content_length:
                content_length = int(content_length)
                if content_length > self.stream.max_buffer_size:
                    raise _BadRequestException("Content-Length too long")
                if headers.get("Expect") == "100-continue":
                    self.stream.write(b("HTTP/1.1 100 (Continue)\r\n\r\n"))
                self.stream.read_bytes(content_length, self._on_request_body)
                return

            self.request_callback(self._request)
        except _BadRequestException, e:
            gen_log.info("Malformed HTTP request from %s: %s",
                         self.address[0], e)
            self.close()
            return
def parse_request(data):
    try:
        req = ast.literal_eval(data)
        path, sep, query = req['uri'].partition('?')
        get_arguments = parse_qs_bytes(query, keep_blank_values=True)
        post_arguments = parse_qs_bytes(req['body'], keep_blank_values=True)
        host = req['headers']['Host']
        headers = req['headers']
        return headers, host, path, get_arguments, post_arguments
    except _BadRequestException as e:
        gen_log.info("Malformed HTTP request:%s", e)
        return
Example #13
0
    def _make_request(self, path, query=None, method="GET", body=None, headers=None):
        """
        Makes request on `path` in the graph.

        path -- endpoint to the facebook graph api
        query -- A dictionary that becomes a query string to be appended to the path
        method -- GET, POST, etc
        body -- message body
        headers -- Like "Content-Type"
        """
        if not query:
            query = {}

        if self.access_token:
            query["access_token"] = self.access_token

        query_string = urllib.urlencode(query) if query else ""
        if method == "GET":
            body = None
        else:
            if headers and "json" in headers.get('Content-Type'):
                body = json.dumps(body) if body else ""
            else:
                body = urllib.urlencode(body) if body else ""

        url = BASE_URL + path
        if query_string:
            url += "?" + query_string

        # url = "https://wio.temp-io.life/v1/nodes/create?access_token=123"
        gen_log.info("URL=====> {}".format(url))
        gen_log.info("method=====> {}".format(method))
        gen_log.info("body=====> {}".format(body))

        client = AsyncHTTPClient()
        request = HTTPRequest(url, method=method, body=body, headers=headers)
        try:
            response = yield client.fetch(request)
        except HTTPError as e:
            raise WioAPIError(e)
        except Exception as e:
            gen_log.error(e)
            raise

        content_type = response.headers.get('Content-Type')
        gen_log.info("#### content_type: {}".format(content_type))
        gen_log.info("#### body: {}".format(response.body))
        if 'json' in content_type:
            data = json.loads(response.body.decode())
        else:
            raise WioAPIError('Maintype was not json')

        raise gen.Return(data)
Example #14
0
  def _on_headers(self, data):
    try:
      data = data.decode('latin1')
      eol = data.find("\r\n")
      start_line = data[:eol]
      try:
        method, uri, version = start_line.split(" ")
      except ValueError:
        raise tornado.httpserver._BadRequestException("Malformed HTTP request line")
      if not version.startswith("HTTP/"):
        raise tornado.httpserver._BadRequestException("Malformed HTTP version in HTTP Request-Line")
      headers = tornado.httputil.HTTPHeaders.parse(data[eol:])

      # HTTPRequest wants an IP, not a full socket address
      if self.address_family in (socket.AF_INET, socket.AF_INET6):
        remote_ip = self.address[0]
      else:
        # Unix (or other) socket; fake the remote address
        remote_ip = '0.0.0.0'

      self._request = tornado.httpserver.HTTPRequest(
        connection=self, method=method, uri=uri, version=version,
        headers=headers, remote_ip=remote_ip, protocol=self.protocol)

      content_length = headers.get("Content-Length")
      if content_length:
        content_length = int(content_length)
        use_tmp_files = self._get_handler_info()
        if not use_tmp_files and content_length > self.stream.max_buffer_size:
          raise _BadRequestException("Content-Length too long")
        if headers.get("Expect") == "100-continue":
          self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n")
        if use_tmp_files:
          gen_log.debug('using temporary files for uploading')

          # avoid raising
          # IOError("Reached maximum read buffer size")
          # in tornado.iostream.BaseIOStream._read_to_buffer
          self.stream.max_buffer_size = maxint

          self._receive_content(content_length)
        else:
          gen_log.debug('using memory for uploading')
          self.stream.read_bytes(content_length, self._on_request_body)
        return

      self.request_callback(self._request)
    except tornado.httpserver._BadRequestException as e:
      gen_log.info("Malformed HTTP request from %s: %s",
             self.address[0], e)
      self.close()
      return
Example #15
0
	def get(self,*args,**kwargs):
		pageIndex=int(self.get_argument('pageIndex',1))
		company=self.get_argument('type','wechat')
		querys={'wechat':{'wechat':{'$exists':True}},
				'1xinxi':{'sendid':{'$exists':True}},
				'chanzor':{'sendid':{'$exists':False},'wechat':{'$exists':False}}}

		db = self.application.db
		gen_log.info(company)
		gen_log.info(pageIndex)
		record_list = yield db.sendrecord.find(querys.get(company)).sort([("_id",-1)]).skip((pageIndex-1)*20).limit(20).to_list(length=None)
		self.set_header('content-type','application/json')
		self.write(json_encode({"data":record_list}))
		pass
Example #16
0
def open_run_save(api, path, legacy=False):
    """open a notebook, run it, and save.
    
    Only the original notebook is saved, the output is not recorded.
    """
    nb = api.get_notebook(path)
    session = Session()
    kernel = yield api.new_kernel(session.session, legacy=legacy)
    try:
        yield run_notebook(nb, kernel, session)
    finally:
        api.kill_kernel(kernel['id'])
    gen_log.info("Saving %s/notebooks/%s", api.url, path)
    api.save_notebook(nb, path)
Example #17
0
	def __init__(self) : 

		gen_log.info ("\n/// GenericSpiderMix / init ")

		# Default fields for mixin class
		
		# self.name = ""  # The name of the spider to use when executing the spider

		self.error_array = []
		self.item_count = 0  # will be incremented each time a new item is created
		self.item_count_depth_1 = 0  # will be incremented each time an item is completed in detailed page
		self.LIMIT = 5  # The number of pages where the spider will stop
		self.page_count = 1  # The number of pages already scraped
		self.download_delay = 0  # The delay in seconds between each request. some website will block too many requests
Example #18
0
    def _on_headers(self, data):
        try:
            data = native_str(data.decode("latin1"))
            eol = data.find("\r\n")
            start_line = data[:eol]
            try:
                method, uri, version = start_line.split(" ")
            except ValueError:
                raise _BadRequestException("Malformed HTTP request line")
            if not version.startswith("HTTP/"):
                raise _BadRequestException("Malformed HTTP version in HTTP Request-Line")
            try:
                headers = httputil.HTTPHeaders.parse(data[eol:])
            except ValueError:
                # Probably from split() if there was no ':' in the line
                raise _BadRequestException("Malformed HTTP headers")

            # HTTPRequest wants an IP, not a full socket address
            if self.address_family in (socket.AF_INET, socket.AF_INET6):
                remote_ip = self.address[0]
            else:
                # Unix (or other) socket; fake the remote address
                remote_ip = "0.0.0.0"

            self._request = HTTPRequest(
                connection=self,
                method=method,
                uri=uri,
                version=version,
                headers=headers,
                remote_ip=remote_ip,
                protocol=self.protocol,
            )

            content_length = headers.get("Content-Length")
            if content_length:
                content_length = int(content_length)
                if content_length > self.stream.max_buffer_size:
                    raise _BadRequestException("Content-Length too long")
                if headers.get("Expect") == "100-continue":
                    self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n")
                self.stream.read_bytes(content_length, self._on_request_body)
                return

            self.request_callback(self._request)
        except _BadRequestException as e:
            gen_log.info("Malformed HTTP request from %s: %s", self.address[0], e)
            self.close()
            return
    def _on_headers(self, data):
        try:
            data = native_str(data.decode('latin1'))
            eol = data.find("\r\n")
            start_line = data[:eol]
            try:
                method, uri, version = start_line.split(" ")
            except ValueError:
                raise _BadRequestException("Malformed HTTP request line")
            if not version.startswith("HTTP/"):
                raise _BadRequestException("Malformed HTTP version in HTTP Request-Line")
            try:
                headers = httputil.HTTPHeaders.parse(data[eol:])
            except ValueError:
                # Probably from split() if there was no ':' in the line
                raise _BadRequestException("Malformed HTTP headers")

            # HTTPRequest wants an IP, not a full socket address
            if self.address_family in (socket.AF_INET, socket.AF_INET6):
                remote_ip = self.address[0]
            else:
                # Unix (or other) socket; fake the remote address
                remote_ip = '0.0.0.0'


            # 构造一个httpRequest对象
            self._request = HTTPRequest(
                connection=self, method=method, uri=uri, version=version,
                headers=headers, remote_ip=remote_ip, protocol=self.protocol)

            # 如果头部带有content-length就继续解包,然后回调我们request_body函数
            # 用回调的方式,估计也是因为多路复用,导致非阻塞的情况
            content_length = headers.get("Content-Length")
            if content_length:
                content_length = int(content_length)
                if content_length > self.stream.max_buffer_size:
                    raise _BadRequestException("Content-Length too long")
                if headers.get("Expect") == "100-continue":
                    self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n")
                self.stream.read_bytes(content_length, self._on_request_body) # 原来都是写到内存里面的
                return

            # 如果请求不带content-length,那很简单,直接开始处理的具体逻辑
            self.request_callback(self._request) # 这里是调用app内部的那个 __call__魔术方法了
        except _BadRequestException as e:
            gen_log.info("Malformed HTTP request from %r: %s",
                         self.address, e)
            self.close()
            return
Example #20
0
    def _handle_events(self, fd, events):
        if self.closed():
            gen_log.warning("Got events for closed stream %s", fd)
            return
        try:
            if self._connecting:
                self._handle_connect()
            if self.closed():
                return
            if events & self.io_loop.READ:
                # NOTE: We use explict read instead of implicit.
                # The reason IOStream is not idle is that when an event happened,
                # tornado iostream will still try to read them into buffer.
                # Our approach is that when someone is trying to read the iostream,
                # we will read it.
                if self._should_socket_close() or self.reading():
                    self._handle_read()

            if self.closed():
                return
            if events & self.io_loop.WRITE:
                self._handle_write()
            if self.closed():
                return
            if events & self.io_loop.ERROR:
                self.error = self.get_fd_error()
                self.io_loop.add_callback(self.close)
                return
            state = self.io_loop.ERROR
            if self.reading():
                state |= self.io_loop.READ
            if self.writing():
                state |= self.io_loop.WRITE
            if state == self.io_loop.ERROR and self._read_buffer_size == 0:
                state |= self.io_loop.READ
            if state != self._state:
                assert self._state is not None, \
                    "shouldn't happen: _handle_events without self._state"
                self._state = state
                self.io_loop.update_handler(self.fileno(), self._state)
        except UnsatisfiableReadError as e:
            gen_log.info("Unsatisfiable read, closing connection: %s" % e)
            self.close(exc_info=True)
        except Exception:
            gen_log.error("Uncaught exception, closing connection.",
                          exc_info=True)
            self.close(exc_info=True)
            raise
Example #21
0
    def _read_to_buffer(self):
        """
        Reads from the socket and appends the result to the read buffer.

        Returns the number of bytes read. Returns 0 if there is nothing
        to read (i.e. the read returns EWOULDBLOCK or equivalent). On
        error closes the socket and raises an exception.

        This method will read upto the allowed max_buffer_size, in which
        case if the buffer limit is reached, the read is placed back onto
        the IOLoop for rescheduling. This happens immediately with the
        hope that the underlying program code is reading quickly enough
        to keep the buffer well drained.
        """
        next_size = self._read_buffer_size +  self.read_chunk_size
        if  next_size >= self.max_buffer_size:
            gen_log.info("Reached maximum read buffer size of: {}".format(
                self.max_buffer_size))

            # Reschedule and treat this as a EWOULDBLOCK
            self._add_io_state(ioloop.IOLoop.READ)
            return 0

        chunk = None

        try:
            chunk = self.read_from_fd()
        except (socket.error, IOError, OSError) as e:
            # ssl.SSLError is a subclass of socket.error
            if e.args[0] in _ERRNO_CONNRESET:
                # Treat ECONNRESET as a connection close rather than
                # an error to minimize log spam (the exception will
                # be available on self.error for apps that care).
                self.close(exc_info=True)
                return
            self.close(exc_info=True)
            raise

        if chunk is None:
            return 0

        chunk_length = len(chunk)

        self._read_buffer.append(chunk)
        self._read_buffer_size += chunk_length

        return chunk_length
Example #22
0
def ontimer_reload(prev_mtime,filepath,mainfile): # wait for file to stop changing, than check syntax,than reload
    global autoreload_compile_error # in an aplication: if autoreload_compile_error!=None: send to main page value of autoreload_compile_error
    statinfo=os.stat(filepath)

    if statinfo.st_size>0 and statinfo.st_ctime==statinfo.st_mtime and statinfo.st_mtime-prev_mtime==0: 
        compiled=False 
        try:
            py_compile.compile(mainfile,doraise=True)
            compiled=True
        except py_compile.PyCompileError as e:
            autoreload_compile_error=str(e)
            logging.info("autoreload: compile error...\n"+autoreload_compile_error, filepath)
        if compiled:
            logging.info("%s modified; restarting server", filepath)
            tornado.autoreload._reload()
    else:
        gen_log.info("autoreload: waiting for file upload complete.")
        Timer(0.3, ontimer_reload,(statinfo.st_mtime,filepath,mainfile)).start()
Example #23
0
    def __init__(self, providers_config, counter):
        handlers = [
            (r"/(.+)", ProxyHandler),
        ]

        settings = dict(
            app_name=providers_config['name'],
            static_path=os.path.join(os.path.dirname(__file__), "static"),
        )

        # Создаем список провайдеров, устанавливаем для него хранилище ключей и заполняем элементами из
        # переданного в веб сервер конфига словаря с настройками
        self.provider_list = ApiProviderList()
        self.provider_list.set_counter_storage(counter)\
                          .add_items_from_dict(providers_config['providers'])

        tornado.web.Application.__init__(self, handlers, **settings)
        gen_log.info('Server is started!')
Example #24
0
def main():
    """Command-line wrapper to re-run a script whenever its source changes.

    Scripts may be specified by filename or module name::

        python -m tornado.autoreload -m tornado.test.runtests
        python -m tornado.autoreload tornado/test/runtests.py

    Running a script with this wrapper is similar to calling
    `tornado.autoreload.wait` at the end of the script, but this wrapper
    can catch import-time problems like syntax errors that would otherwise
    prevent the script from reaching its call to `wait`.
    """
    original_argv = sys.argv
    sys.argv = sys.argv[:]
    if len(sys.argv) >= 3 and sys.argv[1] == "-m":
        mode = "module"
        module = sys.argv[2]
        del sys.argv[1:3]
    elif len(sys.argv) >= 2:
        mode = "script"
        script = sys.argv[1]
        sys.argv = sys.argv[1:]
    else:
        print >>sys.stderr, _USAGE
        sys.exit(1)

    try:
        if mode == "module":
            import runpy

            runpy.run_module(module, run_name="__main__", alter_sys=True)
        elif mode == "script":
            with open(script) as f:
                global __file__
                __file__ = script
                # Use globals as our "locals" dictionary so that
                # something that tries to import __main__ (e.g. the unittest
                # module) will see the right things.
                exec f.read() in globals(), globals()
    except SystemExit, e:
        logging.basicConfig()
        gen_log.info("Script exited with status %s", e.code)
Example #25
0
def recv_timeout(conn, timeout=0.5):
    conn.setblocking(0)
    total_data = []
    data = ''
    begin = time.time()
    while True:
        if total_data and time.time() - begin > timeout:
            break
        elif time.time() - begin > timeout * 2:
            break
        try:
            data = conn.recv(8192)
            if data:
                total_data.append(data)
                begin = time.time()
            else:
                time.sleep(0.1)
        except:
            gen_log.info("recv_timeout failed")
    return ''.join(total_data)
Example #26
0
def flush():
    if failure:
        for sql in failure:
            p = failure.get(sql)
            if p:
                batch_update(db, sql, p)
                logger.info("flush failed save:[ %s ] records", len(p))
        failure.clear()

    if deep_queue:
        logger.info("flush deep_queue size: [%s]", len(deep_queue))
        batch_update(db, deep_sql, deep_queue)

    if rti_queue:
        logger.info("flush rti_queue size: [%s]", len(rti_queue))
        batch_update(db, rti_sql, rti_queue)

    if newpoi_queue:
        logger.info("flush newpoi_queue size: [%s]", len(newpoi_queue))
        batch_update(db, newpoi_sql, newpoi_queue)
    logger.info("flush finished")
Example #27
0
def _check_file(modify_times, module, path):
    try:
        modified = os.stat(path).st_mtime
    except Exception:
        return
    if path not in modify_times:
        modify_times[path] = modified
        return
    if modify_times[path] != modified:
        gen_log.info("%s modified; restarting server", path)
        modify_times[path] = modified
    else:
        return

    try:
        importlib.reload(module)
    except Exception:
        gen_log.error("Error importing %s, not reloading" % (path,))
        traceback.print_exc()
        return False
    return True
Example #28
0
 def run(self, result=None):
     logger = logging.getLogger()
     if not logger.handlers:
         logging.basicConfig()
     handler = logger.handlers[0]
     if len(logger.handlers) > 1 or not isinstance(handler, logging.StreamHandler):
         # Logging has been configured in a way we don't recognize,
         # so just leave it alone.
         super(LogTrapTestCase, self).run(result)
         return
     old_stream = handler.stream
     try:
         handler.stream = StringIO()
         gen_log.info("RUNNING TEST: " + str(self))
         old_error_count = len(result.failures) + len(result.errors)
         super(LogTrapTestCase, self).run(result)
         new_error_count = len(result.failures) + len(result.errors)
         if new_error_count != old_error_count:
             old_stream.write(handler.stream.getvalue())
     finally:
         handler.stream = old_stream
Example #29
0
    def post(self):
        '''
        Create new user with given arguments (email,password,name,...)
        Auto login after user created.
        :return:
        '''
        hashed_password = yield executor.submit(
            bcrypt.hashpw, tornado.escape.utf8(self.get_argument("password")),
            bcrypt.gensalt())
        email = self.get_argument("email")
        name = self.get_argument("name")
        avatar = self.get_argument("avatar", None)
        with self.db.cursor() as cursor:
            cursor.callproc('create_user', [email, hashed_password, name, avatar])
            self.db.commit()
            user_id = cursor.fetchone()[0]  # Error will return 0. else user_id

        if user_id == 0:
            # Create user error: Dupe email.
            logger.info("AUTH SIGNUP FAIL reason:[Duplicated email] email:[%s]" % email)
            self.render("auth/sign_up.html", title="Sign up", error="Email already exists: %s" % email)
        else:
            logger.info("AUTH SIGNUP OK user_id:[%s] email:[%s]" % (user_id, email))
            self.set_secure_cookie("vonng_com_user", str(user_id))
            redirect_url = self.get_argument("next", "/")
            logger.info("AUTH SIGNIN OK user_id:[%s] redirect:[%s]" % (user_id, redirect_url))
            self.redirect(redirect_url)
Example #30
0
    def post(self):
        email = self.get_argument("email")
        with self.db.cursor(cursor_factory=psycopg2.extras.NamedTupleCursor) as cursor:
            cursor.callproc('get_user', [email])
            user = cursor.fetchone()

        if not user:
            logger.info("AUTH SIGNIN FAIL reason:[No such email] email:[%s]" % email)
            self.render("auth/sign_in.html", title="Sign in", error="No matching login entry for email:%s" % email)

        user_id = user.user_id
        password = user.hashed_password

        hashed_password = yield executor.submit(
            bcrypt.hashpw, tornado.escape.utf8(self.get_argument("password")),
            tornado.escape.utf8(password))

        if hashed_password == password:
            self.set_secure_cookie("vonng_com_user", str(user_id))
            redirect_url = self.get_argument("next", "/")
            logger.info("AUTH SIGNIN OK user_id:[%s] redirect:[%s]" % (user_id, redirect_url))
            self.redirect(redirect_url)
        else:
            logger.info("AUTH SIGNIN FAIL reason:[Incorrect password] user_id:[%s] email:[%s]" % (user_id, email))
            self.render("auth/sign_in.html", title="Sign in", error="Incorrect password")
Example #31
0
    def read_headers(self, delegate):
        try:
            _delegate, delegate = self._parse_delegate(delegate)
            header_future = self.stream.read_until_regex(
                b"\r?\n\r?\n", max_bytes=self.params.max_header_size)
            if self.params.header_timeout is None:
                header_data = yield header_future
            else:
                try:
                    header_data = yield gen.with_timeout(
                        self.stream.io_loop.time() +
                        self.params.header_timeout,
                        header_future,
                        quiet_exceptions=StreamClosedError)
                except gen.TimeoutError:
                    self.close()
                    raise gen.Return(False)
            start_line, headers = self._parse_headers(header_data)

            start_line = parse_response_start_line(start_line)
            self._response_start_line = start_line

            self._disconnect_on_finish = not self._can_keep_alive(
                start_line, headers)

            with _ExceptionLoggingContext(app_log):
                header_future = delegate.headers_received(start_line, headers)
                if header_future is not None:
                    yield header_future
            if self.stream is None:
                # We've been detached.
                raise gen.Return(False)

            # determine body skip
            #TODO: 100 <= code < 200
            if (self._request_start_line is not None
                    and self._request_start_line.method == 'HEAD'):
                _delegate.skip_body = True
            code = start_line.code
            if code == 304:
                # 304 responses may include the content-length header
                # but do not actually have a body.
                # http://tools.ietf.org/html/rfc7230#section-3.3
                _delegate.skip_body = True
            if code >= 100 and code < 200:
                # 1xx responses should never indicate the presence of
                # a body.
                if ('Content-Length' in headers
                        or 'Transfer-Encoding' in headers):
                    raise HTTPInputError("Response code %d cannot have body" %
                                         code)
                # TODO: client delegates will get headers_received twice
                # in the case of a 100-continue.  Document or change?
                yield self.read_headers(delegate)

            # return the response with no body set
            with _ExceptionLoggingContext(app_log):
                delegate.finish()
        except HTTPInputError as e:
            gen_log.info("Malformed HTTP message from %s: %s", self.context, e)
            self.close()

            self._clear_callbacks()

            raise gen.Return(False)
        finally:
            header_future = None
        raise gen.Return(True)
Example #32
0
        gen_log.warning("Script exited with uncaught exception", exc_info=True)
        # If an exception occurred at import time, the file with the error
        # never made it into sys.modules and so we won't know to watch it.
        # Just to make sure we've covered everything, walk the stack trace
        # from the exception and watch every file.
        for (filename, lineno, name,
             line) in traceback.extract_tb(sys.exc_info()[2]):
            watch(filename)
        if isinstance(e, SyntaxError):
            # SyntaxErrors are special:  their innermost stack frame is fake
            # so extract_tb won't see it and we have to get the filename
            # from the exception object.
            watch(e.filename)
    else:
        logging.basicConfig()
        gen_log.info("Script exited normally")
    # restore sys.argv so subsequent executions will include autoreload
    sys.argv = original_argv

    if mode == 'module':
        # runpy did a fake import of the module as __main__, but now it's
        # no longer in sys.modules.  Figure out where it is and watch it.
        loader = pkgutil.get_loader(module)
        if loader is not None:
            watch(loader.get_filename())

    wait()


if __name__ == "__main__":
    # See also the other __main__ block at the top of the file, which modifies
Example #33
0
from tornado.options import options
from {{cookiecutter.project_slug}}.run import create_app, run_app, define_options
from tornado.log import gen_log
from {{cookiecutter.project_slug}}.settings import settings

if __name__ == "__main__":
    define_options()
    options.parse_command_line()
    option_dict = options.as_dict()
    gen_log.info("option_dict:{}".format(option_dict))
    app = create_app(**option_dict, **settings)
    run_app(app, options)
Example #34
0
    async def delete(self):
        """
        Delete some file on server
        :return: None
        """
        filename = self.get_argument("filename", None)
        try:
            self.file_helper.delete_file(Config.file_directory, filename)
        except Exception as file_err:
            self.__error(error_msg=f"File error: {str(file_err)}")
            return
        self.__success(success_msg=f"File {filename} has been deleted")


def make_app():
    Config.file_directory.mkdir(parents=True, exist_ok=True)
    # fmt: off
    return web.Application(handlers=[(r"/api/file", FileHandler)])
    # fmt: on


if __name__ == "__main__":
    tornado.options.parse_command_line()
    gen_log.info("Server running")

    app = make_app()
    app.listen(8888)

    ioloop.IOLoop.current().start()
Example #35
0
# -*- encoding: utf-8 -*-


from 	tornado.log import enable_pretty_logging, LogFormatter, access_log, app_log, gen_log

gen_log.info("---> importing .pipelines")

import 	os
import 	json 
from 	pprint import pprint, pformat
import 	pymongo
from 	pymongo import MongoClient
from 	scrapy 	import signals

import 	logging
from 	tornado.log import app_log, gen_log, access_log

# set logger for scrapy
log_pipe = logging.getLogger("log_pipeline")
log_pipe.setLevel(logging.DEBUG)

# Create the Handler for logging data to a file
logger_handler = logging.FileHandler('logs/openscraper_pipeline_logging.log')
logger_handler.setLevel(logging.WARNING)

# Create a Formatter for formatting the log messages
logger_formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s')

# Add the Formatter to the Handler
logger_handler.setFormatter(logger_formatter)
Example #36
0
import tornado.options
from tornado.options import define, options
from tornado.log import gen_log
import etcd

from utils import bytes2int, get_ip

define("port", default=30000, help="run on the given port", type=int)
define("weight", default=1, help="weight", type=int)
define("etcd", default="localhost", help="etcd hostname", type=str)
tornado.options.parse_command_line()
client = etcd.Client(host=options.etcd, port=2379)
client.write(
    '/dubbomesh/com.alibaba.dubbo.performance.demo.provider.IHelloService/{0}:{1}'
    .format(get_ip(), options.port), options.weight)
gen_log.info('register with {0}:{1} [{2}]'.format(get_ip(), options.port,
                                                  options.weight))

while True:
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    try:
        s.connect(('127.0.0.1', 20880))
        s.close()
        gen_log.info("connect to 20880 success")
        break
    except socket.error:
        gen_log.info("connect to 20880 fail, wait a second")
        time.sleep(0.2)
        pass
Example #37
0
def fork_processes(
    num_processes: Optional[int], max_restarts: Optional[int] = None
) -> int:
    """Starts multiple worker processes.

    If ``num_processes`` is None or <= 0, we detect the number of cores
    available on this machine and fork that number of child
    processes. If ``num_processes`` is given and > 0, we fork that
    specific number of sub-processes.

    Since we use processes and not threads, there is no shared memory
    between any server code.

    Note that multiple processes are not compatible with the autoreload
    module (or the ``autoreload=True`` option to `tornado.web.Application`
    which defaults to True when ``debug=True``).
    When using multiple processes, no IOLoops can be created or
    referenced until after the call to ``fork_processes``.

    In each child process, ``fork_processes`` returns its *task id*, a
    number between 0 and ``num_processes``.  Processes that exit
    abnormally (due to a signal or non-zero exit status) are restarted
    with the same id (up to ``max_restarts`` times).  In the parent
    process, ``fork_processes`` calls ``sys.exit(0)`` after all child
    processes have exited normally.

    max_restarts defaults to 100.

    Availability: Unix
    """
    if sys.platform == "win32":
        # The exact form of this condition matters to mypy; it understands
        # if but not assert in this context.
        raise Exception("fork not available on windows")
    if max_restarts is None:
        max_restarts = 100

    global _task_id
    assert _task_id is None
    if num_processes is None or num_processes <= 0:
        num_processes = cpu_count()
    gen_log.info("Starting %d processes", num_processes)
    children = {}

    def start_child(i: int) -> Optional[int]:
        pid = os.fork()
        if pid == 0:
            # child process
            _reseed_random()
            global _task_id
            _task_id = i
            return i
        else:
            children[pid] = i
            return None

    for i in range(num_processes):
        id = start_child(i)
        if id is not None:
            return id
    num_restarts = 0
    while children:
        pid, status = os.wait()
        if pid not in children:
            continue
        id = children.pop(pid)
        if os.WIFSIGNALED(status):
            gen_log.warning(
                "child %d (pid %d) killed by signal %d, restarting",
                id,
                pid,
                os.WTERMSIG(status),
            )
        elif os.WEXITSTATUS(status) != 0:
            gen_log.warning(
                "child %d (pid %d) exited with status %d, restarting",
                id,
                pid,
                os.WEXITSTATUS(status),
            )
        else:
            gen_log.info("child %d (pid %d) exited normally", id, pid)
            continue
        num_restarts += 1
        if num_restarts > max_restarts:
            raise RuntimeError("Too many child restarts, giving up")
        new_id = start_child(id)
        if new_id is not None:
            return new_id
    # All child processes exited cleanly, so exit the master process
    # instead of just returning to right after the call to
    # fork_processes (which will probably just start up another IOLoop
    # unless the caller checks the return value).
    sys.exit(0)
Example #38
0
# -*- encoding: utf-8 -*-

from tornado.log import enable_pretty_logging, LogFormatter, access_log, app_log, gen_log

gen_log.info("--> importing .urls")

from controller import *
from spider_handler import *
from api_handler import *

### most routing functions are in controller.py
### for url mapping in Tornado cf : https://stackoverflow.com/questions/17166051/url-regex-mapping-in-tornado-web-server
### cf : https://code.tutsplus.com/tutorials/8-regular-expressions-you-should-know--net-6149
### cf : https://gist.github.com/c4urself/1028897
### cf : http://www.lexev.org/en/2014/set-url-for-tornado-handlers/
### cf : https://makandracards.com/theogfx/11605-python-+-tornado-variable-length-url-parameters
### cf : https://stackoverflow.com/questions/10726486/tornado-url-query-parameters
### cf : https://stackoverflow.com/questions/6038061/regular-expression-to-find-urls-within-a-string

### cf : https://docs.python.org/2/library/re.html#regular-expression-syntax
### "/( ?P<your_arg>.*? )" - arg in url as kwarg
### "/( \w+ )" - string of letter
### "/( [0-9Xx\-]+ )" - string of numbers  numbers and dashes
### "/( .* )" - whatever comes

### "( [^/]+ )" - as slug
### if url is like "/api/?q=this&r=that" --> self.request.arguments gets : {'q': ['this'], 'r': ['that']}

urls = [

    ### index
Example #39
0
    def _make_request(self,
                      path,
                      query=None,
                      method="GET",
                      body=None,
                      headers=None,
                      **kwargs):
        """
        Makes request on `path` in the graph.

        path -- endpoint to the facebook graph api
        query -- A dictionary that becomes a query string to be appended to the path
        method -- GET, POST, etc
        body -- message body
        headers -- Like "Content-Type"
        """
        if not query:
            query = {}

        if self.access_token:
            query["access_token"] = self.access_token

        query_string = urllib.urlencode(query) if query else ""
        if method == "GET":
            body = None
        else:
            if headers and "json" in headers.get('Content-Type'):
                body = json.dumps(body) if body else ""
            else:
                body = urllib.urlencode(body) if body else ""

        url = BASE_URL + path
        if query_string:
            url += "?" + query_string

        # url = "https://wio.temp-io.life/v1/nodes/create?access_token=123"
        gen_log.info("URL=====> {}".format(url))
        gen_log.info("method=====> {}".format(method))
        gen_log.info("body=====> {}".format(body))

        client = AsyncHTTPClient()
        request = HTTPRequest(url,
                              method=method,
                              body=body,
                              headers=headers,
                              **kwargs)
        try:
            response = yield client.fetch(request)
        except HTTPError as e:
            raise WioAPIError(e)
        except Exception as e:
            gen_log.error(e)
            raise

        content_type = response.headers.get('Content-Type')
        gen_log.info("#### content_type: {}".format(content_type))
        gen_log.info("#### body: {}".format(response.body))
        if 'json' in content_type:
            data = json.loads(response.body.decode())
        else:
            raise WioAPIError('Maintype was not json')

        raise gen.Return(data)
def fork_processes(ports=None, max_restarts=100):
    global listen_port
    assert listen_port is None
    assert isinstance(ports, (list, tuple)) and len(ports) > 0, '端口列表不正确.'
    gen_log.info("Starting service on ports: %s", ports)
    children = {}

    def start_child(port):
        pid = os.fork()
        if pid == 0:
            # child process
            _reseed_random()
            global listen_port
            listen_port = port
            print('FORKED: %s' % port)
            return port
        else:
            children[pid] = port
            return None

    for port in ports:
        l_port = start_child(port)
        if l_port is not None:
            return l_port

    num_restarts = 0
    exit_pending = 0
    while 1:
        try:
            pid, status = os.wait()
        except KeyboardInterrupt:
            print('Ctrl+C, Exit.')
            sys.exit(0)
        except OSError as e:
            if errno_from_exception(e) == errno.EINTR:
                continue
            if errno_from_exception(e) == errno.ECHILD:
                break
            raise

        if pid not in children:
            continue

        port = children.pop(pid)

        if exit_pending:
            continue

        if os.WIFSIGNALED(status):
            gen_log.warning(
                "child [port:%s] [pid:%d] killed by [signal:%d], restarting",
                port, pid, os.WTERMSIG(status))
        elif os.WEXITSTATUS(status) != 0:
            gen_log.warning(
                "child [port:%s] [pid:%d] exited with status %d, restarting",
                port, pid, os.WEXITSTATUS(status))
        else:
            gen_log.info("child [port:%s] [pid:%d] exited normally", port, pid)
            continue

        num_restarts += 1
        if num_restarts > max_restarts:
            raise RuntimeError("child [port:%s] too many restarts, giving up",
                               port)
        l_port = start_child(port)
        if l_port is not None:
            return l_port
    # All child processes exited cleanly, so exit the master process
    # instead of just returning to right after the call to
    # fork_processes (which will probably just start up another IOLoop
    # unless the caller checks the return value).

    print('Children: %s' % sorted(children.items(), key=lambda x: x[1]))
    print('Main Process Exiting...')
    sys.exit(0)
Example #41
0
def run():

    # init the database
    db = database.db(cfg.database_url)
    db.create_all()

    if cfg.test_data:
        gen_log.info('Deleting any existing temperature data from database')
        db.delete_temperature_data()
        gen_log.info('Loading test data to database')
        with open(cfg.test_data) as f:
            for line in f:
                line = line.strip()
                data = line.split()
                # Lines starting with # are comments
                if data[0] == '#':
                    continue
                # Automatically generate a (large) number of entries using a line like:
                # generate start_timestamp interval_secs count
                # For example, generate data at 5 minute intervals for 12*24*365=105120, a year of data
                # generate 1435708800 300 105120
                if data[0].lower() == 'generate':
                    temp = hardware.Temperature(db, cfg.sensor_params)
                    start_ts = int(data[1])
                    interval_sec = int(data[2])
                    generate_count = int(data[3])
                    gen_log.info(
                        'Generating {} records at {} second intervals from timestamp {}'
                        .format(generate_count, interval_sec, start_ts))
                    for loop_count in range(generate_count):
                        # On an RPi3 it takes about 30 seconds to generate 1000 records.
                        # Give some output every 1000 records so the user can see progress.
                        if (loop_count + 1) % 1000 == 0:
                            gen_log.info(
                                'Generating temperature record {}'.format(
                                    loop_count + 1))
                        db.save_temperature(
                            temp.get_temp(),
                            start_ts + (loop_count * interval_sec))
                    continue
                # 1st number data[0] is the timestamp, 2nd number data[1] is the temperature
                db.save_temperature(data[1], data[0])
        gen_log.info('Test data written to database. Starting server.')
    else:
        # init the temperature sensor
        temp = hardware.Temperature(db, cfg.sensor_params)
        temp.save_current()

    # settings for the tornado app
    settings = {'db': db, 'debug': cfg.debug_mode}

    # list of handlers for the server
    handlers = [
        url(r'^/api/temperature/?$', temperature_handler),
        url(r'^/api/temperature/(\d+)/?$', temperature_handler),
        url(r'^/api/temperature/current/?$', current_temp_handler),
        url(r'^/api/temperature/now/?$', current_temp_handler),
        url(r'^/api/temperature/(max|min|ave|stats)/?$', stats_temp_handler),
        url(r'^/api/info/?$', info_handler),
    ]

    # add the static file handler if we want to use it
    if cfg.serve_webapp:
        handlers.append(
            url(r'^/(.*)$', StaticFileHandler, {
                'path': 'webapp',
                'default_filename': 'index.html'
            }))

    # set up the server app
    server = Application(handlers, **settings)
    server.listen(cfg.listen_port)

    if not cfg.test_data:
        # log the temperature at intervals
        PeriodicCallback(temp.save_current,
                         int(cfg.temp_interval) * 1000).start()

    # Let the server loop servicing requests
    tornado.ioloop.IOLoop.current().start()
    def _read_message(self, delegate):
        need_delegate_close = False
        try:
            # 这一句会从 iostream 里异步地读取 header
            # 的内容,返回的是一个将会填充
            # header 内容的值的 future 对象。
            # HTTP 协议中,header 和 body 之间使用类似于 \r\n\r\n
            # 这样的被称为 CRLF 的字符串来划分,
            # 所以作者传入了下面的正则来保证刚好读完 header 的信息
            header_future = self.stream.read_until_regex(
                b"\r?\n\r?\n", max_bytes=self.params.max_header_size)
            if self.params.header_timeout is None:
                header_data = yield header_future
            else:
                # 如果设置了 timeout,则将 Future 对象转化为一个
                # gen._Timeout 对象,让 iostream 在
                # 限定时间内去填充 future 对象内的 result,否则会报超时的错。
                try:
                    header_data = yield gen.with_timeout(
                        self.stream.io_loop.time() +
                        self.params.header_timeout,
                        header_future,
                        io_loop=self.stream.io_loop,
                        quiet_exceptions=iostream.StreamClosedError)
                except gen.TimeoutError:
                    self.close()
                    raise gen.Return(False)

            # 将 header 信息转化为保存有响应信息的对象。因为客户端和服务端中
            # http 头信息的形式是不同的,所以要根据 connection 是客户端方还
            # 是服务端方来作不同的处理。
            # 客户端得到的为 start line 被叫做 status-line,结构类似于:
            # HTTP/1.1 200 OK
            # 服务端得到的 start line 被叫做
            # request-line,结构类似于:GET / HTTP/1.1
            start_line, headers = self._parse_headers(header_data)
            if self.is_client:
                start_line = httputil.parse_response_start_line(start_line)
                self._response_start_line = start_line
            else:
                start_line = httputil.parse_request_start_line(start_line)
                self._request_start_line = start_line
                self._request_headers = headers

            # 根据头信息来判断连接的保持规则。
            self._disconnect_on_finish = not self._can_keep_alive(
                start_line, headers)
            need_delegate_close = True

            # 在这个 Context 之下,如果代码抛出异常,将会把 exception 的信息
            # 用 app_log 当做日志输出
            with _ExceptionLoggingContext(app_log):
                # 将解析好的头信息交给 delegate,这时对于服务端来说,会根据
                # start line 中的 path 信息来匹配处理请求的那个 Handler。
                header_future = delegate.headers_received(start_line, headers)

                # 在 stream 的情况下,header_future 则不会是 None。
                if header_future is not None:
                    yield header_future
            if self.stream is None:
                # We've been detached.
                need_delegate_close = False
                raise gen.Return(False)

            skip_body = False

            if self.is_client:
                # 对于客户端而言如果使用这个连接的对象是客户端类型的,则进
                # 行头信息以及 HTTP 状态码来决定是否继续读取 body 数据。
                if (self._request_start_line is not None
                        and self._request_start_line.method == 'HEAD'):
                    skip_body = True
                code = start_line.code
                if code == 304:
                    # 304 responses may include the content-length header
                    # but do not actually have a body.
                    # http://tools.ietf.org/html/rfc7230#section-3.3
                    skip_body = True
                if code >= 100 and code < 200:
                    # 1xx responses should never indicate the presence of
                    # a body.
                    if ('Content-Length' in headers
                            or 'Transfer-Encoding' in headers):
                        raise httputil.HTTPInputError(
                            "Response code %d cannot have body" % code)
                    # TODO: client delegates will get headers_received twice
                    # in the case of a 100-continue.  Document or change?
                    yield self._read_message(delegate)
            else:
                # 对于服务端而言,不存在需要跳过 body 数据的情况。
                if (headers.get("Expect") == "100-continue" and
                        # 处理头中包含『Expect: 100-continue』的客户端请求。
                        not self._write_finished):
                    self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n")

            if not skip_body:
                body_future = self._read_body(
                    start_line.code if self.is_client else 0, headers,
                    delegate)
                if body_future is not None:
                    if self._body_timeout is None:
                        yield body_future
                    else:
                        try:
                            yield gen.with_timeout(
                                self.stream.io_loop.time() +
                                self._body_timeout,
                                body_future,
                                self.stream.io_loop,
                                quiet_exceptions=iostream.StreamClosedError)
                        except gen.TimeoutError:
                            gen_log.info("Timeout reading body from %s",
                                         self.context)
                            self.stream.close()
                            raise gen.Return(False)
            self._read_finished = True
            if not self._write_finished or self.is_client:
                # 服务端在这里便会调用到匹配到的 handler
                # 来向请求方写回返回数据。
                # 这时 _RequestDispatcher 对象中已经保有了 header 和 body 的数
                # 据,可以稍微处理一下(例如将表单数据 parse 一下)作为参数交给
                # Handler 处理了。
                need_delegate_close = False
                with _ExceptionLoggingContext(app_log):
                    delegate.finish()

            # If we're waiting for the application to produce an asynchronous
            # response, and we're not detached, register a close callback
            # on the stream (we didn't need one while we were reading)
            if (not self._finish_future.done() and self.stream is not None
                    and not self.stream.closed()):
                self.stream.set_close_callback(self._on_connection_close)
                yield self._finish_future
            if self.is_client and self._disconnect_on_finish:
                self.close()
            if self.stream is None:
                raise gen.Return(False)
        except httputil.HTTPInputError as e:
            gen_log.info("Malformed HTTP message from %s: %s", self.context, e)
            self.close()
            raise gen.Return(False)
        finally:
            if need_delegate_close:
                with _ExceptionLoggingContext(app_log):
                    delegate.on_connection_close()
            self._clear_callbacks()
        raise gen.Return(True)
Example #43
0
class HTTPConnection(object):
    """Handles a connection to an HTTP client, executing HTTP requests.

    We parse HTTP headers and bodies, and execute the request callback
    until the HTTP conection is closed.
    """
    def __init__(self,
                 stream,
                 address,
                 request_callback,
                 no_keep_alive=False,
                 xheaders=False,
                 protocol=None):
        self.stream = stream
        self.address = address
        # Save the socket's address family now so we know how to
        # interpret self.address even after the stream is closed
        # and its socket attribute replaced with None.
        self.address_family = stream.socket.family
        self.request_callback = request_callback
        self.no_keep_alive = no_keep_alive
        self.xheaders = xheaders
        self.protocol = protocol
        self._clear_request_state()
        # Save stack context here, outside of any request.  This keeps
        # contexts from one request from leaking into the next.
        self._header_callback = stack_context.wrap(self._on_headers)
        self.stream.set_close_callback(self._on_connection_close)
        self.stream.read_until(b"\r\n\r\n", self._header_callback)

    def _clear_request_state(self):
        """Clears the per-request state.

        This is run in between requests to allow the previous handler
        to be garbage collected (and prevent spurious close callbacks),
        and when the connection is closed (to break up cycles and
        facilitate garbage collection in cpython).
        """
        self._request = None
        self._request_finished = False
        self._write_callback = None
        self._close_callback = None

    def set_close_callback(self, callback):
        """Sets a callback that will be run when the connection is closed.

        Use this instead of accessing
        `HTTPConnection.stream.set_close_callback
        <.BaseIOStream.set_close_callback>` directly (which was the
        recommended approach prior to Tornado 3.0).
        """
        self._close_callback = stack_context.wrap(callback)

    def _on_connection_close(self):
        if self._close_callback is not None:
            callback = self._close_callback
            self._close_callback = None
            callback()
        # Delete any unfinished callbacks to break up reference cycles.
        self._header_callback = None
        self._clear_request_state()

    def close(self):
        self.stream.close()
        # Remove this reference to self, which would otherwise cause a
        # cycle and delay garbage collection of this connection.
        self._header_callback = None
        self._clear_request_state()

    def write(self, chunk, callback=None):
        """Writes a chunk of output to the stream."""
        if not self.stream.closed():
            self._write_callback = stack_context.wrap(callback)
            self.stream.write(chunk, self._on_write_complete)

    def finish(self):
        """Finishes the request."""
        self._request_finished = True
        # No more data is coming, so instruct TCP to send any remaining
        # data immediately instead of waiting for a full packet or ack.
        self.stream.set_nodelay(True)
        if not self.stream.writing():
            self._finish_request()

    def _on_write_complete(self):
        if self._write_callback is not None:
            callback = self._write_callback
            self._write_callback = None
            callback()
        # _on_write_complete is enqueued on the IOLoop whenever the
        # IOStream's write buffer becomes empty, but it's possible for
        # another callback that runs on the IOLoop before it to
        # simultaneously write more data and finish the request.  If
        # there is still data in the IOStream, a future
        # _on_write_complete will be responsible for calling
        # _finish_request.
        if self._request_finished and not self.stream.writing():
            self._finish_request()

    def _finish_request(self):
        if self.no_keep_alive or self._request is None:
            disconnect = True
        else:
            connection_header = self._request.headers.get("Connection")
            if connection_header is not None:
                connection_header = connection_header.lower()
            if self._request.supports_http_1_1():
                disconnect = connection_header == "close"
            elif ("Content-Length" in self._request.headers
                  or self._request.method in ("HEAD", "GET")):
                disconnect = connection_header != "keep-alive"
            else:
                disconnect = True
        self._clear_request_state()
        if disconnect:
            self.close()
            return
        try:
            # Use a try/except instead of checking stream.closed()
            # directly, because in some cases the stream doesn't discover
            # that it's closed until you try to read from it.
            self.stream.read_until(b"\r\n\r\n", self._header_callback)

            # Turn Nagle's algorithm back on, leaving the stream in its
            # default state for the next request.
            self.stream.set_nodelay(False)
        except iostream.StreamClosedError:
            self.close()

    def _on_headers(self, data):
        try:
            try:
                data = native_str(data.decode('latin1'))
            except LookupError, err:
                data = native_str(data)
            eol = data.find("\r\n")
            start_line = data[:eol]
            try:
                method, uri, version = start_line.split(" ")
            except ValueError:
                raise _BadRequestException("Malformed HTTP request line")
            if not version.startswith("HTTP/"):
                raise _BadRequestException(
                    "Malformed HTTP version in HTTP Request-Line")
            try:
                headers = httputil.HTTPHeaders.parse(data[eol:])
            except ValueError:
                # Probably from split() if there was no ':' in the line
                raise _BadRequestException("Malformed HTTP headers")

            # HTTPRequest wants an IP, not a full socket address
            if self.address_family in (socket.AF_INET, socket.AF_INET6):
                remote_ip = self.address[0]
            else:
                # Unix (or other) socket; fake the remote address
                remote_ip = '0.0.0.0'

            self._request = HTTPRequest(connection=self,
                                        method=method,
                                        uri=uri,
                                        version=version,
                                        headers=headers,
                                        remote_ip=remote_ip,
                                        protocol=self.protocol)

            content_length = headers.get("Content-Length")
            if content_length:
                content_length = int(content_length)
                if content_length > self.stream.max_buffer_size:
                    raise _BadRequestException("Content-Length too long")
                if headers.get("Expect") == "100-continue":
                    self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n")
                self.stream.read_bytes(content_length, self._on_request_body)
                return

            self.request_callback(self._request)
        except _BadRequestException as e:
            gen_log.info("Malformed HTTP request from %r: %s", self.address, e)
            self.close()
            return
    def _on_headers(self, data):
        try:
            # 首先是找到起始行:
            data = native_str(data.decode('latin1'))
            eol = data.find("\r\n")
            # 获取请求的起始行数据,例如:GET / HTTP/1.1
            start_line = data[:eol]
            try:
                # 然后是用空格分解【首行】来找到方法,uri和协议版本:
                # 请求方式、请求地址、http版本号
                method, uri, version = start_line.split(" ")
            except ValueError:
                raise _BadRequestException("Malformed HTTP request line")
            if not version.startswith("HTTP/"):
                raise _BadRequestException("Malformed HTTP version in HTTP Request-Line")
            try:
                # 接着依靠HTTPHeaders解析 剩余的请求头,返回一个字典
                # 把请求头信息包装到一个字典中。(不包括第一行)
                headers = httputil.HTTPHeaders.parse(data[eol:])
            except ValueError:
                # Probably from split() if there was no ':' in the line
                raise _BadRequestException("Malformed HTTP headers")

            # HTTPRequest wants an IP, not a full socket address
            # 然后设定 remote_ip(好像没什么用?)
            if self.address_family in (socket.AF_INET, socket.AF_INET6):
                remote_ip = self.address[0]
            else:
                # Unix (or other) socket; fake the remote address
                remote_ip = '0.0.0.0'

            # 接着创建 request 对象(这就是 RequestHandler 接收的那个 request),
            # 然后用 Content-Length 检查是否有请求体,
            # 如果没有则直接调用 HTTP 层次的回调(亦即 application 的__call__方法),
            # 如果有则读取指定长度的内容并跳到回调 _on_request_body, 当然最终还是会调用 application 对象。
            # 在 _on_request_body 方法里是调用 parse_body_arguments方法来完成【解析主体,
            # 请求头和请求体的解析】稍候再说。至此,执行流程就和 Application对象的接口与起到的作用 接上了。
            # 至于何时调用handle_stream,后面会说到。

            #
            # 把请求信息封装到一个HTTPRequest对象中
            # 注意:self._request = HTTPRequest,
            # HTTPRequest中封装了HTTPConnection
            # HTTPConnection中封装了stream和application
            # connection=self,此self就是HTTPConnection
            # headers=headers,就是HTTPHeader对象


            self._request = HTTPRequest(
                connection=self, method=method, uri=uri, version=version,
                headers=headers, remote_ip=remote_ip, protocol=self.protocol)

            # HTTPRequest对象被交给了(其实HTTP Body最后也是交给他【self.request_callback(self._request)】)
            # self.request_callback(self._request)

            # 从请求头中获取 Content-Length
            content_length = headers.get("Content-Length")
            if content_length:
                content_length = int(content_length)
                if content_length > self.stream.max_buffer_size:
                    raise _BadRequestException("Content-Length too long")
                if headers.get("Expect") == "100-continue":
                    self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n")
                self.stream.read_bytes(content_length, self._on_request_body)
                return

            # **************** 执行Application对象的 __call__ 方法,也就是路由系统的入口 *******************
            #
            self.request_callback(self._request)

            # 这个request_callback是什么来头呢?它是在HTTPConnection构造时传进来的参数。
            # 我们回到HTTPServer.handle_stream()
            # def handle_stream(self, stream, address):
            #     HTTPConnection(stream, address, self.request_callback,
            #        self.no_keep_alive, self.xheaders, self.protocol)

            # 它是一个HTTPServer类的成员,继续往回追来历:
            # def __init__(self, request_callback, no_keep_alive=False, io_loop=None,
            #  xheaders=False, ssl_options=None, protocol=None, **kwargs):
            #   self.request_callback = request_callback

            # Bingo!这就是HTTPServer初始化时传进来的那个RequestHandler。

            # 在helloworld.py里,我们看到的是:
            # application = tornado.web.Application([(r"/", MainHandler), ])
            # http_server = tornado.httpserver.HTTPServer(application)
            #
            # 在另一个例子里,我们看到的是:
            # def handle_request(request):
            #    message = "You requested %s\n" % request.uri
            #    request.write("HTTP/1.1 200 OK\r\nContent-Length: %d\r\n\r\n%s" % (
            #                  len(message), message))
            #    request.finish()
            # http_server = tornado.httpserver.HTTPServer(handle_request)

            # 可见这个request_handler通吃【很多种类型】的参数,
            # 可以是一个Application类的对象,也可是一个简单的函数。
            # 如果是handler是简单函数,如上面的handle_request,这个很好理解,由一个函数处理HTTPRequest对象嘛。

            # 如果是一个Application对象,就有点奇怪了。我们能把一个对象作另一个对象的参数来呼叫吗?

            # Python中有一个有趣的语法,只要定义类型的时候,实现__call__【函数】,这个类型就成为可调用的。
            # 换句话说,我们可以把这个类的对象当作函数来使用,相当于【重载了】括号运算符。


        except _BadRequestException as e:
            gen_log.info("Malformed HTTP request from %s: %s",
                         self.address[0], e)
            self.close()
            return
Example #45
0
def main(**kwargs):
    """A simple test runner.

    This test runner is essentially equivalent to `unittest.main` from
    the standard library, but adds support for tornado-style option
    parsing and log formatting.

    The easiest way to run a test is via the command line::

        python -m tornado.testing tornado.test.stack_context_test

    See the standard library unittest module for ways in which tests can
    be specified.

    Projects with many tests may wish to define a test script like
    ``tornado/test/runtests.py``.  This script should define a method
    ``all()`` which returns a test suite and then call
    `tornado.testing.main()`.  Note that even when a test script is
    used, the ``all()`` test suite may be overridden by naming a
    single test on the command line::

        # Runs all tests
        python -m tornado.test.runtests
        # Runs one test
        python -m tornado.test.runtests tornado.test.stack_context_test

    Additional keyword arguments passed through to ``unittest.main()``.
    For example, use ``tornado.testing.main(verbosity=2)``
    to show many test details as they are run.
    See http://docs.python.org/library/unittest.html#unittest.main
    for full argument list.
    """
    from tornado.options import define, options, parse_command_line

    define('exception_on_interrupt',
           type=bool,
           default=True,
           help=("If true (default), ctrl-c raises a KeyboardInterrupt "
                 "exception.  This prints a stack trace but cannot interrupt "
                 "certain operations.  If false, the process is more reliably "
                 "killed, but does not print a stack trace."))

    # support the same options as unittest's command-line interface
    define('verbose', type=bool)
    define('quiet', type=bool)
    define('failfast', type=bool)
    define('catch', type=bool)
    define('buffer', type=bool)

    argv = [sys.argv[0]] + parse_command_line(sys.argv)

    if not options.exception_on_interrupt:
        signal.signal(signal.SIGINT, signal.SIG_DFL)

    if options.verbose is not None:
        kwargs['verbosity'] = 2
    if options.quiet is not None:
        kwargs['verbosity'] = 0
    if options.failfast is not None:
        kwargs['failfast'] = True
    if options.catch is not None:
        kwargs['catchbreak'] = True
    if options.buffer is not None:
        kwargs['buffer'] = True

    if __name__ == '__main__' and len(argv) == 1:
        print("No tests specified", file=sys.stderr)
        sys.exit(1)
    try:
        # In order to be able to run tests by their fully-qualified name
        # on the command line without importing all tests here,
        # module must be set to None.  Python 3.2's unittest.main ignores
        # defaultTest if no module is given (it tries to do its own
        # test discovery, which is incompatible with auto2to3), so don't
        # set module if we're not asking for a specific test.
        if len(argv) > 1:
            unittest.main(module=None, argv=argv, **kwargs)
        else:
            unittest.main(defaultTest="all", argv=argv, **kwargs)
    except SystemExit as e:
        if e.code == 0:
            gen_log.info('PASS')
        else:
            gen_log.error('FAIL')
        raise
Example #46
0
 def _close_on_error(self, file):
     gen_log.info('closing %d on connection close.', file.fileno())
     file.close()
Example #47
0
def fork_processes(num_processes, max_restarts=100):
    """Starts multiple worker processes.

    If ``num_processes`` is None or <= 0, we detect the number of cores
    available on this machine and fork that number of child
    processes. If ``num_processes`` is given and > 0, we fork that
    specific number of sub-processes.

    Since we use processes and not threads, there is no shared memory
    between any server code.

    Note that multiple processes are not compatible with the autoreload
    module (or the debug=True option to `tornado.web.Application`).
    When using multiple processes, no IOLoops can be created or
    referenced until after the call to ``fork_processes``.

    In each child process, ``fork_processes`` returns its *task id*, a
    number between 0 and ``num_processes``.  Processes that exit
    abnormally (due to a signal or non-zero exit status) are restarted
    with the same id (up to ``max_restarts`` times).  In the parent
    process, ``fork_processes`` returns None if all child processes
    have exited normally, but will otherwise only exit by throwing an
    exception.
    """
    global _task_id
    assert _task_id is None
    if num_processes is None or num_processes <= 0:
        num_processes = cpu_count()
    if ioloop.IOLoop.initialized():
        raise RuntimeError(
            "Cannot run in multiple processes: IOLoop instance "
            "has already been initialized. You cannot call "
            "IOLoop.instance() before calling start_processes()")
    gen_log.info("Starting %d processes", num_processes)
    children = {}

    def start_child(i):
        pid = os.fork()
        if pid == 0:
            # child process
            _reseed_random()
            global _task_id
            _task_id = i
            return i
        else:
            children[pid] = i
            return None

    for i in range(num_processes):
        id = start_child(i)
        if id is not None:
            return id
    num_restarts = 0
    while children:
        try:
            pid, status = os.wait()
        except OSError as e:
            if e.errno == errno.EINTR:
                continue
            raise
        if pid not in children:
            continue
        id = children.pop(pid)
        if os.WIFSIGNALED(status):
            gen_log.warning(
                "child %d (pid %d) killed by signal %d, restarting", id, pid,
                os.WTERMSIG(status))
        elif os.WEXITSTATUS(status) != 0:
            gen_log.warning(
                "child %d (pid %d) exited with status %d, restarting", id, pid,
                os.WEXITSTATUS(status))
        else:
            gen_log.info("child %d (pid %d) exited normally", id, pid)
            continue
        num_restarts += 1
        if num_restarts > max_restarts:
            raise RuntimeError("Too many child restarts, giving up")
        new_id = start_child(id)
        if new_id is not None:
            return new_id
    # All child processes exited cleanly, so exit the master process
    # instead of just returning to right after the call to
    # fork_processes (which will probably just start up another IOLoop
    # unless the caller checks the return value).
    sys.exit(0)
Example #48
0
 async def _read_message(self, delegate: httputil.HTTPMessageDelegate) -> bool:
     need_delegate_close = False
     try:
         # 取出请求首部
         header_future = self.stream.read_until_regex(
             b"\r?\n\r?\n", max_bytes=self.params.max_header_size
         )
         if self.params.header_timeout is None:
             header_data = await header_future
         else:
             try:
                 header_data = await gen.with_timeout(
                     self.stream.io_loop.time() + self.params.header_timeout,
                     header_future,
                     quiet_exceptions=iostream.StreamClosedError,
                 )
             except gen.TimeoutError:
                 self.close()
                 return False
         # 解析请求行 和请求首部参数
         start_line_str, headers = self._parse_headers(header_data)
         if self.is_client:
             resp_start_line = httputil.parse_response_start_line(start_line_str)
             self._response_start_line = resp_start_line
             start_line = (
                 resp_start_line
             )  # type: Union[httputil.RequestStartLine, httputil.ResponseStartLine]
             # TODO: this will need to change to support client-side keepalive
             self._disconnect_on_finish = False
         else:
             # 解析请求行 返回 method, path, version
             req_start_line = httputil.parse_request_start_line(start_line_str)
             self._request_start_line = req_start_line
             self._request_headers = headers
             start_line = req_start_line
             # 解析是否需要完成时关闭连接
             self._disconnect_on_finish = not self._can_keep_alive(
                 req_start_line, headers
             )
         need_delegate_close = True
         with _ExceptionLoggingContext(app_log):
             header_recv_future = delegate.headers_received(start_line, headers)
             if header_recv_future is not None:
                 await header_recv_future
         if self.stream is None:
             # We've been detached.
             need_delegate_close = False
             return False
         skip_body = False
         if self.is_client:
             assert isinstance(start_line, httputil.ResponseStartLine)
             if (
                 self._request_start_line is not None
                 and self._request_start_line.method == "HEAD"
             ):
                 skip_body = True
             code = start_line.code
             if code == 304:
                 # 304 responses may include the content-length header
                 # but do not actually have a body.
                 # http://tools.ietf.org/html/rfc7230#section-3.3
                 skip_body = True
             if code >= 100 and code < 200:
                 # 1xx responses should never indicate the presence of
                 # a body.
                 if "Content-Length" in headers or "Transfer-Encoding" in headers:
                     raise httputil.HTTPInputError(
                         "Response code %d cannot have body" % code
                     )
                 # TODO: client delegates will get headers_received twice
                 # in the case of a 100-continue.  Document or change?
                 await self._read_message(delegate)
         else:
             if headers.get("Expect") == "100-continue" and not self._write_finished:
                 self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n")
         # 读取请求主体
         if not skip_body:
             body_future = self._read_body(
                 resp_start_line.code if self.is_client else 0, headers, delegate
             )
             if body_future is not None:
                 if self._body_timeout is None:
                     await body_future
                 else:
                     try:
                         await gen.with_timeout(
                             self.stream.io_loop.time() + self._body_timeout,
                             body_future,
                             quiet_exceptions=iostream.StreamClosedError,
                         )
                     except gen.TimeoutError:
                         gen_log.info("Timeout reading body from %s", self.context)
                         self.stream.close()
                         return False
         self._read_finished = True
         if not self._write_finished or self.is_client:
             need_delegate_close = False
             with _ExceptionLoggingContext(app_log):
                 delegate.finish()
         # If we're waiting for the application to produce an asynchronous
         # response, and we're not detached, register a close callback
         # on the stream (we didn't need one while we were reading)
         if (
             not self._finish_future.done()
             and self.stream is not None
             and not self.stream.closed()
         ):
             self.stream.set_close_callback(self._on_connection_close)
             await self._finish_future
         if self.is_client and self._disconnect_on_finish:
             self.close()
         if self.stream is None:
             return False
     except httputil.HTTPInputError as e:
         gen_log.info("Malformed HTTP message from %s: %s", self.context, e)
         if not self.is_client:
             await self.stream.write(b"HTTP/1.1 400 Bad Request\r\n\r\n")
         self.close()
         return False
     finally:
         if need_delegate_close:
             with _ExceptionLoggingContext(app_log):
                 delegate.on_connection_close()
         header_future = None  # type: ignore
         self._clear_callbacks()
     return True
Example #49
0
# -*- encoding: utf-8 -*-

from tornado.log import enable_pretty_logging, LogFormatter, access_log, app_log, gen_log

gen_log.info("--> importing .spider_handler")

from base_handler import *
from base_utils import *

# from 	tornado.log import access_log, app_log, gen_log # already imported from base_handler

# main decorator to handle parallelism
# from 	handler_threading import *

### OpenScraper generic scraper
from scraper import run_generic_spider

from config.settings_scrapy import DEFAULT_COUNTDOWN

### + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ###
### RUN SPIDER handlers as background tasks #################################################
### + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ###

# threading for background tasks (spiders mainly)
# cf : https://stackoverflow.com/questions/22082165/running-an-async-background-task-in-tornado/25304704
# cf : https://gist.github.com/marksilvis/ea1142680db66e2bb9b2a29e57306d76
# cf : https://stackoverflow.com/questions/22082165/running-an-async-background-task-in-tornado
# cf : https://gist.github.com/mivade/421c427db75c8c5fa1d1
# cf : http://www.tornadoweb.org/en/stable/faq.html#my-code-is-asynchronous-but-it-s-not-running-in-parallel-in-two-browser-tabs
# cf : http://www.tornadoweb.org/en/stable/guide/queues.html
# cf : https://emptysqua.re/blog/refactoring-tornado-coroutines/
Example #50
0
def setup_loggers():
    """
	set up tornado loggers with custom format
	
	logger has 5 severity levels : 
		D - DEBUG (lowest)
		I - INFO
		W - WARNING
		E - ERROR
		C - CRITICAL (highest)
	"""

    # config logger output in console
    # logging.basicConfig(	level 	= logging.DEBUG,
    # 						format 	= "%(name)s - %(funcName)s - %(levelname)s : %(message)s" )

    # Create a Formatter for formatting the log messages
    # log_formatter = logging.Formatter('%(name)s -- %(funcName)s - %(levelname)s - %(message)s')
    openscraper_log_format = '%(color)s::: %(levelname)s %(name)s %(asctime)s ::: %(module)s:%(lineno)d -in- %(funcName)s() :::%(end_color)s \
		%(message)s'

    # datefmt='%y%m%d %H:%M:%S'
    # style='%'
    # color=True
    # colors={40: 1, 10: 4, 20: 2, 30: 3}
    """	
	default tornado format for logging : 
		fmt='%(color)s[%(levelname)1.1s %(asctime)s %(module)s:%(lineno)d]%(end_color)s %(message)s'
		datefmt='%y%m%d %H:%M:%S'
		style='%'
		color=True
		colors={40: 1, 10: 4, 20: 2, 30: 3}
		))
	"""
    # log_formatter = logging.Formatter( fmt=openscraper_log_format )
    tornado_log_formatter = LogFormatter(fmt=openscraper_log_format,
                                         color=True)

    enable_pretty_logging()

    ### Logger as self var
    # create the Logger
    # dictConfig(logging_config)
    # self.log 		= logging.getLogger(__name__)
    # self.logger 	= logging.getLogger()
    # self.access_log = logging.getLogger("tornado.access")
    # self.app_log 	= logging.getLogger("tornado.application")
    # self.gen_log 	= logging.getLogger("tornado.general")

    ### Get root logger
    root_logger = logging.getLogger()
    # print root_logger.__dict__

    ### Format root_logger stream
    # parent_logger = app_log.parent
    # print parent_logger.__dict__
    # root_stream_handler = parent_logger.handlers
    # root_stream_handler[0].setFormatter(tornado_log_formatter)
    root_logger.handlers[0].setFormatter(tornado_log_formatter)

    # streamHandler 	= logging.StreamHandler() # stream=sys.stdout
    # streamHandler.setFormatter(tornado_log_formatter)
    # self.gen_log.addHandler(streamHandler)
    # self.app_log.addHandler(streamHandler)
    # self.access_log.addHandler(streamHandler)

    # self.log.setLevel(logging.DEBUG)

    # Create the Handlers for logging data to log files
    gen_log_handler = logging.FileHandler('logs/openscraper_general.log')
    gen_log_handler.setLevel(logging.WARNING)

    access_log_handler = logging.FileHandler('logs/openscraper_access.log')
    access_log_handler.setLevel(logging.WARNING)

    app_log_handler = logging.FileHandler('logs/openscraper_app.log')
    app_log_handler.setLevel(logging.WARNING)

    # Add the Formatter to the Handler
    gen_log_handler.setFormatter(tornado_log_formatter)
    access_log_handler.setFormatter(tornado_log_formatter)
    app_log_handler.setFormatter(tornado_log_formatter)

    # Add the Handler to the Logger
    gen_log.addHandler(gen_log_handler)
    access_log.addHandler(access_log_handler)
    app_log.addHandler(app_log_handler)

    # test loggers
    print
    app_log.info('>>> this is app_log ')
    gen_log.info('>>> this is gen_log ')
    access_log.info('>>> this is access-log ')
    print
    def _read_message(self, delegate):
        need_delegate_close = False
        try:
            # 消息头与消息体之间由一个空行分开
            header_future = self.stream.read_until_regex(
                b"\r?\n\r?\n",
                max_bytes=self.params.max_header_size)
            if self.params.header_timeout is None:
                header_data = yield header_future
            else:
                try:
                    header_data = yield gen.with_timeout(
                        self.stream.io_loop.time() + self.params.header_timeout,
                        header_future,
                        io_loop=self.stream.io_loop)
                except gen.TimeoutError:
                    self.close()
                    raise gen.Return(False)
            # 解析消息头,分离头字段和首行(request-line/status-line)
            start_line, headers = self._parse_headers(header_data)
            # 作为 client 解析的是 server 的 response,作为 server 解析的是 client 的 request。
            # response 与 request 的 start_line(status-line/request-line) 的字段内容不同:
            # 1. response's status-line: HTTP-Version SP Status-Code SP Reason-Phrase CRLF
            # 2. request's request-line:Method SP Request-URI SP HTTP-Version CRLF
            # start_line 的值是一个 namedtuple。
            if self.is_client:
                start_line = httputil.parse_response_start_line(start_line)
                self._response_start_line = start_line
            else:
                start_line = httputil.parse_request_start_line(start_line)
                self._request_start_line = start_line
                self._request_headers = headers

            # 非 keep-alive 的请求或响应处理完成后要关闭连接。
            self._disconnect_on_finish = not self._can_keep_alive(
                start_line, headers)
            need_delegate_close = True
            with _ExceptionLoggingContext(app_log):
                header_future = delegate.headers_received(start_line, headers)
                if header_future is not None:
                    # 如果 header_future 是一个 `Future` 实例,则要等到完成才读取 body。
                    yield header_future
            # websocket ???
            if self.stream is None:
                # We've been detached.
                need_delegate_close = False
                raise gen.Return(False)
            skip_body = False
            if self.is_client:
                # 作为 client 如果发起的是 HEAD 请求,那么 server response 应该无消息体
                if (self._request_start_line is not None and
                        self._request_start_line.method == 'HEAD'):
                    skip_body = True
                code = start_line.code
                if code == 304:
                    # 304 responses may include the content-length header
                    # but do not actually have a body.
                    # http://tools.ietf.org/html/rfc7230#section-3.3
                    skip_body = True
                if code >= 100 and code < 200:
                    # 1xx responses should never indicate the presence of
                    # a body.
                    if ('Content-Length' in headers or
                        'Transfer-Encoding' in headers):
                        raise httputil.HTTPInputError(
                            "Response code %d cannot have body" % code)
                    # TODO: client delegates will get headers_received twice
                    # in the case of a 100-continue.  Document or change?
                    yield self._read_message(delegate)
            else:
                # 100-continue 这个状态码是在 HTTP/1.1 中为了提高传输效率而设置的。当
                # client 需要 POST 较大数据给 WebServer 时,可以在发送 HTTP 请求时带上
                # Expect: 100-continue,WebServer 如果接受这个请求则应答一个
                # ``HTTP/1.1 100 (Continue)``,那么 client 就继续传输 request body,
                # 否则应答 ``HTTP/1.1 417 Expectation Failed`` client 就放弃传输剩余
                # 的数据。(注:Expect 头部域,用于指出客户端要求的特殊服务器行为采用扩展语法
                # 定义,以方便扩展。)
                if (headers.get("Expect") == "100-continue" and
                        not self._write_finished):
                    self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n")
            if not skip_body:
                body_future = self._read_body(
                    start_line.code if self.is_client else 0, headers, delegate)
                if body_future is not None:
                    if self._body_timeout is None:
                        yield body_future
                    else:
                        try:
                            yield gen.with_timeout(
                                self.stream.io_loop.time() + self._body_timeout,
                                body_future, self.stream.io_loop)
                        except gen.TimeoutError:
                            gen_log.info("Timeout reading body from %s",
                                         self.context)
                            self.stream.close()
                            raise gen.Return(False)
            self._read_finished = True
            # 对 client mode ,response 解析完成就调用 HTTPMessageDelegate.finish() 方法是合适的;
            # 对 server mode ,_write_finished 表示 response 是否发送完成,未完成前调用
            # HTTPMessageDelegate.finish() 方法让 delegate 执行请求响应;
            if not self._write_finished or self.is_client:
                need_delegate_close = False
                with _ExceptionLoggingContext(app_log):
                    delegate.finish()
            # If we're waiting for the application to produce an asynchronous
            # response, and we're not detached, register a close callback
            # on the stream (we didn't need one while we were reading)
            #
            # NOTE:_finish_future resolves when all data has been written and flushed
            # to the IOStream.
            #
            # hold 住执行流程,直到异步响应完成,所有数据都写入 fd,才继续后续处理,通常调用方执行 `finish` 方法
            # 设置 `_finish_future` 完成,详细见 `finish` 和 `_finish_request` 方法实现。
            if (not self._finish_future.done() and
                    self.stream is not None and
                    not self.stream.closed()):
                self.stream.set_close_callback(self._on_connection_close)
                yield self._finish_future
            # 对于 client mode,处理完响应后如果不是 keep-alive 就断开连接。
            # 对于 server mode,需要在 response 完成后才断开连接,详细见 _finish_request/finish 方法实现。
            if self.is_client and self._disconnect_on_finish:
                self.close()
            if self.stream is None:
                raise gen.Return(False)
        except httputil.HTTPInputError as e:
            gen_log.info("Malformed HTTP message from %s: %s",
                         self.context, e)
            self.close()
            raise gen.Return(False)
        finally:
            # 连接 “关闭” 前还没能结束处理请求(call HTTPMessageDelegate.finish()),则
            # call  HTTPMessageDelegate.on_connection_close()
            if need_delegate_close:
                with _ExceptionLoggingContext(app_log):
                    delegate.on_connection_close()
            self._clear_callbacks()
        raise gen.Return(True)
Example #52
0
    def _read_message(self, delegate):
        need_delegate_close = False
        try:
            header_future = self.stream.read_until_regex(
                b"\r?\n\r?\n", max_bytes=self.params.max_header_size)
            if self.params.header_timeout is None:
                header_data = yield header_future
            else:
                try:
                    header_data = yield gen.with_timeout(
                        self.stream.io_loop.time() +
                        self.params.header_timeout,
                        header_future,
                        quiet_exceptions=iostream.StreamClosedError)
                except gen.TimeoutError:
                    self.close()
                    raise gen.Return(False)
            start_line, headers = self._parse_headers(header_data)
            if self.is_client:
                start_line = httputil.parse_response_start_line(start_line)
                self._response_start_line = start_line
            else:
                start_line = httputil.parse_request_start_line(start_line)
                self._request_start_line = start_line
                self._request_headers = headers

            self._disconnect_on_finish = not self._can_keep_alive(
                start_line, headers)
            need_delegate_close = True
            with _ExceptionLoggingContext(app_log):
                header_future = delegate.headers_received(start_line, headers)
                if header_future is not None:
                    yield header_future
            if self.stream is None:
                # We've been detached.
                need_delegate_close = False
                raise gen.Return(False)
            skip_body = False
            if self.is_client:
                if (self._request_start_line is not None
                        and self._request_start_line.method == 'HEAD'):
                    skip_body = True
                code = start_line.code
                if code == 304:
                    # 304 responses may include the content-length header
                    # but do not actually have a body.
                    # http://tools.ietf.org/html/rfc7230#section-3.3
                    skip_body = True
                if code >= 100 and code < 200:
                    # 1xx responses should never indicate the presence of
                    # a body.
                    if ('Content-Length' in headers
                            or 'Transfer-Encoding' in headers):
                        raise httputil.HTTPInputError(
                            "Response code %d cannot have body" % code)
                    # TODO: client delegates will get headers_received twice
                    # in the case of a 100-continue.  Document or change?
                    yield self._read_message(delegate)
            else:
                if (headers.get("Expect") == "100-continue"
                        and not self._write_finished):
                    self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n")
            if not skip_body:
                body_future = self._read_body(
                    start_line.code if self.is_client else 0, headers,
                    delegate)
                if body_future is not None:
                    if self._body_timeout is None:
                        yield body_future
                    else:
                        try:
                            yield gen.with_timeout(
                                self.stream.io_loop.time() +
                                self._body_timeout,
                                body_future,
                                quiet_exceptions=iostream.StreamClosedError)
                        except gen.TimeoutError:
                            gen_log.info("Timeout reading body from %s",
                                         self.context)
                            self.stream.close()
                            raise gen.Return(False)
            self._read_finished = True
            if not self._write_finished or self.is_client:
                need_delegate_close = False
                with _ExceptionLoggingContext(app_log):
                    delegate.finish()
            # If we're waiting for the application to produce an asynchronous
            # response, and we're not detached, register a close callback
            # on the stream (we didn't need one while we were reading)
            if (not self._finish_future.done() and self.stream is not None
                    and not self.stream.closed()):
                self.stream.set_close_callback(self._on_connection_close)
                yield self._finish_future
            if self.is_client and self._disconnect_on_finish:
                self.close()
            if self.stream is None:
                raise gen.Return(False)
        except httputil.HTTPInputError as e:
            gen_log.info("Malformed HTTP message from %s: %s", self.context, e)
            if not self.is_client:
                yield self.stream.write(b'HTTP/1.1 400 Bad Request\r\n\r\n')
            self.close()
            raise gen.Return(False)
        finally:
            if need_delegate_close:
                with _ExceptionLoggingContext(app_log):
                    delegate.on_connection_close()
            header_future = None
            self._clear_callbacks()
        raise gen.Return(True)
Example #53
0
    def _read_message(self, delegate):
        need_delegate_close = False
        try:
            header_future = self.stream.read_until_regex(
                b"\r?\n\r?\n", max_bytes=self.params.max_header_size)
            if self.params.header_timeout is None:
                header_data = yield header_future
            else:
                try:
                    header_data = yield gen.with_timeout(
                        self.stream.io_loop.time() +
                        self.params.header_timeout,
                        header_future,
                        io_loop=self.stream.io_loop)
                except gen.TimeoutError:
                    self.close()
                    raise gen.Return(False)
            start_line, headers = self._parse_headers(header_data)
            if self.is_client:
                start_line = httputil.parse_response_start_line(start_line)
                self._response_start_line = start_line
            else:
                start_line = httputil.parse_request_start_line(start_line)
                self._request_start_line = start_line
                self._request_headers = headers

            self._disconnect_on_finish = not self._can_keep_alive(
                start_line, headers)
            need_delegate_close = True
            header_future = delegate.headers_received(start_line, headers)
            if header_future is not None:
                yield header_future
            if self.stream is None:
                # We've been detached.
                need_delegate_close = False
                raise gen.Return(False)
            skip_body = False
            if self.is_client:
                if (self._request_start_line is not None
                        and self._request_start_line.method == 'HEAD'):
                    skip_body = True
                code = start_line.code
                if code == 304:
                    skip_body = True
                if code >= 100 and code < 200:
                    # TODO: client delegates will get headers_received twice
                    # in the case of a 100-continue.  Document or change?
                    yield self._read_message(delegate)
            else:
                if (headers.get("Expect") == "100-continue"
                        and not self._write_finished):
                    self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n")
            if not skip_body:
                body_future = self._read_body(headers, delegate)
                if body_future is not None:
                    if self._body_timeout is None:
                        yield body_future
                    else:
                        try:
                            yield gen.with_timeout(
                                self.stream.io_loop.time() +
                                self._body_timeout, body_future,
                                self.stream.io_loop)
                        except gen.TimeoutError:
                            gen_log.info("Timeout reading body from %s",
                                         self.context)
                            self.stream.close()
                            raise gen.Return(False)
            self._read_finished = True
            if not self._write_finished or self.is_client:
                need_delegate_close = False
                delegate.finish()
            # If we're waiting for the application to produce an asynchronous
            # response, and we're not detached, register a close callback
            # on the stream (we didn't need one while we were reading)
            if (not self._finish_future.done() and self.stream is not None
                    and not self.stream.closed()):
                self.stream.set_close_callback(self._on_connection_close)
                yield self._finish_future
            if self.is_client and self._disconnect_on_finish:
                self.close()
            if self.stream is None:
                raise gen.Return(False)
        except httputil.HTTPInputException as e:
            gen_log.info("Malformed HTTP message from %s: %s", self.context, e)
            self.close()
            raise gen.Return(False)
        finally:
            if need_delegate_close:
                delegate.on_connection_close()
            self._clear_callbacks()
        raise gen.Return(True)
Example #54
0
    def task(self, *args):
        temp_id = args[0]
        gen_log.info("=== Do task id({})====".format(temp_id))

        doc = yield Temp().get_temp(temp_id)
        if doc is None:
            gen_log.info("Temp({}) be deleted!".format(temp_id))
            yield self.remove_from_tasks(temp_id)
            raise gen.Return()

        access_token = doc.get('key')
        period = doc.get('read_period')
        temp_open = doc.get('open')
        has_sleep = doc.get('has_sleep')
        board_type_id = doc.get('board_type_id')

        if not temp_open:
            gen_log.info("Temp({}) be closed!".format(temp_id))
            yield self.remove_from_tasks(temp_id, "normal",
                                         "The temp is closed.")
            raise gen.Return()

        end_time = time.time() + (3 * period)
        while True:
            try:
                temps = []
                wio = Wio(access_token)
                for i in range(4):
                    result = yield wio.get_temp(board_type_id)
                    temps.append(result)
                    yield gen.sleep(1)

                temp = round(sum(temps[1:]) / (len(temps) - 1), 1)
            except Exception as e:
                # TODO, if not pulgin grove temp, will gen error
                if time.time() > end_time:
                    gen_log.error("Temp({}) {}".format(temp_id, e))
                    yield self.remove_from_tasks(
                        temp_id, "error",
                        "The node is not wake up on three period.")
                    yield self.close_temp(temp_id)
                    raise gen.Return()
                yield gen.sleep(5)
                gen_log.info("Temp({}) {}".format(temp_id, e))
                continue

            gen_log.info("{} ==> {}".format(temps, temp))
            self.update_temp(temp_id, temp)
            break

        if has_sleep is True:
            wio = Wio(access_token)
            try:
                yield wio.sleep(period, board_type_id)
                self.update_status(temp_id, "normal",
                                   "The node is sleep mode.")
            except Exception as e:
                gen_log.error(e)
        else:
            self.update_status(temp_id, "normal", "The node is online mode.")

        IOLoop.current().add_timeout(time.time() + (period or 60), self.task,
                                     temp_id)
        gen_log.info("task ids: ({})".format(self.tasks))
        gen_log.info("===End task and restart({})====".format(temp_id))
 def add_subscriber(self, subscriber):
     self._handlers.add(subscriber)
     # yield result
     gen_log.info('PikaConnector: subscriber %s added' % repr(subscriber))
Example #56
0
def run():
    options.parse_command_line()

    if options.config:
        options.parse_config_file(options.config)

    options.storage = os.path.abspath(options.storage)

    if os.getuid() == 0 and options.user:
        pw = pwd.getpwnam(options.user)
        uid, gid = pw.pw_uid, pw.pw_gid
        log.info("Changind user to %s [%s:%s]", options.user, uid, gid)
        os.setgid(uid)
        os.setuid(uid)

    try:
        if not all(f(options.storage) for f in (os.path.exists, os.path.isdir)):
            log.info('Creating new package storage directory: "%s"', options.storage)
            os.makedirs(options.storage)

        def on_interrupt(*args):
            log.warning("Receiving interrupt signal. Application will be stopped.")
            exit(errno.EINTR)

        log.debug("Preparing signal handling")
        for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGQUIT):
            signal.signal(sig, on_interrupt)

        def handle_pdb(sig, frame):
            import pdb
            pdb.Pdb().set_trace(frame)

        if options.debug:
            signal.signal(signal.SIGUSR2, handle_pdb)

        log.debug("Creating application instance")
        app = create_app(
            options.debug,
            options.secret,
            options.gzip,
        )

        log.debug("Creating IOLoop instance.")
        io_loop = IOLoop.current()

        io_loop.run_sync(lambda: init_db(options.database))

        if not (os.path.exists(options.cache_dir) and os.path.isdir(options.cache_dir)):
            os.makedirs(options.cache_dir)

        Cache.CACHE_DIR = options.cache_dir

        log.info("Init thread pool with %d threads", options.pool_size)
        handlers.base.BaseHandler.THREAD_POOL = futures.ThreadPoolExecutor(options.pool_size)

        AsyncHTTPClient.configure(None, max_clients=options.max_http_clients)

        proxy_url = URL(os.getenv('{0}_proxy'.format(options.pypi_server.scheme)))
        if proxy_url:
            log.debug("Configuring for proxy: %s", proxy_url)
            AsyncHTTPClient.configure(
                    'tornado.curl_httpclient.CurlAsyncHTTPClient',
                    defaults={
                        'proxy_host': proxy_url.host,
                        'proxy_port': proxy_url.port,
                        'proxy_username': proxy_url.user,
                        'proxy_password': proxy_url.password,
                        }
                    )

        PYPIClient.configure(
            options.pypi_server,
            handlers.base.BaseHandler.THREAD_POOL
        )

        if options.pypi_proxy:
            pypi_updater = PeriodicCallback(PYPIClient.packages, HOUR * 1000, io_loop)

            io_loop.add_callback(PYPIClient.packages)
            io_loop.add_callback(pypi_updater.start)

        log.info("Starting server http://%s:%d/", options.address, options.port)
        http_server = HTTPServer(app, xheaders=options.proxy_mode)
        http_server.listen(options.port, address=options.address)

        log.debug('Setting "%s" as storage', options.storage)
        PackageFile.set_storage(options.storage)

        log.debug("Starting main loop")
        io_loop.start()
    except Exception as e:
        log.fatal("Exception on main loop:")
        log.exception(e)
        exit(1)
    else:
        exit(0)
 def close_connection(self):
     """This method closes the connection to RabbitMQ."""
     gen_log.info('Closing connection')
     self._connection.close()
Example #58
0
def main():
    """Command-line wrapper to re-run a script whenever its source changes.

    Scripts may be specified by filename or module name::

        python -m tornado.autoreload -m tornado.test.runtests
        python -m tornado.autoreload tornado/test/runtests.py

    Running a script with this wrapper is similar to calling
    `tornado.autoreload.wait` at the end of the script, but this wrapper
    can catch import-time problems like syntax errors that would otherwise
    prevent the script from reaching its call to `wait`.
    """
    original_argv = sys.argv
    sys.argv = sys.argv[:]
    if len(sys.argv) >= 3 and sys.argv[1] == "-m":
        mode = "module"
        module = sys.argv[2]
        del sys.argv[1:3]
    elif len(sys.argv) >= 2:
        mode = "script"
        script = sys.argv[1]
        sys.argv = sys.argv[1:]
    else:
        print(_USAGE, file=sys.stderr)
        sys.exit(1)

    try:
        if mode == "module":
            import runpy
            runpy.run_module(module, run_name="__main__", alter_sys=True)
        elif mode == "script":
            with open(script) as f:
                # Execute the script in our namespace instead of creating
                # a new one so that something that tries to import __main__
                # (e.g. the unittest module) will see names defined in the
                # script instead of just those defined in this module.
                global __file__
                __file__ = script
                # If __package__ is defined, imports may be incorrectly
                # interpreted as relative to this module.
                global __package__
                del __package__
                exec_in(f.read(), globals(), globals())
    except SystemExit as e:
        logging.basicConfig()
        gen_log.info("Script exited with status %s", e.code)
    except Exception as e:
        logging.basicConfig()
        gen_log.warning("Script exited with uncaught exception", exc_info=True)
        # If an exception occurred at import time, the file with the error
        # never made it into sys.modules and so we won't know to watch it.
        # Just to make sure we've covered everything, walk the stack trace
        # from the exception and watch every file.
        for (filename, lineno, name,
             line) in traceback.extract_tb(sys.exc_info()[2]):
            watch(filename)
        if isinstance(e, SyntaxError):
            # SyntaxErrors are special:  their innermost stack frame is fake
            # so extract_tb won't see it and we have to get the filename
            # from the exception object.
            watch(e.filename)
    else:
        logging.basicConfig()
        gen_log.info("Script exited normally")
    # restore sys.argv so subsequent executions will include autoreload
    sys.argv = original_argv

    if mode == 'module':
        # runpy did a fake import of the module as __main__, but now it's
        # no longer in sys.modules.  Figure out where it is and watch it.
        loader = pkgutil.get_loader(module)
        if loader is not None:
            watch(loader.get_filename())

    wait()
Example #59
0
    filename=options.APPLICATION_LOG, when='D')
fh_app.setFormatter(fmt)
app_log.addHandler(fh_app)

fh_gen = logging.handlers.TimedRotatingFileHandler(
    filename=options.GENERAL_LOG, when='D')
fh_gen.setFormatter(fmt)
gen_log.addHandler(fh_gen)

from urls import url_patterns

application = tornado.web.Application(
    url_patterns,
    cookie_secret="__TODO:_GENERATE_YOUR_OWN_RANDOM_VALUE_HERE__",
    xsrf_cookies=False,
    debug=options.debug,
)

if __name__ == "__main__":

    gen_log.info(',[session_id:],server started. port:{0}'.format(
        options.port))

    # Tornado Service Start
    try:
        application.listen(options.port)
        tornado.ioloop.IOLoop.instance().start()
    except KeyboardInterrupt:
        tornado.ioloop.IOLoop.instance().stop()
        gen_log.info(',[session_id:],server stoped.')
Example #60
0
def fork_processes(num_processes, max_restarts=100):
    """Starts multiple worker processes.

    If ``num_processes`` is None or <= 0, we detect the number of cores
    available on this machine and fork that number of child
    processes. If ``num_processes`` is given and > 0, we fork that
    specific number of sub-processes.

    Since we use processes and not threads, there is no shared memory
    between any server code.

    Note that multiple processes are not compatible with the autoreload
    module (or the debug=True option to `tornado.web.Application`).
    When using multiple processes, no IOLoops can be created or
    referenced until after the call to ``fork_processes``.

    In each child process, ``fork_processes`` returns its *task id*, a
    number between 0 and ``num_processes``.  Processes that exit
    abnormally (due to a signal or non-zero exit status) are restarted
    with the same id (up to ``max_restarts`` times).  In the parent
    process, ``fork_processes`` returns None if all child processes
    have exited normally, but will otherwise only exit by throwing an
    exception.
    """
    #

    global _task_id
    assert _task_id is None
    if num_processes is None or num_processes <= 0:
        num_processes = cpu_count()
    if ioloop.IOLoop.initialized():
        raise RuntimeError(
            "Cannot run in multiple processes: IOLoop instance "
            "has already been initialized. You cannot call "
            "IOLoop.instance() before calling start_processes()")
    gen_log.info("Starting %d processes", num_processes)
    children = {}

    # 这一段很简单,就是在没有传入进程数的时候使用默认的cpu个数作为将要生成的进程个数。

    # 这是一个内函数,作用就是生成子进程。
    # 【fork】是个很有意思的方法,他会同时返回【两种状态】,为什么呢?
    # 其实fork相当于在原有的一条路(父进程)旁边又修了一条路(子进程)。
    # 如果这条路修成功了,那么在原有的路上(父进程)你就看到旁边来了另外一条路(子进程),
    # 所以也就是返回新生成的那条路的名字(子进程的pid),但是在另外一条路上(子进程),
    # 你看到的是自己本身修建成功了,也就返回自己的状态码(返回结果是0)。

    # 所以if pid==0表示这时候cpu已经切换到子进程了,相当于我们在新生成的这条路上面做事(返回任务id);
    # else表示又跑到原来的路上做事了,在这里我们记录下新生成的子进程,
    # 这时候children[pid]=i里面的pid就是新生成的子进程的pid,
    # 而 i 就是刚才在子进程里面我们返回的任务id(其实就是用来代替子进程的id号)。

    def start_child(i):
        pid = os.fork()
        if pid == 0:
            # child process
            _reseed_random()
            global _task_id
            _task_id = i
            return i
        else:
            children[pid] = i
            return None

    # if id is not None表示如果我们在刚刚生成的那个子进程的上下文里面,那么就什么都不干,
    # 直接返回子进程的任务id就好了,啥都别想了,也别再折腾。
    # 如果还在父进程的上下文的话那么就继续生成子进程。
    for i in range(num_processes):
        id = start_child(i)
        if id is not None:
            return id
    num_restarts = 0
    while children:
        try:
            # pid, status = os.wait()的意思是等待任意子进程退出或者结束,
            # 这时候我们就把它从我们的children表里面去除掉,然后通过status判断子进程退出的原因。

            # 如果子进程是因为接收到kill信号或者抛出exception了,那么我们就重新启动一个子进程,
            # 用的当然还是刚刚退出的那个子进程的任务号。
            # 如果子进程是自己把事情做完了才退出的,那么就算了,等待别的子进程退出吧。
            pid, status = os.wait()
        except OSError as e:
            if e.errno == errno.EINTR:
                continue
            raise
        if pid not in children:
            continue
        id = children.pop(pid)
        if os.WIFSIGNALED(status):
            gen_log.warning(
                "child %d (pid %d) killed by signal %d, restarting", id, pid,
                os.WTERMSIG(status))
        elif os.WEXITSTATUS(status) != 0:
            gen_log.warning(
                "child %d (pid %d) exited with status %d, restarting", id, pid,
                os.WEXITSTATUS(status))
        else:
            gen_log.info("child %d (pid %d) exited normally", id, pid)
            continue
        num_restarts += 1
        if num_restarts > max_restarts:
            raise RuntimeError("Too many child restarts, giving up")
        # 我们看到在重新启动子进程的时候又使用了
        new_id = start_child(id)
        if new_id is not None:
            return new_id
    # All child processes exited cleanly, so exit the master process
    # instead of just returning to right after the call to
    # fork_processes (which will probably just start up another IOLoop
    # unless the caller checks the return value).
    sys.exit(0)