Beispiel #1
0
def tcount_exception(counter_name):
    exc_name, exc_value, lineno, lines = get_exception_info()
    data = generate_json_record(counter_name, exc_name, exc_value, lineno, lines)
    # configure to use the better fetch method
    AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
    # make the actual call to the counter
    AsyncHTTPClient().fetch(URL, lambda resp: None, method="POST", body=json.dumps(data))
Beispiel #2
0
def main():

    term, location, search_limit = 'dinner', 'Chattanooga, TN', 3
    url_params = {
        'term': term.replace(' ', '+'),
        'location': location.replace(' ','+'),
        'limit' : search_limit
    }

    def on_response(response, error):
        ioloop.IOLoop.instance().stop()

        if error:
            raise error

        for repo in response:
            print repo

    auth = OAuth1(client_key=CLIENT_KEY,
                  client_secret=CLIENT_SECRET,
                  resource_owner_key=RESOURCE_OWNER_KEY,
                  resource_owner_secret=RESOURCE_OWNER_SECRET,
    )

    AsyncHTTPClient.configure(None, defaults=dict(user_agent="ipa-agent"))
    buss = Businesses(AsyncHTTPClient())
    buss.all(on_response, auth=auth, url_params=url_params)

    ioloop.IOLoop.instance().start()
Beispiel #3
0
def getAClient(max_clients=200):
	client = AsyncHTTPClient(max_clients=max_clients)  # page 20 * size 20,maybe great
	client.configure(None,
	                 # "tornado.curl_httpclient.CurlAsyncHTTPClient",
	                 raise_error=False
	                 )
	return client
Beispiel #4
0
def http_request(url, server, port=80, timeout=20.0):
    def check_twitter_response(response):
        return (response is not None and len(response.headers.get_list('Server')) > 0 \
                and response.headers.get_list('Server')[0] == 'tfe')

    def get_rate_limits(headers):
        rtime = headers.get_list('X-RateLimit-Reset')
        rhits = headers.get_list('X-RateLimit-Remaining')
        if len(rtime) > 0: rtime = int(rtime[0])
        else: rtime = None
        if len(rhits) > 0: rhits = int(rhits[0])
        else: rhits = None
        return rhits, rtime

    AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
    http_client = HTTPClient()
    code, data, rtime, rhits = 999, None, None, None
    try:
        response = http_client.fetch(url, proxy_host=server, proxy_port=port, connect_timeout=timeout, request_timeout=timeout)
        response.rethrow()

        if check_twitter_response(response):
            code, data = response.code, response.body
            rhits, rtime = get_rate_limits(response.headers)

    except HTTPError as e:
        if check_twitter_response(e.response):
            code, data = e.code, None
            rhits, rtime = get_rate_limits(e.response.headers)

    return code, data, rhits, rtime
Beispiel #5
0
 def __init__(self,url,action,target,callback=None,retry=0,ssl=False):
     """
     Build the event source client
     :param url: string, the url to connect to
     :param action: string of the listening action to connect to
     :param target: string with the listening token
     :param callback: function with one parameter (Event) that gets called for each received event
     :param retry: timeout between two reconnections (0 means no reconnection)
     """
     log.debug("EventSourceClient(%s,%s,%s,%s,%s)" % (url,action,target,callback,retry))
     
     if ssl:
         self._url = "https://%s/%s/%s" % (url,action,target)
     else:
         self._url = "http://%s/%s/%s" % (url,action,target)
     AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
     self.http_client = AsyncHTTPClient()
     self.http_request = HTTPRequest(url=self._url,
                                     method='GET',
                                     headers={"content-type":"text/event-stream"},
                                     request_timeout=0,
                                     streaming_callback=self.handle_stream)
     if callback is None:
         self.cb = lambda e: log.info( "received %s" % (e,) )
     else:
         self.cb = callback
     self.retry_timeout = int(retry)
Beispiel #6
0
    def __init__(self, url, callback = None, retry = 0, keep_alive = False, ssl = False, validate_cert = False, user = None, password = None, async_http_client = DEFAULT_ASYNC_HTTP_CLIENT):
        """
        Build the event source client
        :param url: string, the url to connect to
        :param action: string of the listening action to connect to
        :param target: string with the listening token
        :param callback: function with one parameter (Event) that gets called for each received event
        :param retry: timeout between two reconnections (0 means no reconnection)
        """
        log.debug("EventSourceClient(%s,%s,%s)" % (url, callback, retry))

        self.data_partial = None
        self.last_event_id = None
        self.retry_timeout = int(retry)
        self.keep_alive = keep_alive
        self.current_event = None
        self._url = url
        self._headers = {"Accept": "text/event-stream"}
        self._options = {
            'validate_cert': validate_cert,
            'auth_username': user,
            'auth_password': password
        }

        if async_http_client is not None:
            AsyncHTTPClient.configure(async_http_client)
        self.http_client = AsyncHTTPClient()
        if callback is None:
            self.cb = lambda e: log.info( "received %s" % (e,) )
        else:
            self.cb = callback
Beispiel #7
0
 def __init__(self, host = 'localhost', max_clients=100, port = 4711, logger = None):
     AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
     self.http_client = AsyncHTTPClient(max_clients=max_clients)
     self.host = host
     self.port = port
     self.logger = logger
     self.body = ''
Beispiel #8
0
    def __init__(self):
        AsyncHTTPClient.configure(
            "tornado.curl_httpclient.CurlAsyncHTTPClient")
        self.client = AsyncHTTPClient()

        # 开发者 token
        self.client_id = WEI_CLIENT_ID
        self.client_secret = WEI_CLIENT_SECRET
        self.admin_token = WEI_ADMIN_TOKEN

        # oauth 相关 url
        self.oauth2_url = 'https://api.weibo.com/oauth2/authorize?' \
                          'client_id=%s&' \
                          'response_type=code&' \
                          'redirect_uri=twiwei.com/oauth2/weibo/access_token/' % self.client_id
        self.access_token_url = 'https://api.weibo.com/oauth2/access_token'

        # 时间线相关 url
        self.user_home_url = 'https://api.weibo.com/2/statuses/home_timeline.json'
        self.public_url = 'https://api.weibo.com/2/statuses/public_timeline.json'

        # 用户操作相关 url
        self.like_weibo_url = 'https://api.weibo.com/2/favorites/create.json'
        self.unlike_weibo_url = 'https://api.weibo.com/2/favorites/destroy.json'
        self.get_replies_url = 'https://api.weibo.com/2/comments/show.json'
        self.repost_message_url = 'https://api.weibo.com/2/statuses/repost.json'
        self.reply_message_url = 'https://api.weibo.com/2/comments/create.json'

        # 用户信息相关 url
        self.user_info_url = 'https://api.weibo.com/2/users/show.json'
        self.liked_url = 'https://api.weibo.com/2/favorites/ids.json'
Beispiel #9
0
    def __init__(
            self, concurrency=10, auto_start=False, cache=False,
            expiration_in_seconds=30, request_timeout_in_seconds=10,
            connect_timeout_in_seconds=5, ignore_pycurl=False,
            limiter=None, allow_connection_reuse=True):

        self.concurrency = concurrency
        self.auto_start = auto_start
        self.last_timeout = None

        self.cache = cache
        self.response_cache = Cache(expiration_in_seconds=expiration_in_seconds)
        self.request_timeout_in_seconds = request_timeout_in_seconds
        self.connect_timeout_in_seconds = connect_timeout_in_seconds

        self.ignore_pycurl = ignore_pycurl

        self.running_urls = 0
        self.url_queue = []

        if PYCURL_AVAILABLE and not self.ignore_pycurl:
            logging.debug('pycurl is available, thus Octopus will be using it instead of tornado\'s simple http client.')
            AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
            self.allow_connection_reuse = allow_connection_reuse
        else:
            self.allow_connection_reuse = True

        if auto_start:
            logging.debug('Auto starting...')
            self.start()

        self.limiter = limiter
def main():
    def scrapy(body):
        bs = BeautifulSoup(body)
        print(bs)
        item = bs.find("div",id="product-intro").find("div",id="itemInfo")
        name = item.find("div",id="name").find("h1").text
        price = item.find("div",id="summary-price").find("strong",class_="p-price").text
        #print(price)
        price = re_price.search(price).group()
        discount = item.find("div",id="summary-price").find("span",class_="p-discount").text
        #print(discount)
        discount = re_price.search(discount).group()
        #print(name,price,discount)
        print("insert a shop(%s,%s,%s)"%(name,price,discount))



    def prepare_cul_opts(obj):
        pass
        #obj.setopt(pycurl.WRITEFUNCTION,open("result.html","wb").write)
        #obj.setopt(pycurl.NOBODY,True)
    #url="http://upload-images.jianshu.io/upload_images/1679702-7e810a34f3ef8d18.jpg?imageMogr2/auto-orient/strip%7CimageView2/1/w/300/h/300"
    AsyncHTTPClient.configure(CurlAsyncHTTPClient)
    for url in ["http://item.jd.com/11917788.html"]:
        httpCli = AsyncHTTPClient()
        respone = yield httpCli.fetch(url,prepare_curl_callback=prepare_cul_opts)
        scrapy(respone.body)
        print(list(respone.headers.get_all()))
    def __init__(self, url, callback = None, retry = 0, keep_alive = False, ssl = False, validate_cert = False, user = None, password = None):
        """
        Build the event source client
        :param url: string, the url to connect to
        :param action: string of the listening action to connect to
        :param target: string with the listening token
        :param callback: function with one parameter (Event) that gets called for each received event
        :param retry: timeout between two reconnections (0 means no reconnection)
        """
        log.debug("EventSourceClient(%s,%s,%s)" % (url, callback, retry))

        self.data_partial = None
        self.last_event_id = None
        self.retry_timeout = int(retry)
        self.keep_alive = keep_alive
        self._url = url
        self._headers = {"Accept": "text/event-stream"}
        self._user = user
        self._password = password

        AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
        self.http_client = AsyncHTTPClient()
        self.http_request = HTTPRequest(url = self._url,
                                        method="GET",
                                        headers={"content-type":"text/event-stream"},
                                        request_timeout = 0,
                                        validate_cert = validate_cert,
                                        streaming_callback = self.handle_stream,
                                        auth_username = user,
                                        auth_password = password)
        if callback is None:
            self.cb = lambda e: log.info( "received %s" % (e,) )
        else:
            self.cb = callback
    def test_max_clients(self):
        # The max_clients argument is tricky because it was originally
        # allowed to be passed positionally; newer arguments are keyword-only.
        AsyncHTTPClient.configure(SimpleAsyncHTTPClient)
        with closing(AsyncHTTPClient(
                self.io_loop, force_instance=True)) as client:
            self.assertEqual(client.max_clients, 10)
        with closing(AsyncHTTPClient(
                self.io_loop, 11, force_instance=True)) as client:
            self.assertEqual(client.max_clients, 11)
        with closing(AsyncHTTPClient(
                self.io_loop, max_clients=11, force_instance=True)) as client:
            self.assertEqual(client.max_clients, 11)

        # Now configure max_clients statically and try overriding it
        # with each way max_clients can be passed
        AsyncHTTPClient.configure(SimpleAsyncHTTPClient, max_clients=12)
        with closing(AsyncHTTPClient(
                self.io_loop, force_instance=True)) as client:
            self.assertEqual(client.max_clients, 12)
        with closing(AsyncHTTPClient(
                self.io_loop, max_clients=13, force_instance=True)) as client:
            self.assertEqual(client.max_clients, 13)
        with closing(AsyncHTTPClient(
                self.io_loop, max_clients=14, force_instance=True)) as client:
            self.assertEqual(client.max_clients, 14)
Beispiel #13
0
    def tsend_log(self,
            message,
            severity,
            filename=None,
            url=None,
            status_code=None,
            headers=None,
            parameters=None,
            stacktrace=False):
        from tornado.httpclient import AsyncHTTPClient

        d = self._build_message(
                message,
                severity,
                filename,
                url,
                status_code,
                headers,
                parameters,
                stacktrace)

        # want to use the better client here.
        AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")

        AsyncHTTPClient().fetch(
                self._build_url(),
                lambda resp: None,
                method="POST",
                body=json.dumps(d),
                headers=headers)
Beispiel #14
0
def client(io_loop, request):
    """Return mocked AsyncHTTPClient"""
    before = AsyncHTTPClient.configured_class()
    AsyncHTTPClient.configure(MockAsyncHTTPClient)
    request.addfinalizer(lambda : AsyncHTTPClient.configure(before))
    c = AsyncHTTPClient()
    assert isinstance(c, MockAsyncHTTPClient)
    return c
Beispiel #15
0
    def get_http_client(self):
        """Overrides `AsyncHTTPTestCase.get_http_client` to separate unit test HTTPClient
        from application HTTPClient.

        This allows mocking HTTP requests made by application in unit tests.
        """
        AsyncHTTPClient.configure('tornado.curl_httpclient.CurlAsyncHTTPClient')
        return AsyncHTTPClient(force_instance=True)
Beispiel #16
0
 def _enable_curl_httpclient(cls):
     """
     Tornado proxies are currently only supported with curl_httpclient
     http://www.tornadoweb.org/en/stable/httpclient.html#request-objects
     """
     if not cls._curl_httpclient_enabled:
         AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
         cls._curl_httpclient_enabled = True
Beispiel #17
0
 def run(self):
     print("TornadoThread Start....")
     AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
     app = tornado.web.Application(tornado_handlers, template_path=template_path, static_path=static_path, 
                                   **settings)
     app.listen(4000, address="0.0.0.0")
     tornado.ioloop.IOLoop.current().start()            
     return
Beispiel #18
0
def main():
    """A simple test runner.

    This test runner is essentially equivalent to `unittest.main` from
    the standard library, but adds support for tornado-style option
    parsing and log formatting.

    The easiest way to run a test is via the command line::

        python -m tornado.testing tornado.test.stack_context_test

    See the standard library unittest module for ways in which tests can
    be specified.

    Projects with many tests may wish to define a test script like
    tornado/test/runtests.py.  This script should define a method all()
    which returns a test suite and then call tornado.testing.main().
    Note that even when a test script is used, the all() test suite may
    be overridden by naming a single test on the command line::

        # Runs all tests
        tornado/test/runtests.py
        # Runs one test
        tornado/test/runtests.py tornado.test.stack_context_test

    """
    from tornado.options import define, options, parse_command_line

    define('autoreload', type=bool, default=False,
           help="DEPRECATED: use tornado.autoreload.main instead")
    define('httpclient', type=str, default=None)
    argv = [sys.argv[0]] + parse_command_line(sys.argv)

    if options.httpclient:
        from tornado.httpclient import AsyncHTTPClient
        AsyncHTTPClient.configure(options.httpclient)

    if __name__ == '__main__' and len(argv) == 1:
        print >> sys.stderr, "No tests specified"
        sys.exit(1)
    try:
        # In order to be able to run tests by their fully-qualified name
        # on the command line without importing all tests here,
        # module must be set to None.  Python 3.2's unittest.main ignores
        # defaultTest if no module is given (it tries to do its own
        # test discovery, which is incompatible with auto2to3), so don't
        # set module if we're not asking for a specific test.
        if len(argv) > 1:
            unittest.main(module=None, argv=argv)
        else:
            unittest.main(defaultTest="all", argv=argv)
    except SystemExit, e:
        if e.code == 0:
            logging.info('PASS')
        else:
            logging.error('FAIL')
        if not options.autoreload:
            raise
Beispiel #19
0
def setup():
    """Set up options and logger. Configure the asynchronous HTTP client."""
    define(
        'apiurl', type=str,
        help='The Juju WebSocket server address. This is usually the address '
             'of the bootstrap/state node as returned by "juju status".')
    # Optional parameters.
    define(
        'apiversion', type=str, default=DEFAULT_API_VERSION,
        help='the Juju API version/implementation. Currently the possible '
             'values are "go" (default) or "python".')
    define(
        'testsroot', type=str,
        help='The filesystem path of the Juju GUI tests directory. '
             'If not provided, tests are not served.')
    define(
        'sslpath', type=str, default=DEFAULT_SSL_PATH,
        help='The path where the SSL certificates are stored.')
    define(
        'insecure', type=bool, default=False,
        help='Set to True to serve the GUI over an insecure HTTP connection. '
             'Do not set unless you understand and accept the risks.')
    define(
        'sandbox', type=bool, default=False,
        help='Set to True if the GUI is running in sandbox mode, i.e. using '
             'an in-memory backend. When this is set to True, the GUI server '
             'does not listen to incoming WebSocket connections, and '
             'therefore the --apiurl and --apiversion options are ignored.')
    define(
        'charmworldurl', type=str,
        help='The URL to use for Charmworld.')
    define(
        'port', type=int,
        help='User defined port to run the server on. If no port is defined '
             'the server will be started on 80 and 443 as per the default '
             'port options from the charm.')
    define(
        'jujuguidebug', type=bool, default=False,
        help='Set to True to run the gui without minifiying or combining '
             'source files.')
    define('user', type=str, help='The juju environment user.')
    define('password', type=str, help='The juju environment password.')
    define('uuid', type=str, help='The juju environment uuid.')
    define('jujuversion', type=str, help='The jujud version.')
    define(
        'jemlocation', type=str,
        help="The url for a Juju Environment Manager.")
    define(
        'interactivelogin', type=bool, default=False,
        help='Enables interactive login to identity manager, if applicable.')
    # In Tornado, parsing the options also sets up the default logger.
    parse_command_line()
    _validate_choices('apiversion', ('go', 'python'))
    _validate_range('port', 1, 65535)
    _add_debug(logging.getLogger())
    # Configure the asynchronous HTTP client used by proxy handlers.
    AsyncHTTPClient.configure(
        'tornado.curl_httpclient.CurlAsyncHTTPClient', max_clients=20)
def main():
    """A simple test runner with autoreload support.

    The easiest way to run a test is via the command line::

        python -m tornado.testing --autoreload tornado.test.stack_context_test

    See the standard library unittest module for ways in which tests can
    be specified.

    Projects with many tests may wish to define a test script like
    tornado/test/runtests.py.  This script should define a method all()
    which returns a test suite and then call tornado.testing.main().
    Note that even when a test script is used, the all() test suite may
    be overridden by naming a single test on the command line::

        # Runs all tests
        tornado/test/runtests.py --autoreload
        # Runs one test
        tornado/test/runtests.py --autoreload tornado.test.stack_context_test

    If --autoreload is specified, the process will continue running
    after the tests finish, and when any source file changes the tests
    will be rerun.  Without --autoreload, the process will exit
    once the tests finish (with an exit status of 0 for success and
    non-zero for failures).
    """
    from tornado.options import define, options, parse_command_line

    define('autoreload', type=bool, default=False)
    define('httpclient', type=str, default=None)
    argv = [sys.argv[0]] + parse_command_line(sys.argv)

    if options.httpclient:
        from tornado.httpclient import AsyncHTTPClient
        AsyncHTTPClient.configure(options.httpclient)

    if __name__ == '__main__' and len(argv) == 1:
        print >> sys.stderr, "No tests specified"
        sys.exit(1)
    try:
        # In order to be able to run tests by their fully-qualified name
        # on the command line without importing all tests here,
        # module must be set to None.  Python 3.2's unittest.main ignores
        # defaultTest if no module is given (it tries to do its own
        # test discovery, which is incompatible with auto2to3), so don't
        # set module if we're not asking for a specific test.
        if len(argv) > 1:
            unittest.main(module=None, argv=argv)
        else:
            unittest.main(defaultTest="all", argv=argv)
    except SystemExit, e:
        if e.code == 0:
            logging.info('PASS')
        else:
            logging.error('FAIL')
        if not options.autoreload:
            raise
Beispiel #21
0
 def __init__(self, url, key, sender, **kwargs):
     AsyncHTTPClient.configure(
         'tornado.curl_httpclient.CurlAsyncHTTPClient'
     )
     self.url = url
     self.key = key
     self.sender = sender
     self.kwargs = kwargs
     self.http_client = AsyncHTTPClient()
Beispiel #22
0
def configure_app(self, config=None, log_level='INFO', debug=False, main_loop=None):
    self.config = config
    self.main_loop = main_loop

    handlers = [
        url(r'/api/(?P<project_id>\d+)/store/', RouterHandler, name="router"),
        # Deprecated
        url(r'/api/store/', OldRouterHandler, name="router_post"),
        #/Deprecated
        url(r'/count', CountHandler, name="count"),
        url(r'/healthcheck(?:/|\.html)?', HealthCheckHandler, name="healthcheck"),
    ]

    logging.info("Connecting to db on {0}:{1} on database {2} with user {3}".format(
        self.config.MYSQL_HOST,
        self.config.MYSQL_PORT,
        self.config.MYSQL_DB,
        self.config.MYSQL_USER)
    )

    cache_class = get_class(self.config.CACHE_IMPLEMENTATION_CLASS)
    self.cache = cache_class(self)

    storage_class = get_class(self.config.STORAGE)
    self.storage = storage_class(self)

    options = {}

    self.project_keys = {}

    self.processed_items = 0
    self.ignored_items = 0

    #if self.config.PROCESS_NEWER_MESSAGES_FIRST:
        #self.items_to_process = defaultdict(LifoQueue)
    #else:
        #self.items_to_process = defaultdict(Queue)

    self.last_requests = []
    self.average_request_time = None
    self.percentile_request_time = None

    projects_update_task = ProjectsUpdateTask(self, self.main_loop)
    projects_update_task.update()
    projects_update_task.start()

    send_to_sentry_task = SendToSentryTask(self, self.main_loop)
    send_to_sentry_task.update()
    send_to_sentry_task.start()

    if debug:
        options['debug'] = True
        config.NUMBER_OF_FORKS = 1

    AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")

    return handlers, options
    def __init__(self, dispatcher_client, bindaddr='127.0.0.1', port=8080, certfile=None, keyfile=None):
        self._port = port
        self._client = dispatcher_client
        self._bindaddr = bindaddr
        self._certfile = certfile
        self._keyfile = keyfile
        self._ioloop = None
        self._server = None

        AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
Beispiel #24
0
    def __init__(self, endpoint, token=None):
        self.endpoint = endpoint
        self.token = token

        if endpoint.startswith("http"):
            self._base_url = self.endpoint
        else:
            self._base_url = "https://%s" % self.endpoint

        AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient", defaults=dict(validate_cert=False))
Beispiel #25
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("input", help="input: list of urls")
    parser.add_argument("--pool-size", help="max count of urls that can be resolved simultaneously",
                        type=int, default=10)
    args = parser.parse_args()
    AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
    io_loop = ioloop.IOLoop.instance()
    async_client = AsyncHTTPClient()
    pool = WorkPool(args.input, async_client,io_loop=io_loop, pool_size=args.pool_size)
    io_loop.run_sync(pool.run)
Beispiel #26
0
def config():
    with open('config.yaml', 'r') as f:
        config = yaml.load(f)
        for k,v in config.items():
            logging.info("%s:%s", k,v)
            define(k, default=v)

    # curl_httpclient is faster, it is said 

    max_clients = options.max_fetch_clients + options.max_send_clients
    AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient", max_clients=max_clients)
Beispiel #27
0
    def __init__(self, *args, **kwargs):
        super(PartyuApp, self).__init__(*args, **kwargs)

        self.db = None

        AsyncHTTPClient.configure(None, max_clients=options.http_max_clients)
        self.client = AsyncHTTPClient()

        self.comms = {}
        self.comms["fsq"] = FoursquareComm(self.client)
        self.comms["fb"] = FacebookComm(self.client)
    def async_purge(self, files=None, max_concurrency=None, isVerbose=False):
        """Asynchronously purge all the files from the Fastly CDN cache."""
        self.files               = files
        self.isVerbose           = isVerbose

        self.http_client = AsyncHTTPClient()
        AsyncHTTPClient.configure(None, max_clients=max_concurrency)

        workers = []
        for i in range(min(len(self.files), max_concurrency)):
            workers.append(self.purge_worker())
        yield workers
Beispiel #29
0
 def __init__(self, policy):
     """
     :type policy: ScrapingPolicy
     """
     self.policy = policy
     self.requests_in_flight = 0
     self.crawled = set()
     self.backlog = set()
     self.redirects = {}  # maps URLs to their destination
     self.errors = {}  # maps URLs to their (error) HTTP status codes
     AsyncHTTPClient.configure(None, defaults=dict(user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'))
     self.http_client = AsyncHTTPClient(max_clients=policy.max_concurrent_requests)
Beispiel #30
0
def run(url_to_handlers=default_handlers):
  define("port", default=9999, help="Runs on the given port", type=int)
  define("secret", default='', help="Shared secret for /killexecutor", type=str)
  parse_command_line()

  logger = logging.getLogger(__file__)
  logger.info("Starting Heron Shell")
  logger.info("Shared secret for /killexecutor: %s", options.secret)

  AsyncHTTPClient.configure(None, defaults=dict(request_timeout=120.0))
  app = tornado.web.Application(url_to_handlers)
  app.listen(options.port)
  tornado.ioloop.IOLoop.instance().start()
Beispiel #31
0
    def __init__(self, config):

        AsyncHTTPClient.configure(None, defaults=dict(request_timeout=120.0))
        self.tracker = Tracker(config)
        self.tracker.synch_topologies()
        tornadoHandlers = [
            (r"/", handlers.MainHandler),
            (r"/clusters", handlers.ClustersHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies", handlers.TopologiesHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/states", handlers.StatesHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/info", handlers.TopologyHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/logicalplan", handlers.LogicalPlanHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/containerfiledata",
             handlers.ContainerFileDataHandler, {
                 "tracker": self.tracker
             }),
            (r"/topologies/containerfiledownload",
             handlers.ContainerFileDownloadHandler, {
                 "tracker": self.tracker
             }),
            (r"/topologies/containerfilestats",
             handlers.ContainerFileStatsHandler, {
                 "tracker": self.tracker
             }),
            (r"/topologies/physicalplan", handlers.PhysicalPlanHandler, {
                "tracker": self.tracker
            }),
            # Deprecated. See https://github.com/twitter/heron/issues/1754
            (r"/topologies/executionstate", handlers.ExecutionStateHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/schedulerlocation",
             handlers.SchedulerLocationHandler, {
                 "tracker": self.tracker
             }),
            (r"/topologies/metadata", handlers.MetaDataHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/runtimestate", handlers.RuntimeStateHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/metrics", handlers.MetricsHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/metricstimeline", handlers.MetricsTimelineHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/metricsquery", handlers.MetricsQueryHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/exceptions", handlers.ExceptionHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/exceptionsummary", handlers.ExceptionSummaryHandler,
             {
                 "tracker": self.tracker
             }),
            (r"/machines", handlers.MachinesHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/pid", handlers.PidHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/jstack", handlers.JstackHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/jmap", handlers.JmapHandler, {
                "tracker": self.tracker
            }),
            (r"/topologies/histo", handlers.MemoryHistogramHandler, {
                "tracker": self.tracker
            }),
            (r"(.*)", handlers.DefaultHandler),
        ]

        settings = dict(debug=True,
                        serve_traceback=True,
                        static_path=os.path.dirname(__file__))
        tornado.web.Application.__init__(self, tornadoHandlers, **settings)
        Log.info("Tracker has started")
Beispiel #32
0
 def configure_httpclient():
     if options.httpclient is not None:
         AsyncHTTPClient.configure(options.httpclient)
     else:
         AsyncHTTPClient.configure('tornado_http2.client.ForceHTTP2Client')
Beispiel #33
0
import json
import logging
from collections import defaultdict

from tornado.gen import coroutine
from tornado.gen import Return
from tornado.httpclient import AsyncHTTPClient

from api import conf as consts
from api.extension.client import NoQueueTimeoutHTTPClient

LOG = logging.getLogger(__name__)
AsyncHTTPClient.configure(NoQueueTimeoutHTTPClient)


class Result(object):

    def __init__(self):
        self._url = '{}/results?period={}&version={}&page={}'
        self._client = AsyncHTTPClient()
        self._result = defaultdict(list)

    @property
    @coroutine
    def result(self):
        if not self._result:
            yield self.update_results()
        raise Return(self._result)

    @coroutine
    def update_results(self):
Beispiel #34
0
                 reason) = httputil.parse_response_start_line(header_line)
                header_line = "X-Http-Reason: %s" % reason
            except httputil.HTTPInputError:
                return
        if not header_line:
            return
        headers.parse_line(header_line)

    def _curl_debug(self, debug_type: int, debug_msg: str) -> None:
        debug_types = ("I", "<", ">", "<", ">")
        if debug_type == 0:
            debug_msg = native_str(debug_msg)
            curl_log.debug("%s", debug_msg.strip())
        elif debug_type in (1, 2):
            debug_msg = native_str(debug_msg)
            for line in debug_msg.splitlines():
                curl_log.debug("%s %s", debug_types[debug_type], line)
        elif debug_type == 4:
            curl_log.debug("%s %r", debug_types[debug_type], debug_msg)


class CurlError(HTTPError):
    def __init__(self, errno: int, message: str) -> None:
        HTTPError.__init__(self, 599, message)
        self.errno = errno


if __name__ == "__main__":
    AsyncHTTPClient.configure(CurlAsyncHTTPClient)
    main()
Beispiel #35
0
def main():
    # Be strict about most warnings (This is set in our test running
    # scripts to catch import-time warnings, but set it again here to
    # be sure). This also turns on warnings that are ignored by
    # default, including DeprecationWarnings and python 3.2's
    # ResourceWarnings.
    warnings.filterwarnings("error")
    # setuptools sometimes gives ImportWarnings about things that are on
    # sys.path even if they're not being used.
    warnings.filterwarnings("ignore", category=ImportWarning)
    # Tornado generally shouldn't use anything deprecated, but some of
    # our dependencies do (last match wins).
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    warnings.filterwarnings("error",
                            category=DeprecationWarning,
                            module=r"tornado\..*")
    warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
    warnings.filterwarnings("error",
                            category=PendingDeprecationWarning,
                            module=r"tornado\..*")
    # The unittest module is aggressive about deprecating redundant methods,
    # leaving some without non-deprecated spellings that work on both
    # 2.7 and 3.2
    warnings.filterwarnings("ignore",
                            category=DeprecationWarning,
                            message="Please use assert.* instead")
    warnings.filterwarnings(
        "ignore",
        category=PendingDeprecationWarning,
        message="Please use assert.* instead",
    )
    # Twisted 15.0.0 triggers some warnings on py3 with -bb.
    warnings.filterwarnings("ignore",
                            category=BytesWarning,
                            module=r"twisted\..*")
    if (3, ) < sys.version_info < (3, 6):
        # Prior to 3.6, async ResourceWarnings were rather noisy
        # and even
        # `python3.4 -W error -c 'import asyncio; asyncio.get_event_loop()'`
        # would generate a warning.
        warnings.filterwarnings("ignore",
                                category=ResourceWarning,
                                module=r"asyncio\..*")
    # This deprecation warning is introduced in Python 3.8 and is
    # triggered by pycurl. Unforunately, because it is raised in the C
    # layer it can't be filtered by module and we must match the
    # message text instead (Tornado's C module uses PY_SSIZE_T_CLEAN
    # so it's not at risk of running into this issue).
    warnings.filterwarnings(
        "ignore",
        category=DeprecationWarning,
        message="PY_SSIZE_T_CLEAN will be required",
    )

    logging.getLogger("tornado.access").setLevel(logging.CRITICAL)

    define(
        "httpclient",
        type=str,
        default=None,
        callback=lambda s: AsyncHTTPClient.configure(
            s, defaults=dict(allow_ipv6=False)),
    )
    define("httpserver", type=str, default=None, callback=HTTPServer.configure)
    define("resolver", type=str, default=None, callback=Resolver.configure)
    define(
        "debug_gc",
        type=str,
        multiple=True,
        help="A comma-separated list of gc module debug constants, "
        "e.g. DEBUG_STATS or DEBUG_COLLECTABLE,DEBUG_OBJECTS",
        callback=lambda values: gc.set_debug(
            reduce(operator.or_, (getattr(gc, v) for v in values))),
    )

    def set_locale(x):
        locale.setlocale(locale.LC_ALL, x)

    define("locale", type=str, default=None, callback=set_locale)

    log_counter = LogCounter()
    add_parse_callback(
        lambda: logging.getLogger().handlers[0].addFilter(log_counter))

    # Certain errors (especially "unclosed resource" errors raised in
    # destructors) go directly to stderr instead of logging. Count
    # anything written by anything but the test runner as an error.
    orig_stderr = sys.stderr
    counting_stderr = CountingStderr(orig_stderr)
    sys.stderr = counting_stderr  # type: ignore

    import tornado.testing

    kwargs = {}

    # HACK:  unittest.main will make its own changes to the warning
    # configuration, which may conflict with the settings above
    # or command-line flags like -bb.  Passing warnings=False
    # suppresses this behavior, although this looks like an implementation
    # detail.  http://bugs.python.org/issue15626
    kwargs["warnings"] = False

    kwargs["testRunner"] = test_runner_factory(orig_stderr)
    try:
        tornado.testing.main(**kwargs)
    finally:
        # The tests should run clean; consider it a failure if they
        # logged anything at info level or above.
        if (log_counter.info_count > 0 or log_counter.warning_count > 0
                or log_counter.error_count > 0
                or counting_stderr.byte_count > 0):
            logging.error(
                "logged %d infos, %d warnings, %d errors, and %d bytes to stderr",
                log_counter.info_count,
                log_counter.warning_count,
                log_counter.error_count,
                counting_stderr.byte_count,
            )
            sys.exit(1)
Beispiel #36
0
    def __init__(self, base_url):

        # pylint: disable=fixme
        # TODO: hacky solution
        # sys.path[0] should be the path to the extracted files for heron-ui, as it is added
        # when bootstrapping the pex file
        static_prefix = '/static/'
        if base_url != "":
            static_prefix = os.path.join(base_url, 'static/')

        AsyncHTTPClient.configure(None, defaults=dict(request_timeout=120.0))
        Log.info("Using base url: %s", base_url)
        settings = dict(
            template_path=os.path.join(sys.path[0],
                                       "heron/tools/ui/resources/templates"),
            static_path=os.path.join(sys.path[0],
                                     "heron/tools/ui/resources/static"),
            static_url_prefix=static_prefix,
            gzip=True,
            debug=True,
            default_handler_class=handlers.NotFoundHandler,
        )
        Log.info(os.path.join(base_url, 'static/'))

        # Change these to query string parameters, since
        # current format can lead to pattern matching issues.
        callbacks = [
            (r"/", handlers.MainHandler),
            url(r"/topologies",
                handlers.ListTopologiesHandler,
                dict(baseUrl=base_url),
                name='topologies'),
            url(r"/topologies/filestats/([^\/]+)/([^\/]+)/([^\/]+)/([^\/]+)",
                handlers.ContainerFileStatsHandler, dict(baseUrl=base_url)),
            url(r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/config",
                handlers.TopologyConfigHandler, dict(baseUrl=base_url)),
            url(
                r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/([^\/]+)/([^\/]+)/exceptions",
                handlers.TopologyExceptionsPageHandler,
                dict(baseUrl=base_url)),
            url(r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)",
                handlers.TopologyPlanHandler, dict(baseUrl=base_url)),

            # topology metric apis
            (r"/topologies/metrics", handlers.api.MetricsHandler),
            (r"/topologies/metrics/timeline",
             handlers.api.MetricsTimelineHandler),
            url(r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/([^\/]+)/file",
                handlers.ContainerFileHandler, dict(baseUrl=base_url)),
            url(r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/([^\/]+)/filedata",
                handlers.ContainerFileDataHandler, dict(baseUrl=base_url)),
            url(
                r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/([^\/]+)/filedownload",
                handlers.ContainerFileDownloadHandler, dict(baseUrl=base_url)),

            # Topology list and plan handlers
            (r"/topologies/list.json", handlers.api.ListTopologiesJsonHandler),
            (r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/logicalplan.json",
             handlers.api.TopologyLogicalPlanJsonHandler),
            (r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/physicalplan.json",
             handlers.api.TopologyPhysicalPlanJsonHandler),
            (r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/executionstate.json",
             handlers.api.TopologyExecutionStateJsonHandler),
            (r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/schedulerlocation.json",
             handlers.api.TopologySchedulerLocationJsonHandler),

            # Counter Handlers
            (r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/([^\/]+)/exceptions.json",
             handlers.api.TopologyExceptionsJsonHandler),
            (r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/([^\/]+)/exceptionsummary.json",
             handlers.api.TopologyExceptionSummaryHandler),

            # Heron shell Handlers
            (r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/([^\/]+)/pid",
             handlers.api.PidHandler),
            (r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/([^\/]+)/jstack",
             handlers.api.JstackHandler),
            (r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/([^\/]+)/jmap",
             handlers.api.JmapHandler),
            (r"/topologies/([^\/]+)/([^\/]+)/([^\/]+)/([^\/]+)/histo",
             handlers.api.MemoryHistogramHandler),

            ## Static files
            (r"/static/(.*)", tornado.web.StaticFileHandler,
             dict(path=settings['static_path']))
        ]

        tornado.web.Application.__init__(self, callbacks, **settings)
from tornado import ioloop
from tornado.httpclient import AsyncHTTPClient
from tornado import gen

from functools import partial
import string
import random

AsyncHTTPClient.configure(
    "tornado.curl_httpclient.CurlAsyncHTTPClient", max_clients=100)


def generate_urls(base_url, num_urls):
    for i in range(num_urls):
        yield base_url + "".join(random.sample(string.ascii_lowercase, 10))


@gen.coroutine
def run_experiment(base_url, num_iter=500):
    http_client = AsyncHTTPClient()
    urls = generate_urls(base_url, num_iter)
    responses = yield [http_client.fetch(url) for url in urls]
    response_sum = sum(len(r.body) for r in responses)
    raise gen.Return(value=response_sum)

if __name__ == "__main__":
    import time
    delay = 100
    num_iter = 500
    _ioloop = ioloop.IOLoop.instance()
    run_func = partial(
Beispiel #38
0
 def _default_client(self):
     ssl_context = make_ssl_context(self.keyfile,
                                    self.certfile,
                                    cafile=self.client_ca)
     AsyncHTTPClient.configure(None, defaults={"ssl_options": ssl_context})
     return AsyncHTTPClient()
Beispiel #39
0
def main():
    define(
        'url',
        default=os.environ.get('JUPYTERHUB_API_URL'),
        help="The JupyterHub API URL",
    )
    define('timeout', default=600, help="The idle timeout (in seconds)")
    define(
        'cull_every',
        default=0,
        help="The interval (in seconds) for checking for idle servers to cull",
    )
    define(
        'max_age',
        default=0,
        help=
        "The maximum age (in seconds) of servers that should be culled even if they are active",
    )
    define(
        'cull_users',
        default=False,
        help="""Cull users in addition to servers.
                This is for use in temporary-user cases such as tmpnb.""",
    )
    define(
        'concurrency',
        default=10,
        help="""Limit the number of concurrent requests made to the Hub.

                Deleting a lot of users at the same time can slow down the Hub,
                so limit the number of API requests we have outstanding at any given time.
                """,
    )
    define(
        'hooks_dir',
        default="/srv/jupyterhub/culler",
        help=
        "Path to the directory for the krb tickets scripts (check_ticket.sh and delete_ticket.sh)"
    )
    define(
        'disable_hooks',
        default=False,
        help=
        "The user's home is a temporary scratch directory and we should not check krb tickets"
    )

    parse_command_line()
    if not options.cull_every:
        options.cull_every = options.timeout // 2
    api_token = os.environ['JUPYTERHUB_API_TOKEN']

    try:
        AsyncHTTPClient.configure(
            "tornado.curl_httpclient.CurlAsyncHTTPClient")
    except ImportError as e:
        app_log.warning(
            "Could not load pycurl: %s\n"
            "pycurl is recommended if you have a large number of users.",
            e,
        )

    loop = IOLoop.current()
    cull = partial(
        cull_idle,
        url=options.url,
        api_token=api_token,
        inactive_limit=options.timeout,
        cull_users=options.cull_users,
        disable_hooks=options.disable_hooks,
        max_age=options.max_age,
        concurrency=options.concurrency,
    )
    # schedule first cull immediately
    # because PeriodicCallback doesn't start until the end of the first interval
    loop.add_callback(cull)
    # schedule periodic cull
    pc = PeriodicCallback(cull, 1e3 * options.cull_every)
    pc.start()
    try:
        loop.start()
    except KeyboardInterrupt:
        pass
Beispiel #40
0
def main():
    # The -W command-line option does not work in a virtualenv with
    # python 3 (as of virtualenv 1.7), so configure warnings
    # programmatically instead.
    import warnings
    # Be strict about most warnings.  This also turns on warnings that are
    # ignored by default, including DeprecationWarnings and
    # python 3.2's ResourceWarnings.
    warnings.filterwarnings("error")
    # setuptools sometimes gives ImportWarnings about things that are on
    # sys.path even if they're not being used.
    warnings.filterwarnings("ignore", category=ImportWarning)
    # Tornado generally shouldn't use anything deprecated, but some of
    # our dependencies do (last match wins).
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    warnings.filterwarnings("error",
                            category=DeprecationWarning,
                            module=r"tornado\..*")
    warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
    warnings.filterwarnings("error",
                            category=PendingDeprecationWarning,
                            module=r"tornado\..*")
    # The unittest module is aggressive about deprecating redundant methods,
    # leaving some without non-deprecated spellings that work on both
    # 2.7 and 3.2
    warnings.filterwarnings("ignore",
                            category=DeprecationWarning,
                            message="Please use assert.* instead")
    # unittest2 0.6 on py26 reports these as PendingDeprecationWarnings
    # instead of DeprecationWarnings.
    warnings.filterwarnings("ignore",
                            category=PendingDeprecationWarning,
                            message="Please use assert.* instead")
    # Twisted 15.0.0 triggers some warnings on py3 with -bb.
    warnings.filterwarnings("ignore",
                            category=BytesWarning,
                            module=r"twisted\..*")
    # The __aiter__ protocol changed in python 3.5.2.
    # Silence the warning until we can drop 3.5.[01].
    warnings.filterwarnings("ignore",
                            category=PendingDeprecationWarning,
                            message=".*legacy __aiter__ protocol")
    # 3.5.2's PendingDeprecationWarning became a DeprecationWarning in 3.6.
    warnings.filterwarnings("ignore",
                            category=DeprecationWarning,
                            message=".*legacy __aiter__ protocol")

    logging.getLogger("tornado.access").setLevel(logging.CRITICAL)

    define('httpclient',
           type=str,
           default=None,
           callback=lambda s: AsyncHTTPClient.configure(
               s, defaults=dict(allow_ipv6=False)))
    define('httpserver', type=str, default=None, callback=HTTPServer.configure)
    define('ioloop', type=str, default=None)
    define('ioloop_time_monotonic', default=False)
    define('resolver', type=str, default=None, callback=Resolver.configure)
    define('debug_gc',
           type=str,
           multiple=True,
           help="A comma-separated list of gc module debug constants, "
           "e.g. DEBUG_STATS or DEBUG_COLLECTABLE,DEBUG_OBJECTS",
           callback=lambda values: gc.set_debug(
               reduce(operator.or_, (getattr(gc, v) for v in values))))
    define('locale',
           type=str,
           default=None,
           callback=lambda x: locale.setlocale(locale.LC_ALL, x))

    def configure_ioloop():
        kwargs = {}
        if options.ioloop_time_monotonic:
            from tornado.platform.auto import monotonic_time
            if monotonic_time is None:
                raise RuntimeError("monotonic clock not found")
            kwargs['time_func'] = monotonic_time
        if options.ioloop or kwargs:
            IOLoop.configure(options.ioloop, **kwargs)

    add_parse_callback(configure_ioloop)

    log_counter = LogCounter()
    add_parse_callback(
        lambda: logging.getLogger().handlers[0].addFilter(log_counter))

    import tornado.testing
    kwargs = {}
    if sys.version_info >= (3, 2):
        # HACK:  unittest.main will make its own changes to the warning
        # configuration, which may conflict with the settings above
        # or command-line flags like -bb.  Passing warnings=False
        # suppresses this behavior, although this looks like an implementation
        # detail.  http://bugs.python.org/issue15626
        kwargs['warnings'] = False
    kwargs['testRunner'] = TornadoTextTestRunner
    try:
        tornado.testing.main(**kwargs)
    finally:
        # The tests should run clean; consider it a failure if they logged
        # any warnings or errors. We'd like to ban info logs too, but
        # we can't count them cleanly due to interactions with LogTrapTestCase.
        if log_counter.warning_count > 0 or log_counter.error_count > 0:
            logging.error("logged %d warnings and %d errors",
                          log_counter.warning_count, log_counter.error_count)
            sys.exit(1)
Beispiel #41
0
            buffer = BytesIO(data)  # TODO: don't require one big string?
        response = HTTPResponse(
            original_request,
            self.code,
            reason=getattr(self, "reason", None),
            headers=self.headers,
            request_time=self.io_loop.time() - self.start_time,
            start_time=self.start_wall_time,
            buffer=buffer,
            effective_url=self.request.url,
        )
        self._run_callback(response)
        self._on_end_request()

    def _on_end_request(self) -> None:
        self.stream.close()

    def data_received(self, chunk: bytes) -> None:
        if self._should_follow_redirect():
            # We're going to follow a redirect so just discard the body.
            return
        if self.request.streaming_callback is not None:
            self.request.streaming_callback(chunk)
        else:
            self.chunks.append(chunk)


if __name__ == "__main__":
    AsyncHTTPClient.configure(SimpleAsyncHTTPClient)
    main()
Beispiel #42
0
def main():
    # command-line options
    define("debug", default=False, help="run in debug mode", type=bool)
    define("no_cache", default=False, help="Do not cache results", type=bool)
    define(
        "localfiles",
        default="",
        help=
        "Allow to serve local files under /localfile/* this can be a security risk",
        type=str)
    define("port", default=5000, help="run on the given port", type=int)
    define("cache_expiry_min",
           default=10 * 60,
           help="minimum cache expiry (seconds)",
           type=int)
    define("cache_expiry_max",
           default=2 * 60 * 60,
           help="maximum cache expiry (seconds)",
           type=int)
    define("mc_threads",
           default=1,
           help="number of threads to use for Async Memcache",
           type=int)
    define("threads",
           default=1,
           help="number of threads to use for rendering",
           type=int)
    define("processes",
           default=0,
           help="use processes instead of threads for rendering",
           type=int)
    define("frontpage",
           default=FRONTPAGE_JSON,
           help="path to json file containing frontpage content",
           type=str)
    define("sslcert", help="path to ssl .crt file", type=str)
    define("sslkey", help="path to ssl .key file", type=str)
    define("default_format",
           default="html",
           help="format to use for legacy / URLs",
           type=str)
    define("proxy_host", default="", help="The proxy URL.", type=str)
    define("proxy_port", default="", help="The proxy port.", type=int)
    define("providers",
           default=default_providers,
           help="Full dotted package(s) that provide `default_handlers`",
           type=str,
           multiple=True,
           group="provider")
    define("provider_rewrites",
           default=default_rewrites,
           help="Full dotted package(s) that provide `uri_rewrites`",
           type=str,
           multiple=True,
           group="provider")
    define("mathjax_url",
           default="https://cdn.mathjax.org/mathjax/latest/",
           help="URL base for mathjax package",
           type=str)
    tornado.options.parse_command_line()

    # NBConvert config
    config = Config()
    config.NbconvertApp.fileext = 'html'
    config.CSSHTMLHeaderTransformer.enabled = False
    # don't strip the files prefix - we use it for redirects
    # config.Exporter.filters = {'strip_files_prefix': lambda s: s}

    # DEBUG env implies both autoreload and log-level
    if os.environ.get("DEBUG"):
        options.debug = True
        logging.getLogger().setLevel(logging.DEBUG)

    # setup memcache
    mc_pool = ThreadPoolExecutor(options.mc_threads)

    # setup formats
    formats = configure_formats(options, config, log.app_log)

    if options.processes:
        pool = ProcessPoolExecutor(options.processes)
    else:
        pool = ThreadPoolExecutor(options.threads)

    memcache_urls = os.environ.get('MEMCACHIER_SERVERS',
                                   os.environ.get('MEMCACHE_SERVERS'))

    # Handle linked Docker containers
    if (os.environ.get('NBCACHE_PORT')):
        tcp_memcache = os.environ.get('NBCACHE_PORT')
        memcache_urls = tcp_memcache.split('tcp://')[1]

    if (os.environ.get('NBINDEX_PORT')):
        log.app_log.info("Indexing notebooks")
        tcp_index = os.environ.get('NBINDEX_PORT')
        index_url = tcp_index.split('tcp://')[1]
        index_host, index_port = index_url.split(":")
        indexer = ElasticSearch(index_host, index_port)
    else:
        log.app_log.info("Not indexing notebooks")
        indexer = NoSearch()

    if options.no_cache:
        log.app_log.info("Not using cache")
        cache = MockCache()
    elif pylibmc and memcache_urls:
        kwargs = dict(pool=mc_pool)
        username = os.environ.get('MEMCACHIER_USERNAME', '')
        password = os.environ.get('MEMCACHIER_PASSWORD', '')
        if username and password:
            kwargs['binary'] = True
            kwargs['username'] = username
            kwargs['password'] = password
            log.app_log.info("Using SASL memcache")
        else:
            log.app_log.info("Using plain memecache")

        cache = AsyncMultipartMemcache(memcache_urls.split(','), **kwargs)
    else:
        log.app_log.info("Using in-memory cache")
        cache = DummyAsyncCache()

    # setup tornado handlers and settings

    template_path = pjoin(here, 'templates')
    static_path = pjoin(here, 'static')
    env = Environment(loader=FileSystemLoader(template_path), autoescape=True)
    env.filters['markdown'] = markdown.markdown
    try:
        git_data = git_info(here)
    except Exception as e:
        app_log.error("Failed to get git info: %s", e)
        git_data = {}
    else:
        git_data['msg'] = escape(git_data['msg'])

    if options.no_cache:
        # force jinja to recompile template every time
        env.globals.update(cache_size=0)
    env.globals.update(
        nrhead=nrhead,
        nrfoot=nrfoot,
        git_data=git_data,
        ipython_info=ipython_info(),
        len=len,
    )
    AsyncHTTPClient.configure(HTTPClientClass)
    client = AsyncHTTPClient()

    # load frontpage sections
    with io.open(options.frontpage, 'r') as f:
        frontpage_sections = json.load(f)

    # cache frontpage links for the maximum allowed time
    max_cache_uris = {''}
    for section in frontpage_sections:
        for link in section['links']:
            max_cache_uris.add('/' + link['target'])

    fetch_kwargs = dict(connect_timeout=10, )
    if options.proxy_host:
        fetch_kwargs.update(
            dict(proxy_host=options.proxy_host, proxy_port=options.proxy_port))

        log.app_log.info("Using web proxy {proxy_host}:{proxy_port}."
                         "".format(**fetch_kwargs))

    settings = dict(
        log_function=log_request,
        jinja2_env=env,
        static_path=static_path,
        client=client,
        formats=formats,
        default_format=options.default_format,
        providers=options.providers,
        provider_rewrites=options.provider_rewrites,
        config=config,
        index=indexer,
        cache=cache,
        cache_expiry_min=options.cache_expiry_min,
        cache_expiry_max=options.cache_expiry_max,
        max_cache_uris=max_cache_uris,
        frontpage_sections=frontpage_sections,
        pool=pool,
        gzip=True,
        render_timeout=20,
        localfile_path=os.path.abspath(options.localfiles),
        fetch_kwargs=fetch_kwargs,
        mathjax_url=options.mathjax_url,
    )

    # handle handlers
    handlers = init_handlers(formats, options.providers)

    if options.localfiles:
        log.app_log.warning(
            "Serving local notebooks in %s, this can be a security risk",
            options.localfiles)
        # use absolute or relative paths:
        local_handlers = [(r'/localfile/(.*)', LocalFileHandler)]
        handlers = (local_handlers + format_handlers(formats, local_handlers) +
                    handlers)

    # load ssl options
    ssl_options = None
    if options.sslcert:
        ssl_options = {
            'certfile': options.sslcert,
            'keyfile': options.sslkey,
        }

    # create and start the app
    app = web.Application(handlers, debug=options.debug, **settings)
    http_server = httpserver.HTTPServer(app,
                                        xheaders=True,
                                        ssl_options=ssl_options)
    log.app_log.info("Listening on port %i", options.port)
    http_server.listen(options.port)
    ioloop.IOLoop.instance().start()
Beispiel #43
0
logging.init_log(StreamHandler(logging.INFO, log_format,
                               logging.DATE_FMT_SIMPLE),
                 disable_existing_loggers=not options.display_url)
logger = logging.getLogger(__name__)

ASYNC_HTTP_CONNECT_TIMEOUT = 60
ASYNC_HTTP_REQUEST_TIMEOUT = 120

ASYNC_HTTP_CLIENT_MAX_CLIENTS = 100

if platform.system() != 'windows':
    try:
        # curl_httpclient is faster than simple_httpclient
        AsyncHTTPClient.configure(
            'tornado.curl_httpclient.CurlAsyncHTTPClient',
            max_clients=ASYNC_HTTP_CLIENT_MAX_CLIENTS)
    except ImportError:
        AsyncHTTPClient.configure('tornado.simple_httpclient.AsyncHTTPClient')


class ProxyHandler(RequestHandler):
    @gen.coroutine
    def get(self):
        yield self._do_fetch('GET')

    @gen.coroutine
    def post(self):
        yield self._do_fetch('POST')

    @gen.coroutine
Beispiel #44
0
def make_app():
    # NBConvert config
    config = Config()
    config.NbconvertApp.fileext = 'html'
    config.CSSHTMLHeaderTransformer.enabled = False
    # don't strip the files prefix - we use it for redirects
    # config.Exporter.filters = {'strip_files_prefix': lambda s: s}

    # DEBUG env implies both autoreload and log-level
    if os.environ.get("DEBUG"):
        options.debug = True
        logging.getLogger().setLevel(logging.DEBUG)

    # setup memcache
    mc_pool = ThreadPoolExecutor(options.mc_threads)

    # setup formats
    formats = configure_formats(options, config, log.app_log)

    if options.processes:
        pool = ProcessPoolExecutor(options.processes)
    else:
        pool = ThreadPoolExecutor(options.threads)

    memcache_urls = os.environ.get('MEMCACHIER_SERVERS',
        os.environ.get('MEMCACHE_SERVERS')
    )

    # Handle linked Docker containers
    if(os.environ.get('NBCACHE_PORT')):
        tcp_memcache = os.environ.get('NBCACHE_PORT')
        memcache_urls = tcp_memcache.split('tcp://')[1]

    if(os.environ.get('NBINDEX_PORT')):
        log.app_log.info("Indexing notebooks")
        tcp_index = os.environ.get('NBINDEX_PORT')
        index_url = tcp_index.split('tcp://')[1]
        index_host, index_port = index_url.split(":")
        indexer = ElasticSearch(index_host, index_port)
    else:
        log.app_log.info("Not indexing notebooks")
        indexer = NoSearch()

    if options.no_cache:
        log.app_log.info("Not using cache")
        cache = MockCache()
    elif pylibmc and memcache_urls:
        kwargs = dict(pool=mc_pool)
        username = os.environ.get('MEMCACHIER_USERNAME', '')
        password = os.environ.get('MEMCACHIER_PASSWORD', '')
        if username and password:
            kwargs['binary'] = True
            kwargs['username'] = username
            kwargs['password'] = password
            log.app_log.info("Using SASL memcache")
        else:
            log.app_log.info("Using plain memecache")

        cache = AsyncMultipartMemcache(memcache_urls.split(','), **kwargs)
    else:
        log.app_log.info("Using in-memory cache")
        cache = DummyAsyncCache()

    # setup tornado handlers and settings

    template_paths = pjoin(here, 'templates')

    if options.template_path is not None:
        log.app_log.info("Using custom template path {}".format(
            options.template_path)
        )
        template_paths = [options.template_path, template_paths]

    static_path = pjoin(here, 'static')
    env = Environment(
        loader=FileSystemLoader(template_paths),
        autoescape=True
    )
    env.filters['markdown'] = markdown.markdown
    try:
        git_data = git_info(here)
    except Exception as e:
        app_log.error("Failed to get git info: %s", e)
        git_data = {}
    else:
        git_data['msg'] = escape(git_data['msg'])


    if options.no_cache:
        # force jinja to recompile template every time
        env.globals.update(cache_size=0)
    env.globals.update(nrhead=nrhead, nrfoot=nrfoot, git_data=git_data,
        jupyter_info=jupyter_info(), len=len,
    )
    AsyncHTTPClient.configure(HTTPClientClass)
    client = AsyncHTTPClient()
    client.cache = cache

    # load frontpage sections
    with io.open(options.frontpage, 'r') as f:
        frontpage_setup = json.load(f)
    # check if the json has a 'sections' field, otherwise assume it is
    # just a list of sessions, and provide the defaults for the other
    # fields
    if 'sections' not in frontpage_setup:
        frontpage_setup = {'title': 'nbviewer',
                           'subtitle':
                           'A simple way to share Jupyter Notebooks',
                           'show_input': True,
                           'sections': frontpage_setup}

    # cache frontpage links for the maximum allowed time
    max_cache_uris = {''}
    for section in frontpage_setup['sections']:
        for link in section['links']:
            max_cache_uris.add('/' + link['target'])

    fetch_kwargs = dict(connect_timeout=10,)
    if options.proxy_host:
        fetch_kwargs.update(dict(proxy_host=options.proxy_host,
                                 proxy_port=options.proxy_port))

        log.app_log.info("Using web proxy {proxy_host}:{proxy_port}."
                         "".format(**fetch_kwargs))

    if options.no_check_certificate:
        fetch_kwargs.update(dict(validate_cert=False))

        log.app_log.info("Not validating SSL certificates")

    # prefer the jhub defined service prefix over the CLI
    base_url = os.getenv('JUPYTERHUB_SERVICE_PREFIX', options.base_url)
    
    rate_limiter = RateLimiter(
        limit=options.rate_limit,
        interval=options.rate_limit_interval,
        cache=cache,
    )

    settings = dict(
        log_function=log_request,
        jinja2_env=env,
        static_path=static_path,
        static_url_prefix=url_path_join(base_url, '/static/'),
        client=client,
        formats=formats,
        default_format=options.default_format,
        providers=options.providers,
        provider_rewrites=options.provider_rewrites,
        config=config,
        index=indexer,
        cache=cache,
        cache_expiry_min=options.cache_expiry_min,
        cache_expiry_max=options.cache_expiry_max,
        max_cache_uris=max_cache_uris,
        frontpage_setup=frontpage_setup,
        pool=pool,
        gzip=True,
        render_timeout=options.render_timeout,
        localfile_path=os.path.abspath(options.localfiles),
        localfile_follow_symlinks=options.localfile_follow_symlinks,
        localfile_any_user=options.localfile_any_user,
        fetch_kwargs=fetch_kwargs,
        mathjax_url=options.mathjax_url,
        rate_limiter=rate_limiter,
        statsd_host=options.statsd_host,
        statsd_port=options.statsd_port,
        statsd_prefix=options.statsd_prefix,
        base_url=base_url,
        google_analytics_id=os.getenv('GOOGLE_ANALYTICS_ID'),
        hub_api_token=os.getenv('JUPYTERHUB_API_TOKEN'),
        hub_api_url=os.getenv('JUPYTERHUB_API_URL'),
        hub_base_url=os.getenv('JUPYTERHUB_BASE_URL'),
        ipywidgets_base_url=options.ipywidgets_base_url,
        jupyter_widgets_html_manager_version=options.jupyter_widgets_html_manager_version,
        jupyter_js_widgets_version=options.jupyter_js_widgets_version,
        content_security_policy=options.content_security_policy,
        binder_base_url=options.binder_base_url,
    )

    if options.localfiles:
        log.app_log.warning("Serving local notebooks in %s, this can be a security risk", options.localfiles)

    # handle handlers
    handlers = init_handlers(formats, options.providers, base_url, options.localfiles)

    # create the app
    return web.Application(handlers, debug=options.debug, **settings)
Beispiel #45
0
    def __init__(self,
                 handlers,
                 root_dir=None,
                 static_path=None,
                 template_path=None,
                 create_http_client=True,
                 **settings):
        self.log_msg_tmpl = '%s %s'

        # TODO add valiate paths
        if root_dir is not None:
            app_static_dir = os.path.join(root_dir, "static") \
                if static_path is None else static_path

            app_template_dir = os.path.join(root_dir, "templates") \
                if template_path is None else template_path
        else:
            app_template_dir = app_static_dir = None

        super().__init__(handlers,
                         static_path=app_static_dir,
                         template_path=app_template_dir,
                         **settings)
        self.server_name = options.srv_name
        self.logger = tornado.log.gen_log
        self.redis_connection_pool = None
        tornado.ioloop.IOLoop.configure(
            'tornado.platform.asyncio.AsyncIOMainLoop')

        if options.use_mail_logging:
            if options.log_mail_user == '' and options.log_mail_psw == '':
                credentials_list = None
            else:
                credentials_list = [
                    options.log_mail_user, options.log_mail_psw
                ]

            self.set_mail_logging(options.log_mail_host, options.log_mail_from,
                                  options.log_mail_to, options.log_mail_subj,
                                  credentials_list)

        if options.use_reactjs:
            if not jinja2_import:
                self.react_env = self.react_assets = None
                raise ImportError('Required package jinja2 is missing')
            else:
                self.react_env = Environment(
                    loader=FileSystemLoader('templates'))
                self.react_assets = self.load_react_assets()

        else:
            self.react_env = self.react_assets = None

        if options.use_curl_http_client:
            self.log_debug(options.use_curl_http_client,
                           grep_label='use_curl_http_client')
            if pycurl is None:
                raise ImportError('Required package for pycurl '
                                  'CurlAsyncHTTPClient  is missing')
            else:
                self.log_debug('configure curl')
                AsyncHTTPClient.configure(
                    "tornado.curl_httpclient.CurlAsyncHTTPClient")

        self.http_client = AsyncHTTPClient(max_clients=options.max_http_clients
                                           ) if create_http_client else None

        if options.use_mongo:
            try:
                self.mongo_pool = get_mongo_pool(
                    options.mongo_db_name, options.mongo_user,
                    options.mongo_psw, options.mongo_auth_db_name,
                    options.mongo_server, options.mongo_port,
                    options.mongo_min_pool_size, options.mongo_max_pool_size)
            except ModuleNotFoundError:
                self.mongo_pool = None
                raise
        else:
            self.mongo_pool = None
        self.event_writer = TorskelEventLogController()
Beispiel #46
0
def configure_tornado():
    from tornado.httpclient import AsyncHTTPClient

    AsyncHTTPClient.configure('tornado.curl_httpclient.CurlAsyncHTTPClient',
                              max_clients=16)
if __name__ == '__main__':
    define('db',
           type=str,
           help='DB connection DSN',
           default=environ.get(
               'DB',
               "dbname=boterator user=boterator host=localhost port=5432"))
    define('burlesque',
           type=str,
           help='Burlesque address',
           default=environ.get('BURLESQUE', 'http://127.0.0.1:4401'))
    define('debug', type=bool, default=False)

    parse_command_line()

    AsyncHTTPClient.configure(None, max_clients=1024)

    ioloop = IOLoop.instance()

    db = Pool(dsn=options.db,
              size=1,
              max_size=10,
              auto_shrink=True,
              ioloop=IOLoop.current())
    ioloop.run_sync(db.connect)

    if options.debug:
        autoreload.start()

    sh = SlaveHolder(db, Burlesque(options.burlesque))
    try:
Beispiel #48
0
    def initialize(self, notebook_store, security_token):
        self.notebook_store = notebook_store
        self.security_token = security_token

        AsyncHTTPClient.configure(None, max_body_size=4000000000)
Beispiel #49
0
import os
import glob
from tornado.httpclient import AsyncHTTPClient
from kubernetes import client

from z2jh import get_config, get_secret

# Configure JupyterHub to use the curl backend for making HTTP requests,
# rather than the pure-python implementations. The default one starts
# being too slow to make a large number of requests to the proxy API
# at the rate required.
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")

c.JupyterHub.spawner_class = 'kubespawner.KubeSpawner'

# Connect to a proxy running in a different pod
c.ConfigurableHTTPProxy.api_url = 'http://{}:{}'.format(
    os.environ['PROXY_API_SERVICE_HOST'],
    int(os.environ['PROXY_API_SERVICE_PORT']))
c.ConfigurableHTTPProxy.should_start = False

# Do not shut down user pods when hub is restarted
c.JupyterHub.cleanup_servers = False

# Check that the proxy has routes appropriately setup
# This isn't the best named setting :D
c.JupyterHub.last_activity_interval = 60

# Max number of servers that can be spawning at any one time
c.JupyterHub.concurrent_spawn_limit = get_config('hub.concurrent-spawn-limit')