def __init__(self, client, path, identifier=None): """ Creates an AsyncKazooLock. Args: client: A KazooClient. path: The lock path to use. identifier: The name to use for this lock contender. This can be useful for querying to see who the current lock contenders are. """ self.client = client self.tornado_kazoo = TornadoKazoo(client) self.path = path # some data is written to the node. this can be queried via # contenders() to see who is contending for the lock self.data = str(identifier or "").encode('utf-8') self.node = None self.wake_event = AsyncEvent() # props to Netflix Curator for this trick. It is possible for our # create request to succeed on the server, but for a failure to # prevent us from getting back the full path name. We prefix our # lock name with a uuid and can check for its presence on retry. self.prefix = uuid.uuid4().hex + self._NODE_NAME self.create_path = self.path + "/" + self.prefix self.create_tried = False self.is_acquired = False self.assured_path = False self.cancelled = False self._retry = AsyncKazooRetry(max_tries=-1) self._lock = AsyncLock()
def __init__(self, name, endpoints, io_loop=None): # If it's not the main thread # and a current IOloop doesn't exist here, # IOLoop.instance becomes self._io_loop self.io_loop = io_loop or IOLoop.current() # List of available endpoints in which service is resolved to. # Looks as [["host", port2], ["host2", port2]] self.endpoints = endpoints self.name = name self.id = generate_service_id(self) self.log = servicelog self.sessions = dict() self.counter = itertools.count(1) self.api = {} self._lock = Lock() # wrap into separate class self.pipe = None self.address = None # on_close can be schedulled at any time, # even after we've already reconnected. So to prevent # from closing wrong connection, each new pipe has its epoch, # as id for on_close self.pipe_epoch = 0 self.buffer = msgpack_unpacker()
def __init__(self, app_id, username, password, max_requests_per_second=3, default_timeout=20, api_version='5.28'): """Create new application object Args: app_id: id of VK application. username: user's phone number or email. password: user's password. max_requests_per_second: maximum number of requests that application can send per one second. Depends on number of users. Default value is 3. default_timeout: default timeout for ip requests. api_version: version of VK API used. """ self.last_requests = deque([datetime.min] * max_requests_per_second) self.max_requests_per_second = max_requests_per_second self.app_id = app_id self.username = username self.password = password self.api_version = api_version self.default_timeout = default_timeout self.client_session = Session(AsyncHTTPClient) self.queue_lock = Lock() IOLoop.current().run_sync(self.get_access_token)
def __init__(self, endpoints=LOCATOR_DEFAULT_ENDPOINTS, io_loop=None): self.io_loop = io_loop or IOLoop.current() self.endpoints = endpoints self._lock = Lock() self.counter = itertools.count(1) self.pipe = None self.target = Defaults.app self.verbosity = DEBUG_LEVEL self.queue = queues.Queue(10000) # level could be reset from update_verbosity in the future if not fallback_logger.handlers: sh = logging.StreamHandler() sh.setFormatter(logging.Formatter(fmt="[%(asctime)s.%(msecs)d] %(levelname)s fallback %(message)s", datefmt="%z %d/%b/%Y:%H:%M:%S")) sh.setLevel(logging.DEBUG) fallback_logger.addHandler(sh) self._send() try: uuid = Defaults.uuid self._defaultattrs = [("uuid", uuid)] except GetOptError: self._defaultattrs = []
class EventSettingHandler(BasicAuthHandlerMixin): def initialize(self, wechat_event_setting, **kwargs): super(EventSettingHandler, self).initialize(**kwargs) self.wechat_event_message_setting = wechat_event_setting self.setting_lock = Lock() def _output_setting(self, setting): response = {"setting": setting} self.write(response) @coroutine def get(self): setting = self.wechat_event_message_setting.get_all() self._output_setting(setting) @coroutine def post(self): json_str = self.request.body try: setting = json.loads(json_str) except: self._output_error_response(400, "bad request") return for key in setting: if key not in WechatEventSetting.all_settings: self._output_error_response(400, 'setting has no key "{}"'.format(key)) return with (yield self.setting_lock.acquire()): self.wechat_event_message_setting.set_all(setting) self.wechat_event_message_setting.save() self.write({"errcode": 0, "errmsg": ""})
def __init__(self, port, http_client, verbose): """ Creates a new DatastoreServer. Args: port: An integer specifying the port to use. http_client: An AsyncHTTPClient verbose: A boolean that sets logging level to debug. """ super(DatastoreServer, self).__init__(ServiceTypes.DATASTORE, port) self.monit_name = 'datastore_server-{}'.format(port) self._http_client = http_client self._stdout = None self._verbose = verbose # Serializes start, stop, and monitor operations. self._management_lock = AsyncLock()
def __init__(self, conn, stream_id, delegate, context=None): self.conn = conn self.stream_id = stream_id self.set_delegate(delegate) self.context = context self.finish_future = Future() self.write_lock = Lock() from tornado.util import ObjectDict # TODO: remove self.stream = ObjectDict(io_loop=IOLoop.current(), close=conn.stream.close) self._incoming_content_remaining = None self._outgoing_content_remaining = None self._delegate_started = False self.window = Window(conn.window, stream_id, conn.setting(constants.Setting.INITIAL_WINDOW_SIZE)) self._header_frames = [] self._phase = constants.HTTPPhase.HEADERS
def __init__(self, zk_client, monit_operator, routing_client, projects_manager, deployment_config, source_manager, syslog_server, thread_pool, private_ip): """ Creates a new InstanceManager. Args: zk_client: A kazoo.client.KazooClient object. monit_operator: An appscale.common.monit_interface.MonitOperator object. routing_client: An instance_manager.routing_client.RoutingClient object. projects_manager: A ProjectsManager object. deployment_config: A common.deployment_config.DeploymentConfig object. source_manager: An instance_manager.source_manager.SourceManager object. syslog_server: A string specifying the location of the syslog process that generates the combined app logs. thread_pool: A ThreadPoolExecutor. private_ip: A string specifying the current machine's private IP address. """ self._monit_operator = monit_operator self._routing_client = routing_client self._private_ip = private_ip self._syslog_server = syslog_server self._projects_manager = projects_manager self._deployment_config = deployment_config self._source_manager = source_manager self._thread_pool = thread_pool self._zk_client = zk_client # Ensures only one process tries to make changes at a time. self._work_lock = AsyncLock() self._health_checker = PeriodicCallback( self._ensure_health, self.HEALTH_CHECK_INTERVAL * 1000) # Instances that this machine should run. # For example, {guestbook_default_v1: [20000, -1]} self._assignments = None self._api_servers = {} self._running_instances = set() self._login_server = None
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._lock = Lock() self._session_locks = {}
class BaseService(object): def __init__(self, name, endpoints, io_loop=None): # If it's not the main thread # and a current IOloop doesn't exist here, # IOLoop.instance becomes self._io_loop self.io_loop = io_loop or IOLoop.current() # List of available endpoints in which service is resolved to. # Looks as [["host", port2], ["host2", port2]] self.endpoints = endpoints self.name = name self.id = generate_service_id(self) self.log = servicelog self.sessions = {} self.counter = itertools.count(1) self.api = {} self._lock = Lock() # wrap into separate class self.pipe = None self.address = None # on_close can be schedulled at any time, # even after we've already reconnected. So to prevent # from closing wrong connection, each new pipe has its epoch, # as id for on_close self.pipe_epoch = 0 self.buffer = msgpack_unpacker() self._header_table = { 'tx': CocaineHeaders(), 'rx': CocaineHeaders(), } @coroutine def connect(self, traceid=None): if self._connected: return log = get_trace_adapter(self.log, traceid) log.debug("acquiring the connection lock") with (yield self._lock.acquire()): if self._connected: return start_time = time.time() if self.pipe: log.info("`%s` pipe has been closed by StreamClosed exception", self.name) self.disconnect() conn_statuses = [] for host, port in self.endpoints: try: log.info("trying %s:%d to establish connection %s", host, port, self.name) self.pipe_epoch += 1 self.pipe = yield TCPClient(io_loop=self.io_loop).connect( host, port) self.pipe.set_nodelay(True) set_keep_alive(self.pipe.socket) self.pipe.read_until_close( callback=functools.partial(weak_wrapper, weakref.ref(self), "on_close", self.pipe_epoch), streaming_callback=functools.partial( weak_wrapper, weakref.ref(self), "on_read")) except Exception as err: log.error("connection error %s", err) conn_statuses.append((host, port, err)) else: self.address = (host, port) self._header_table = { 'tx': CocaineHeaders(), 'rx': CocaineHeaders(), } connection_time = (time.time() - start_time) * 1000 log.info( "`%s` connection has been established successfully %.3fms", self.name, connection_time) return raise ServiceConnectionError( "unable to establish connection: " + ", ".join(("%s:%d %s" % (host, port, err) for (host, port, err) in conn_statuses))) def disconnect(self): self.log.debug("`%s` disconnect has been called", self.name) if self.pipe is None: return self.pipe.close() self.pipe = None # detach rx from sessions # and send errors to all of the open sessions sessions = self.sessions while sessions: _, rx = sessions.popitem() rx.error(DisconnectionError(self.name)) self.log.info("`%s` has been disconnected", self.name) def on_close(self, pipe_epoch, *args): self.log.info("`%s` pipe has been closed with args: %s", self.name, args) if self.pipe_epoch == pipe_epoch: self.log.info("the epoch matches. Call disconnect") self.disconnect() def on_read(self, read_bytes): self.log.debug("read %.300s", read_bytes) self.buffer.feed(read_bytes) for msg in self.buffer: self.log.debug("unpacked: %.300s", msg) try: session, message_type, payload = msg[:3] # skip extra fields self.log.debug("%s, %d, %.300s", session, message_type, payload) headers = msg[3] if len(msg) > 3 else None except Exception as err: self.log.error("malformed message: `%s` %s", err, msg) continue rx = self.sessions.get(session) if rx is None: self.log.warning("unknown session number: `%d`", session) continue rx.push(message_type, payload, headers) if rx.closed(): del self.sessions[session] @coroutine def _invoke(self, method_name, *args, **kwargs): # Pop the Trace object, because it's not real header. trace = kwargs.pop("trace", None) if trace is not None: update_dict_with_trace(kwargs, trace) trace_id = kwargs.get('trace_id') trace_logger = get_trace_adapter(self.log, trace_id) trace_logger.debug("BaseService method `%s` call: %.300s %.300s", method_name, args, kwargs) yield self.connect(trace_id) if self.pipe is None: raise ServiceConnectionError('connection has suddenly disappeared') trace_logger.debug("%s", self.api) for method_id, (method, tx_tree, rx_tree) in six.iteritems(self.api): if method == method_name: trace_logger.debug("method `%s` has been found in API map", method_name) session = next( self.counter) # py3 counter has no .next() method # Manage headers using header table. headers = manage_headers(kwargs, self._header_table['tx']) packed_data = msgpack_packb( [session, method_id, args, headers]) trace_logger.info( 'send message to `%s`: channel id: %s, type: %s, length: %s bytes', self.name, session, method_name, len(packed_data)) trace_logger.debug('send message: %.300s', [session, method_id, args, kwargs]) self.pipe.write(packed_data) trace_logger.debug("RX TREE %s", rx_tree) trace_logger.debug("TX TREE %s", tx_tree) rx = Rx(rx_tree=rx_tree, session_id=session, header_table=self._header_table['rx'], io_loop=self.io_loop, service_name=self.name, trace_id=trace_id) tx = Tx(tx_tree=tx_tree, pipe=self.pipe, session_id=session, header_table=self._header_table['tx'], service_name=self.name, trace_id=trace_id) self.sessions[session] = rx channel = Channel(rx=rx, tx=tx) raise Return(channel) raise AttributeError(method_name) @property def _connected(self): return self.pipe is not None and not self.pipe.closed() def __getattr__(self, name): def on_getattr(*args, **kwargs): return self._invoke(six.b(name), *args, **kwargs) return on_getattr def __del__(self): # we have to close owned connection # otherwise it would be a fd-leak self.disconnect() def __str__(self): return "name: %s id: %s" % (self.name, self.id) def __repr__(self): return "<%s %s %s at %s>" % (type(self).__name__, self.name, self.id, hex(id(self)))
class Pool(object): def __init__(self, host, user, password, database, port=3306, connect_timeout=5, init_command="SET names utf8", max_active=10, idle_timeout=600): self.host = host self.user = user self.password = password self.database = database self.port = port self.connect_timeout = connect_timeout self.init_command = init_command self.max_active = max_active # When zero, there is no limit on the number of connections in the pool self.idle_timeout = idle_timeout self.active = 0 self.idle_queue = deque() self.closed = False self.killer_lock = Lock() self.killer = None self.killer_idle_at = None def active_count(self): return self.active @coroutine def get_killer_connection(self): with (yield self.killer_lock.acquire()): if self.killer is not None: self.killer_idle_at = time.time() raise Return(self.killer) try: self.killer = yield self._create_connection() self.killer_idle_at = time.time() except Exception as e: self.killer = None self.killer_idle_at = None logging.error(u'create killer connection exception: %s', e) raise Return(self.killer) @coroutine def get_connection(self): if self.killer is not None: if self.killer_idle_at + 1800 < time.time(): with (yield self.killer_lock.acquire() ): # avoid kill() and close() at the same time logging.info( u"idle timeout, recycle killer connection: %s", self.killer) self._safe_close(self.killer) self.killer = None self.killer_idle_at = None if self.idle_timeout > 0: while len(self.idle_queue) > 0: c = self.idle_queue[0] if c.idle_at + self.idle_timeout > time.time(): break logging.info(u"idle timeout, recycle stale connection: %s", c) c = self.idle_queue.popleft() yield self._safe_close(c) # close the longtime idle connection if self.closed: raise PoolClosedError(u"connection pool closed.") if len(self.idle_queue) > 0: self.active += 1 c = self.idle_queue.pop() c.idle_at = time.time() raise Return(c) if self.max_active == 0 or self.active < self.max_active: self.active += 1 logging.info( u"create new db connection[%s:%s %s]. now active: %d" % (self.host, self.port, self.database, self.active)) try: c = yield self._create_connection() except Exception: # e.g. --> OperationalError: (1040, u'Too many connections') self.active -= 1 raise raise Return(c) else: raise PoolExhaustedError(u"connection pool exhausted. active: %d" % self.active) @coroutine def _create_connection(self): c = Connection(self.host, self.user, self.password, self.database, self.port, connect_timeout=self.connect_timeout, init_command=self.init_command, charset='utf8') yield c.connect() yield c.autocommit(True) if self.init_command: yield c.execute(self.init_command) setattr(c, 'idle_at', time.time()) raise Return(c) @coroutine def _safe_close(self, conn): try: yield conn.close() except Exception as e: logging.warn(u'_safe_close() exception: %s', e) @coroutine def release_connection(self, conn, discard): if conn is None: # do not get a connection successfully return self.active -= 1 if self.closed: # when pool.close() is called, this connection is being used. yield self._safe_close(conn) return if discard: yield self._safe_close(conn) else: conn.idle_at = time.time() self.idle_queue.append(conn) @coroutine def close(self): logging.info(u"pool closing.") self.closed = True while len(self.idle_queue) > 0: c = self.idle_queue.popleft() yield self._safe_close(c)
def setUp(self): self.wrapper = fixtures_app.RubyPcsdWrapper(ruby_pcsd.SINATRA_REMOTE) self.https_server_manage = mock.MagicMock( spec_set=http_server.HttpsServerManage) self.lock = Lock() super().setUp()
def _write_lock_default(self): return Lock()
class InstanceManager(object): """ Fulfills AppServer instance assignments from the scheduler. """ # The seconds to wait between performing health checks. HEALTH_CHECK_INTERVAL = 60 def __init__(self, zk_client, monit_operator, routing_client, projects_manager, deployment_config, source_manager, syslog_server, thread_pool, private_ip): """ Creates a new InstanceManager. Args: zk_client: A kazoo.client.KazooClient object. monit_operator: An appscale.common.monit_interface.MonitOperator object. routing_client: An instance_manager.routing_client.RoutingClient object. projects_manager: A ProjectsManager object. deployment_config: A common.deployment_config.DeploymentConfig object. source_manager: An instance_manager.source_manager.SourceManager object. syslog_server: A string specifying the location of the syslog process that generates the combined app logs. thread_pool: A ThreadPoolExecutor. private_ip: A string specifying the current machine's private IP address. """ self._monit_operator = monit_operator self._routing_client = routing_client self._private_ip = private_ip self._syslog_server = syslog_server self._projects_manager = projects_manager self._deployment_config = deployment_config self._source_manager = source_manager self._thread_pool = thread_pool self._zk_client = zk_client # Ensures only one process tries to make changes at a time. self._work_lock = AsyncLock() self._health_checker = PeriodicCallback( self._ensure_health, self.HEALTH_CHECK_INTERVAL * 1000) # Instances that this machine should run. # For example, {guestbook_default_v1: [20000, -1]} self._assignments = None self._api_servers = {} self._running_instances = set() self._login_server = None def start(self): """ Begins processes needed to fulfill instance assignments. """ # Update list of running instances in case the InstanceManager was # restarted. self._recover_state() # Subscribe to changes in controller state, which includes assignments and # the 'login' property. self._zk_client.DataWatch(CONTROLLER_STATE_NODE, self._controller_state_watch) # Subscribe to changes in project configuration, including relevant # versions. self._projects_manager.subscriptions.append( self._handle_configuration_update) # Start the regular health check. self._health_checker.start() @gen.coroutine def _start_instance(self, version, port): """ Starts a Google App Engine application on this machine. It will start it up and then proceed to fetch the main page. Args: version: A Version object. port: An integer specifying a port to use. """ version_details = version.version_details runtime = version_details['runtime'] env_vars = version_details.get('envVariables', {}) runtime_params = self._deployment_config.get_config( 'runtime_parameters') max_memory = runtime_params.get('default_max_appserver_memory', DEFAULT_MAX_APPSERVER_MEMORY) if 'instanceClass' in version_details: max_memory = INSTANCE_CLASSES.get(version_details['instanceClass'], max_memory) source_archive = version_details['deployment']['zip']['sourceUrl'] api_server_port = yield self._ensure_api_server(version.project_id) yield self._source_manager.ensure_source(version.revision_key, source_archive, runtime) logger.info('Starting {}:{}'.format(version, port)) pidfile = PIDFILE_TEMPLATE.format(revision=version.revision_key, port=port) if runtime == GO: env_vars['GOPATH'] = os.path.join(UNPACK_ROOT, version.revision_key, 'gopath') env_vars['GOROOT'] = os.path.join(GO_SDK, 'goroot') watch = ''.join([MONIT_INSTANCE_PREFIX, version.revision_key]) if runtime in (PYTHON27, GO, PHP): start_cmd = create_python27_start_cmd(version.project_id, self._login_server, port, pidfile, version.revision_key, api_server_port) env_vars.update( create_python_app_env(self._login_server, version.project_id)) elif runtime == JAVA: # Account for MaxPermSize (~170MB), the parent process (~50MB), and thread # stacks (~20MB). max_heap = max_memory - 250 if max_heap <= 0: raise BadConfigurationException( 'Memory for Java applications must be greater than 250MB') start_cmd = create_java_start_cmd(version.project_id, port, self._login_server, max_heap, pidfile, version.revision_key, api_server_port) env_vars.update(create_java_app_env(self._deployment_config)) else: raise BadConfigurationException('Unknown runtime {} for {}'.format( runtime, version.project_id)) logger.info("Start command: " + str(start_cmd)) logger.info("Environment variables: " + str(env_vars)) base_version = version.revision_key.rsplit(VERSION_PATH_SEPARATOR, 1)[0] log_tag = "app_{}".format(hashlib.sha1(base_version).hexdigest()[:28]) monit_app_configuration.create_config_file( watch, start_cmd, pidfile, port, env_vars, max_memory, self._syslog_server, check_port=True, kill_exceeded_memory=True, log_tag=log_tag, ) full_watch = '{}-{}'.format(watch, port) yield self._monit_operator.reload(self._thread_pool) # The reload command does not block, and we don't have a good way to check # if Monit is ready with its new configuration yet. If the daemon begins # reloading while it is handling the 'start', it can end up in a state # where it never starts the process. As a temporary workaround, this # small period allows it to finish reloading. This can be removed if # instances are started inside a cgroup. yield gen.sleep(0.5) yield self._monit_operator.send_command_retry_process( full_watch, 'start') # Make sure the version registration node exists. self._zk_client.ensure_path('/'.join( [VERSION_REGISTRATION_NODE, version.version_key])) instance = Instance(version.revision_key, port) yield self._add_routing(instance) if version.project_id == DASHBOARD_PROJECT_ID: log_size = DASHBOARD_LOG_SIZE else: log_size = APP_LOG_SIZE if not setup_logrotate(version.project_id, log_size): logger.error( "Error while setting up log rotation for application: {}". format(version.project_id)) @gen.coroutine def populate_api_servers(self): """ Find running API servers. """ def api_server_info(entry): prefix, port = entry.rsplit('-', 1) project_id = prefix[len(API_SERVER_PREFIX):] return project_id, int(port) monit_entries = yield self._monit_operator.get_entries() server_entries = [ api_server_info(entry) for entry in monit_entries if entry.startswith(API_SERVER_PREFIX) ] for project_id, port in server_entries: self._api_servers[project_id] = port def _recover_state(self): """ Establishes current state from Monit entries. """ logger.info('Getting current state') monit_entries = self._monit_operator.get_entries_sync() instance_entries = { entry: state for entry, state in monit_entries.items() if entry.startswith(MONIT_INSTANCE_PREFIX) } # Remove all unmonitored entries. removed = [] for entry, state in instance_entries.items(): if state == MonitStates.UNMONITORED: self._monit_operator.remove_configuration(entry) removed.append(entry) for entry in removed: del instance_entries[entry] if removed: self._monit_operator.reload_sync() instance_details = [] for entry, state in instance_entries.items(): revision, port = entry[len(MONIT_INSTANCE_PREFIX):].rsplit('-', 1) instance_details.append({ 'revision': revision, 'port': int(port), 'state': state }) clean_up_instances(instance_details) # Ensure version nodes exist. running_versions = { '_'.join(instance['revision'].split('_')[:3]) for instance in instance_details } self._zk_client.ensure_path(VERSION_REGISTRATION_NODE) for version_key in running_versions: self._zk_client.ensure_path('/'.join( [VERSION_REGISTRATION_NODE, version_key])) # Account for monitored instances. running_instances = { Instance(instance['revision'], instance['port']) for instance in instance_details } self._routing_client.declare_instance_nodes(running_instances) self._running_instances = running_instances @gen.coroutine def _ensure_api_server(self, project_id): """ Make sure there is a running API server for a project. Args: project_id: A string specifying the project ID. Returns: An integer specifying the API server port. """ if project_id in self._api_servers: raise gen.Return(self._api_servers[project_id]) server_port = MAX_API_SERVER_PORT for port in self._api_servers.values(): if port <= server_port: server_port = port - 1 zk_locations = appscale_info.get_zk_node_ips() start_cmd = ' '.join([ API_SERVER_LOCATION, '--port', str(server_port), '--project-id', project_id, '--zookeeper-locations', ' '.join(zk_locations) ]) watch = ''.join([API_SERVER_PREFIX, project_id]) full_watch = '-'.join([watch, str(server_port)]) pidfile = os.path.join(VAR_DIR, '{}.pid'.format(full_watch)) monit_app_configuration.create_config_file( watch, start_cmd, pidfile, server_port, max_memory=DEFAULT_MAX_APPSERVER_MEMORY, check_port=True) yield self._monit_operator.reload(self._thread_pool) yield self._monit_operator.send_command_retry_process( full_watch, 'start') self._api_servers[project_id] = server_port raise gen.Return(server_port) @gen.coroutine def _unmonitor_and_terminate(self, watch): """ Unmonitors an instance and terminates it. Args: watch: A string specifying the Monit entry. """ try: monit_retry = retry(max_retries=5, retry_on_exception=DEFAULT_RETRIES) send_w_retries = monit_retry( self._monit_operator.send_command_sync) send_w_retries(watch, 'unmonitor') except ProcessNotFound: # If Monit does not know about a process, assume it is already stopped. return # Now that the AppServer is stopped, remove its monit config file so that # monit doesn't pick it up and restart it. self._monit_operator.remove_configuration(watch) stop_instance(watch, MAX_INSTANCE_RESPONSE_TIME) def _instance_healthy(self, port): """ Determines the health of an instance with an HTTP request. Args: port: An integer specifying the port the instance is listening on. Returns: A boolean indicating whether or not the instance is healthy. """ url = "http://" + self._private_ip + ":" + str(port) + FETCH_PATH try: opener = urllib2.build_opener(NoRedirection) response = opener.open(url, timeout=HEALTH_CHECK_TIMEOUT) if response.code == httplib.SERVICE_UNAVAILABLE: return False except IOError: return False return True @gen.coroutine def _wait_for_app(self, port): """ Waits for the application hosted on this machine, on the given port, to respond to HTTP requests. Args: port: Port where app is hosted on the local machine Returns: True on success, False otherwise """ deadline = time.time() + START_APP_TIMEOUT while time.time() < deadline: if self._instance_healthy(port): raise gen.Return(True) logger.debug('Instance at port {} is not ready yet'.format(port)) yield gen.sleep(BACKOFF_TIME) raise gen.Return(False) @gen.coroutine def _add_routing(self, instance): """ Tells the AppController to begin routing traffic to an AppServer. Args: instance: An Instance. """ logger.info('Waiting for {}'.format(instance)) start_successful = yield self._wait_for_app(instance.port) if not start_successful: monit_watch = ''.join([ MONIT_INSTANCE_PREFIX, instance.revision_key, '-', str(instance.port) ]) yield self._unmonitor_and_terminate(monit_watch) yield self._monit_operator.reload(self._thread_pool) logger.warning('{} did not come up in time'.format(instance)) return self._routing_client.register_instance(instance) self._running_instances.add(instance) @gen.coroutine def _stop_api_server(self, project_id): """ Make sure there is not a running API server for a project. Args: project_id: A string specifying the project ID. """ if project_id not in self._api_servers: return port = self._api_servers[project_id] watch = '{}{}-{}'.format(API_SERVER_PREFIX, project_id, port) yield self._unmonitor_and_terminate(watch) del self._api_servers[project_id] @gen.coroutine def _clean_old_sources(self): """ Removes source code for obsolete revisions. """ monit_entries = yield self._monit_operator.get_entries() active_revisions = { entry[len(MONIT_INSTANCE_PREFIX):].rsplit('-', 1)[0] for entry in monit_entries if entry.startswith(MONIT_INSTANCE_PREFIX) } for project_id, project_manager in self._projects_manager.items(): for service_id, service_manager in project_manager.items(): for version_id, version_manager in service_manager.items(): revision_id = version_manager.version_details['revision'] revision_key = VERSION_PATH_SEPARATOR.join( [project_id, service_id, version_id, str(revision_id)]) active_revisions.add(revision_key) self._source_manager.clean_old_revisions( active_revisions=active_revisions) @gen.coroutine def _stop_app_instance(self, instance): """ Stops a Google App Engine application process instance on current machine. Args: instance: An Instance object. """ logger.info('Stopping {}'.format(instance)) monit_watch = ''.join([ MONIT_INSTANCE_PREFIX, instance.revision_key, '-', str(instance.port) ]) self._routing_client.unregister_instance(instance) try: self._running_instances.remove(instance) except KeyError: logger.info('unregister_instance: non-existent instance {}'.format( instance)) yield self._unmonitor_and_terminate(monit_watch) project_instances = [ instance_ for instance_ in self._running_instances if instance_.project_id == instance.project_id ] if not project_instances: yield self._stop_api_server(instance.project_id) remove_logrotate(instance.project_id) yield self._monit_operator.reload(self._thread_pool) yield self._clean_old_sources() def _get_lowest_port(self): """ Determines the lowest usuable port for a new instance. Returns: An integer specifying a free port. """ existing_ports = { instance.port for instance in self._running_instances } port = STARTING_INSTANCE_PORT while True: if port in existing_ports: port += 1 continue return port @gen.coroutine def _restart_unrouted_instances(self): """ Restarts instances that the router considers offline. """ with (yield self._work_lock.acquire()): failed_instances = yield self._routing_client.get_failed_instances( ) for version_key, port in failed_instances: try: instance = next(instance for instance in self._running_instances if instance.version_key == version_key and instance.port == port) except StopIteration: # If the manager has no recored of that instance, remove routing. self._routing_client.unregister_instance( Instance(version_key, port)) continue try: version = self._projects_manager.version_from_key( instance.version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue logger.warning( 'Restarting failed instance: {}'.format(instance)) yield self._stop_app_instance(instance) yield self._start_instance(version, instance.port) @gen.coroutine def _restart_unavailable_instances(self): """ Restarts instances that fail health check requests. """ with (yield self._work_lock.acquire()): for instance in self._running_instances: # TODO: Add a threshold to avoid restarting on a transient error. if not self._instance_healthy(instance.port): try: version = self._projects_manager.version_from_key( instance.version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue logger.warning( 'Restarting failed instance: {}'.format(instance)) yield self._stop_app_instance(instance) yield self._start_instance(version, instance.port) @gen.coroutine def _ensure_health(self): """ Checks to make sure all required instances are running and healthy. """ yield self._restart_unrouted_instances() yield self._restart_unavailable_instances() # Just as an infrequent sanity check, fulfill assignments and enforce # instance details. yield self._fulfill_assignments() yield self._enforce_instance_details() @gen.coroutine def _fulfill_assignments(self): """ Starts and stops instances in order to fulfill assignments. """ # If the manager has not been able to retrieve a valid set of assignments, # don't do any work. if self._assignments is None: return with (yield self._work_lock.acquire()): # Stop versions that aren't assigned. to_stop = [ instance for instance in self._running_instances if instance.version_key not in self._assignments ] for version_key in {instance.version_key for instance in to_stop}: logger.info('{} is no longer assigned'.format(version_key)) for instance in to_stop: yield self._stop_app_instance(instance) for version_key, assigned_ports in self._assignments.items(): try: version = self._projects_manager.version_from_key( version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue # The number of required instances that don't have an assigned port. new_assignment_count = sum(port == -1 for port in assigned_ports) # Stop instances that aren't assigned. If the assignment list includes # any -1s, match them to running instances that aren't in the assigned # ports list. candidates = [ instance for instance in self._running_instances if instance.version_key == version_key and instance.port not in assigned_ports ] unmatched_instances = candidates[new_assignment_count:] for running_instance in unmatched_instances: logger.info( '{} is no longer assigned'.format(running_instance)) yield self._stop_app_instance(running_instance) # Start defined ports that aren't running. running_ports = [ instance.port for instance in self._running_instances if instance.version_key == version_key ] for port in assigned_ports: if port != -1 and port not in running_ports: yield self._start_instance(version, port) # Start new assignments that don't have a match. candidates = [ instance for instance in self._running_instances if instance.version_key == version_key and instance.port not in assigned_ports ] to_start = max(new_assignment_count - len(candidates), 0) for _ in range(to_start): yield self._start_instance(version, self._get_lowest_port()) @gen.coroutine def _enforce_instance_details(self): """ Ensures all running instances are configured correctly. """ with (yield self._work_lock.acquire()): # Restart instances with an outdated revision or login server. for instance in self._running_instances: try: version = self._projects_manager.version_from_key( instance.version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue login_server_changed = ( self._login_server is not None and self._login_server != get_login_server(instance)) if (instance.revision_key != version.revision_key or login_server_changed): logger.info( 'Configuration changed for {}'.format(instance)) yield self._stop_app_instance(instance) yield self._start_instance(version, instance.port) def _assignments_from_state(self, controller_state): """ Extracts the current machine's assignments from controller state. Args: controller_state: A dictionary containing controller state. """ def version_assignments(data): return [ int(server.split(':')[1]) for server in data['appservers'] if server.split(':')[0] == self._private_ip ] return { version_key: version_assignments(data) for version_key, data in controller_state['@app_info_map'].items() if version_assignments(data) } @gen.coroutine def _update_controller_state(self, encoded_controller_state): """ Handles updates to controller state. Args: encoded_controller_state: A JSON-encoded string containing controller state. """ try: controller_state = json.loads(encoded_controller_state) except (TypeError, ValueError): # If the controller state isn't usable, don't do any work. logger.warning('Invalid controller state: {}'.format( encoded_controller_state)) return new_assignments = self._assignments_from_state(controller_state) login_server = controller_state['@options']['login'] if new_assignments != self._assignments: logger.info('New assignments: {}'.format(new_assignments)) self._assignments = new_assignments yield self._fulfill_assignments() if login_server != self._login_server: logger.info('New login server: {}'.format(login_server)) self._login_server = login_server yield self._enforce_instance_details() def _controller_state_watch(self, encoded_controller_state, _): """ Handles updates to controller state. Args: encoded_controller_state: A JSON-encoded string containing controller state. """ persistent_update_controller_state = retry_data_watch_coroutine( CONTROLLER_STATE_NODE, self._update_controller_state) IOLoop.instance().add_callback(persistent_update_controller_state, encoded_controller_state) @gen.coroutine def _handle_configuration_update(self, event): """ Handles updates to a project's configuration details. Args: event: An appscale.admin.instance_manager.projects_manager.Event object. """ relevant_versions = { instance.version_key for instance in self._running_instances } if self._assignments is not None: relevant_versions |= set(self._assignments.keys()) for version_key in relevant_versions: if event.affects_version(version_key): logger.info('New revision for version: {}'.format(version_key)) yield self._enforce_instance_details() break
class Stream(object): def __init__(self, conn, stream_id, delegate, context=None): self.conn = conn self.stream_id = stream_id self.set_delegate(delegate) self.context = context self.finish_future = Future() self.write_lock = Lock() from tornado.util import ObjectDict # TODO: remove self.stream = ObjectDict(io_loop=IOLoop.current(), close=conn.stream.close) self._incoming_content_remaining = None self._outgoing_content_remaining = None self._delegate_started = False self.window = Window( conn.window, stream_id, conn.setting(constants.Setting.INITIAL_WINDOW_SIZE)) self._header_frames = [] self._phase = constants.HTTPPhase.HEADERS def set_delegate(self, delegate): self.orig_delegate = self.delegate = delegate if self.conn.params.decompress: self.delegate = _GzipMessageDelegate(delegate, self.conn.params.chunk_size) def handle_frame(self, frame): if frame.type == constants.FrameType.PRIORITY: self._handle_priority_frame(frame) return elif frame.type == constants.FrameType.RST_STREAM: self._handle_rst_stream_frame(frame) return elif frame.type == constants.FrameType.WINDOW_UPDATE: self._handle_window_update_frame(frame) return elif frame.type in (constants.FrameType.SETTINGS, constants.FrameType.GOAWAY, constants.FrameType.PUSH_PROMISE): raise Exception("invalid frame type %s for stream", frame.type) if self.finish_future.done(): raise StreamError(self.stream_id, constants.ErrorCode.STREAM_CLOSED) if frame.type == constants.FrameType.HEADERS: self._handle_headers_frame(frame) elif frame.type == constants.FrameType.CONTINUATION: self._handle_continuation_frame(frame) elif frame.type == constants.FrameType.DATA: self._handle_data_frame(frame) # Unknown frame types are silently discarded, unless they break # the rule that nothing can come between HEADERS and CONTINUATION. def needs_continuation(self): return bool(self._header_frames) def _handle_headers_frame(self, frame): if self._phase == constants.HTTPPhase.BODY: self._phase = constants.HTTPPhase.TRAILERS frame = frame.without_padding() self._header_frames.append(frame) self._check_header_length() if frame.flags & constants.FrameFlag.END_HEADERS: self._parse_headers() def _handle_continuation_frame(self, frame): if not self._header_frames: raise ConnectionError(constants.ErrorCode.PROTOCOL_ERROR, "CONTINUATION without HEADERS") self._header_frames.append(frame) self._check_header_length() if frame.flags & constants.FrameFlag.END_HEADERS: self._parse_headers() def _check_header_length(self): if (sum(len(f.data) for f in self._header_frames) > self.conn.params.max_header_size): if self.conn.is_client: # TODO: Need tests for client side of headers-too-large. # What's the best way to send an error? self.delegate.on_connection_close() else: # write_headers needs a start line so it can tell # whether this is a HEAD or not. If we're rejecting # the headers we can't know so just make something up. # Note that this means the error response body MUST be # zero bytes so it doesn't matter whether the client # sent a HEAD or a GET. self._request_start_line = RequestStartLine( 'GET', '/', 'HTTP/2.0') start_line = ResponseStartLine('HTTP/2.0', 431, 'Headers too large') self.write_headers(start_line, HTTPHeaders()) self.finish() return def _parse_headers(self): frame = self._header_frames[0] data = b''.join(f.data for f in self._header_frames) self._header_frames = [] if frame.flags & constants.FrameFlag.PRIORITY: # TODO: support PRIORITY and PADDING. # This is just enough to cover an error case tested in h2spec. stream_dep, weight = struct.unpack('>ib', data[:5]) data = data[5:] # strip off the "exclusive" bit stream_dep = stream_dep & 0x7fffffff if stream_dep == frame.stream_id: raise ConnectionError(constants.ErrorCode.PROTOCOL_ERROR, "stream cannot depend on itself") pseudo_headers = {} headers = HTTPHeaders() try: # Pseudo-headers must come before any regular headers, # and only in the first HEADERS phase. has_regular_header = bool( self._phase == constants.HTTPPhase.TRAILERS) for k, v, idx in self.conn.hpack_decoder.decode(bytearray(data)): if k != k.lower(): # RFC section 8.1.2 raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) if k.startswith(b':'): if self.conn.is_client: valid_pseudo_headers = (b':status', ) else: valid_pseudo_headers = (b':method', b':scheme', b':authority', b':path') if (has_regular_header or k not in valid_pseudo_headers or native_str(k) in pseudo_headers): raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) pseudo_headers[native_str(k)] = native_str(v) if k == b":authority": headers.add("Host", native_str(v)) else: headers.add(native_str(k), native_str(v)) has_regular_header = True except HpackError: raise ConnectionError(constants.ErrorCode.COMPRESSION_ERROR) if self._phase == constants.HTTPPhase.HEADERS: self._start_request(pseudo_headers, headers) elif self._phase == constants.HTTPPhase.TRAILERS: # TODO: support trailers pass if (not self._maybe_end_stream(frame.flags) and self._phase == constants.HTTPPhase.TRAILERS): # The frame that finishes the trailers must also finish # the stream. raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) def _start_request(self, pseudo_headers, headers): if "connection" in headers: raise ConnectionError(constants.ErrorCode.PROTOCOL_ERROR, "connection header should not be present") if "te" in headers and headers["te"] != "trailers": raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) if self.conn.is_client: status = int(pseudo_headers[':status']) start_line = ResponseStartLine('HTTP/2.0', status, responses.get(status, '')) else: for k in (':method', ':scheme', ':path'): if k not in pseudo_headers: raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) start_line = RequestStartLine(pseudo_headers[':method'], pseudo_headers[':path'], 'HTTP/2.0') self._request_start_line = start_line if (self.conn.is_client and (self._request_start_line.method == 'HEAD' or start_line.code == 304)): self._incoming_content_remaining = 0 elif "content-length" in headers: self._incoming_content_remaining = int(headers["content-length"]) if not self.conn.is_client or status >= 200: self._phase = constants.HTTPPhase.BODY self._delegate_started = True self.delegate.headers_received(start_line, headers) def _handle_data_frame(self, frame): if self._header_frames: raise ConnectionError(constants.ErrorCode.PROTOCOL_ERROR, "DATA without END_HEADERS") if self._phase == constants.HTTPPhase.TRAILERS: raise ConnectionError(constants.ErrorCode.PROTOCOL_ERROR, "DATA after trailers") self._phase = constants.HTTPPhase.BODY frame = frame.without_padding() if self._incoming_content_remaining is not None: self._incoming_content_remaining -= len(frame.data) if self._incoming_content_remaining < 0: raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) if frame.data and self._delegate_started: future = self.delegate.data_received(frame.data) if future is None: self._send_window_update(len(frame.data)) else: IOLoop.current().add_future( future, lambda f: self._send_window_update(len(frame.data))) self._maybe_end_stream(frame.flags) def _send_window_update(self, amount): encoded = struct.pack('>I', amount) for stream_id in (0, self.stream_id): self.conn._write_frame( Frame(constants.FrameType.WINDOW_UPDATE, 0, stream_id, encoded)) def _maybe_end_stream(self, flags): if flags & constants.FrameFlag.END_STREAM: if (self._incoming_content_remaining is not None and self._incoming_content_remaining != 0): raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) if self._delegate_started: self._delegate_started = False self.delegate.finish() self.finish_future.set_result(None) return True return False def _handle_priority_frame(self, frame): # TODO: implement priority if len(frame.data) != 5: raise StreamError(self.stream_id, constants.ErrorCode.FRAME_SIZE_ERROR) def _handle_rst_stream_frame(self, frame): if len(frame.data) != 4: raise ConnectionError(constants.ErrorCode.FRAME_SIZE_ERROR) # TODO: expose error code? if self._delegate_started: self.delegate.on_connection_close() def _handle_window_update_frame(self, frame): self.window.apply_window_update(frame) def set_close_callback(self, callback): # TODO: this shouldn't be necessary pass def reset(self): self.conn._write_frame( Frame(constants.FrameType.RST_STREAM, 0, self.stream_id, b'\x00\x00\x00\x00')) @_reset_on_error def write_headers(self, start_line, headers, chunk=None, callback=None): if (not self.conn.is_client and (self._request_start_line.method == 'HEAD' or start_line.code == 304)): self._outgoing_content_remaining = 0 elif 'Content-Length' in headers: self._outgoing_content_remaining = int(headers['Content-Length']) header_list = [] if self.conn.is_client: self._request_start_line = start_line header_list.append((b':method', utf8(start_line.method), constants.HeaderIndexMode.YES)) header_list.append( (b':scheme', b'https', constants.HeaderIndexMode.YES)) header_list.append((b':path', utf8(start_line.path), constants.HeaderIndexMode.NO)) else: header_list.append((b':status', utf8(str(start_line.code)), constants.HeaderIndexMode.YES)) for k, v in headers.get_all(): k = utf8(k.lower()) if k == b"connection": # Remove the implicit "connection: close", which is not # allowed in http2. # TODO: move the responsibility for this from httpclient # to http1connection? continue header_list.append((k, utf8(v), constants.HeaderIndexMode.YES)) data = bytes(self.conn.hpack_encoder.encode(header_list)) frame = Frame(constants.FrameType.HEADERS, constants.FrameFlag.END_HEADERS, self.stream_id, data) self.conn._write_frame(frame) return self.write(chunk, callback) @_reset_on_error def write(self, chunk, callback=None): if chunk: if self._outgoing_content_remaining is not None: self._outgoing_content_remaining -= len(chunk) if self._outgoing_content_remaining < 0: raise HTTPOutputError( "Tried to write more data than Content-Length") return self._write_chunk(chunk, callback) @gen.coroutine def _write_chunk(self, chunk, callback=None): try: if chunk: yield self.write_lock.acquire() while chunk: bytes_to_write = min( len(chunk), self.conn.setting(constants.Setting.MAX_FRAME_SIZE)) allowance = yield self.window.consume(bytes_to_write) yield self.conn._write_frame( Frame(constants.FrameType.DATA, 0, self.stream_id, chunk[:allowance])) chunk = chunk[allowance:] self.write_lock.release() if callback is not None: callback() except Exception: self.reset() raise @_reset_on_error def finish(self): if (self._outgoing_content_remaining is not None and self._outgoing_content_remaining != 0): raise HTTPOutputError( "Tried to write %d bytes less than Content-Length" % self._outgoing_content_remaining) return self._write_end_stream() @gen.coroutine def _write_end_stream(self): # Callers are not required to wait for write() before calling finish, # so we must manually lock. yield self.write_lock.acquire() try: self.conn._write_frame( Frame(constants.FrameType.DATA, constants.FrameFlag.END_STREAM, self.stream_id, b'')) except Exception: self.reset() raise finally: self.write_lock.release() def read_response(self, delegate): assert delegate is self.orig_delegate, 'cannot change delegate' return self.finish_future
def __init__(self): """Create new instance of FBaseProcessor that will process requests.""" self._processor_function_map = {} self._annotations_map = {} self._write_lock = Lock()
class CacheBase(Configurable): """借助Configurable实现单例 1. 操作应该都是异步的 >>> def view(self): ... value = yield cache.get(key) """ @classmethod def cached_instances(cls): attr_name = '_cached_instances_dict_' + cls.__name__ if not hasattr(cls, attr_name): setattr(cls, attr_name, weakref.WeakKeyDictionary()) return getattr(cls, attr_name) def __new__(cls, io_loop=None, force_instance=False, **kwargs): io_loop = io_loop or IOLoop.current() if force_instance: instance_cache = None else: instance_cache = cls.cached_instances() if instance_cache is not None and io_loop in instance_cache: return instance_cache[io_loop] instance = super(CacheBase, cls).__new__(cls, io_loop=io_loop, **kwargs) # Make sure the instance knows which cache to remove itself from. # It can't simply call _async_clients() because we may be in # __new__(AsyncHTTPClient) but instance.__class__ may be # SimpleAsyncHTTPClient. instance._instance_cache = instance_cache if instance_cache is not None: instance_cache[instance.io_loop] = instance return instance def _make_key(self, key, version=None): if version is None: version = self.version new_key = self.key_func(key, self.key_prefix, version) return new_key def get_backend_timeout(self, timeout=DEFAULT_TIMEOUT): """ Returns the timeout value usable by this backend based upon the provided timeout. """ if timeout == DEFAULT_TIMEOUT: timeout = self.default_timeout elif timeout == 0: # ticket 21147 - avoid time.time() related precision issues timeout = -1 return None if timeout is None else self.io_loop.time() + timeout def get(self, key, default=None, version=None): raise NotImplementedError( 'subclasses of BaseCache must provide an add() method') def get_sync(self, key, default=None, version=None): raise NotImplementedError( 'subclasses of BaseCache must provide an get_sync() method') def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None): raise NotImplementedError( 'subclasses of BaseCache must provide a set() method') def set_sync(self, key, default=None, version=None): raise NotImplementedError( 'subclasses of BaseCache must provide a set_sync() method') def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None): raise NotImplementedError( 'subclasses of BaseCache must provide an add() method') def initialize(self, io_loop, defaults=None): self.io_loop = io_loop self.key_func = get_key_func(getattr(options, 'key_func', None)) self.default_timeout = getattr(options, 'cache_time', 300) self.version = getattr(options, 'version', 1) self.key_prefix = getattr(options, 'key_prefix', 'cache') self.defaults = dict() self._lock = Lock() if defaults is not None: self.defaults.update(defaults) def lock(self, timeout=500): return self._lock.acquire() def release(self): return self._lock.release() @classmethod def configurable_base(cls): return CacheBase
def __init__(self, server): self.server = server self.lock = Lock() self.metadata = {} self.script_response = None
class TornadoClientPool(object): def __init__(self, endpoints, max_concurrency=1, **kwargs): self._endpoints = endpoints self._candidates = dict.fromkeys(self._endpoints, max_concurrency) self._kwargs = kwargs self._clients = queues.Queue() self._create_lock = Lock() self._close_lock = Lock() self._clients_count = 0 self._closed = False self._closing = False self._started = False self._logger = logging.getLogger(self.__class__.__name__) @gen.coroutine def _create_client(self): with (yield self._create_lock.acquire()): self.__ensure_not_closed() self.__ensure_not_closing() while len(self._candidates) > 0: if self._clients.qsize() > 0: return endpoint = random.sample(self._candidates.keys(), 1)[0] if self._candidates[endpoint] <= 0: self._candidates.pop(endpoint) continue address, port = split_endpoint(endpoint) client = TornadoClient(address, port, **self._kwargs) self._candidates[endpoint] -= 1 if self._candidates[endpoint] <= 0: self._candidates.pop(endpoint) yield self._clients.put(client) self._clients_count += 1 self._logger.debug('Create a new client {}'.format(endpoint)) return raise ResourceLimit('No more available endpoint') def _exhausted(self): return self._clients_count <= 0 and len(self._candidates) <= 0 def __ensure_not_closing(self): if self._closing: raise Unavailable('Closing') def __ensure_not_closed(self): if self._closed: raise Unavailable('Closed') def __ensure_not_exhuasted(self): if self._exhausted(): raise ResourceLimit('No more available client') @gen.coroutine def execute(self, qdb_proto): while True: self.__ensure_not_closed() self.__ensure_not_closing() self.__ensure_not_exhuasted() if not self._started: try: yield self._create_client() except: continue finally: self._started = True try: client = yield self._clients.get(timeout=timedelta(seconds=1)) except TimeoutError as e: try: yield self._create_client() except ResourceLimit: pass continue try: r = yield client.execute(qdb_proto) yield self._clients.put(client) except (RetryLimitExceeded, StreamClosedError, TimeoutError, socket.gaierror) as e: self._logger.warning( 'Not available, {} {}'.format(client.endpoint, e)) self._release_client(client) continue except Exception as e: self._logger.error( 'Unexpected error, {} {}'.format(client.endpoint, e)) self._release_client(client) continue finally: self._clients.task_done() raise gen.Return(r) def _release_client(self, client): try: if client: client.close() finally: self._clients_count -= 1 @gen.coroutine def close(self): with (yield self._close_lock.acquire()): if self._closed: return self._closing = True with (yield self._create_lock.acquire()): while self._clients_count > 0: try: client = yield self._clients.get(timeout=timedelta(seconds=0.1)) except TimeoutError: continue try: self._release_client(client) finally: self._clients.task_done() self._closed = True self._closing = False
class TTornadoStreamTransport(TTransportBase): """a framed, buffered transport over a Tornado stream""" DEFAULT_CONNECT_TIMEOUT = timedelta(seconds=1) DEFAULT_READ_TIMEOUT = timedelta(seconds=1) def __init__(self, host, port, stream=None, io_loop=None, ssl_options=None, read_timeout=DEFAULT_READ_TIMEOUT): self.host = host self.port = port self.io_loop = io_loop self.read_timeout = read_timeout self.is_queuing_reads = False self.read_queue = [] self.__wbuf = BytesIO() self._read_lock = Lock() self.ssl_options = ssl_options # servers provide a ready-to-go stream self.stream = stream if self.stream is not None: self._set_close_callback() if tornado_version >= '5.0': def with_timeout(self, timeout, future): return gen.with_timeout(timeout, future) else: def with_timeout(self, timeout, future): return gen.with_timeout(timeout, future, self.io_loop) @gen.coroutine def open(self, timeout=DEFAULT_CONNECT_TIMEOUT): logger.debug('socket connecting') sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) if self.ssl_options is None: self.stream = iostream.IOStream(sock) else: self.stream = iostream.SSLIOStream( sock, ssl_options=self.ssl_options) try: yield self.with_timeout(timeout, self.stream.connect( (self.host, self.port))) except (socket.error, OSError, IOError): message = 'could not connect to {}:{}'.format(self.host, self.port) raise TTransportException( type=TTransportException.NOT_OPEN, message=message) self._set_close_callback() raise gen.Return(self) def _set_close_callback(self): self.stream.set_close_callback(self.close) def close(self): # don't raise if we intend to close self.stream.set_close_callback(None) self.stream.close() def read(self, _): # The generated code for Tornado shouldn't do individual reads -- only # frames at a time assert False, "you're doing it wrong" @contextmanager def io_exception_context(self): try: yield except (socket.error, OSError, IOError) as e: raise TTransportException( type=TTransportException.END_OF_FILE, message=str(e)) except iostream.StreamBufferFullError as e: raise TTransportException( type=TTransportException.UNKNOWN, message=str(e)) except gen.TimeoutError as e: raise TTransportException( type=TTransportException.TIMED_OUT, message=str(e)) @gen.coroutine def read_frame(self): # IOStream processes reads one at a time with (yield self._read_lock.acquire()): with self.io_exception_context(): frame_header = yield self._read_bytes(4) if len(frame_header) == 0: raise iostream.StreamClosedError( 'Read zero bytes from stream') frame_length, = struct.unpack('!i', frame_header) logger.debug('received frame header, frame length = %d', frame_length) frame = yield self._read_bytes(frame_length) logger.debug('received frame payload: %r', frame) raise gen.Return(frame) def _read_bytes(self, n): return self.with_timeout(self.read_timeout, self.stream.read_bytes(n)) def write(self, buf): self.__wbuf.write(buf) def flush(self): frame = self.__wbuf.getvalue() # reset wbuf before write/flush to preserve state on underlying failure frame_length = struct.pack('!i', len(frame)) self.__wbuf = BytesIO() with self.io_exception_context(): return self.stream.write(frame_length + frame)
def __init__(self, *args, **kwargs): self.msg_queue = Queue(maxsize=100) self.msg_lock = Lock() self.msg_lock_2 = Lock() self.msg_queue_2 = list() self.msg_count = 0 self.msg_ping_time = 0.5 self.client_id = None self._redis_client = None self.heartbeat = None self.last_ping = 0 self.last_pong = 0 self.client_address = None self.allow_origin = options.allowed_hosts self.r = get_redis_connection() self.sid = None self.AUTH = False super(WebSocketHandler, self).__init__(*args, **kwargs) self.client_id = self.get_argument("id") try: if self.r.llen("user:{0}:pipe".format(self.client_id)) > 0: message = self.r.lindex("user:{0}:pipe".format(self.client_id), 0) msg = json.loads(message.encode("utf-8")) if 'seq' in msg: count = int(msg['seq']) - 1 else: LOG.error('user {0} msg seq is not exist'.format(self.client_id)) message = self.r.lpop("user:{0}:pipe".format(self.client_id)) LOG.error('user {0} msg_not_seq is {1}'.format(self.client_id, message)) else: count = self.r.get("user:{0}:msg_send_cnt".format(self.client_id)) if count is None: self.r.set("user:{0}:msg_send_cnt".format(self.client_id), 0) count = 0 self.add_header("Msg_Count", count) LOG.info("user {0} start msg_count is {1}".format(self.client_id, count)) except WebSocketClosedError as e: LOG.error("user {0} msg_count error {1}".format(self.client_id, e.message)) LOG.info("user {0} _init_".format(self.client_id)) if self.client_id is None or self.get_argument("token") is None: LOG.error("user {0} input parameter missing!!!".format(self.client_id)) self.close(code=PARAMETER_MISSING, reason="input parameter missing!") self.sid = "user:"******":channel" # LOG.info('trying to do authentication') # self.do_authentication(self.get_argument("token").strip('\"')) # remote authentication headers = {'content-type': 'application/json'} token = self.get_argument("token").strip('\"') # LOG.info(token) payload = {"token": token} j = json.dumps(payload) response = requests.post(DJANGO_URL, data=j, headers=headers) LOG.info("user {0} authentication status {1}: {2}".format(self.client_id, response.status_code, response.content)) if response.status_code == 200: self.AUTH = True else: LOG.error('user {0} authentication failed!'.format(self.client_id))
def __init__(self, provided_max_sequence): super(TornadoPublishSequenceManager, self).__init__(provided_max_sequence) self._lock = Lock() self._ioloop = ioloop
from bokeh.layouts import gridplot, WidgetBox from bokeh.models import Panel, Button, Spacer import gc from bokeh.models.widgets import Div, \ DatePicker, Select from datetime import datetime, timedelta, date import holoviews as hv from tornado.gen import coroutine import numpy as np from static.css.KPI_interface import KPI_card_css lock = Lock() executor = ThreadPoolExecutor() logger = mylogger(__file__) hv.extension('bokeh', logo=False) renderer = hv.renderer('bokeh') @coroutine def KPI_developer_adoption_tab(page_width,DAYS_TO_LOAD=90): class Thistab(KPI): def __init__(self, table,cols=[]): KPI.__init__(self, table,name='developer',cols=cols) self.table = table self.df = None
class IPCMessageSubscriber(IPCClient): ''' Salt IPC message subscriber Create an IPC client to receive messages from IPC publisher An example of a very simple IPCMessageSubscriber connecting to an IPCMessagePublisher. This example assumes an already running IPCMessagePublisher. IMPORTANT: The below example also assumes the IOLoop is NOT running. # Import Tornado libs import tornado.ioloop # Import Salt libs import salt.config import salt.transport.ipc # Create a new IO Loop. # We know that this new IO Loop is not currently running. io_loop = tornado.ioloop.IOLoop() ipc_publisher_socket_path = '/var/run/ipc_publisher.ipc' ipc_subscriber = salt.transport.ipc.IPCMessageSubscriber(ipc_server_socket_path, io_loop=io_loop) # Connect to the server # Use the associated IO Loop that isn't running. io_loop.run_sync(ipc_subscriber.connect) # Wait for some data package = ipc_subscriber.read_sync() ''' def __init__(self, socket_path, io_loop=None): super(IPCMessageSubscriber, self).__init__(socket_path, io_loop=io_loop) self._read_stream_future = None self._saved_data = [] self._read_in_progress = Lock() @tornado.gen.coroutine def _read(self, timeout, callback=None): try: yield self._read_in_progress.acquire(timeout=0.00000001) except tornado.gen.TimeoutError: raise tornado.gen.Return(None) exc_to_raise = None ret = None try: while True: if self._read_stream_future is None: self._read_stream_future = self.stream.read_bytes( 4096, partial=True) if timeout is None: wire_bytes = yield self._read_stream_future else: wire_bytes = yield FutureWithTimeout( self.io_loop, self._read_stream_future, timeout) self._read_stream_future = None # Remove the timeout once we get some data or an exception # occurs. We will assume that the rest of the data is already # there or is coming soon if an exception doesn't occur. timeout = None self.unpacker.feed(wire_bytes) first_sync_msg = True for framed_msg in self.unpacker: if callback: self.io_loop.spawn_callback(callback, framed_msg['body']) elif first_sync_msg: ret = framed_msg['body'] first_sync_msg = False else: self._saved_data.append(framed_msg['body']) if not first_sync_msg: # We read at least one piece of data and we're on sync run break except TornadoTimeoutError: # In the timeout case, just return None. # Keep 'self._read_stream_future' alive. ret = None except StreamClosedError as exc: log.trace('Subscriber disconnected from IPC %s', self.socket_path) self._read_stream_future = None exc_to_raise = exc except Exception as exc: log.error( 'Exception occurred in Subscriber while handling stream: %s', exc) self._read_stream_future = None exc_to_raise = exc self._read_in_progress.release() if exc_to_raise is not None: raise exc_to_raise # pylint: disable=E0702 raise tornado.gen.Return(ret) def read_sync(self, timeout=None): ''' Read a message from an IPC socket The socket must already be connected. The associated IO Loop must NOT be running. :param int timeout: Timeout when receiving message :return: message data if successful. None if timed out. Will raise an exception for all other error conditions. ''' if self._saved_data: return self._saved_data.pop(0) return self.io_loop.run_sync(lambda: self._read(timeout)) @tornado.gen.coroutine def read_async(self, callback): ''' Asynchronously read messages and invoke a callback when they are ready. :param callback: A callback with the received data ''' while not self.connected(): try: yield self.connect(timeout=5) except StreamClosedError: log.trace('Subscriber closed stream on IPC %s before connect', self.socket_path) yield tornado.gen.sleep(1) except Exception as exc: log.error('Exception occurred while Subscriber connecting: %s', exc) yield tornado.gen.sleep(1) yield self._read(None, callback) def close(self): ''' Routines to handle any cleanup before the instance shuts down. Sockets and filehandles should be closed explicitly, to prevent leaks. ''' if self._closing: return super(IPCMessageSubscriber, self).close() # This will prevent this message from showing up: # '[ERROR ] Future exception was never retrieved: # StreamClosedError' if self._read_stream_future is not None and self._read_stream_future.done( ): exc = self._read_stream_future.exception() if exc and not isinstance(exc, StreamClosedError): log.error("Read future returned exception %r", exc) def __del__(self): if IPCMessageSubscriber in globals(): self.close()
class Logger(object): _name = "logging" _current = threading.local() def __new__(cls, *args, **kwargs): if not getattr(cls._current, "instance", None): cls._current.instance = object.__new__(cls, *args, **kwargs) return cls._current.instance @thread_once def __init__(self, endpoints=LOCATOR_DEFAULT_ENDPOINTS, io_loop=None): self.io_loop = io_loop or IOLoop.current() self.endpoints = endpoints self._lock = Lock() self.counter = itertools.count(1) self.pipe = None self.target = Defaults.app self.verbosity = DEBUG_LEVEL self.queue = queues.Queue(10000) # level could be reset from update_verbosity in the future if not fallback_logger.handlers: sh = logging.StreamHandler() sh.setFormatter( logging.Formatter( fmt= "[%(asctime)s.%(msecs)d] %(levelname)s fallback %(message)s", datefmt="%z %d/%b/%Y:%H:%M:%S")) sh.setLevel(logging.DEBUG) fallback_logger.addHandler(sh) self._send() try: uuid = Defaults.uuid self._defaultattrs = [("uuid", uuid)] except GetOptError: self._defaultattrs = [] def prepare_message_args(self, level, message, *args, **kwargs): if args: try: message %= args except Exception: message = "unformatted: %s %s" % (message, args) level = ERROR_LEVEL if "extra" not in kwargs: if self._defaultattrs: msg = [level, self.target, message, self._defaultattrs] else: msg = [level, self.target, message] else: attrs = [(str(k), (v if isinstance(v, ATTRS_TYPES) else str(v))) for k, v in six.iteritems(kwargs["extra"])] msg = [level, self.target, message, attrs + self._defaultattrs] return msg def emit(self, level, message, *args, **kwargs): msg = self.prepare_message_args(level, message, *args, **kwargs) # if the queue is full log new messages to the fallback Logger # to make most recent errors be printed at least to stderr try: self.queue.put_nowait(msg) except queues.QueueFull: self._log_to_fallback(msg) @coroutine def _send(self): """ Send a message lazy formatted with args. External log attributes can be passed via named attribute `extra`, like in logging from the standart library. Note: * Attrs must be dict, otherwise the whole message would be skipped. * The key field in an attr is converted to string. * The value is sent as is if isinstance of (str, unicode, int, float, long, bool), otherwise we convert the value to string. """ buff = BytesIO() while True: msgs = list() try: msg = yield self.queue.get() # we need to connect first, as we issue verbosity request just after connection # and channels should strictly go in ascending order if not self._connected: yield self.connect() try: while True: msgs.append(msg) counter = next(self.counter) msgpack_pack([counter, EMIT, msg], buff) msg = self.queue.get_nowait() except queues.QueueEmpty: pass try: yield self.pipe.write(buff.getvalue()) except Exception: pass # clean the buffer or we will end up without memory buff.truncate(0) except Exception: for message in msgs: self._log_to_fallback(message) def _log_to_fallback(self, message): level, target, text, attrs = message if level >= ERROR_LEVEL: actual_level = logging.ERROR elif level >= WARNING_LEVEL: actual_level = logging.WARNING elif level >= INFO_LEVEL: actual_level = logging.INFO else: actual_level = logging.DEBUG fallback_logger.log(actual_level, "%s %s %s", target, text, json.dumps(attrs)) def debug(self, message, *args, **kwargs): if self.enable_for(DEBUG_LEVEL): self.emit(DEBUG_LEVEL, message, *args, **kwargs) def warn(self, message, *args, **kwargs): self.warning(message, *args, **kwargs) def warning(self, message, *args, **kwargs): if self.enable_for(WARNING_LEVEL): self.emit(WARNING_LEVEL, message, *args, **kwargs) def info(self, message, *args, **kwargs): if self.enable_for(INFO_LEVEL): self.emit(INFO_LEVEL, message, *args, **kwargs) def error(self, message, *args, **kwargs): if self.enable_for(ERROR_LEVEL): self.emit(ERROR_LEVEL, message, *args, **kwargs) def enable_for(self, level): return self.verbosity <= level @coroutine def update_verbosity(self): counter = next(self.counter) verbosity_request = msgpack_packb([counter, VERBOSITY, []]) self.pipe.write(verbosity_request) buff = msgpack_unpacker() while True: data = yield self.pipe.read_bytes(1024, partial=True) buff.feed(data) for msg in buff: _, code, payload = msg[:3] if code == VALUE_CODE: self.verbosity = payload[0] else: self.verbosity = DEBUG_LEVEL return @coroutine def connect(self): with (yield self._lock.acquire()): if self._connected: return for host, port in (yield resolve_logging(self.endpoints, self._name, self.io_loop)): try: self.pipe = yield TCPClient(io_loop=self.io_loop).connect( host, port) self.pipe.set_nodelay(True) yield self.update_verbosity() return except IOError: pass @property def _connected(self): return self.pipe is not None and not self.pipe.closed() def disconnect(self): if self.pipe is None: return self.pipe.close() self.pipe = None def __del__(self): # we have to close owned connection # otherwise it would be a fd-leak self.disconnect()
def __init__(self, socket_path, io_loop=None): super(IPCMessageSubscriber, self).__init__(socket_path, io_loop=io_loop) self._read_stream_future = None self._saved_data = [] self._read_in_progress = Lock()
class PSQLConnPool(object, metaclass=Singleton): """ A connection pool of the PostgreSQL. This wraps the `momoko.Pool` to initialize the connection when the first SQL statement is run. """ def __init__(self, pool): """ Initializes PSQLConnPool Parameters ---------- pool : momoko.Pool A connection pool to run SQL queries. """ self._pool = pool self._connected = False self._lock = Lock() @gen.coroutine def _connect(self): """ Open connections in this pool. This is safe to call after connections are opened """ if self._connected: return # Initialize the connection pool with lock because it may run another coroutine same timing # but `momoko.Pool.connect()` must call once. (multiple calls generates too many # connections) with (yield self._lock.acquire()): if not self._connected: yield self._pool.connect() self._connected = True @gen.coroutine def execute(self, operation, parameters=()): """ Prepare and execute a database operation (query or command). Parameters ---------- operation : str An SQL query or a command. parameters : tuple or list or dict A list, tuple or dict with query parameters. See `Passing parameters to SQL queries <http://initd.org/psycopg/docs/usage.html#query-parameters>`_ for more information. Returns ------- future : tornado.concurrent.Future future that resolves to `cursor` object containing result. """ yield self._connect() print(operation) return (yield self._pool.execute(operation, parameters))
def __init__(self, config: ConfigHelper) -> None: super().__init__(config) self.request_mutex = Lock() self.addr: List[str] = config.get("address").split('/') self.port = config.getint("port", 9999)
class FBaseProcessor(FProcessor): """ FBaseProcessor is a base implementation of FProcessor. FProcessors should extend this and map FProcessorFunctions. This should only be used by generated code. """ def __init__(self): """ Create new instance of FBaseProcessor that will process requests. """ self._processor_function_map = {} self._annotations_map = {} self._write_lock = Lock() def add_to_processor_map(self, key, proc): """ Register the given FProcessorFunction. Args: key: processor function name proc: FProcessorFunction """ self._processor_function_map[key] = proc def add_to_annotations_map(self, method_name, annotation): """ Register the given annotation dictionary Args: method_name: method name annotation: annotation dictionary """ self._annotations_map[method_name] = annotation def get_annotations_map(self): """ Return a deepcopy of the annotations map. """ return copy.deepcopy(self._annotations_map) def get_write_lock(self): """ Return the write lock. """ return self._write_lock @gen.coroutine def process(self, iprot, oprot): """ Process an input protocol and output protocol Args: iprot: input FProtocol oport: ouput FProtocol Raises: TException: if processing fails. """ context = iprot.read_request_headers() name, _, _ = iprot.readMessageBegin() processor_function = self._processor_function_map.get(name) # If the function was in our dict, call process on it. if processor_function: try: yield processor_function.process(context, iprot, oprot) except Exception: # Don't raise an exception because the server should still send # a response to the client. logger.exception( "frugal: exception occurred while processing request with " "correlation id %s", context.correlation_id) return logger.warn( "frugal: client invoked unknown method %s on request " "with correlation id %s", name, context.correlation_id) iprot.skip(TType.STRUCT) iprot.readMessageEnd() ex = TApplicationException( type=TApplicationExceptionType.UNKNOWN_METHOD, message="Unknown function: {0}".format(name)) with (yield self._write_lock.acquire()): oprot.write_response_headers(context) oprot.writeMessageBegin(name, TMessageType.EXCEPTION, 0) ex.write(oprot) oprot.writeMessageEnd() oprot.trans.flush() def add_middleware(self, middleware): """ Add the given middleware to the FProcessor. This should only be called before the server is started. Args: middleware: ServiceMiddleware """ if middleware and not isinstance(middleware, list): middleware = [middleware] for proc in self._processor_function_map.values(): proc.add_middleware(middleware)
class AsyncKazooLock(object): """ A lock based on kazoo.recipe.Lock and modified to work as a coroutine. """ # Node name, after the contender UUID, before the sequence # number. Involved in read/write locks. _NODE_NAME = "__lock__" # Node names which exclude this contender when present at a lower # sequence number. Involved in read/write locks. _EXCLUDE_NAMES = ["__lock__"] def __init__(self, client, path, identifier=None): """ Creates an AsyncKazooLock. Args: client: A KazooClient. path: The lock path to use. identifier: The name to use for this lock contender. This can be useful for querying to see who the current lock contenders are. """ self.client = client self.tornado_kazoo = TornadoKazoo(client) self.path = path # some data is written to the node. this can be queried via # contenders() to see who is contending for the lock self.data = str(identifier or "").encode('utf-8') self.node = None self.wake_event = AsyncEvent() # props to Netflix Curator for this trick. It is possible for our # create request to succeed on the server, but for a failure to # prevent us from getting back the full path name. We prefix our # lock name with a uuid and can check for its presence on retry. self.prefix = uuid.uuid4().hex + self._NODE_NAME self.create_path = self.path + "/" + self.prefix self.create_tried = False self.is_acquired = False self.assured_path = False self.cancelled = False self._retry = AsyncKazooRetry(max_tries=-1) self._lock = AsyncLock() @gen.coroutine def _ensure_path(self): yield self.tornado_kazoo.ensure_path(self.path) self.assured_path = True def cancel(self): """ Cancels a pending lock acquire. """ self.cancelled = True self.wake_event.set() @gen.coroutine def acquire(self, timeout=None, ephemeral=True): """ Acquires the lock. By default, it blocks and waits forever. Args: timeout: A float specifying how long to wait to acquire the lock. ephemeral: A boolean indicating that the lock should use an ephemeral node. Raises: LockTimeout if the lock wasn't acquired within `timeout` seconds. """ retry = self._retry.copy() retry.deadline = timeout # Ensure we are locked so that we avoid multiple coroutines in # this acquisition routine at the same time... timeout_interval = None if timeout is not None: timeout_interval = datetime.timedelta(seconds=timeout) try: with (yield self._lock.acquire(timeout=timeout_interval)): already_acquired = self.is_acquired gotten = False try: gotten = yield retry(self._inner_acquire, timeout=timeout, ephemeral=ephemeral) except RetryFailedError: pass except KazooException: # if we did ultimately fail, attempt to clean up exc_info = sys.exc_info() if not already_acquired: yield self._best_effort_cleanup() self.cancelled = False six.reraise(exc_info[0], exc_info[1], exc_info[2]) if gotten: self.is_acquired = gotten if not gotten and not already_acquired: yield self._best_effort_cleanup() raise gen.Return(gotten) except gen.TimeoutError: raise LockTimeout("Failed to acquire lock on %s after " "%s seconds" % (self.path, timeout)) def _watch_session(self, state): self.wake_event.set() return True def _watch_session_listener(self, state): IOLoop.current().add_callback(self._watch_session, state) @gen.coroutine def _inner_acquire(self, timeout, ephemeral=True): # wait until it's our chance to get it.. if self.is_acquired: raise ForceRetryError() # make sure our election parent node exists if not self.assured_path: yield self._ensure_path() node = None if self.create_tried: node = yield self._find_node() else: self.create_tried = True if not node: node = yield self.tornado_kazoo.create( self.create_path, self.data, ephemeral=ephemeral, sequence=True) # strip off path to node node = node[len(self.path) + 1:] self.node = node while True: self.wake_event.clear() # bail out with an exception if cancellation has been requested if self.cancelled: raise CancelledError() children = yield self._get_sorted_children() try: our_index = children.index(node) except ValueError: # pragma: nocover # somehow we aren't in the children -- probably we are # recovering from a session failure and our ephemeral # node was removed raise ForceRetryError() predecessor = self.predecessor(children, our_index) if not predecessor: raise gen.Return(True) # otherwise we are in the mix. watch predecessor and bide our time predecessor = self.path + "/" + predecessor self.client.add_listener(self._watch_session_listener) try: yield self.tornado_kazoo.get(predecessor, self._watch_predecessor) except NoNodeError: pass # predecessor has already been deleted else: try: yield self.wake_event.wait(timeout) except gen.TimeoutError: raise LockTimeout("Failed to acquire lock on %s after " "%s seconds" % (self.path, timeout)) finally: self.client.remove_listener(self._watch_session_listener) def predecessor(self, children, index): for c in reversed(children[:index]): if any(n in c for n in self._EXCLUDE_NAMES): return c return None def _watch_predecessor(self, event): self.wake_event.set() @gen.coroutine def _get_sorted_children(self): children = yield self.tornado_kazoo.get_children(self.path) # Node names are prefixed by a type: strip the prefix first, which may # be one of multiple values in case of a read-write lock, and return # only the sequence number (as a string since it is padded and will # sort correctly anyway). # # In some cases, the lock path may contain nodes with other prefixes # (eg. in case of a lease), just sort them last ('~' sorts after all # ASCII digits). def _seq(c): for name in ["__lock__", "__rlock__"]: idx = c.find(name) if idx != -1: return c[idx + len(name):] # Sort unknown node names eg. "lease_holder" last. return '~' children.sort(key=_seq) raise gen.Return(children) @gen.coroutine def _find_node(self): children = yield self.tornado_kazoo.get_children(self.path) for child in children: if child.startswith(self.prefix): raise gen.Return(child) raise gen.Return(None) @gen.coroutine def _delete_node(self, node): yield self.tornado_kazoo.delete(self.path + "/" + node) @gen.coroutine def _best_effort_cleanup(self): try: node = self.node if not node: node = yield self._find_node() if node: yield self._delete_node(node) except KazooException: # pragma: nocover pass @gen.coroutine def release(self): """Release the lock immediately.""" retry = self._retry.copy() release_response = yield retry(self._inner_release) raise gen.Return(release_response) @gen.coroutine def _inner_release(self): if not self.is_acquired: raise gen.Return(False) try: yield self._delete_node(self.node) except NoNodeError: # pragma: nocover pass self.is_acquired = False self.node = None raise gen.Return(True) @gen.coroutine def contenders(self): """ Returns an ordered list of the current contenders for the lock. """ # make sure our election parent node exists if not self.assured_path: yield self._ensure_path() children = yield self._get_sorted_children() contenders = [] for child in children: try: data = yield self.tornado_kazoo.get(self.path + "/" + child)[0] contenders.append(data.decode('utf-8')) except NoNodeError: # pragma: nocover pass raise gen.Return(contenders)
def main(): signal.signal(signal.SIGTERM, handle_signal) signal.signal(signal.SIGINT, handle_signal) Path(settings.pcsd_log_location).touch(mode=0o600, exist_ok=True) log.setup(settings.pcsd_log_location) env = prepare_env(os.environ, log.pcsd) if env.has_errors: raise SystemExit(1) if env.PCSD_DEBUG: log.enable_debug() sync_config_lock = Lock() ruby_pcsd_wrapper = ruby_pcsd.Wrapper( settings.pcsd_ruby_socket, debug=env.PCSD_DEBUG, ) make_app = configure_app( session.Storage(env.PCSD_SESSION_LIFETIME), ruby_pcsd_wrapper, sync_config_lock, env.PCSD_STATIC_FILES_DIR, disable_gui=env.PCSD_DISABLE_GUI, debug=env.PCSD_DEV, ) pcsd_ssl = ssl.PcsdSSL( server_name=socket.gethostname(), cert_location=settings.pcsd_cert_location, key_location=settings.pcsd_key_location, ssl_options=env.PCSD_SSL_OPTIONS, ssl_ciphers=env.PCSD_SSL_CIPHERS, ) try: SignalInfo.server_manage = HttpsServerManage( make_app, port=env.PCSD_PORT, bind_addresses=env.PCSD_BIND_ADDR, ssl=pcsd_ssl, ).start() except socket.gaierror as e: log.pcsd.error( "Unable to bind to specific address(es), exiting: %s ", e ) raise SystemExit(1) from e except OSError as e: log.pcsd.error("Unable to start pcsd daemon, exiting: %s ", e) raise SystemExit(1) from e except ssl.SSLCertKeyException as e: for error in e.args: log.pcsd.error(error) log.pcsd.error("Invalid SSL certificate and/or key, exiting") raise SystemExit(1) from e ioloop = IOLoop.current() ioloop.add_callback(sign_ioloop_started) if is_systemd() and env.NOTIFY_SOCKET: ioloop.add_callback(systemd.notify, env.NOTIFY_SOCKET) ioloop.add_callback(config_sync(sync_config_lock, ruby_pcsd_wrapper)) ioloop.start()
class BaseService(object): def __init__(self, name, endpoints, io_loop=None): # If it's not the main thread # and a current IOloop doesn't exist here, # IOLoop.instance becomes self._io_loop self.io_loop = io_loop or IOLoop.current() # List of available endpoints in which service is resolved to. # Looks as [["host", port2], ["host2", port2]] self.endpoints = endpoints self.name = name self.id = generate_service_id(self) self.log = servicelog self.sessions = dict() self.counter = itertools.count(1) self.api = {} self._lock = Lock() # wrap into separate class self.pipe = None self.address = None # on_close can be schedulled at any time, # even after we've already reconnected. So to prevent # from closing wrong connection, each new pipe has its epoch, # as id for on_close self.pipe_epoch = 0 self.buffer = msgpack_unpacker() @coroutine def connect(self, traceid=None): if self._connected: return log = TraceAdapter(self.log, {"traceid": traceid}) if traceid else self.log log.info("acquiring the connection lock") with (yield self._lock.acquire()): if self._connected: return start_time = time.time() for host, port in self.endpoints: try: log.info("trying %s:%d to establish connection %s", host, port, self.name) self.pipe_epoch += 1 pipe_epoch = self.pipe_epoch self.pipe = yield TCPClient(io_loop=self.io_loop).connect(host, port) self.pipe.set_nodelay(True) self.pipe.read_until_close(callback=functools.partial(weak_wrapper, weakref.ref(self), "on_close", pipe_epoch), streaming_callback=functools.partial(weak_wrapper, weakref.ref(self), "on_read")) except Exception as err: log.error("connection error %s", err) else: self.address = (host, port) connection_time = (time.time() - start_time) * 1000 log.info("connection has been established successfully %.3fms" % connection_time) return raise Exception("unable to establish connection") def disconnect(self): self.log.debug("disconnect has been called %s", self.name) if self.pipe is None: return self.pipe.close() self.pipe = None # detach rx from sessions # and send errors to all of the open sessions sessions = self.sessions while sessions: _, rx = sessions.popitem() rx.error(DisconnectionError(self.name)) def on_close(self, pipe_epoch, *args): self.log.debug("pipe has been closed %s %s", args, self.name) if self.pipe_epoch == pipe_epoch: self.log.debug("the epoch matches. Call disconnect") self.disconnect() def on_read(self, read_bytes): self.log.debug("read %.300s", read_bytes) self.buffer.feed(read_bytes) for msg in self.buffer: self.log.debug("unpacked: %.300s", msg) try: session, message_type, payload = msg[:3] # skip extra fields self.log.debug("%s, %d, %.300s", session, message_type, payload) except Exception as err: self.log.error("malformed message: `%s` %s", err, str(msg)) continue rx = self.sessions.get(session) if rx is None: self.log.warning("unknown session number: `%d`", session) continue rx.push(message_type, payload) if rx.closed(): del self.sessions[session] @coroutine def _invoke(self, method_name, *args, **kwargs): self.log.debug("_invoke has been called %.300s %.300s", str(args), str(kwargs)) trace = kwargs.get("trace") if trace: yield self.connect(hex(trace.traceid)[2:]) else: yield self.connect() self.log.debug("%s", self.api) for method_id, (method, tx_tree, rx_tree) in self.api.items(): # py3 has no iteritems if method == method_name: self.log.debug("method `%s` has been found in API map", method_name) counter = next(self.counter) # py3 counter has no .next() method self.log.debug('sending message: %.300s', [counter, method_id, args]) if trace is None: self.pipe.write(msgpack_packb([counter, method_id, args])) else: self.pipe.write(msgpack_packb([counter, method_id, args, pack_trace(trace)])) self.log.debug("RX TREE %s", rx_tree) self.log.debug("TX TREE %s", tx_tree) rx = Rx(rx_tree, io_loop=self.io_loop, servicename=self.name) tx = Tx(tx_tree, self.pipe, counter) self.sessions[counter] = rx channel = Channel(rx=rx, tx=tx) raise Return(channel) raise AttributeError(method_name) @property def _connected(self): return self.pipe is not None and not self.pipe.closed() def __getattr__(self, name): def on_getattr(*args, **kwargs): return self._invoke(name, *args, **kwargs) return on_getattr def __del__(self): # we have to close owned connection # otherwise it would be a fd-leak self.disconnect() def __str__(self): return "name: %s id: %s" % (self.name, self.id) def __repr__(self): return "<%s %s %s at %s>" % ( type(self).__name__, self.name, self.id, hex(id(self)))
class Application(object): """Wrapper around single vk application.""" LOGIN_URL = 'https://m.vk.com' AUTHORIZE_URL = 'https://oauth.vk.com/authorize' CAPTCHA_URI = 'https://m.vk.com/captcha.php' API_URL = 'https://api.vk.com/method/' SCOPE = 'offline' DEFAULT_HEADERS = HTTPHeaders({ 'accept': 'application/json', 'accept-charset': 'utf-8', 'content-type': 'application/x-www-form-urlencoded', }) def __init__(self, app_id, username, password, max_requests_per_second=3, default_timeout=20, api_version='5.28'): """Create new application object Args: app_id: id of VK application. username: user's phone number or email. password: user's password. max_requests_per_second: maximum number of requests that application can send per one second. Depends on number of users. Default value is 3. default_timeout: default timeout for ip requests. api_version: version of VK API used. """ self.last_requests = deque([datetime.min] * max_requests_per_second) self.max_requests_per_second = max_requests_per_second self.app_id = app_id self.username = username self.password = password self.api_version = api_version self.default_timeout = default_timeout self.client_session = Session(AsyncHTTPClient) self.queue_lock = Lock() IOLoop.current().run_sync(self.get_access_token) @gen.coroutine def _post(self, url, params, handle_redirects=True): """Make HTTP post request, handle redirects and timeouts. Args: url: url to make request. params: request parameters. handle_redirects: process redirects manually (use to handle cookies properly). """ request = HTTPRequest( url, method='POST', headers=self.DEFAULT_HEADERS.copy(), follow_redirects=not handle_redirects, request_timeout=self.default_timeout, body=urlencode(params), ) # Handle timeouts. while True: try: # If no redirect. response = yield self.client_session.fetch(request) return response except HTTPError as e: # If timeout happened just retry. if e.code == 599: continue # Else it is redirect. response = e.response break # If access token has been acquired. if response.code == 405: return response # Handle redirect. new_url = response.headers['Location'] if new_url == '/': new_url = self.LOGIN_URL # Save cookies. extract_cookies_to_jar(self.client_session.cookies, request, response) response = yield self._post(new_url, params, handle_redirects) return response def __getattr__(self, method_name): return APIMethod(self, method_name) @gen.coroutine def __call__(self, method_name, **method_kwargs): response = yield self.method_request(method_name, **method_kwargs) # There may be 2 dicts in one JSON. # For example: {'error': ...}{'response': ...}. errors = [] error_codes = [] for data in json_iter_parse(response.body.decode('utf-8')): if 'error' in data: error_data = data['error'] error_codes.append(error_data['error_code']) errors.append(error_data) if 'response' in data: for error in errors: logger.warning(str(error)) return data['response'] # Handle "Too many requests" error. if TO_MANY_REQUESTS in error_codes: return (yield self(method_name, **method_kwargs)) raise VkAPIMethodError(errors[0]) @gen.coroutine def method_request(self, method_name, **method_kwargs): """Make call to VK API. Args: method_name: name of VK API method. **method_args: arguments to VK API method. """ # Wait if too many requests were made. with (yield self.queue_lock.acquire()): first_request = self.last_requests.pop() now = datetime.now() delay = max(0, 1.1 - (now - first_request).total_seconds()) yield gen.sleep(delay) params = { 'timestamp': int(time.time()), 'access_token': self.access_token, 'v': self.api_version, } method_kwargs = stringify_values(method_kwargs) params.update(method_kwargs) url = self.API_URL + method_name result = yield self._post(url, params) self.last_requests.appendleft(datetime.now()) return result @gen.coroutine def get_access_token(self): """Get access token using app id and user login and password.""" # Log in and get cookies. yield self.login() # Authorize via OAuth2. auth_response_url_query = yield self.oauth2_authorization() if 'access_token' in auth_response_url_query: self.access_token = auth_response_url_query['access_token'] else: raise VkAuthError('OAuth2 authorization error') @gen.coroutine def login(self, login_form_action=None): """Log in and set cookies. Args: login_form_action: url of login action. Default is retrieved from m.vk.api/login. """ if login_form_action is None: while True: try: response = yield self.client_session.fetch(self.LOGIN_URL) break except HTTPError: pass login_form_action = get_form_action(response.body.decode('utf-8')) if not login_form_action: raise VkAuthError('VK changed login flow') login_form_data = { 'email': self.username, 'pass': self.password, } response = yield self._post( login_form_action, login_form_data, ) # Check for session id. if ('remixsid' in self.client_session.cookies or 'remixsid6' in self.client_session.cookies): return response raise VkAuthError('Authorization error (incorrect password)') @gen.coroutine def oauth2_authorization(self): """OAuth2 procedure for getting access token.""" auth_data = { 'client_id': self.app_id, 'display': 'mobile', 'response_type': 'token', 'scope': self.SCOPE, 'v': self.api_version, } response = yield self._post( self.AUTHORIZE_URL, auth_data, ) response_url_query = get_url_query(response.effective_url) if 'access_token' in response_url_query: return response_url_query # Permissions is needed response_body = response.body.decode('utf-8') form_action = get_form_action(response_body) if form_action: response = yield self._post(form_action, {}) response_url_query = get_url_query(response.effective_url) return response_url_query try: response_json = json.loads(response_body) except ValueError: # not JSON in response error_message = 'OAuth2 grant access error' else: error_message = 'VK error: [{}] {}'.format( response_json['error'], response_json['error_description'] ) raise VkAuthError(error_message)
class DatastoreServer(Server): """ Keeps track of the status and location of a datastore server. """ # The datastore backend. DATASTORE_TYPE = 'cassandra' # The cgroup slice used to start datastore server processes. SLICE = 'appscale-datastore' # The number of seconds to wait for the server to start. START_TIMEOUT = 30 # The number of seconds to wait for a status check. STATUS_TIMEOUT = 10 # The number of seconds to wait for the server to stop. STOP_TIMEOUT = 5 def __init__(self, port, http_client, verbose): """ Creates a new DatastoreServer. Args: port: An integer specifying the port to use. http_client: An AsyncHTTPClient verbose: A boolean that sets logging level to debug. """ super(DatastoreServer, self).__init__(ServiceTypes.DATASTORE, port) self.monit_name = 'datastore_server-{}'.format(port) self._http_client = http_client self._stdout = None self._verbose = verbose # Serializes start, stop, and monitor operations. self._management_lock = AsyncLock() @gen.coroutine def ensure_running(self): """ Checks to make sure the server is still running. """ with (yield self._management_lock.acquire()): yield self._wait_for_service(timeout=self.STATUS_TIMEOUT) @staticmethod def from_pid(pid, http_client): """ Creates a new DatastoreServer from an existing process. Args: pid: An integers specifying a process ID. http_client: An AsyncHTTPClient. """ process = psutil.Process(pid) args = process.cmdline() port = int(args[args.index('--port') + 1]) verbose = '--verbose' in args server = DatastoreServer(port, http_client, verbose) server.process = process server.state = ServerStates.RUNNING return server @gen.coroutine def restart(self): yield self.stop() yield self.start() @gen.coroutine def start(self): """ Starts a new datastore server. """ with (yield self._management_lock.acquire()): if self.state == ServerStates.RUNNING: return self.state = ServerStates.STARTING start_cmd = ['appscale-datastore', '--type', self.DATASTORE_TYPE, '--port', str(self.port)] if self._verbose: start_cmd.append('--verbose') log_file = os.path.join(LOG_DIR, 'datastore_server-{}.log'.format(self.port)) self._stdout = open(log_file, 'a') # With systemd-run, it's possible to start the process within the slice. # To keep things simple and maintain backwards compatibility with # pre-systemd distros, move the process after starting it. self.process = psutil.Popen(start_cmd, stdout=self._stdout, stderr=subprocess.STDOUT) tasks_location = os.path.join(slice_path(self.SLICE), 'tasks') with open(tasks_location, 'w') as tasks_file: tasks_file.write(str(self.process.pid)) yield self._wait_for_service(timeout=self.START_TIMEOUT) self.state = ServerStates.RUNNING @gen.coroutine def stop(self): """ Stops an existing datastore server. """ with (yield self._management_lock.acquire()): if self.state == ServerStates.STOPPED: return self.state = ServerStates.STOPPING try: yield self._cleanup() finally: self.state = ServerStates.STOPPED @gen.coroutine def _cleanup(self): """ Cleans up process and file descriptor. """ if self.process is not None: try: self.process.terminate() except NoSuchProcess: logger.info('Can\'t terminate process {pid} as it no longer exists' .format(pid=self.process.pid)) return initial_stop_time = time.time() while True: if time.time() > initial_stop_time + self.STOP_TIMEOUT: self.process.kill() break try: self.process.wait(timeout=0) break except psutil.TimeoutExpired: yield gen.sleep(1) if self._stdout is not None: self._stdout.close() @gen.coroutine def _wait_for_service(self, timeout): """ Query server until it responds. Args: timeout: A integer specifying the number of seconds to wait. Raises: StartTimeout if start time exceeds given timeout. """ server_url = 'http://{}:{}'.format(options.private_ip, self.port) start_time = time.time() try: while True: if not self.process.is_running(): raise ProcessStopped('{} is no longer running'.format(self)) if time.time() > start_time + timeout: raise StartTimeout('{} took too long to start'.format(self)) try: response = yield self._http_client.fetch(server_url) if response.code == 200: break except socket.error as error: if error.errno != errno.ECONNREFUSED: raise yield gen.sleep(1) except Exception as error: self._cleanup() self.failure_time = time.time() self.failure = error self.state = ServerStates.FAILED raise error
class InstanceManager(object): """ Fulfills AppServer instance assignments from the scheduler. """ # The seconds to wait between performing health checks. HEALTH_CHECK_INTERVAL = 60 def __init__(self, zk_client, monit_operator, routing_client, projects_manager, deployment_config, source_manager, syslog_server, thread_pool, private_ip): """ Creates a new InstanceManager. Args: zk_client: A kazoo.client.KazooClient object. monit_operator: An appscale.common.monit_interface.MonitOperator object. routing_client: An instance_manager.routing_client.RoutingClient object. projects_manager: A ProjectsManager object. deployment_config: A common.deployment_config.DeploymentConfig object. source_manager: An instance_manager.source_manager.SourceManager object. syslog_server: A string specifying the location of the syslog process that generates the combined app logs. thread_pool: A ThreadPoolExecutor. private_ip: A string specifying the current machine's private IP address. """ self._monit_operator = monit_operator self._routing_client = routing_client self._private_ip = private_ip self._syslog_server = syslog_server self._projects_manager = projects_manager self._deployment_config = deployment_config self._source_manager = source_manager self._thread_pool = thread_pool self._zk_client = zk_client # Ensures only one process tries to make changes at a time. self._work_lock = AsyncLock() self._health_checker = PeriodicCallback( self._ensure_health, self.HEALTH_CHECK_INTERVAL * 1000) # Instances that this machine should run. # For example, {guestbook_default_v1: [20000, -1]} self._assignments = None self._api_servers = {} self._running_instances = set() self._login_server = None def start(self): """ Begins processes needed to fulfill instance assignments. """ # Update list of running instances in case the InstanceManager was # restarted. self._recover_state() # Subscribe to changes in controller state, which includes assignments and # the 'login' property. self._zk_client.DataWatch(CONTROLLER_STATE_NODE, self._controller_state_watch) # Subscribe to changes in project configuration, including relevant # versions. self._projects_manager.subscriptions.append( self._handle_configuration_update) # Start the regular health check. self._health_checker.start() @gen.coroutine def _start_instance(self, version, port): """ Starts a Google App Engine application on this machine. It will start it up and then proceed to fetch the main page. Args: version: A Version object. port: An integer specifying a port to use. """ version_details = version.version_details runtime = version_details['runtime'] env_vars = version_details.get('envVariables', {}) runtime_params = self._deployment_config.get_config('runtime_parameters') max_memory = runtime_params.get('default_max_appserver_memory', DEFAULT_MAX_APPSERVER_MEMORY) if 'instanceClass' in version_details: max_memory = INSTANCE_CLASSES.get(version_details['instanceClass'], max_memory) source_archive = version_details['deployment']['zip']['sourceUrl'] api_server_port = yield self._ensure_api_server(version.project_id) yield self._source_manager.ensure_source( version.revision_key, source_archive, runtime) logger.info('Starting {}:{}'.format(version, port)) pidfile = PIDFILE_TEMPLATE.format(revision=version.revision_key, port=port) if runtime == GO: env_vars['GOPATH'] = os.path.join(UNPACK_ROOT, version.revision_key, 'gopath') env_vars['GOROOT'] = os.path.join(GO_SDK, 'goroot') watch = ''.join([MONIT_INSTANCE_PREFIX, version.revision_key]) if runtime in (PYTHON27, GO, PHP): start_cmd = create_python27_start_cmd( version.project_id, self._login_server, port, pidfile, version.revision_key, api_server_port) env_vars.update(create_python_app_env(self._login_server, version.project_id)) elif runtime == JAVA: # Account for MaxPermSize (~170MB), the parent process (~50MB), and thread # stacks (~20MB). max_heap = max_memory - 250 if max_heap <= 0: raise BadConfigurationException( 'Memory for Java applications must be greater than 250MB') start_cmd = create_java_start_cmd( version.project_id, port, self._login_server, max_heap, pidfile, version.revision_key, api_server_port ) env_vars.update(create_java_app_env(self._deployment_config)) else: raise BadConfigurationException( 'Unknown runtime {} for {}'.format(runtime, version.project_id)) logger.info("Start command: " + str(start_cmd)) logger.info("Environment variables: " + str(env_vars)) base_version = version.revision_key.rsplit(VERSION_PATH_SEPARATOR, 1)[0] log_tag = "app_{}".format(hashlib.sha1(base_version).hexdigest()[:28]) monit_app_configuration.create_config_file( watch, start_cmd, pidfile, port, env_vars, max_memory, self._syslog_server, check_port=True, kill_exceeded_memory=True, log_tag=log_tag, ) full_watch = '{}-{}'.format(watch, port) yield self._monit_operator.reload(self._thread_pool) # The reload command does not block, and we don't have a good way to check # if Monit is ready with its new configuration yet. If the daemon begins # reloading while it is handling the 'start', it can end up in a state # where it never starts the process. As a temporary workaround, this # small period allows it to finish reloading. This can be removed if # instances are started inside a cgroup. yield gen.sleep(1) yield self._monit_operator.send_command_retry_process(full_watch, 'start') # Make sure the version registration node exists. self._zk_client.ensure_path( '/'.join([VERSION_REGISTRATION_NODE, version.version_key])) instance = Instance(version.revision_key, port) yield self._add_routing(instance) if version.project_id == DASHBOARD_PROJECT_ID: log_size = DASHBOARD_LOG_SIZE else: log_size = APP_LOG_SIZE if not setup_logrotate(version.project_id, log_size): logger.error("Error while setting up log rotation for application: {}". format(version.project_id)) @gen.coroutine def populate_api_servers(self): """ Find running API servers. """ def api_server_info(entry): prefix, port = entry.rsplit('-', 1) project_id = prefix[len(API_SERVER_PREFIX):] return project_id, int(port) monit_entries = yield self._monit_operator.get_entries() server_entries = [api_server_info(entry) for entry in monit_entries if entry.startswith(API_SERVER_PREFIX)] for project_id, port in server_entries: self._api_servers[project_id] = port def _recover_state(self): """ Establishes current state from Monit entries. """ logger.info('Getting current state') monit_entries = self._monit_operator.get_entries_sync() instance_entries = {entry: state for entry, state in monit_entries.items() if entry.startswith(MONIT_INSTANCE_PREFIX)} # Remove all unmonitored entries. removed = [] for entry, state in instance_entries.items(): if state == MonitStates.UNMONITORED: self._monit_operator.remove_configuration(entry) removed.append(entry) for entry in removed: del instance_entries[entry] if removed: self._monit_operator.reload_sync() instance_details = [] for entry, state in instance_entries.items(): revision, port = entry[len(MONIT_INSTANCE_PREFIX):].rsplit('-', 1) instance_details.append( {'revision': revision, 'port': int(port), 'state': state}) clean_up_instances(instance_details) # Ensure version nodes exist. running_versions = {'_'.join(instance['revision'].split('_')[:3]) for instance in instance_details} self._zk_client.ensure_path(VERSION_REGISTRATION_NODE) for version_key in running_versions: self._zk_client.ensure_path( '/'.join([VERSION_REGISTRATION_NODE, version_key])) # Account for monitored instances. running_instances = { Instance(instance['revision'], instance['port']) for instance in instance_details} self._routing_client.declare_instance_nodes(running_instances) self._running_instances = running_instances @gen.coroutine def _ensure_api_server(self, project_id): """ Make sure there is a running API server for a project. Args: project_id: A string specifying the project ID. Returns: An integer specifying the API server port. """ if project_id in self._api_servers: raise gen.Return(self._api_servers[project_id]) server_port = MAX_API_SERVER_PORT for port in self._api_servers.values(): if port <= server_port: server_port = port - 1 zk_locations = appscale_info.get_zk_node_ips() start_cmd = ' '.join([API_SERVER_LOCATION, '--port', str(server_port), '--project-id', project_id, '--zookeeper-locations', ' '.join(zk_locations)]) watch = ''.join([API_SERVER_PREFIX, project_id]) full_watch = '-'.join([watch, str(server_port)]) pidfile = os.path.join(VAR_DIR, '{}.pid'.format(full_watch)) monit_app_configuration.create_config_file( watch, start_cmd, pidfile, server_port, max_memory=DEFAULT_MAX_APPSERVER_MEMORY, check_port=True) yield self._monit_operator.reload(self._thread_pool) yield self._monit_operator.send_command_retry_process(full_watch, 'start') self._api_servers[project_id] = server_port raise gen.Return(server_port) @gen.coroutine def _unmonitor_and_terminate(self, watch): """ Unmonitors an instance and terminates it. Args: watch: A string specifying the Monit entry. """ try: monit_retry = retry(max_retries=5, retry_on_exception=DEFAULT_RETRIES) send_w_retries = monit_retry(self._monit_operator.send_command_sync) send_w_retries(watch, 'unmonitor') except ProcessNotFound: # If Monit does not know about a process, assume it is already stopped. return # Now that the AppServer is stopped, remove its monit config file so that # monit doesn't pick it up and restart it. self._monit_operator.remove_configuration(watch) stop_instance(watch, MAX_INSTANCE_RESPONSE_TIME) def _instance_healthy(self, port): """ Determines the health of an instance with an HTTP request. Args: port: An integer specifying the port the instance is listening on. Returns: A boolean indicating whether or not the instance is healthy. """ url = "http://" + self._private_ip + ":" + str(port) + FETCH_PATH try: opener = urllib2.build_opener(NoRedirection) response = opener.open(url, timeout=HEALTH_CHECK_TIMEOUT) if response.code == httplib.SERVICE_UNAVAILABLE: return False except IOError: return False return True @gen.coroutine def _wait_for_app(self, port): """ Waits for the application hosted on this machine, on the given port, to respond to HTTP requests. Args: port: Port where app is hosted on the local machine Returns: True on success, False otherwise """ deadline = time.time() + START_APP_TIMEOUT while time.time() < deadline: if self._instance_healthy(port): raise gen.Return(True) logger.debug('Instance at port {} is not ready yet'.format(port)) yield gen.sleep(BACKOFF_TIME) raise gen.Return(False) @gen.coroutine def _add_routing(self, instance): """ Tells the AppController to begin routing traffic to an AppServer. Args: instance: An Instance. """ logger.info('Waiting for {}'.format(instance)) start_successful = yield self._wait_for_app(instance.port) if not start_successful: monit_watch = ''.join( [MONIT_INSTANCE_PREFIX, instance.revision_key, '-', str(instance.port)]) yield self._unmonitor_and_terminate(monit_watch) yield self._monit_operator.reload(self._thread_pool) logger.warning('{} did not come up in time'.format(instance)) return self._routing_client.register_instance(instance) self._running_instances.add(instance) @gen.coroutine def _stop_api_server(self, project_id): """ Make sure there is not a running API server for a project. Args: project_id: A string specifying the project ID. """ if project_id not in self._api_servers: return port = self._api_servers[project_id] watch = '{}{}-{}'.format(API_SERVER_PREFIX, project_id, port) yield self._unmonitor_and_terminate(watch) del self._api_servers[project_id] @gen.coroutine def _clean_old_sources(self): """ Removes source code for obsolete revisions. """ monit_entries = yield self._monit_operator.get_entries() active_revisions = { entry[len(MONIT_INSTANCE_PREFIX):].rsplit('-', 1)[0] for entry in monit_entries if entry.startswith(MONIT_INSTANCE_PREFIX)} for project_id, project_manager in self._projects_manager.items(): for service_id, service_manager in project_manager.items(): for version_id, version_manager in service_manager.items(): revision_id = version_manager.version_details['revision'] revision_key = VERSION_PATH_SEPARATOR.join( [project_id, service_id, version_id, str(revision_id)]) active_revisions.add(revision_key) self._source_manager.clean_old_revisions(active_revisions=active_revisions) @gen.coroutine def _stop_app_instance(self, instance): """ Stops a Google App Engine application process instance on current machine. Args: instance: An Instance object. """ logger.info('Stopping {}'.format(instance)) monit_watch = ''.join( [MONIT_INSTANCE_PREFIX, instance.revision_key, '-', str(instance.port)]) self._routing_client.unregister_instance(instance) try: self._running_instances.remove(instance) except KeyError: logger.info( 'unregister_instance: non-existent instance {}'.format(instance)) yield self._unmonitor_and_terminate(monit_watch) project_instances = [instance_ for instance_ in self._running_instances if instance_.project_id == instance.project_id] if not project_instances: yield self._stop_api_server(instance.project_id) remove_logrotate(instance.project_id) yield self._monit_operator.reload(self._thread_pool) yield self._clean_old_sources() def _get_lowest_port(self): """ Determines the lowest usuable port for a new instance. Returns: An integer specifying a free port. """ existing_ports = {instance.port for instance in self._running_instances} port = STARTING_INSTANCE_PORT while True: if port in existing_ports: port += 1 continue return port @gen.coroutine def _restart_unrouted_instances(self): """ Restarts instances that the router considers offline. """ with (yield self._work_lock.acquire()): failed_instances = yield self._routing_client.get_failed_instances() for version_key, port in failed_instances: try: instance = next(instance for instance in self._running_instances if instance.version_key == version_key and instance.port == port) except StopIteration: # If the manager has no recored of that instance, remove routing. self._routing_client.unregister_instance(Instance(version_key, port)) continue try: version = self._projects_manager.version_from_key( instance.version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue logger.warning('Restarting failed instance: {}'.format(instance)) yield self._stop_app_instance(instance) yield self._start_instance(version, instance.port) @gen.coroutine def _restart_unavailable_instances(self): """ Restarts instances that fail health check requests. """ with (yield self._work_lock.acquire()): for instance in self._running_instances: # TODO: Add a threshold to avoid restarting on a transient error. if not self._instance_healthy(instance.port): try: version = self._projects_manager.version_from_key( instance.version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue logger.warning('Restarting failed instance: {}'.format(instance)) yield self._stop_app_instance(instance) yield self._start_instance(version, instance.port) @gen.coroutine def _ensure_health(self): """ Checks to make sure all required instances are running and healthy. """ yield self._restart_unrouted_instances() yield self._restart_unavailable_instances() # Just as an infrequent sanity check, fulfill assignments and enforce # instance details. yield self._fulfill_assignments() yield self._enforce_instance_details() @gen.coroutine def _fulfill_assignments(self): """ Starts and stops instances in order to fulfill assignments. """ # If the manager has not been able to retrieve a valid set of assignments, # don't do any work. if self._assignments is None: return with (yield self._work_lock.acquire()): # Stop versions that aren't assigned. to_stop = [instance for instance in self._running_instances if instance.version_key not in self._assignments] for version_key in {instance.version_key for instance in to_stop}: logger.info('{} is no longer assigned'.format(version_key)) for instance in to_stop: yield self._stop_app_instance(instance) for version_key, assigned_ports in self._assignments.items(): try: version = self._projects_manager.version_from_key(version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue # The number of required instances that don't have an assigned port. new_assignment_count = sum(port == -1 for port in assigned_ports) # Stop instances that aren't assigned. If the assignment list includes # any -1s, match them to running instances that aren't in the assigned # ports list. candidates = [instance for instance in self._running_instances if instance.version_key == version_key and instance.port not in assigned_ports] unmatched_instances = candidates[new_assignment_count:] for running_instance in unmatched_instances: logger.info('{} is no longer assigned'.format(running_instance)) yield self._stop_app_instance(running_instance) # Start defined ports that aren't running. running_ports = [instance.port for instance in self._running_instances if instance.version_key == version_key] for port in assigned_ports: if port != -1 and port not in running_ports: yield self._start_instance(version, port) # Start new assignments that don't have a match. candidates = [instance for instance in self._running_instances if instance.version_key == version_key and instance.port not in assigned_ports] to_start = max(new_assignment_count - len(candidates), 0) for _ in range(to_start): yield self._start_instance(version, self._get_lowest_port()) @gen.coroutine def _enforce_instance_details(self): """ Ensures all running instances are configured correctly. """ with (yield self._work_lock.acquire()): # Restart instances with an outdated revision or login server. for instance in self._running_instances: try: version = self._projects_manager.version_from_key(instance.version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue login_server_changed = ( self._login_server is not None and self._login_server != get_login_server(instance)) if (instance.revision_key != version.revision_key or login_server_changed): logger.info('Configuration changed for {}'.format(instance)) yield self._stop_app_instance(instance) yield self._start_instance(version, instance.port) def _assignments_from_state(self, controller_state): """ Extracts the current machine's assignments from controller state. Args: controller_state: A dictionary containing controller state. """ def version_assignments(data): return [int(server.split(':')[1]) for server in data['appservers'] if server.split(':')[0] == self._private_ip] return { version_key: version_assignments(data) for version_key, data in controller_state['@app_info_map'].items() if version_assignments(data)} @gen.coroutine def _update_controller_state(self, encoded_controller_state): """ Handles updates to controller state. Args: encoded_controller_state: A JSON-encoded string containing controller state. """ try: controller_state = json.loads(encoded_controller_state) except (TypeError, ValueError): # If the controller state isn't usable, don't do any work. logger.warning( 'Invalid controller state: {}'.format(encoded_controller_state)) return new_assignments = self._assignments_from_state(controller_state) login_server = controller_state['@options']['login'] if new_assignments != self._assignments: logger.info('New assignments: {}'.format(new_assignments)) self._assignments = new_assignments yield self._fulfill_assignments() if login_server != self._login_server: logger.info('New login server: {}'.format(login_server)) self._login_server = login_server yield self._enforce_instance_details() def _controller_state_watch(self, encoded_controller_state, _): """ Handles updates to controller state. Args: encoded_controller_state: A JSON-encoded string containing controller state. """ persistent_update_controller_state = retry_data_watch_coroutine( CONTROLLER_STATE_NODE, self._update_controller_state) IOLoop.instance().add_callback( persistent_update_controller_state, encoded_controller_state) @gen.coroutine def _handle_configuration_update(self, event): """ Handles updates to a project's configuration details. Args: event: An appscale.admin.instance_manager.projects_manager.Event object. """ relevant_versions = {instance.version_key for instance in self._running_instances} if self._assignments is not None: relevant_versions |= set(self._assignments.keys()) for version_key in relevant_versions: if event.affects_version(version_key): logger.info('New revision for version: {}'.format(version_key)) yield self._enforce_instance_details() break
class Scheduler(): """ (public) Scheduler class. """ __slots__ = [ 'unit', 'current_time', 'callback_process', 'data_in_queue', 'data_in_heap', 'tasks_queue', 'lock' ] def __init__(self, unit_in_seconds, callback_process): """ Initialize a scheduler. :param unit_in_seconds: number of seconds to wait for each step. :param callback_process: callback to call on every task. Signature: task_callback(task.data) -> bool If callback return True, task is considered done and is removed from scheduler. Otherwise, task is rescheduled for another delay. """ assert isinstance(unit_in_seconds, int) and unit_in_seconds > 0 assert callable(callback_process) self.unit = unit_in_seconds self.current_time = 0 self.callback_process = callback_process self.data_in_heap = PriorityDict() # data => Deadline self.data_in_queue = { } # type: dict{object, _Task} # data => associated Task in queue self.tasks_queue = Queue() # Lock to modify this object safely inside one Tornado thread: # http://www.tornadoweb.org/en/stable/locks.html self.lock = Lock() def _enqueue(self, task): """ Put a task in queue of tasks to process now. """ self.data_in_queue[task.data] = task self.tasks_queue.put_nowait(task) @gen.coroutine def has_data(self, data): """ Return True if given data is associated to any task. """ with (yield self.lock.acquire()): return data in self.data_in_heap or data in self.data_in_queue @gen.coroutine def get_info(self, data): """ Return info about scheduling for given data, or None if data is not found. """ with (yield self.lock.acquire()): deadline = None # type: _Deadline if data in self.data_in_heap: deadline = self.data_in_heap[data] if data in self.data_in_queue: deadline = self.data_in_queue[data].deadline if deadline: return SchedulerEvent(time_unit=self.unit, time_added=deadline.start_time, delay=deadline.delay, current_time=self.current_time) return None @gen.coroutine def add_data(self, data, nb_units_to_wait): """ Add data with a non-null deadline. For null deadlines, use no_wait(). :param data: data to add :param nb_units_to_wait: time to wait (in number of units) """ if not isinstance(nb_units_to_wait, int) or nb_units_to_wait <= 0: raise exceptions.NaturalIntegerNotNullException() with (yield self.lock.acquire()): if data in self.data_in_heap or data in self.data_in_queue: raise exceptions.AlreadyScheduledException() # Add task to scheduler. self.data_in_heap[data] = _Deadline(self.current_time, nb_units_to_wait) @gen.coroutine def no_wait(self, data, nb_units_to_wait, processing_validator): """ Add a data to be processed the sooner. :param data: data to add :param nb_units_to_wait: time to wait (in number of units) for data tasks after first task is executed. If null (0), data is processed once (first time) and then dropped. :param processing_validator: validator used to check if data can still be processed for the first time. See documentation of class _ImmediateTask for more details. """ if not isinstance(nb_units_to_wait, int) or nb_units_to_wait < 0: raise exceptions.NaturalIntegerException() with (yield self.lock.acquire()): if data in self.data_in_heap: # Move data from heap to queue with new delay. del self.data_in_heap[data] self._enqueue( _ImmediateTask(data, nb_units_to_wait, processing_validator)) elif data in self.data_in_queue: # Change delay for future scheduling. self.data_in_queue[data].update_delay(nb_units_to_wait) else: # Add data to queue. self._enqueue( _ImmediateTask(data, nb_units_to_wait, processing_validator)) @gen.coroutine def remove_data(self, data): """ Remove a data (and all associated tasks) from scheduler. """ with (yield self.lock.acquire()): if data in self.data_in_heap: del self.data_in_heap[data] elif data in self.data_in_queue: # Remove task from data_in_queue and invalidate it in queue. self.data_in_queue.pop(data).valid = False @gen.coroutine def _step(self): """ Compute a step (check and enqueue tasks to run now) in scheduler. """ with (yield self.lock.acquire()): self.current_time += 1 while self.data_in_heap: deadline, data = self.data_in_heap.smallest() if deadline.deadline > self.current_time: break del self.data_in_heap[data] self._enqueue(_Task(data, deadline)) @gen.coroutine def schedule(self): """ Main scheduler method (callback to register in ioloop). Wait for unit seconds and run tasks after each wait time. """ while True: yield gen.sleep(self.unit) yield self._step() @gen.coroutine def process_tasks(self): """ Main task processing method (callback to register in ioloop). Consume and process tasks in queue and reschedule processed tasks when relevant. A task is processed if associated data was not removed from scheduler. A task is rescheduler if processing callback returns False (True meaning `task definitively done`) AND if task deadline is not null. """ while True: task = yield self.tasks_queue.get() # type: _Task try: if task.valid and (not isinstance(task, _ImmediateTask) or task.can_still_process()): if gen.is_coroutine_function(self.callback_process): remove_data = yield self.callback_process(task.data) else: remove_data = self.callback_process(task.data) remove_data = remove_data or not task.deadline.delay with (yield self.lock.acquire()): del self.data_in_queue[task.data] if not remove_data: self.data_in_heap[task.data] = _Deadline( self.current_time, task.deadline.delay) finally: self.tasks_queue.task_done()
class BaseService(object): def __init__(self, name, endpoints, io_loop=None): if io_loop: warnings.warn('io_loop argument is deprecated.', DeprecationWarning) # If it's not the main thread # and a current IOloop doesn't exist here, # IOLoop.instance becomes self._io_loop self.io_loop = io_loop or IOLoop.current() # List of available endpoints in which service is resolved to. # Looks as [["host", port2], ["host2", port2]] self.endpoints = endpoints self.name = name self.id = generate_service_id(self) self.log = servicelog self.sessions = {} self.counter = itertools.count(1) self.api = {} self._lock = Lock() # wrap into separate class self.pipe = None self.address = None # on_close can be schedulled at any time, # even after we've already reconnected. So to prevent # from closing wrong connection, each new pipe has its epoch, # as id for on_close self.pipe_epoch = 0 self.buffer = msgpack_unpacker() self._header_table = { 'tx': CocaineHeaders(), 'rx': CocaineHeaders(), } @coroutine def connect(self, traceid=None): if self._connected: return log = get_trace_adapter(self.log, traceid) log.debug("acquiring the connection lock") with (yield self._lock.acquire()): if self._connected: return start_time = time.time() if self.pipe: log.info("`%s` pipe has been closed by StreamClosed exception", self.name) self.disconnect() conn_statuses = [] for host, port in self.endpoints: try: log.info("trying %s:%d to establish connection %s", host, port, self.name) self.pipe_epoch += 1 self.pipe = yield TCPClient(io_loop=self.io_loop).connect(host, port) self.pipe.set_nodelay(True) set_keep_alive(self.pipe.socket) self.pipe.read_until_close(callback=functools.partial(weak_wrapper, weakref.ref(self), "on_close", self.pipe_epoch), streaming_callback=functools.partial(weak_wrapper, weakref.ref(self), "on_read")) except Exception as err: log.error("connection error %s", err) conn_statuses.append((host, port, err)) else: self.address = (host, port) self._header_table = { 'tx': CocaineHeaders(), 'rx': CocaineHeaders(), } connection_time = (time.time() - start_time) * 1000 log.info("`%s` connection has been established successfully %.3fms", self.name, connection_time) return raise ServiceConnectionError("unable to establish connection: " + ", ".join(("%s:%d %s" % (host, port, err) for (host, port, err) in conn_statuses))) def disconnect(self): self.log.debug("`%s` disconnect has been called", self.name) if self.pipe is None: return self.pipe.close() self.pipe = None # detach rx from sessions # and send errors to all of the open sessions sessions = self.sessions while sessions: _, rx = sessions.popitem() rx.error(DisconnectionError(self.name)) self.log.info("`%s` has been disconnected", self.name) def on_close(self, pipe_epoch, *args): self.log.info("`%s` pipe has been closed with args: %s", self.name, args) if self.pipe_epoch == pipe_epoch: self.log.info("the epoch matches. Call disconnect") self.disconnect() def on_read(self, read_bytes): self.log.debug("read %.300s", read_bytes) self.buffer.feed(read_bytes) for msg in self.buffer: self.log.debug("unpacked: %.300s", msg) try: session, message_type, payload = msg[:3] # skip extra fields self.log.debug("%s, %d, %.300s", session, message_type, payload) headers = msg[3] if len(msg) > 3 else None except Exception as err: self.log.error("malformed message: `%s` %s", err, msg) continue rx = self.sessions.get(session) if rx is None: self.log.warning("unknown session number: `%d`", session) continue rx.push(message_type, payload, headers) if rx.closed(): del self.sessions[session] @coroutine def _invoke(self, method_name, *args, **kwargs): # Pop the Trace object, because it's not real header. trace = kwargs.pop("trace", None) if trace is not None: update_dict_with_trace(kwargs, trace) trace_id = kwargs.get('trace_id') trace_logger = get_trace_adapter(self.log, trace_id) trace_logger.debug("BaseService method `%s` call: %.300s %.300s", method_name, args, kwargs) yield self.connect(trace_id) if self.pipe is None: raise ServiceConnectionError('connection has suddenly disappeared') trace_logger.debug("%s", self.api) for method_id, (method, tx_tree, rx_tree) in six.iteritems(self.api): if method == method_name: trace_logger.debug("method `%s` has been found in API map", method_name) session = next(self.counter) # py3 counter has no .next() method # Manage headers using header table. headers = manage_headers(kwargs, self._header_table['tx']) packed_data = msgpack_packb([session, method_id, args, headers]) trace_logger.info( 'send message to `%s`: channel id: %s, type: %s, length: %s bytes', self.name, session, method_name, len(packed_data) ) trace_logger.debug('send message: %.300s', [session, method_id, args, kwargs]) self.pipe.write(packed_data) trace_logger.debug("RX TREE %s", rx_tree) trace_logger.debug("TX TREE %s", tx_tree) rx = Rx(rx_tree=rx_tree, session_id=session, header_table=self._header_table['rx'], io_loop=self.io_loop, service_name=self.name, trace_id=trace_id) tx = Tx(tx_tree=tx_tree, pipe=self.pipe, session_id=session, header_table=self._header_table['tx'], service_name=self.name, trace_id=trace_id) self.sessions[session] = rx channel = Channel(rx=rx, tx=tx) raise Return(channel) raise AttributeError(method_name) @property def _connected(self): return self.pipe is not None and not self.pipe.closed() def __getattr__(self, name): def on_getattr(*args, **kwargs): return self._invoke(six.b(name), *args, **kwargs) return on_getattr def __del__(self): # we have to close owned connection # otherwise it would be a fd-leak self.disconnect() def __str__(self): return "name: %s id: %s" % (self.name, self.id) def __repr__(self): return "<%s %s %s at %s>" % (type(self).__name__, self.name, self.id, hex(id(self)))
async def open(self, *args, **kwargs): self.lock = Lock() # One client - One lock logger.info('Client connected: %s' % self)
class Logger(object): _name = "logging" _current = threading.local() def __new__(cls, *args, **kwargs): if not getattr(cls._current, "instance", None): cls._current.instance = object.__new__(cls, *args, **kwargs) return cls._current.instance @thread_once def __init__(self, endpoints=LOCATOR_DEFAULT_ENDPOINTS, io_loop=None): self.io_loop = io_loop or IOLoop.current() self.endpoints = endpoints self._lock = Lock() self.counter = itertools.count(1) self.pipe = None self.target = Defaults.app self.verbosity = DEBUG_LEVEL self.queue = queues.Queue(10000) # level could be reset from update_verbosity in the future if not fallback_logger.handlers: sh = logging.StreamHandler() sh.setFormatter(logging.Formatter(fmt="[%(asctime)s.%(msecs)d] %(levelname)s fallback %(message)s", datefmt="%z %d/%b/%Y:%H:%M:%S")) sh.setLevel(logging.DEBUG) fallback_logger.addHandler(sh) self._send() try: uuid = Defaults.uuid self._defaultattrs = [("uuid", uuid)] except GetOptError: self._defaultattrs = [] def prepare_message_args(self, level, message, *args, **kwargs): if args: try: message %= args except Exception: message = "unformatted: %s %s" % (message, args) level = ERROR_LEVEL if "extra" not in kwargs: if self._defaultattrs: msg = [level, self.target, message, self._defaultattrs] else: msg = [level, self.target, message] else: attrs = [(str(k), (v if isinstance(v, ATTRS_TYPES) else str(v))) for k, v in six.iteritems(kwargs["extra"])] msg = [level, self.target, message, attrs + self._defaultattrs] return msg def emit(self, level, message, *args, **kwargs): msg = self.prepare_message_args(level, message, *args, **kwargs) # if the queue is full log new messages to the fallback Logger # to make most recent errors be printed at least to stderr try: self.queue.put_nowait(msg) except queues.QueueFull: self._log_to_fallback(msg) @coroutine def _send(self): """ Send a message lazy formatted with args. External log attributes can be passed via named attribute `extra`, like in logging from the standart library. Note: * Attrs must be dict, otherwise the whole message would be skipped. * The key field in an attr is converted to string. * The value is sent as is if isinstance of (str, unicode, int, float, long, bool), otherwise we convert the value to string. """ buff = BytesIO() while True: msgs = list() try: msg = yield self.queue.get() # we need to connect first, as we issue verbosity request just after connection # and channels should strictly go in ascending order if not self._connected: yield self.connect() try: while True: msgs.append(msg) counter = next(self.counter) msgpack_pack([counter, EMIT, msg], buff) msg = self.queue.get_nowait() except queues.QueueEmpty: pass try: yield self.pipe.write(buff.getvalue()) except Exception: pass # clean the buffer or we will end up without memory buff.truncate(0) except Exception: for message in msgs: self._log_to_fallback(message) def _log_to_fallback(self, message): level, target, text, attrs = message if level >= ERROR_LEVEL: actual_level = logging.ERROR elif level >= WARNING_LEVEL: actual_level = logging.WARNING elif level >= INFO_LEVEL: actual_level = logging.INFO else: actual_level = logging.DEBUG fallback_logger.log(actual_level, "%s %s %s", target, text, json.dumps(attrs)) def debug(self, message, *args, **kwargs): if self.enable_for(DEBUG_LEVEL): self.emit(DEBUG_LEVEL, message, *args, **kwargs) def warn(self, message, *args, **kwargs): self.warning(message, *args, **kwargs) def warning(self, message, *args, **kwargs): if self.enable_for(WARNING_LEVEL): self.emit(WARNING_LEVEL, message, *args, **kwargs) def info(self, message, *args, **kwargs): if self.enable_for(INFO_LEVEL): self.emit(INFO_LEVEL, message, *args, **kwargs) def error(self, message, *args, **kwargs): if self.enable_for(ERROR_LEVEL): self.emit(ERROR_LEVEL, message, *args, **kwargs) def enable_for(self, level): return self.verbosity <= level @coroutine def update_verbosity(self): counter = next(self.counter) verbosity_request = msgpack_packb([counter, VERBOSITY, []]) self.pipe.write(verbosity_request) buff = msgpack_unpacker() while True: data = yield self.pipe.read_bytes(1024, partial=True) buff.feed(data) for msg in buff: _, code, payload = msg[:3] if code == VALUE_CODE: self.verbosity = payload[0] else: self.verbosity = DEBUG_LEVEL return @coroutine def connect(self): with (yield self._lock.acquire()): if self._connected: return for host, port in (yield resolve_logging(self.endpoints, self._name, self.io_loop)): try: self.pipe = yield TCPClient(io_loop=self.io_loop).connect(host, port) self.pipe.set_nodelay(True) yield self.update_verbosity() return except IOError: pass @property def _connected(self): return self.pipe is not None and not self.pipe.closed() def disconnect(self): if self.pipe is None: return self.pipe.close() self.pipe = None def __del__(self): # we have to close owned connection # otherwise it would be a fd-leak self.disconnect()
class ServerManager(object): """ Keeps track of the status and location of a specific server. """ KNOWN_SERVICES = [datastore_service] def __init__(self, service, port, assignment_options=None, start_cmd=None): """ Creates a new Server. It accepts either assignment_options argument (to build start_cmd) or start_cmd of existing process. Args: service: An instance of Service. port: An integer specifying the port to use. assignment_options: A dict representing assignment options from zookeeper. start_cmd: A list of command line arguments used for starting server. """ self.service = service self.failure = None self.failure_time = None # This is for compatibility with Hermes, which expects a monit name. self.monit_name = self.service.monit_name(port) self.port = port self.process = None self.state = ServerStates.NEW self.type = self.service.type if assignment_options is None and start_cmd is None: raise TypeError('assignment_options or start_cmd should be specified') self._assignment_options = assignment_options self._start_cmd = start_cmd self._stdout = None # Serializes start, stop, and monitor operations. self._management_lock = AsyncLock() @gen.coroutine def ensure_running(self): """ Checks to make sure the server is still running. """ with (yield self._management_lock.acquire()): yield self._wait_for_service(timeout=self.service.status_timeout) @staticmethod def from_pid(pid, service): """ Creates a new ServerManager from an existing process. Args: pid: An integers specifying a process ID. service: An instance of Service. """ process = psutil.Process(pid) args = process.cmdline() try: port = service.port_from_start_cmd(args) except ValueError: raise ValueError('Process #{} ({}) is not recognized'.format(args, pid)) server = ServerManager(service, port, start_cmd=args) server.process = process server.state = ServerStates.RUNNING return server @gen.coroutine def restart(self): yield self.stop() yield self.start() @gen.coroutine def start(self): """ Starts a new server process. """ with (yield self._management_lock.acquire()): if self.state == ServerStates.RUNNING: return self.state = ServerStates.STARTING if not self._start_cmd: self._start_cmd = self.service.get_start_cmd( self.port, self._assignment_options ) log_filename = self.service.log_filename(self.port) log_file = os.path.join(LOG_DIR, log_filename) self._stdout = open(log_file, 'a') # With systemd-run, it's possible to start the process within the slice. # To keep things simple and maintain backwards compatibility with # pre-systemd distros, move the process after starting it. self.process = psutil.Popen(self._start_cmd, stdout=self._stdout, stderr=subprocess.STDOUT) tasks_location = os.path.join(slice_path(self.service.slice), 'tasks') with open(tasks_location, 'w') as tasks_file: tasks_file.write(str(self.process.pid)) yield self._wait_for_service(timeout=self.service.start_timeout) self.state = ServerStates.RUNNING @gen.coroutine def stop(self): """ Stops an existing server process. """ with (yield self._management_lock.acquire()): if self.state == ServerStates.STOPPED: return self.state = ServerStates.STOPPING try: yield self._cleanup() finally: self.state = ServerStates.STOPPED @gen.coroutine def _cleanup(self): """ Cleans up process and file descriptor. """ if self.process is not None: try: self.process.terminate() except NoSuchProcess: logger.info('Can\'t terminate process {pid} as it no longer exists' .format(pid=self.process.pid)) return initial_stop_time = time.time() while True: if time.time() > initial_stop_time + self.service.stop_timeout: self.process.kill() break try: self.process.wait(timeout=0) break except psutil.TimeoutExpired: yield gen.sleep(1) if self._stdout is not None: self._stdout.close() @gen.coroutine def _wait_for_service(self, timeout): """ Query server until it responds. Args: timeout: A integer specifying the number of seconds to wait. Raises: StartTimeout if start time exceeds given timeout. """ server_url = 'http://{}:{}'.format(options.private_ip, self.port) start_time = time.time() try: while True: if not self.process.is_running(): raise ProcessStopped('{} is no longer running'.format(self)) if time.time() > start_time + timeout: raise StartTimeout('{} took too long to start'.format(self)) health_result = self.service.health_probe(server_url) if isinstance(health_result, gen.Future): health_result = yield health_result if health_result: break yield gen.sleep(1) except Exception as error: self._cleanup() self.failure_time = time.time() self.failure = error self.state = ServerStates.FAILED raise def __repr__(self): """ Represents the server details. Returns: A string representing the server. """ return '<Server: {}:{}, {}>'.format(self.type, self.port, self.state)
def initialize(self, wechat_event_setting, **kwargs): super(EventSettingHandler, self).initialize(**kwargs) self.wechat_event_message_setting = wechat_event_setting self.setting_lock = Lock()
class Stream(object): def __init__(self, conn, stream_id, delegate, context=None): self.conn = conn self.stream_id = stream_id self.set_delegate(delegate) self.context = context self.finish_future = Future() self.write_lock = Lock() from tornado.util import ObjectDict # TODO: remove self.stream = ObjectDict(io_loop=IOLoop.current(), close=conn.stream.close) self._incoming_content_remaining = None self._outgoing_content_remaining = None self._delegate_started = False self.window = Window(conn.window, stream_id, conn.setting(constants.Setting.INITIAL_WINDOW_SIZE)) self._header_frames = [] self._phase = constants.HTTPPhase.HEADERS def set_delegate(self, delegate): self.orig_delegate = self.delegate = delegate if self.conn.params.decompress: self.delegate = _GzipMessageDelegate(delegate, self.conn.params.chunk_size) def handle_frame(self, frame): if frame.type == constants.FrameType.PRIORITY: self._handle_priority_frame(frame) return elif frame.type == constants.FrameType.RST_STREAM: self._handle_rst_stream_frame(frame) return elif frame.type == constants.FrameType.WINDOW_UPDATE: self._handle_window_update_frame(frame) return elif frame.type in (constants.FrameType.SETTINGS, constants.FrameType.GOAWAY, constants.FrameType.PUSH_PROMISE): raise Exception("invalid frame type %s for stream", frame.type) if self.finish_future.done(): raise StreamError(self.stream_id, constants.ErrorCode.STREAM_CLOSED) if frame.type == constants.FrameType.HEADERS: self._handle_headers_frame(frame) elif frame.type == constants.FrameType.CONTINUATION: self._handle_continuation_frame(frame) elif frame.type == constants.FrameType.DATA: self._handle_data_frame(frame) # Unknown frame types are silently discarded, unless they break # the rule that nothing can come between HEADERS and CONTINUATION. def needs_continuation(self): return bool(self._header_frames) def _handle_headers_frame(self, frame): if self._phase == constants.HTTPPhase.BODY: self._phase = constants.HTTPPhase.TRAILERS frame = frame.without_padding() self._header_frames.append(frame) self._check_header_length() if frame.flags & constants.FrameFlag.END_HEADERS: self._parse_headers() def _handle_continuation_frame(self, frame): if not self._header_frames: raise ConnectionError(constants.ErrorCode.PROTOCOL_ERROR, "CONTINUATION without HEADERS") self._header_frames.append(frame) self._check_header_length() if frame.flags & constants.FrameFlag.END_HEADERS: self._parse_headers() def _check_header_length(self): if (sum(len(f.data) for f in self._header_frames) > self.conn.params.max_header_size): if self.conn.is_client: # TODO: Need tests for client side of headers-too-large. # What's the best way to send an error? self.delegate.on_connection_close() else: # write_headers needs a start line so it can tell # whether this is a HEAD or not. If we're rejecting # the headers we can't know so just make something up. # Note that this means the error response body MUST be # zero bytes so it doesn't matter whether the client # sent a HEAD or a GET. self._request_start_line = RequestStartLine('GET', '/', 'HTTP/2.0') start_line = ResponseStartLine('HTTP/2.0', 431, 'Headers too large') self.write_headers(start_line, HTTPHeaders()) self.finish() return def _parse_headers(self): frame = self._header_frames[0] data = b''.join(f.data for f in self._header_frames) self._header_frames = [] if frame.flags & constants.FrameFlag.PRIORITY: # TODO: support PRIORITY and PADDING. # This is just enough to cover an error case tested in h2spec. stream_dep, weight = struct.unpack('>ib', data[:5]) data = data[5:] # strip off the "exclusive" bit stream_dep = stream_dep & 0x7fffffff if stream_dep == frame.stream_id: raise ConnectionError(constants.ErrorCode.PROTOCOL_ERROR, "stream cannot depend on itself") pseudo_headers = {} headers = HTTPHeaders() try: # Pseudo-headers must come before any regular headers, # and only in the first HEADERS phase. has_regular_header = bool(self._phase == constants.HTTPPhase.TRAILERS) for k, v, idx in self.conn.hpack_decoder.decode(bytearray(data)): if k != k.lower(): # RFC section 8.1.2 raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) if k.startswith(b':'): if self.conn.is_client: valid_pseudo_headers = (b':status',) else: valid_pseudo_headers = (b':method', b':scheme', b':authority', b':path') if (has_regular_header or k not in valid_pseudo_headers or native_str(k) in pseudo_headers): raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) pseudo_headers[native_str(k)] = native_str(v) if k == b":authority": headers.add("Host", native_str(v)) else: headers.add(native_str(k), native_str(v)) has_regular_header = True except HpackError: raise ConnectionError(constants.ErrorCode.COMPRESSION_ERROR) if self._phase == constants.HTTPPhase.HEADERS: self._start_request(pseudo_headers, headers) elif self._phase == constants.HTTPPhase.TRAILERS: # TODO: support trailers pass if (not self._maybe_end_stream(frame.flags) and self._phase == constants.HTTPPhase.TRAILERS): # The frame that finishes the trailers must also finish # the stream. raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) def _start_request(self, pseudo_headers, headers): if "connection" in headers: raise ConnectionError(constants.ErrorCode.PROTOCOL_ERROR, "connection header should not be present") if "te" in headers and headers["te"] != "trailers": raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) if self.conn.is_client: status = int(pseudo_headers[':status']) start_line = ResponseStartLine('HTTP/2.0', status, responses.get(status, '')) else: for k in (':method', ':scheme', ':path'): if k not in pseudo_headers: raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) start_line = RequestStartLine(pseudo_headers[':method'], pseudo_headers[':path'], 'HTTP/2.0') self._request_start_line = start_line if (self.conn.is_client and (self._request_start_line.method == 'HEAD' or start_line.code == 304)): self._incoming_content_remaining = 0 elif "content-length" in headers: self._incoming_content_remaining = int(headers["content-length"]) if not self.conn.is_client or status >= 200: self._phase = constants.HTTPPhase.BODY self._delegate_started = True self.delegate.headers_received(start_line, headers) def _handle_data_frame(self, frame): if self._header_frames: raise ConnectionError(constants.ErrorCode.PROTOCOL_ERROR, "DATA without END_HEADERS") if self._phase == constants.HTTPPhase.TRAILERS: raise ConnectionError(constants.ErrorCode.PROTOCOL_ERROR, "DATA after trailers") self._phase = constants.HTTPPhase.BODY frame = frame.without_padding() if self._incoming_content_remaining is not None: self._incoming_content_remaining -= len(frame.data) if self._incoming_content_remaining < 0: raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) if frame.data and self._delegate_started: future = self.delegate.data_received(frame.data) if future is None: self._send_window_update(len(frame.data)) else: IOLoop.current().add_future( future, lambda f: self._send_window_update(len(frame.data))) self._maybe_end_stream(frame.flags) def _send_window_update(self, amount): encoded = struct.pack('>I', amount) for stream_id in (0, self.stream_id): self.conn._write_frame(Frame( constants.FrameType.WINDOW_UPDATE, 0, stream_id, encoded)) def _maybe_end_stream(self, flags): if flags & constants.FrameFlag.END_STREAM: if (self._incoming_content_remaining is not None and self._incoming_content_remaining != 0): raise StreamError(self.stream_id, constants.ErrorCode.PROTOCOL_ERROR) if self._delegate_started: self._delegate_started = False self.delegate.finish() self.finish_future.set_result(None) return True return False def _handle_priority_frame(self, frame): # TODO: implement priority if len(frame.data) != 5: raise StreamError(self.stream_id, constants.ErrorCode.FRAME_SIZE_ERROR) def _handle_rst_stream_frame(self, frame): if len(frame.data) != 4: raise ConnectionError(constants.ErrorCode.FRAME_SIZE_ERROR) # TODO: expose error code? if self._delegate_started: self.delegate.on_connection_close() def _handle_window_update_frame(self, frame): self.window.apply_window_update(frame) def set_close_callback(self, callback): # TODO: this shouldn't be necessary pass def reset(self): self.conn._write_frame(Frame(constants.FrameType.RST_STREAM, 0, self.stream_id, b'\x00\x00\x00\x00')) @_reset_on_error def write_headers(self, start_line, headers, chunk=None, callback=None): if (not self.conn.is_client and (self._request_start_line.method == 'HEAD' or start_line.code == 304)): self._outgoing_content_remaining = 0 elif 'Content-Length' in headers: self._outgoing_content_remaining = int(headers['Content-Length']) header_list = [] if self.conn.is_client: self._request_start_line = start_line header_list.append((b':method', utf8(start_line.method), constants.HeaderIndexMode.YES)) header_list.append((b':scheme', b'https', constants.HeaderIndexMode.YES)) header_list.append((b':path', utf8(start_line.path), constants.HeaderIndexMode.NO)) else: header_list.append((b':status', utf8(str(start_line.code)), constants.HeaderIndexMode.YES)) for k, v in headers.get_all(): k = utf8(k.lower()) if k == b"connection": # Remove the implicit "connection: close", which is not # allowed in http2. # TODO: move the responsibility for this from httpclient # to http1connection? continue header_list.append((k, utf8(v), constants.HeaderIndexMode.YES)) data = bytes(self.conn.hpack_encoder.encode(header_list)) frame = Frame(constants.FrameType.HEADERS, constants.FrameFlag.END_HEADERS, self.stream_id, data) self.conn._write_frame(frame) return self.write(chunk, callback) @_reset_on_error def write(self, chunk, callback=None): if chunk: if self._outgoing_content_remaining is not None: self._outgoing_content_remaining -= len(chunk) if self._outgoing_content_remaining < 0: raise HTTPOutputError( "Tried to write more data than Content-Length") return self._write_chunk(chunk, callback) @gen.coroutine def _write_chunk(self, chunk, callback=None): try: if chunk: yield self.write_lock.acquire() while chunk: allowance = yield self.window.consume(len(chunk)) yield self.conn._write_frame( Frame(constants.FrameType.DATA, 0, self.stream_id, chunk[:allowance])) chunk = chunk[allowance:] self.write_lock.release() if callback is not None: callback() except Exception: self.reset() raise @_reset_on_error def finish(self): if (self._outgoing_content_remaining is not None and self._outgoing_content_remaining != 0): raise HTTPOutputError( "Tried to write %d bytes less than Content-Length" % self._outgoing_content_remaining) return self._write_end_stream() @gen.coroutine def _write_end_stream(self): # Callers are not required to wait for write() before calling finish, # so we must manually lock. yield self.write_lock.acquire() try: self.conn._write_frame(Frame(constants.FrameType.DATA, constants.FrameFlag.END_STREAM, self.stream_id, b'')) except Exception: self.reset() raise finally: self.write_lock.release() def read_response(self, delegate): assert delegate is self.orig_delegate, 'cannot change delegate' return self.finish_future