def publish_to_subscribers(sender, action_type, username, user_type, action_data, is_subscriable, severity): action_name = action_types[action_type] logger.info('webhook event(%s, %s) published.', action_name, username) if user_type == APPLICATION_USER: monitor_client.increment('webhook.publish_universal', tags={ 'username': username, 'appid': username, 'action_name': action_name, }) notifier.publish_universal( action_type, username, user_type, action_data, severity) if is_subscriable: application_names = (action_data.get('application_names') or [action_data['application_name']]) for application_name in application_names: if user_type == APPLICATION_USER: monitor_client.increment('webhook.publish_subscribable', tags={ 'username': username, 'appid': username, 'action_name': action_name, 'application_name': application_name, }) notifier.publish(application_names, username, user_type, action_type, action_data, severity=severity)
def _get_ezone(request_domain, application_name, cluster_name, request_addr): ezone = settings.ROUTE_DOMAIN_EZONE_MAP.get(request_domain, '') if ezone not in settings.ROUTE_EZONE_DEFAULT_HIJACK_MODE: logger.info( 'unknown domain: %s %s %s %r', request_addr, request_domain, application_name, cluster_name) monitor_client.increment('route_hijack.unknown_domain', tags={ 'domain': request_domain, 'from_application_name': application_name, 'appid': application_name, }) if not cluster_name: logger.info('unknown domain and cluster: %s %s %s', request_addr, request_domain, application_name) monitor_client.increment('route_hijack.unknown_cluster', tags={ 'domain': request_domain, 'from_application_name': application_name, 'appid': application_name, }) return settings.EZONE or 'default' if not switch.is_switched_on( SWITCH_ENABLE_ROUTE_HIJACK_WITH_LOCAL_EZONE, False): return settings.EZONE or 'default' ezone = try_to_extract_ezone(cluster_name, default='') if not ezone: return settings.ROUTE_OVERALL_EZONE return ezone
def _load_fallback_token(token): res = None for secret_key in settings.FALLBACK_SECRET_KEYS: res = _load_token(secret_key, token, False) if res: monitor_client.increment('old_token', tags=dict(res=res)) logger.info('fallback token %s is detected', res) break return res
def set_barrier(self): """Sets a barrier to prevent future registering of this container. There is a configuration item, ``CONTAINER_BARRIER_LIFESPAN``, which decides the max lifespan of a barrier in seconds. """ path = self._make_barrier_path() self.huskar_client.client.ensure_path(path) monitor_client.increment('container.barrier')
def deregister_from(self, application_name, cluster_name): """Deregisters a specific application and cluster on this container. :param application_name: The name of registering application. :param cluster_name: The name of registering cluster. """ location = _pack(application_name, cluster_name) path = self._make_location_path(location) self.huskar_client.client.delete(path, recursive=True) monitor_client.increment('container.deregister')
def register_to(self, application_name, cluster_name): """Registers this container to a specific application and cluster. :param application_name: The name of registering application. :param cluster_name: The name of registering cluster. """ location = _pack(application_name, cluster_name) path = self._make_location_path(location) self.huskar_client.client.ensure_path(path) monitor_client.increment('container.register')
def remote_context(url): start_at = time.time() url_digest = hashlib.md5(url).hexdigest() try: yield except (Timeout, ConnectionError, HTTPError) as e: monitor_client.increment('webhook.delivery_errors') logger.warn('Remote Request Failed: %s, %s', url_digest, str(e)) else: logger.info('Remote Request Ok: %s', url_digest) finally: monitor_client.timing('webhook.delivery', time.time() - start_at)
def trace_access(auth, endpoint, action, key): if not auth.is_application: return if endpoint in settings.ALLOW_ALL_VIA_API_ENDPOINTS.get(action, []): return monitor_client.increment('access_via_api.{}'.format(key), tags={ 'appid': auth.username, 'endpoint': endpoint, 'action': action, })
def collect_route_mode(): frontend_name = request.headers.get('X-Frontend-Name') mode = request.headers.get('X-SOA-Mode') if mode and mode not in ROUTE_MODES: abort(400, u'X-SOA-Mode must be one of %s' % u'/'.join(ROUTE_MODES)) if not mode: mode = 'unknown' g.route_mode = mode if not frontend_name and g.auth.username and g.auth.is_application: monitor_client.increment('route_mode.qps', tags=dict(mode=mode, from_user=g.auth.username, appid=g.auth.username))
def collect_application_name(): g.cluster_name = request.headers.get('X-Cluster-Name') if g.auth.username and g.auth.is_application: monitor_client.increment('route_mode.cluster', tags=dict(from_cluster=g.cluster_name or 'unknown', from_user=g.auth.username, appid=g.auth.username)) if g.auth and g.auth.is_application and g.route_mode == 'route': g.application_name = g.auth.username if not g.cluster_name: abort(400, u'X-Cluster-Name is required while X-SOA-Mode is route') else: g.application_name = None
def destroy(self): """Destroys this container at all. :raises NotEmptyError: The container is still used by some clusters. You should lookup and deregister again. """ path = self._make_container_path() try: self.huskar_client.client.delete(path) except NoNodeError: return except KazooNotEmptyError: raise NotEmptyError() else: monitor_client.increment('container.destroy')
def trace_all_application_events(action_type, action_data): if not isinstance(action_data, dict): return action_name = action_types[action_type] application_names = (action_data.get('application_names') or [action_data.get('application_name')]) for application_name in application_names: if not application_name: continue monitor_client.increment('audit.application_event', tags={ 'action_name': action_name, 'application_name': application_name, })
def track_user_qps(response): if not request.endpoint: return response if g.get('auth'): name = g.auth.username kind = 'app' if g.auth.is_application else 'user' else: name = 'anonymous' kind = 'anonymous' tags = dict(kind=kind, name=name) if kind == 'app': tags.update(appid=name) monitor_client.increment('qps.all', tags=tags) monitor_client.increment('qps.url', tags=dict( endpoint=request.endpoint, method=request.method, **tags)) return response
def _detect_bad_route(self, body): if not switch.is_switched_on(SWITCH_DETECT_BAD_ROUTE): return if self.from_application_name in settings.LEGACY_APPLICATION_LIST: return from_cluster_blacklist = settings.ROUTE_FROM_CLUSTER_BLACKLIST.get( self.from_application_name, []) if self.from_cluster_name in from_cluster_blacklist: return type_name = SERVICE_SUBDOMAIN type_body = body[type_name] flat_cluster_names = ( (application_name, cluster_name, cluster_body) for application_name, application_body in type_body.iteritems() for cluster_name, cluster_body in application_body.iteritems()) for application_name, cluster_name, cluster_body in flat_cluster_names: if application_name in settings.LEGACY_APPLICATION_LIST: continue if cluster_name in settings.ROUTE_DEST_CLUSTER_BLACKLIST.get( application_name, []): continue cluster_map = self.cluster_maps[application_name, type_name] resolved_name = cluster_map.cluster_names.get(cluster_name) if cluster_body or not resolved_name: continue monitor_client.increment( 'tree_watcher.bad_route', 1, tags=dict( from_application_name=self.from_application_name, from_cluster_name=self.from_cluster_name, dest_application_name=application_name, appid=application_name, dest_cluster_name=cluster_name, dest_resolved_cluster_name=resolved_name, )) logger.info('Bad route detected: %s %s %s %s -> %s (%r)', self.from_application_name, self.from_cluster_name, application_name, cluster_name, resolved_name, dict(cluster_map.cluster_names))
def raise_for_unbound(self, application_name, cluster_name, key): """ Deal with container has barrier :return: :raise: ContainerUnboundError: raise when container has barrier """ if self.has_barrier(): monitor_client.increment('container.barrier_deny', tags={ 'application_name': application_name, 'appid': application_name, 'cluster_name': cluster_name, }) logger.info( 'could not register service because of container barrier, ' 'application={}, cluster={}, key={}'.format( application_name, cluster_name, key ) ) raise ContainerUnboundError()
def block_until_initialized(self, timeout): if self.initialized.wait(timeout): return outstanding_ops = self.cache._outstanding_ops self.close() monitor_client.increment('tree_holder.tree_timeout', 1, tags={ 'type_name': self.type_name, 'application_name': self.application_name, 'appid': self.application_name, }) monitor_client.increment('tree_holder.tree_timeout.outstanding', outstanding_ops, tags={ 'type_name': self.type_name, 'application_name': self.application_name, 'appid': self.application_name, }) raise TreeTimeoutError(self.application_name, self.type_name)
def trace_remote_http_call(url): start_at = int(time.time() * 1000) domain = urlparse.urlparse(url).hostname try: yield except HTTPError as e: response = e.response status_code = response.status_code if response.status_code >= 500: monitor_client.increment('remote_http_call.error', tags={ 'type': 'internal_error', 'domain': domain, 'status_code': str(status_code), }) message = 'Remote HTTP API Internal Server Error' capture_message(message, level=logging.WARNING, extra={ 'url': url, 'status_code': status_code, 'body': repr(response.content), }) raise except (Timeout, ConnectionError) as e: if isinstance(e, Timeout): _type = 'timeout' else: _type = 'connection_error' monitor_client.increment('remote_http_call.error', tags={ 'type': _type, 'domain': domain, 'status_code': 'unknown', }) capture_exception(level=logging.WARNING, extra={'url': url}) raise finally: monitor_client.timing('remote_http_call.timer', int(time.time() * 1000) - start_at, tags={'domain': domain})
def dispatch_signal(self, event): if not self.initialized.is_set(): if event.event_type == TreeEvent.INITIALIZED: monitor_client.increment('tree_holder.events.initialized', 1) self.initialized.set() # If the tree holder is closing, the throttle semaphore should # be maintaining in the close method instead here. if not self._closed: self.throttle_semaphore.release() return # It is possible to receive following events also. But we don't need # them for now. # - TreeEvent.CONNECTION_SUSPENDED # - TreeEvent.CONNECTION_RECONNECTED # - TreeEvent.CONNECTION_LOST if event.event_type in self.CONNECTIVE_EVENTS: event_name = self.CONNECTIVE_EVENTS[event.event_type] logger.info('Connective event %s happened on %s', event_name, self.path) monitor_client.increment('tree_holder.events.connective', 1) return if event.event_type in (TreeEvent.NODE_ADDED, TreeEvent.NODE_UPDATED, TreeEvent.NODE_REMOVED): self.tree_changed.send(self, event=event) monitor_client.increment('tree_holder.events.node', 1) return
def _clean(self): max_score = time.time() - self._old_offset try: items = redis_client.zrangebyscore(REDIS_KEY, 0, max_score) except Exception as e: logger.warning('get tree holder cleaner data failed: %s', e) return for key in items: application_name, type_name = key.split(':') holder = self._tree_hub.release_tree_holder( application_name, type_name) if holder is not None: logger.info('release unused tree holder: %s %s', application_name, type_name) monitor_client.increment('tree_holder.release_unused', tags={ 'application_name': application_name, 'appid': application_name, 'type_name': type_name, }) self._clean_old_redis_data()
def __iter__(self): """The tree watcher is iterable for subscribing events.""" monitor_client.increment('tree_watcher.session', 1, tags={ 'from': str(self._metrics_tag_from), 'appid': str(self._metrics_tag_from), }) started_at = time.time() if self.with_initial: body = self._load_entire_body() yield ('all', body) monitor_client.increment('tree_watcher.event', 1, tags={ 'from': str(self._metrics_tag_from), 'appid': str(self._metrics_tag_from), 'event_type': 'all', }) while True: while not self.queue.empty(): event_type, body = self.queue.get() yield (event_type, body) monitor_client.increment('tree_watcher.event', 1, tags={ 'from': str(self._metrics_tag_from), 'appid': str(self._metrics_tag_from), 'event_type': event_type, }) yield ('ping', {}) if self.life_span and time.time() > started_at + self.life_span: break sleep(1)
def enter_minimal_mode(self, reason=None): if self._minimal_mode: return self._minimal_mode = True self._minimal_mode_reason = reason monitor_client.increment('minimal_mode.qps', 1)