def _on_span_generation_known(self, span: opentracing.Span) -> None: if self._consumer: coordinator = self._consumer._coordinator coordinator_id = coordinator.coordinator_id app_id = self.app.conf.id generation = coordinator.generation member_id = coordinator.member_id try: op_name = span.operation_name set_tag = span.set_tag except AttributeError: # pragma: no cover pass # not a real span else: trace_id_str = f'reb-{app_id}-{generation}' trace_id = murmur2(trace_id_str.encode()) span.context.trace_id = trace_id if op_name.endswith('.REPLACE_WITH_MEMBER_ID'): span.set_operation_name(f'rebalancing node {member_id}') set_tag('kafka_generation', generation) set_tag('kafka_member_id', member_id) set_tag('kafka_coordinator_id', coordinator_id) self.app._span_add_default_tags(span) span._real_finish()
def _on_span_cancelled_early(self, span: opentracing.Span) -> None: try: op_name = span.operation_name except AttributeError: return else: span.set_operation_name(f'{op_name} (CANCELLED)') span._real_finish()
def generate_headers_for_span_context_injection(span: Span, http_method: str, http_url: str) -> dict: span.set_tag(tags.HTTP_METHOD, http_method) span.set_tag(tags.HTTP_URL, http_url) span.set_tag(tags.SPAN_KIND, tags.SPAN_KIND_RPC_CLIENT) headers = {} tracer.inject(span, Format.HTTP_HEADERS, headers) return headers
def _transform_span_lazy(self, span: opentracing.Span) -> None: # XXX slow consumer = self if typing.TYPE_CHECKING: # MyPy completely disallows the statements below # claiming it is an illegal dynamic baseclass. # We know mypy, but do it anyway :D pass else: cls = span.__class__ class LazySpan(cls): def finish() -> None: consumer._span_finish(span) span._real_finish, span.finish = span.finish, LazySpan.finish
def call_with_trace( span: opentracing.Span, fun: Callable, callback: Optional[Tuple[Callable, Tuple[Any, ...]]], *args: Any, **kwargs: Any, ) -> Any: """Call function and trace it from parent span.""" cb: Optional[Callable] = None cb_args: Tuple = () if callback: cb, cb_args = callback span.__enter__() try: ret = fun(*args, **kwargs) except BaseException: span.__exit__(*sys.exc_info()) raise else: if asyncio.iscoroutine(ret): # if async def method, we attach our span to # when it completes. async def corowrapped() -> Any: await_ret = None try: await_ret = await ret except BaseException: span.__exit__(*sys.exc_info()) if cb: cb(*cb_args) raise else: span.__exit__(None, None, None) if cb: cb(*cb_args) return await_ret return corowrapped() else: # for non async def method, we just exit the span. span.__exit__(None, None, None) if cb: cb(*cb_args) return ret
async def _dispatch_request( self, log: NotificationLoggerAdapter, span: Span, device: Device, shaved_payload: Dict[str, Any], prio: int, ) -> List[str]: """ Actually attempts to dispatch the notification once. """ # this is no good: APNs expects ID to be in their format # so we can't just derive a # notif_id = context.request_id + f"-{n.devices.index(device)}" notif_id = str(uuid4()) log.info(f"Sending as APNs-ID {notif_id}") span.set_tag("apns_id", notif_id) device_token = base64.b64decode(device.pushkey).hex() request = NotificationRequest( device_token=device_token, message=shaved_payload, priority=prio, notification_id=notif_id, ) try: with ACTIVE_REQUESTS_GAUGE.track_inprogress(): with SEND_TIME_HISTOGRAM.time(): response = await self._send_notification(request) except aioapns.ConnectionError: raise TemporaryNotificationDispatchException( "aioapns Connection Failure") code = int(response.status) span.set_tag(tags.HTTP_STATUS_CODE, code) RESPONSE_STATUS_CODES_COUNTER.labels(pushkin=self.name, code=code).inc() if response.is_successful: return [] else: # .description corresponds to the 'reason' response field span.set_tag("apns_reason", response.description) if (code, response.description) in self.TOKEN_ERRORS: log.info( "APNs token %s for pushkin %s was rejected: %d %s", device_token, self.name, code, response.description, ) return [device.pushkey] else: if 500 <= code < 600: raise TemporaryNotificationDispatchException( f"{response.status} {response.description}") else: raise NotificationDispatchException( f"{response.status} {response.description}")
async def _request_dispatch( self, n: Notification, log: NotificationLoggerAdapter, body: dict, headers: Dict[AnyStr, List[AnyStr]], pushkeys: List[str], span: Span, ) -> Tuple[List[str], List[str]]: poke_start_time = time.time() failed = [] response, response_text = await self._perform_http_request( body, headers) RESPONSE_STATUS_CODES_COUNTER.labels(pushkin=self.name, code=response.code).inc() log.debug("GCM request took %f seconds", time.time() - poke_start_time) span.set_tag(tags.HTTP_STATUS_CODE, response.code) if 500 <= response.code < 600: log.debug("%d from server, waiting to try again", response.code) retry_after = None for header_value in response.headers.getRawHeaders(b"retry-after", default=[]): retry_after = int(header_value) span.log_kv({ "event": "gcm_retry_after", "retry_after": retry_after }) raise TemporaryNotificationDispatchException( "GCM server error, hopefully temporary.", custom_retry_delay=retry_after) elif response.code == 400: log.error( "%d from server, we have sent something invalid! Error: %r", response.code, response_text, ) # permanent failure: give up raise NotificationDispatchException("Invalid request") elif response.code == 401: log.error("401 from server! Our API key is invalid? Error: %r", response_text) # permanent failure: give up raise NotificationDispatchException("Not authorised to push") elif response.code == 404: # assume they're all failed log.info("Reg IDs %r get 404 response; assuming unregistered", pushkeys) return pushkeys, [] elif 200 <= response.code < 300: try: resp_object = json_decoder.decode(response_text) except ValueError: raise NotificationDispatchException( "Invalid JSON response from GCM.") if "results" not in resp_object: log.error( "%d from server but response contained no 'results' key: %r", response.code, response_text, ) if len(resp_object["results"]) < len(pushkeys): log.error( "Sent %d notifications but only got %d responses!", len(n.devices), len(resp_object["results"]), ) span.log_kv({ logs.EVENT: "gcm_response_mismatch", "num_devices": len(n.devices), "num_results": len(resp_object["results"]), }) # determine which pushkeys to retry or forget about new_pushkeys = [] for i, result in enumerate(resp_object["results"]): if "error" in result: log.warning("Error for pushkey %s: %s", pushkeys[i], result["error"]) span.set_tag("gcm_error", result["error"]) if result["error"] in BAD_PUSHKEY_FAILURE_CODES: log.info( "Reg ID %r has permanently failed with code %r: " "rejecting upstream", pushkeys[i], result["error"], ) failed.append(pushkeys[i]) elif result["error"] in BAD_MESSAGE_FAILURE_CODES: log.info( "Message for reg ID %r has permanently failed with code %r", pushkeys[i], result["error"], ) else: log.info( "Reg ID %r has temporarily failed with code %r", pushkeys[i], result["error"], ) new_pushkeys.append(pushkeys[i]) return failed, new_pushkeys else: raise NotificationDispatchException( f"Unknown GCM response code {response.code}")
def inner_close(fut): # type: (Future) -> None exc = fut.exception() if exc is not None: from opentracing import Span Span._on_error(span, type(exc), exc, '<unavailable>') span.finish()
def get_report_summary(objectives: Iterator[Objective], unit: str, start: datetime, end: datetime, current_span: opentracing.Span) -> List[dict]: summary = [] start = truncate(start) for objective in objectives: days = collections.defaultdict(dict) if not len(objective.targets): current_span.log_kv({ 'objective_skipped': True, 'objective': objective.id }) continue current_span.log_kv({ 'objective_target_count': len(objective.targets), 'objective_id': objective.id }) # Instrument objective summary! objective_summary_span = opentracing.tracer.start_span( operation_name='report_objective_summary', child_of=current_span) objective_summary_span.set_tag('objective_id', objective.id) with objective_summary_span: for target in objective.targets: objective_summary_span.log_kv({ 'target_id': target.id, 'indicator_id': target.indicator_id }) ivs = (IndicatorValue.query.filter( IndicatorValue.indicator_id == target.indicator_id, IndicatorValue.timestamp >= start, IndicatorValue.timestamp < end).order_by( IndicatorValue.timestamp)) target_values_truncated = truncate_values( ivs, parent_span=objective_summary_span) for truncated_date, target_values in target_values_truncated.items( ): target_form = target.target_from or float('-inf') target_to = target.target_to or float('inf') target_count = len(target_values) target_sum = sum(target_values) breaches = target_count - len([ v for v in target_values if v >= target_form and v <= target_to ]) days[truncated_date.isoformat()][target.indicator.name] = { 'aggregation': target.indicator.aggregation, 'avg': target_sum / target_count, 'breaches': breaches, 'count': target_count, 'max': max(target_values), 'min': min(target_values), 'sum': target_sum, } summary.append({ 'title': objective.title, 'description': objective.description, 'id': objective.id, 'targets': [{ 'from': t.target_from, 'to': t.target_to, 'sli_name': t.indicator.name, 'unit': t.indicator.unit, 'aggregation': t.indicator.aggregation } for t in objective.targets], 'days': days }) return summary
def _on_span_cancelled_early(self, span: opentracing.Span) -> None: op_name = span.operation_name span.set_operation_name(f'{op_name} (CANCELLED)') span._real_finish()
def test_span(): ctx = TraceContextSource().new_root_trace_context() span = Span(trace_context=ctx) assert span.trace_context == ctx child = span.start_child(operation_name='test') assert span == child child.info('cache hit', 'arg1', 'arg2') child.error('cache miss', 'arg1', 'arg2') with mock.patch.object(span, 'finish') as finish: with mock.patch.object(span, 'error') as error: try: with span: raise ValueError() except ValueError: pass assert finish.call_count == 1 assert error.call_count == 1 with mock.patch.object(span, 'finish') as finish: with mock.patch.object(span, 'error') as error: with span: pass assert finish.call_count == 1 assert error.call_count == 0 span.add_tag('x', 'y').add_tag('z', 1) span.add_tag(tags.PEER_SERVICE, 'test-service') span.add_tag(tags.PEER_HOST_IPV4, 127 << 24 + 1) span.add_tag(tags.PEER_HOST_IPV6, '::') span.add_tag(tags.PEER_HOSTNAME, 'uber.com') span.add_tag(tags.PEER_PORT, 123) span.add_tags({tags.PEER_PORT: 123, tags.PEER_HOSTNAME: 'uber.com'}) span.finish()
async def _handle_dispatch( self, root_span: Span, request: Request, log: NotificationLoggerAdapter, notif: Notification, context: NotificationContext, ) -> None: """ Actually handle the dispatch of notifications to devices, sequentially for simplicity. root_span: the OpenTracing span request: the Twisted Web Request log: the logger to use notif: the notification to dispatch context: the context of the notification """ try: rejected = [] for d in notif.devices: NOTIFS_RECEIVED_DEVICE_PUSH_COUNTER.inc() appid = d.app_id found_pushkins = self.find_pushkins(appid) if len(found_pushkins) == 0: log.warning("Got notification for unknown app ID %s", appid) rejected.append(d.pushkey) continue if len(found_pushkins) > 1: log.warning("Got notification for an ambiguous app ID %s", appid) rejected.append(d.pushkey) continue pushkin = found_pushkins[0] log.debug("Sending push to pushkin %s for app ID %s", pushkin.name, appid) NOTIFS_BY_PUSHKIN.labels(pushkin.name).inc() result = await pushkin.dispatch_notification(notif, d, context) if not isinstance(result, list): raise TypeError("Pushkin should return list.") rejected += result request.write(json.dumps({"rejected": rejected}).encode()) if rejected: log.info( "Successfully delivered notifications with %d rejected pushkeys", len(rejected), ) except NotificationDispatchException: request.setResponseCode(502) log.warning("Failed to dispatch notification.", exc_info=True) except Exception: request.setResponseCode(500) log.error("Exception whilst dispatching notification.", exc_info=True) finally: if not request._disconnected: request.finish() PUSHGATEWAY_HTTP_RESPONSES_COUNTER.labels(code=request.code).inc() root_span.set_tag(tags.HTTP_STATUS_CODE, request.code) req_time = time.perf_counter() - context.start_time if req_time > 0: # can be negative as perf_counter() may not be monotonic NOTIFY_HANDLE_HISTOGRAM.labels( code=request.code).observe(req_time) if not 200 <= request.code < 300: root_span.set_tag(tags.ERROR, True) root_span.finish()