Ejemplo n.º 1
0
def get_data_from_request(event: dict,
                          capture_body: bool = False,
                          capture_headers: bool = True) -> dict:
    """
    Capture context data from API gateway event
    """
    result = {}
    if capture_headers and "headers" in event:
        result["headers"] = event["headers"]
    method = nested_key(event, "requestContext", "httpMethod") or nested_key(
        event, "requestContext", "http", "method")
    if not method:
        # Not API Gateway
        return result

    result["method"] = method
    if method in constants.HTTP_WITH_BODY and "body" in event:
        body = event["body"]
        if capture_body:
            if event.get("isBase64Encoded"):
                body = base64.b64decode(body)
            else:
                try:
                    jsonbody = json.loads(body)
                    body = jsonbody
                except Exception:
                    pass

        if body is not None:
            result["body"] = body if capture_body else "[REDACTED]"

    result["url"] = get_url_dict(event)
    return result
Ejemplo n.º 2
0
 def autofill_resource_context(self):
     """Automatically fills "resource" fields based on other fields"""
     if self.context:
         resource = nested_key(self.context, "destination", "service", "resource")
         if not resource and (self.leaf or any(k in self.context for k in ("destination", "db", "message", "http"))):
             type_info = self.subtype or self.type
             instance = nested_key(self.context, "db", "instance")
             queue_name = nested_key(self.context, "message", "queue", "name")
             http_url = nested_key(self.context, "http", "url")
             if instance:
                 resource = f"{type_info}/{instance}"
             elif queue_name:
                 resource = f"{type_info}/{queue_name}"
             elif http_url:
                 resource = url_to_destination_resource(http_url)
             else:
                 resource = type_info
             if "destination" not in self.context:
                 self.context["destination"] = {}
             if "service" not in self.context["destination"]:
                 self.context["destination"]["service"] = {}
             self.context["destination"]["service"]["resource"] = resource
             # set fields that are deprecated, but still required by APM Server API
             if "name" not in self.context["destination"]["service"]:
                 self.context["destination"]["service"]["name"] = ""
             if "type" not in self.context["destination"]["service"]:
                 self.context["destination"]["service"]["type"] = ""
Ejemplo n.º 3
0
    def __enter__(self):
        """
        Transaction setup
        """
        trace_parent = TraceParent.from_headers(self.event.get("headers", {}))

        global COLD_START
        cold_start = COLD_START
        COLD_START = False

        self.source = "other"
        transaction_type = "request"
        transaction_name = os.environ.get("AWS_LAMBDA_FUNCTION_NAME",
                                          self.name)

        self.httpmethod = nested_key(
            self.event, "requestContext", "httpMethod") or nested_key(
                self.event, "requestContext", "http", "method")
        if self.httpmethod:  # API Gateway
            self.source = "api"
            if os.environ.get("AWS_LAMBDA_FUNCTION_NAME"):
                transaction_name = "{} {}".format(
                    self.httpmethod, os.environ["AWS_LAMBDA_FUNCTION_NAME"])
            else:
                transaction_name = self.name
        elif "Records" in self.event and len(self.event["Records"]) == 1:
            record = self.event["Records"][0]
            if record.get("eventSource") == "aws:s3":  # S3
                self.source = "s3"
                transaction_name = "{} {}".format(
                    record["eventName"], record["s3"]["bucket"]["name"])
            elif record.get("EventSource") == "aws:sns":  # SNS
                self.source = "sns"
                transaction_type = "messaging"
                transaction_name = "RECEIVE {}".format(
                    record["Sns"]["TopicArn"].split(":")[5])
            elif record.get("eventSource") == "aws:sqs":  # SQS
                self.source = "sqs"
                transaction_type = "messaging"
                transaction_name = "RECEIVE {}".format(
                    record["eventSourceARN"].split(":")[5])

        self.transaction = self.client.begin_transaction(
            transaction_type, trace_parent=trace_parent)
        elasticapm.set_transaction_name(transaction_name, override=False)
        if self.source == "api":
            elasticapm.set_context(
                lambda: get_data_from_request(
                    self.event,
                    capture_body=self.client.config.capture_body in
                    ("transactions", "all"),
                    capture_headers=self.client.config.capture_headers,
                ),
                "request",
            )
        self.set_metadata_and_context(cold_start)
Ejemplo n.º 4
0
 def is_same_kind(self, other_span: SpanType) -> bool:
     """
     For compression purposes, two spans are considered to be of the same kind if they have the same
     values for type, subtype, and destination.service.resource
     :param other_span: another span object
     :return: bool
     """
     resource = nested_key(self.context, "destination", "service", "resource")
     return bool(
         self.type == other_span.type
         and self.subtype == other_span.subtype
         and (resource and resource == nested_key(other_span.context, "destination", "service", "resource"))
     )
Ejemplo n.º 5
0
def get_url_dict(event: dict) -> dict:
    """
    Reconstruct URL from API Gateway
    """
    headers = event.get("headers", {})
    protocol = headers.get("X-Forwarded-Proto",
                           headers.get("x-forwarded-proto", "https"))
    host = headers.get("Host", headers.get("host", ""))
    stage = "/" + (nested_key(event, "requestContext", "stage") or "")
    path = event.get("path", event.get("rawPath", "").split(stage)[-1])
    port = headers.get("X-Forwarded-Port", headers.get("x-forwarded-port"))
    query = ""
    if "rawQueryString" in event:
        query = event["rawQueryString"]
    elif event.get("queryStringParameters"):
        query = "?"
        for k, v in compat.iteritems(event["queryStringParameters"]):
            query += "{}={}".format(k, v)
    url = protocol + "://" + host + stage + path + query

    url_dict = {
        "full": encoding.keyword_field(url),
        "protocol": protocol,
        "hostname": encoding.keyword_field(host),
        "pathname": encoding.keyword_field(stage + path),
    }

    if port:
        url_dict["port"] = port
    if query:
        url_dict["search"] = encoding.keyword_field(query)
    return url_dict
Ejemplo n.º 6
0
 def to_dict(self) -> dict:
     if (
         self.composite
         and self.composite["compression_strategy"] == "same_kind"
         and nested_key(self.context, "destination", "service", "resource")
     ):
         name = "Calls to " + self.context["destination"]["service"]["resource"]
     else:
         name = self.name
     result = {
         "id": self.id,
         "transaction_id": self.transaction.id,
         "trace_id": self.transaction.trace_parent.trace_id,
         # use either the explicitly set parent_span_id, or the id of the parent, or finally the transaction id
         "parent_id": self.parent_span_id or (self.parent.id if self.parent else self.transaction.id),
         "name": encoding.keyword_field(name),
         "type": encoding.keyword_field(self.type),
         "subtype": encoding.keyword_field(self.subtype),
         "action": encoding.keyword_field(self.action),
         "timestamp": int(self.timestamp * 1000000),  # microseconds
         "duration": self.duration * 1000,  # milliseconds
         "outcome": self.outcome,
     }
     if self.transaction.sample_rate is not None:
         result["sample_rate"] = float(self.transaction.sample_rate)
     if self.sync is not None:
         result["sync"] = self.sync
     if self.labels:
         if self.context is None:
             self.context = {}
         self.context["tags"] = self.labels
     if self.context:
         self.autofill_resource_context()
         result["context"] = self.context
     if self.frames:
         result["stacktrace"] = self.frames
     if self.composite:
         result["composite"] = {
             "compression_strategy": self.composite["compression_strategy"],
             "sum": self.composite["sum"] * 1000,
             "count": self.composite["count"],
         }
     return result
Ejemplo n.º 7
0
 def to_dict(self) -> dict:
     if (
         self.composite
         and self.composite["compression_strategy"] == "same_kind"
         and nested_key(self.context, "destination", "service", "resource")
     ):
         name = "Calls to " + self.context["destination"]["service"]["resource"]
     else:
         name = self.name
     result = {
         "id": self.id,
         "transaction_id": self.transaction.id,
         "trace_id": self.transaction.trace_parent.trace_id,
         # use either the explicitly set parent_span_id, or the id of the parent, or finally the transaction id
         "parent_id": self.parent_span_id or (self.parent.id if self.parent else self.transaction.id),
         "name": encoding.keyword_field(name),
         "type": encoding.keyword_field(self.type),
         "subtype": encoding.keyword_field(self.subtype),
         "action": encoding.keyword_field(self.action),
         "timestamp": int(self.timestamp * 1000000),  # microseconds
         "duration": self.duration * 1000,  # milliseconds
         "outcome": self.outcome,
     }
     if self.transaction.sample_rate is not None:
         result["sample_rate"] = float(self.transaction.sample_rate)
     if self.sync is not None:
         result["sync"] = self.sync
     if self.labels:
         if self.context is None:
             self.context = {}
         self.context["tags"] = self.labels
     if self.context:
         resource = nested_key(self.context, "destination", "service", "resource")
         if not resource and (self.leaf or any(k in self.context for k in ("destination", "db", "message", "http"))):
             type_info = self.subtype or self.type
             instance = nested_key(self.context, "db", "instance")
             queue_name = nested_key(self.context, "message", "queue", "name")
             http_url = nested_key(self.context, "http", "url")
             if instance:
                 resource = f"{type_info}/{instance}"
             elif queue_name:
                 resource = f"{type_info}/{queue_name}"
             elif http_url:
                 resource = url_to_destination_resource(http_url)
             else:
                 resource = type_info
             if "destination" not in self.context:
                 self.context["destination"] = {}
             if "service" not in self.context["destination"]:
                 self.context["destination"]["service"] = {}
             self.context["destination"]["service"]["resource"] = resource
             # set fields that are deprecated, but still required by APM Server API
             if "name" not in self.context["destination"]["service"]:
                 self.context["destination"]["service"]["name"] = ""
             if "type" not in self.context["destination"]["service"]:
                 self.context["destination"]["service"]["type"] = ""
         result["context"] = self.context
     if self.frames:
         result["stacktrace"] = self.frames
     if self.composite:
         result["composite"] = {
             "compression_strategy": self.composite["compression_strategy"],
             "sum": self.composite["sum"] * 1000,
             "count": self.composite["count"],
         }
     return result
Ejemplo n.º 8
0
    def set_metadata_and_context(self, coldstart: bool) -> None:
        """
        Process the metadata and context fields for this request
        """
        metadata = {}
        cloud_context = {"origin": {"provider": "aws"}}
        service_context = {}
        message_context = {}

        faas = {}
        faas["coldstart"] = coldstart
        faas["trigger"] = {"type": "other"}
        faas["execution"] = self.context.aws_request_id

        if self.source == "api":
            faas["trigger"]["type"] = "http"
            faas["trigger"]["request_id"] = self.event["requestContext"][
                "requestId"]
            path = (self.event["requestContext"].get("resourcePath")
                    or self.event["requestContext"]["http"]["path"].split(
                        self.event["requestContext"]["stage"])[-1])
            service_context["origin"] = {
                "name":
                "{} {}/{}".format(
                    self.httpmethod,
                    self.event["requestContext"]["stage"],
                    path,
                )
            }
            service_context["origin"]["id"] = self.event["requestContext"][
                "apiId"]
            service_context["origin"]["version"] = self.event.get(
                "version", "1.0")
            cloud_context["origin"] = {}
            cloud_context["origin"]["service"] = {"name": "api gateway"}
            cloud_context["origin"]["account"] = {
                "id": self.event["requestContext"]["accountId"]
            }
            cloud_context["origin"]["provider"] = "aws"
        elif self.source == "sqs":
            record = self.event["Records"][0]
            faas["trigger"]["type"] = "pubsub"
            faas["trigger"]["request_id"] = record["messageId"]
            service_context["origin"] = {}
            service_context["origin"]["name"] = record["eventSourceARN"].split(
                ":")[5]
            service_context["origin"]["id"] = record["eventSourceARN"]
            cloud_context["origin"] = {}
            cloud_context["origin"]["service"] = {"name": "sqs"}
            cloud_context["origin"]["region"] = record["awsRegion"]
            cloud_context["origin"]["account"] = {
                "id": record["eventSourceARN"].split(":")[4]
            }
            cloud_context["origin"]["provider"] = "aws"
            message_context["queue"] = service_context["origin"]["name"]
            if "SentTimestamp" in record["attributes"]:
                message_context["age"] = {
                    "ms":
                    int((time.time() * 1000) -
                        int(record["attributes"]["SentTimestamp"]))
                }
            if self.client.config.capture_body in ("transactions",
                                                   "all") and "body" in record:
                message_context["body"] = record["body"]
            if self.client.config.capture_headers and record.get(
                    "messageAttributes"):
                message_context["headers"] = record["messageAttributes"]
        elif self.source == "sns":
            record = self.event["Records"][0]
            faas["trigger"]["type"] = "pubsub"
            faas["trigger"]["request_id"] = record["Sns"]["TopicArn"]
            service_context["origin"] = {}
            service_context["origin"]["name"] = record["Sns"][
                "TopicArn"].split(":")[5]
            service_context["origin"]["id"] = record["Sns"]["TopicArn"]
            service_context["origin"]["version"] = record["EventVersion"]
            service_context["origin"]["service"] = {"name": "sns"}
            cloud_context["origin"] = {}
            cloud_context["origin"]["region"] = record["Sns"][
                "TopicArn"].split(":")[3]
            cloud_context["origin"]["account_id"] = record["Sns"][
                "TopicArn"].split(":")[4]
            cloud_context["origin"]["provider"] = "aws"
            message_context["queue"] = service_context["origin"]["name"]
            if "Timestamp" in record["Sns"]:
                message_context["age"] = {
                    "ms":
                    int((datetime.datetime.now() - datetime.datetime.strptime(
                        record["Sns"]["Timestamp"],
                        r"%Y-%m-%dT%H:%M:%S.%fZ")).total_seconds() * 1000)
                }
            if self.client.config.capture_body in (
                    "transactions", "all") and "Message" in record["Sns"]:
                message_context["body"] = record["Sns"]["Message"]
            if self.client.config.capture_headers and record["Sns"].get(
                    "MessageAttributes"):
                message_context["headers"] = record["Sns"]["MessageAttributes"]
        elif self.source == "s3":
            record = self.event["Records"][0]
            faas["trigger"]["type"] = "datasource"
            faas["trigger"]["request_id"] = record["responseElements"][
                "x-amz-request-id"]
            service_context["origin"] = {}
            service_context["origin"]["name"] = record["s3"]["bucket"]["name"]
            service_context["origin"]["id"] = record["s3"]["bucket"]["arn"]
            service_context["origin"]["version"] = record["eventVersion"]
            cloud_context["origin"] = {}
            cloud_context["origin"]["service"] = {"name": "s3"}
            cloud_context["origin"]["region"] = record["awsRegion"]
            cloud_context["origin"]["provider"] = "aws"

        metadata["service"] = {}
        metadata["service"]["name"] = os.environ.get(
            "AWS_LAMBDA_FUNCTION_NAME")
        metadata["service"]["framework"] = {"name": "AWS Lambda"}
        metadata["service"]["runtime"] = {
            "name": os.environ.get("AWS_EXECUTION_ENV"),
            "version": platform.python_version(),
        }
        arn = self.context.invoked_function_arn
        if len(arn.split(":")) > 7:
            arn = ":".join(arn.split(":")[:7])
        metadata["service"]["id"] = arn
        metadata["service"]["version"] = os.environ.get(
            "AWS_LAMBDA_FUNCTION_VERSION")
        metadata["service"]["node"] = {
            "configured_name": os.environ.get("AWS_LAMBDA_LOG_STREAM_NAME")
        }
        # This is the one piece of metadata that requires deep merging. We add it manually
        # here to avoid having to deep merge in _transport.add_metadata()
        if self.client._transport._metadata:
            node_name = nested_key(self.client._transport._metadata, "service",
                                   "node", "name")
            if node_name:
                metadata["service"]["node"]["name"] = node_name

        metadata["cloud"] = {}
        metadata["cloud"]["provider"] = "aws"
        metadata["cloud"]["region"] = os.environ.get("AWS_REGION")
        metadata["cloud"]["service"] = {"name": "lambda"}
        metadata["cloud"]["account"] = {"id": arn.split(":")[4]}

        elasticapm.set_context(cloud_context, "cloud")
        elasticapm.set_context(service_context, "service")
        # faas doesn't actually belong in context, but we handle this in to_dict
        elasticapm.set_context(faas, "faas")
        if message_context:
            elasticapm.set_context(service_context, "message")
        self.client._transport.add_metadata(metadata)
Ejemplo n.º 9
0
def test_nested_key(data, key, expected):
    r = nested_key(data, *key.split("."))
    if expected is None:
        assert r is expected
    else:
        assert r == expected