Esempio n. 1
0
    def test_list_entries_no_paging(self):
        from google.cloud.logging_v2.client import Client
        from google.cloud.logging_v2.entries import TextEntry
        from google.cloud.logging_v2.logger import Logger

        NOW, TIMESTAMP = self._make_timestamp()
        IID = "IID"
        TEXT = "TEXT"
        SENT = {"resourceNames": [self.PROJECT_PATH]}
        TOKEN = "TOKEN"
        RETURNED = {
            "entries": [{
                "textPayload":
                TEXT,
                "insertId":
                IID,
                "resource": {
                    "type": "global"
                },
                "timestamp":
                TIMESTAMP,
                "logName":
                f"projects/{self.PROJECT}/logs/{self.LOGGER_NAME}",
            }],
            "nextPageToken":
            TOKEN,
        }
        client = Client(project=self.PROJECT,
                        credentials=_make_credentials(),
                        _use_grpc=False)
        client._connection = _Connection(RETURNED)
        api = self._make_one(client)

        iterator = api.list_entries([self.PROJECT_PATH])
        page = next(iterator.pages)
        entries = list(page)
        token = iterator.next_page_token

        # First check the token.
        self.assertEqual(token, TOKEN)
        # Then check the entries returned.
        self.assertEqual(len(entries), 1)
        entry = entries[0]
        self.assertIsInstance(entry, TextEntry)
        self.assertEqual(entry.payload, TEXT)
        self.assertIsInstance(entry.logger, Logger)
        self.assertEqual(entry.logger.name, self.LOGGER_NAME)
        self.assertEqual(entry.insert_id, IID)
        self.assertEqual(entry.timestamp, NOW)
        self.assertIsNone(entry.labels)
        self.assertIsNone(entry.severity)
        self.assertIsNone(entry.http_request)

        called_with = client._connection._called_with
        expected_path = "/%s" % (self.LIST_ENTRIES_PATH, )
        self.assertEqual(called_with, {
            "method": "POST",
            "path": expected_path,
            "data": SENT
        })
Esempio n. 2
0
    def test_list_entries_explicit(self):
        from google.cloud.logging_v2 import DESCENDING
        from google.cloud.logging_v2.client import Client

        PROJECT1 = "PROJECT1"
        PROJECT2 = "PROJECT2"
        INPUT_FILTER = "resource.type:global"
        TOKEN = "TOKEN"
        PAGE_SIZE = 42
        client = Client(project=self.PROJECT,
                        credentials=_make_credentials(),
                        _use_grpc=False)
        client._connection = _Connection({})
        logger = self._make_one(self.LOGGER_NAME, client=client)
        iterator = logger.list_entries(
            resource_names=[f"projects/{PROJECT1}", f"projects/{PROJECT2}"],
            filter_=INPUT_FILTER,
            order_by=DESCENDING,
            page_size=PAGE_SIZE,
            page_token=TOKEN,
        )
        entries = list(iterator)
        token = iterator.next_page_token

        self.assertEqual(len(entries), 0)
        self.assertIsNone(token)
        # self.assertEqual(client._listed, LISTED)
        # check call payload
        call_payload_no_filter = deepcopy(client._connection._called_with)
        call_payload_no_filter["data"]["filter"] = "removed"
        self.assertEqual(
            call_payload_no_filter,
            {
                "method": "POST",
                "path": "/entries:list",
                "data": {
                    "filter":
                    "removed",
                    "orderBy":
                    DESCENDING,
                    "pageSize":
                    PAGE_SIZE,
                    "pageToken":
                    TOKEN,
                    "resourceNames":
                    [f"projects/{PROJECT1}", f"projects/{PROJECT2}"],
                },
            },
        )
        # verify that default filter is 24 hours
        LOG_FILTER = "logName=projects/%s/logs/%s" % (
            self.PROJECT,
            self.LOGGER_NAME,
        )
        combined_filter = (INPUT_FILTER + " AND " + LOG_FILTER + " AND " +
                           "timestamp>=" + self.TIME_FORMAT)
        timestamp = datetime.strptime(
            client._connection._called_with["data"]["filter"], combined_filter)
        yesterday = datetime.now(timezone.utc) - timedelta(days=1)
        self.assertLess(yesterday - timestamp, timedelta(minutes=1))
Esempio n. 3
0
    def test_list_entries_explicit_timestamp(self):
        from google.cloud.logging_v2 import DESCENDING
        from google.cloud.logging_v2.client import Client

        PROJECT1 = "PROJECT1"
        PROJECT2 = "PROJECT2"
        INPUT_FILTER = 'resource.type:global AND timestamp="2020-10-13T21"'
        TOKEN = "TOKEN"
        PAGE_SIZE = 42
        client = Client(project=self.PROJECT,
                        credentials=_make_credentials(),
                        _use_grpc=False)
        client._connection = _Connection({})
        logger = self._make_one(self.LOGGER_NAME, client=client)
        iterator = logger.list_entries(
            resource_names=[f"projects/{PROJECT1}", f"projects/{PROJECT2}"],
            filter_=INPUT_FILTER,
            order_by=DESCENDING,
            page_size=PAGE_SIZE,
            page_token=TOKEN,
        )
        entries = list(iterator)
        token = iterator.next_page_token

        self.assertEqual(len(entries), 0)
        self.assertIsNone(token)
        # self.assertEqual(client._listed, LISTED)
        # check call payload
        LOG_FILTER = "logName=projects/%s/logs/%s" % (
            self.PROJECT,
            self.LOGGER_NAME,
        )
        combined_filter = INPUT_FILTER + " AND " + LOG_FILTER
        self.assertEqual(
            client._connection._called_with,
            {
                "method": "POST",
                "path": "/entries:list",
                "data": {
                    "filter":
                    combined_filter,
                    "orderBy":
                    DESCENDING,
                    "pageSize":
                    PAGE_SIZE,
                    "pageToken":
                    TOKEN,
                    "resourceNames":
                    [f"projects/{PROJECT1}", f"projects/{PROJECT2}"],
                },
            },
        )
Esempio n. 4
0
class StackDriverLogger(Logger):
    def __init__(self, project_id, service_name, region):
        self.client = Client(project=project_id)
        self.project_id = project_id
        self.service_name = service_name
        self.region = region

    def __get_resource(self):
        return Resource(
            type="cloud_run_revision",
            labels={
                "project_id": self.project_id,
                "service_name": self.service_name,
                "location": self.region,
            })

    def __log(self, severity: str, message: str, extra: Dict = None, exc_info=None):
        trace = self.get_trace_id()

        if extra or exc_info:
            struct = {"message": message}

            if extra:
                struct["extra"] = extra

            if exc_info:
                struct["exception"] = exc_info
                struct["serviceContext"] = {
                    "service": self.service_name
                }
                struct["@type"] = "type.googleapis.com/google.devtools.clouderrorreporting.v1beta1.ReportedErrorEvent"

            self.client.logger(self.service_name).log_struct(struct, severity=severity, resource=self.__get_resource(), trace=trace)
        else:
            self.client.logger(self.service_name).log_text(message, severity=severity, resource=self.__get_resource(), trace=trace)

    def debug(self, message: str, extra: Dict = None):
        self.__log("DEBUG", message, extra=extra)

    def info(self, message: str, extra: Dict = None):
        self.__log("INFO", message, extra)

    def warn(self, message: str, extra: Dict = None):
        self.__log("WARNING", message, extra)

    def error(self, message: str, extra: Dict = None, exc_info=None):
        self.__log("ERROR", message, extra=extra, exc_info=exc_info)
Esempio n. 5
0
    def test_list_entries_defaults(self):
        from google.cloud.logging_v2.client import Client

        TOKEN = "TOKEN"

        client = Client(project=self.PROJECT,
                        credentials=_make_credentials(),
                        _use_grpc=False)
        returned = {"nextPageToken": TOKEN}
        client._connection = _Connection(returned)

        logger = self._make_one(self.LOGGER_NAME, client=client)

        iterator = logger.list_entries()
        page = next(iterator.pages)
        entries = list(page)
        token = iterator.next_page_token

        self.assertEqual(len(entries), 0)
        self.assertEqual(token, TOKEN)
        LOG_FILTER = "logName=projects/%s/logs/%s" % (self.PROJECT,
                                                      self.LOGGER_NAME)

        # check call payload
        call_payload_no_filter = deepcopy(client._connection._called_with)
        call_payload_no_filter["data"]["filter"] = "removed"
        self.assertEqual(
            call_payload_no_filter,
            {
                "path": "/entries:list",
                "method": "POST",
                "data": {
                    "filter": "removed",
                    "resourceNames": [f"projects/{self.PROJECT}"],
                },
            },
        )
        # verify that default filter is 24 hours
        timestamp = datetime.strptime(
            client._connection._called_with["data"]["filter"],
            LOG_FILTER + " AND timestamp>=" + self.TIME_FORMAT,
        )
        yesterday = datetime.now(timezone.utc) - timedelta(days=1)
        self.assertLess(yesterday - timestamp, timedelta(minutes=1))
Esempio n. 6
0
    def __init__(self, config: BigQueryUsageConfig, ctx: PipelineContext):
        super().__init__(ctx)
        self.config = config
        self.report = BigQueryUsageSourceReport()

        client_options = self.config.extra_client_options.copy()
        if self.config.project_id is not None:
            client_options["project"] = self.config.project_id

        # See https://github.com/googleapis/google-cloud-python/issues/2674 for
        # why we disable gRPC here.
        self.client = GCPLoggingClient(**client_options, _use_grpc=False)
Esempio n. 7
0
def get_logger(logger_name='stocker') -> Logger:
    foramt = Formatter("%(asctime)s %(levelname)s: %(message)s")

    s_handler = StreamHandler()
    s_handler.setFormatter(foramt)
    s_handler.setLevel(INFO)

    # client = Client(project=ProjectConfig.ID,
    #                 credentials=ProjectConfig.CREDENTIAL)
    client = Client()
    g_handler = CloudLoggingHandler(client)
    g_handler.setFormatter(foramt)
    g_handler.setLevel(DEBUG)

    logger = getLogger(logger_name)
    logger.setLevel(DEBUG)
    logger.addHandler(g_handler)
    logger.propagate = False

    return logger
Esempio n. 8
0
class BigQueryUsageSource(Source):
    config: BigQueryUsageConfig
    report: BigQueryUsageSourceReport

    client: GCPLoggingClient

    def __init__(self, config: BigQueryUsageConfig, ctx: PipelineContext):
        super().__init__(ctx)
        self.config = config
        self.report = BigQueryUsageSourceReport()

        client_options = self.config.extra_client_options.copy()
        if self.config.project_id is not None:
            client_options["project"] = self.config.project_id

        # See https://github.com/googleapis/google-cloud-python/issues/2674 for
        # why we disable gRPC here.
        self.client = GCPLoggingClient(**client_options, _use_grpc=False)

    @classmethod
    def create(cls, config_dict: dict,
               ctx: PipelineContext) -> "BigQueryUsageSource":
        config = BigQueryUsageConfig.parse_obj(config_dict)
        return cls(config, ctx)

    def get_workunits(self) -> Iterable[UsageStatsWorkUnit]:
        bigquery_log_entries = self._get_bigquery_log_entries()
        parsed_events = self._parse_bigquery_log_entries(bigquery_log_entries)
        hydrated_read_events = self._join_events_by_job_id(parsed_events)
        aggregated_info = self._aggregate_enriched_read_events(
            hydrated_read_events)

        for time_bucket in aggregated_info.values():
            for aggregate in time_bucket.values():
                wu = self._make_usage_stat(aggregate)
                self.report.report_workunit(wu)
                yield wu

    def _get_bigquery_log_entries(self) -> Iterable[AuditLogEntry]:
        filter = BQ_FILTER_RULE_TEMPLATE.format(
            start_time=self.config.start_time.strftime(BQ_DATETIME_FORMAT),
            end_time=self.config.end_time.strftime(BQ_DATETIME_FORMAT),
        )

        entry: AuditLogEntry
        for i, entry in enumerate(
                self.client.list_entries(filter_=filter,
                                         page_size=GCP_LOGGING_PAGE_SIZE)):
            if i == 0:
                logger.debug("starting log load from BigQuery")
            yield entry
        logger.debug("finished loading log entries from BigQuery")

    def _parse_bigquery_log_entries(
        self, entries: Iterable[AuditLogEntry]
    ) -> Iterable[Union[ReadEvent, QueryEvent]]:
        for entry in entries:
            event: Union[None, ReadEvent, QueryEvent] = None
            if ReadEvent.can_parse_entry(entry):
                event = ReadEvent.from_entry(entry)
            elif QueryEvent.can_parse_entry(entry):
                event = QueryEvent.from_entry(entry)
            else:
                self.report.report_failure(
                    f"{entry.log_name}-{entry.insert_id}",
                    f"unable to parse log entry: {entry!r}",
                )
            if event:
                yield event

    def _join_events_by_job_id(
            self, events: Iterable[Union[ReadEvent,
                                         QueryEvent]]) -> Iterable[ReadEvent]:
        # If caching eviction is enabled, we only store the most recently used query events,
        # which are used when resolving job information within the read events.
        query_jobs: MutableMapping[str, QueryEvent]
        if self.config.query_log_delay:
            query_jobs = cachetools.LRUCache(maxsize=5 *
                                             self.config.query_log_delay)
        else:
            query_jobs = {}

        def event_processor(
            events: Iterable[Union[ReadEvent,
                                   QueryEvent]]) -> Iterable[ReadEvent]:
            for event in events:
                if isinstance(event, QueryEvent):
                    query_jobs[event.jobName] = event
                else:
                    yield event

        # TRICKY: To account for the possibility that the query event arrives after
        # the read event in the audit logs, we wait for at least `query_log_delay`
        # additional events to be processed before attempting to resolve BigQuery
        # job information from the logs. If `query_log_delay` is None, it gets treated
        # as an unlimited delay, which prioritizes correctness at the expense of memory usage.
        original_read_events = event_processor(events)
        delayed_read_events = delayed_iter(original_read_events,
                                           self.config.query_log_delay)

        for event in delayed_read_events:
            if event.jobName:
                if event.jobName in query_jobs:
                    # Join the query log event into the table read log event.
                    event.query = query_jobs[event.jobName].query

                    # TODO also join into the query itself for column references
                else:
                    self.report.report_warning(
                        "<general>",
                        "failed to match table read event with job; try increasing `query_log_delay`",
                    )

            yield event

    def _aggregate_enriched_read_events(
        self, events: Iterable[ReadEvent]
    ) -> Dict[datetime, Dict[BigQueryTableRef, AggregatedDataset]]:
        # TODO: handle partitioned tables

        # TODO: perhaps we need to continuously prune this, rather than
        # storing it all in one big object.
        datasets: Dict[datetime,
                       Dict[BigQueryTableRef,
                            AggregatedDataset]] = collections.defaultdict(dict)

        for event in events:
            floored_ts = get_time_bucket(event.timestamp,
                                         self.config.bucket_duration)
            resource = event.resource.remove_extras()

            if resource.is_anonymous():
                self.report.report_dropped(str(resource))
                continue

            agg_bucket = datasets[floored_ts].setdefault(
                resource,
                AggregatedDataset(bucket_start_time=floored_ts,
                                  resource=resource),
            )
            agg_bucket.add_read_entry(event.actor_email, event.query,
                                      event.fieldsRead)

        return datasets

    def _make_usage_stat(self, agg: AggregatedDataset) -> UsageStatsWorkUnit:
        return agg.make_usage_workunit(
            self.config.bucket_duration,
            lambda resource: _table_ref_to_urn(resource, self.config.env),
            self.config.top_n_queries,
        )

    def get_report(self) -> SourceReport:
        return self.report
Esempio n. 9
0
 def __init__(self, project_id, service_name, region):
     self.client = Client(project=project_id)
     self.project_id = project_id
     self.service_name = service_name
     self.region = region
Esempio n. 10
0
    def test_list_entries_w_paging(self):
        from google.cloud.logging_v2 import DESCENDING
        from google.cloud.logging_v2.client import Client
        from google.cloud.logging_v2.logger import Logger
        from google.cloud.logging_v2.entries import ProtobufEntry
        from google.cloud.logging_v2.entries import StructEntry

        PROJECT1 = "PROJECT1"
        PROJECT1_PATH = f"projects/{PROJECT1}"
        PROJECT2 = "PROJECT2"
        PROJECT2_PATH = f"projects/{PROJECT2}"
        NOW, TIMESTAMP = self._make_timestamp()
        IID1 = "IID1"
        IID2 = "IID2"
        PAYLOAD = {"message": "MESSAGE", "weather": "partly cloudy"}
        PROTO_PAYLOAD = PAYLOAD.copy()
        PROTO_PAYLOAD["@type"] = "type.googleapis.com/testing.example"
        TOKEN = "TOKEN"
        PAGE_SIZE = 42
        SENT = {
            "resourceNames": [PROJECT1_PATH, PROJECT2_PATH],
            "filter": self.FILTER,
            "orderBy": DESCENDING,
            "pageSize": PAGE_SIZE,
            "pageToken": TOKEN,
        }
        RETURNED = {
            "entries": [
                {
                    "jsonPayload":
                    PAYLOAD,
                    "insertId":
                    IID1,
                    "resource": {
                        "type": "global"
                    },
                    "timestamp":
                    TIMESTAMP,
                    "logName":
                    "projects/%s/logs/%s" % (self.PROJECT, self.LOGGER_NAME),
                },
                {
                    "protoPayload":
                    PROTO_PAYLOAD,
                    "insertId":
                    IID2,
                    "resource": {
                        "type": "global"
                    },
                    "timestamp":
                    TIMESTAMP,
                    "logName":
                    "projects/%s/logs/%s" % (self.PROJECT, self.LOGGER_NAME),
                },
            ]
        }
        client = Client(project=self.PROJECT,
                        credentials=_make_credentials(),
                        _use_grpc=False)
        client._connection = _Connection(RETURNED)
        api = self._make_one(client)

        iterator = api.list_entries(
            resource_names=[PROJECT1_PATH, PROJECT2_PATH],
            filter_=self.FILTER,
            order_by=DESCENDING,
            page_size=PAGE_SIZE,
            page_token=TOKEN,
        )
        entries = list(iterator)
        token = iterator.next_page_token

        # First check the token.
        self.assertIsNone(token)
        # Then check the entries returned.
        self.assertEqual(len(entries), 2)
        entry1 = entries[0]
        self.assertIsInstance(entry1, StructEntry)
        self.assertEqual(entry1.payload, PAYLOAD)
        self.assertIsInstance(entry1.logger, Logger)
        self.assertEqual(entry1.logger.name, self.LOGGER_NAME)
        self.assertEqual(entry1.insert_id, IID1)
        self.assertEqual(entry1.timestamp, NOW)
        self.assertIsNone(entry1.labels)
        self.assertIsNone(entry1.severity)
        self.assertIsNone(entry1.http_request)

        entry2 = entries[1]
        self.assertIsInstance(entry2, ProtobufEntry)
        self.assertEqual(entry2.payload, PROTO_PAYLOAD)
        self.assertIsInstance(entry2.logger, Logger)
        self.assertEqual(entry2.logger.name, self.LOGGER_NAME)
        self.assertEqual(entry2.insert_id, IID2)
        self.assertEqual(entry2.timestamp, NOW)
        self.assertIsNone(entry2.labels)
        self.assertIsNone(entry2.severity)
        self.assertIsNone(entry2.http_request)

        called_with = client._connection._called_with
        expected_path = "/%s" % (self.LIST_ENTRIES_PATH, )
        self.assertEqual(called_with, {
            "method": "POST",
            "path": expected_path,
            "data": SENT
        })