def test_list_entries_no_paging(self): from google.cloud.logging_v2.client import Client from google.cloud.logging_v2.entries import TextEntry from google.cloud.logging_v2.logger import Logger NOW, TIMESTAMP = self._make_timestamp() IID = "IID" TEXT = "TEXT" SENT = {"resourceNames": [self.PROJECT_PATH]} TOKEN = "TOKEN" RETURNED = { "entries": [{ "textPayload": TEXT, "insertId": IID, "resource": { "type": "global" }, "timestamp": TIMESTAMP, "logName": f"projects/{self.PROJECT}/logs/{self.LOGGER_NAME}", }], "nextPageToken": TOKEN, } client = Client(project=self.PROJECT, credentials=_make_credentials(), _use_grpc=False) client._connection = _Connection(RETURNED) api = self._make_one(client) iterator = api.list_entries([self.PROJECT_PATH]) page = next(iterator.pages) entries = list(page) token = iterator.next_page_token # First check the token. self.assertEqual(token, TOKEN) # Then check the entries returned. self.assertEqual(len(entries), 1) entry = entries[0] self.assertIsInstance(entry, TextEntry) self.assertEqual(entry.payload, TEXT) self.assertIsInstance(entry.logger, Logger) self.assertEqual(entry.logger.name, self.LOGGER_NAME) self.assertEqual(entry.insert_id, IID) self.assertEqual(entry.timestamp, NOW) self.assertIsNone(entry.labels) self.assertIsNone(entry.severity) self.assertIsNone(entry.http_request) called_with = client._connection._called_with expected_path = "/%s" % (self.LIST_ENTRIES_PATH, ) self.assertEqual(called_with, { "method": "POST", "path": expected_path, "data": SENT })
def test_list_entries_explicit(self): from google.cloud.logging_v2 import DESCENDING from google.cloud.logging_v2.client import Client PROJECT1 = "PROJECT1" PROJECT2 = "PROJECT2" INPUT_FILTER = "resource.type:global" TOKEN = "TOKEN" PAGE_SIZE = 42 client = Client(project=self.PROJECT, credentials=_make_credentials(), _use_grpc=False) client._connection = _Connection({}) logger = self._make_one(self.LOGGER_NAME, client=client) iterator = logger.list_entries( resource_names=[f"projects/{PROJECT1}", f"projects/{PROJECT2}"], filter_=INPUT_FILTER, order_by=DESCENDING, page_size=PAGE_SIZE, page_token=TOKEN, ) entries = list(iterator) token = iterator.next_page_token self.assertEqual(len(entries), 0) self.assertIsNone(token) # self.assertEqual(client._listed, LISTED) # check call payload call_payload_no_filter = deepcopy(client._connection._called_with) call_payload_no_filter["data"]["filter"] = "removed" self.assertEqual( call_payload_no_filter, { "method": "POST", "path": "/entries:list", "data": { "filter": "removed", "orderBy": DESCENDING, "pageSize": PAGE_SIZE, "pageToken": TOKEN, "resourceNames": [f"projects/{PROJECT1}", f"projects/{PROJECT2}"], }, }, ) # verify that default filter is 24 hours LOG_FILTER = "logName=projects/%s/logs/%s" % ( self.PROJECT, self.LOGGER_NAME, ) combined_filter = (INPUT_FILTER + " AND " + LOG_FILTER + " AND " + "timestamp>=" + self.TIME_FORMAT) timestamp = datetime.strptime( client._connection._called_with["data"]["filter"], combined_filter) yesterday = datetime.now(timezone.utc) - timedelta(days=1) self.assertLess(yesterday - timestamp, timedelta(minutes=1))
def test_list_entries_explicit_timestamp(self): from google.cloud.logging_v2 import DESCENDING from google.cloud.logging_v2.client import Client PROJECT1 = "PROJECT1" PROJECT2 = "PROJECT2" INPUT_FILTER = 'resource.type:global AND timestamp="2020-10-13T21"' TOKEN = "TOKEN" PAGE_SIZE = 42 client = Client(project=self.PROJECT, credentials=_make_credentials(), _use_grpc=False) client._connection = _Connection({}) logger = self._make_one(self.LOGGER_NAME, client=client) iterator = logger.list_entries( resource_names=[f"projects/{PROJECT1}", f"projects/{PROJECT2}"], filter_=INPUT_FILTER, order_by=DESCENDING, page_size=PAGE_SIZE, page_token=TOKEN, ) entries = list(iterator) token = iterator.next_page_token self.assertEqual(len(entries), 0) self.assertIsNone(token) # self.assertEqual(client._listed, LISTED) # check call payload LOG_FILTER = "logName=projects/%s/logs/%s" % ( self.PROJECT, self.LOGGER_NAME, ) combined_filter = INPUT_FILTER + " AND " + LOG_FILTER self.assertEqual( client._connection._called_with, { "method": "POST", "path": "/entries:list", "data": { "filter": combined_filter, "orderBy": DESCENDING, "pageSize": PAGE_SIZE, "pageToken": TOKEN, "resourceNames": [f"projects/{PROJECT1}", f"projects/{PROJECT2}"], }, }, )
class StackDriverLogger(Logger): def __init__(self, project_id, service_name, region): self.client = Client(project=project_id) self.project_id = project_id self.service_name = service_name self.region = region def __get_resource(self): return Resource( type="cloud_run_revision", labels={ "project_id": self.project_id, "service_name": self.service_name, "location": self.region, }) def __log(self, severity: str, message: str, extra: Dict = None, exc_info=None): trace = self.get_trace_id() if extra or exc_info: struct = {"message": message} if extra: struct["extra"] = extra if exc_info: struct["exception"] = exc_info struct["serviceContext"] = { "service": self.service_name } struct["@type"] = "type.googleapis.com/google.devtools.clouderrorreporting.v1beta1.ReportedErrorEvent" self.client.logger(self.service_name).log_struct(struct, severity=severity, resource=self.__get_resource(), trace=trace) else: self.client.logger(self.service_name).log_text(message, severity=severity, resource=self.__get_resource(), trace=trace) def debug(self, message: str, extra: Dict = None): self.__log("DEBUG", message, extra=extra) def info(self, message: str, extra: Dict = None): self.__log("INFO", message, extra) def warn(self, message: str, extra: Dict = None): self.__log("WARNING", message, extra) def error(self, message: str, extra: Dict = None, exc_info=None): self.__log("ERROR", message, extra=extra, exc_info=exc_info)
def test_list_entries_defaults(self): from google.cloud.logging_v2.client import Client TOKEN = "TOKEN" client = Client(project=self.PROJECT, credentials=_make_credentials(), _use_grpc=False) returned = {"nextPageToken": TOKEN} client._connection = _Connection(returned) logger = self._make_one(self.LOGGER_NAME, client=client) iterator = logger.list_entries() page = next(iterator.pages) entries = list(page) token = iterator.next_page_token self.assertEqual(len(entries), 0) self.assertEqual(token, TOKEN) LOG_FILTER = "logName=projects/%s/logs/%s" % (self.PROJECT, self.LOGGER_NAME) # check call payload call_payload_no_filter = deepcopy(client._connection._called_with) call_payload_no_filter["data"]["filter"] = "removed" self.assertEqual( call_payload_no_filter, { "path": "/entries:list", "method": "POST", "data": { "filter": "removed", "resourceNames": [f"projects/{self.PROJECT}"], }, }, ) # verify that default filter is 24 hours timestamp = datetime.strptime( client._connection._called_with["data"]["filter"], LOG_FILTER + " AND timestamp>=" + self.TIME_FORMAT, ) yesterday = datetime.now(timezone.utc) - timedelta(days=1) self.assertLess(yesterday - timestamp, timedelta(minutes=1))
def __init__(self, config: BigQueryUsageConfig, ctx: PipelineContext): super().__init__(ctx) self.config = config self.report = BigQueryUsageSourceReport() client_options = self.config.extra_client_options.copy() if self.config.project_id is not None: client_options["project"] = self.config.project_id # See https://github.com/googleapis/google-cloud-python/issues/2674 for # why we disable gRPC here. self.client = GCPLoggingClient(**client_options, _use_grpc=False)
def get_logger(logger_name='stocker') -> Logger: foramt = Formatter("%(asctime)s %(levelname)s: %(message)s") s_handler = StreamHandler() s_handler.setFormatter(foramt) s_handler.setLevel(INFO) # client = Client(project=ProjectConfig.ID, # credentials=ProjectConfig.CREDENTIAL) client = Client() g_handler = CloudLoggingHandler(client) g_handler.setFormatter(foramt) g_handler.setLevel(DEBUG) logger = getLogger(logger_name) logger.setLevel(DEBUG) logger.addHandler(g_handler) logger.propagate = False return logger
class BigQueryUsageSource(Source): config: BigQueryUsageConfig report: BigQueryUsageSourceReport client: GCPLoggingClient def __init__(self, config: BigQueryUsageConfig, ctx: PipelineContext): super().__init__(ctx) self.config = config self.report = BigQueryUsageSourceReport() client_options = self.config.extra_client_options.copy() if self.config.project_id is not None: client_options["project"] = self.config.project_id # See https://github.com/googleapis/google-cloud-python/issues/2674 for # why we disable gRPC here. self.client = GCPLoggingClient(**client_options, _use_grpc=False) @classmethod def create(cls, config_dict: dict, ctx: PipelineContext) -> "BigQueryUsageSource": config = BigQueryUsageConfig.parse_obj(config_dict) return cls(config, ctx) def get_workunits(self) -> Iterable[UsageStatsWorkUnit]: bigquery_log_entries = self._get_bigquery_log_entries() parsed_events = self._parse_bigquery_log_entries(bigquery_log_entries) hydrated_read_events = self._join_events_by_job_id(parsed_events) aggregated_info = self._aggregate_enriched_read_events( hydrated_read_events) for time_bucket in aggregated_info.values(): for aggregate in time_bucket.values(): wu = self._make_usage_stat(aggregate) self.report.report_workunit(wu) yield wu def _get_bigquery_log_entries(self) -> Iterable[AuditLogEntry]: filter = BQ_FILTER_RULE_TEMPLATE.format( start_time=self.config.start_time.strftime(BQ_DATETIME_FORMAT), end_time=self.config.end_time.strftime(BQ_DATETIME_FORMAT), ) entry: AuditLogEntry for i, entry in enumerate( self.client.list_entries(filter_=filter, page_size=GCP_LOGGING_PAGE_SIZE)): if i == 0: logger.debug("starting log load from BigQuery") yield entry logger.debug("finished loading log entries from BigQuery") def _parse_bigquery_log_entries( self, entries: Iterable[AuditLogEntry] ) -> Iterable[Union[ReadEvent, QueryEvent]]: for entry in entries: event: Union[None, ReadEvent, QueryEvent] = None if ReadEvent.can_parse_entry(entry): event = ReadEvent.from_entry(entry) elif QueryEvent.can_parse_entry(entry): event = QueryEvent.from_entry(entry) else: self.report.report_failure( f"{entry.log_name}-{entry.insert_id}", f"unable to parse log entry: {entry!r}", ) if event: yield event def _join_events_by_job_id( self, events: Iterable[Union[ReadEvent, QueryEvent]]) -> Iterable[ReadEvent]: # If caching eviction is enabled, we only store the most recently used query events, # which are used when resolving job information within the read events. query_jobs: MutableMapping[str, QueryEvent] if self.config.query_log_delay: query_jobs = cachetools.LRUCache(maxsize=5 * self.config.query_log_delay) else: query_jobs = {} def event_processor( events: Iterable[Union[ReadEvent, QueryEvent]]) -> Iterable[ReadEvent]: for event in events: if isinstance(event, QueryEvent): query_jobs[event.jobName] = event else: yield event # TRICKY: To account for the possibility that the query event arrives after # the read event in the audit logs, we wait for at least `query_log_delay` # additional events to be processed before attempting to resolve BigQuery # job information from the logs. If `query_log_delay` is None, it gets treated # as an unlimited delay, which prioritizes correctness at the expense of memory usage. original_read_events = event_processor(events) delayed_read_events = delayed_iter(original_read_events, self.config.query_log_delay) for event in delayed_read_events: if event.jobName: if event.jobName in query_jobs: # Join the query log event into the table read log event. event.query = query_jobs[event.jobName].query # TODO also join into the query itself for column references else: self.report.report_warning( "<general>", "failed to match table read event with job; try increasing `query_log_delay`", ) yield event def _aggregate_enriched_read_events( self, events: Iterable[ReadEvent] ) -> Dict[datetime, Dict[BigQueryTableRef, AggregatedDataset]]: # TODO: handle partitioned tables # TODO: perhaps we need to continuously prune this, rather than # storing it all in one big object. datasets: Dict[datetime, Dict[BigQueryTableRef, AggregatedDataset]] = collections.defaultdict(dict) for event in events: floored_ts = get_time_bucket(event.timestamp, self.config.bucket_duration) resource = event.resource.remove_extras() if resource.is_anonymous(): self.report.report_dropped(str(resource)) continue agg_bucket = datasets[floored_ts].setdefault( resource, AggregatedDataset(bucket_start_time=floored_ts, resource=resource), ) agg_bucket.add_read_entry(event.actor_email, event.query, event.fieldsRead) return datasets def _make_usage_stat(self, agg: AggregatedDataset) -> UsageStatsWorkUnit: return agg.make_usage_workunit( self.config.bucket_duration, lambda resource: _table_ref_to_urn(resource, self.config.env), self.config.top_n_queries, ) def get_report(self) -> SourceReport: return self.report
def __init__(self, project_id, service_name, region): self.client = Client(project=project_id) self.project_id = project_id self.service_name = service_name self.region = region
def test_list_entries_w_paging(self): from google.cloud.logging_v2 import DESCENDING from google.cloud.logging_v2.client import Client from google.cloud.logging_v2.logger import Logger from google.cloud.logging_v2.entries import ProtobufEntry from google.cloud.logging_v2.entries import StructEntry PROJECT1 = "PROJECT1" PROJECT1_PATH = f"projects/{PROJECT1}" PROJECT2 = "PROJECT2" PROJECT2_PATH = f"projects/{PROJECT2}" NOW, TIMESTAMP = self._make_timestamp() IID1 = "IID1" IID2 = "IID2" PAYLOAD = {"message": "MESSAGE", "weather": "partly cloudy"} PROTO_PAYLOAD = PAYLOAD.copy() PROTO_PAYLOAD["@type"] = "type.googleapis.com/testing.example" TOKEN = "TOKEN" PAGE_SIZE = 42 SENT = { "resourceNames": [PROJECT1_PATH, PROJECT2_PATH], "filter": self.FILTER, "orderBy": DESCENDING, "pageSize": PAGE_SIZE, "pageToken": TOKEN, } RETURNED = { "entries": [ { "jsonPayload": PAYLOAD, "insertId": IID1, "resource": { "type": "global" }, "timestamp": TIMESTAMP, "logName": "projects/%s/logs/%s" % (self.PROJECT, self.LOGGER_NAME), }, { "protoPayload": PROTO_PAYLOAD, "insertId": IID2, "resource": { "type": "global" }, "timestamp": TIMESTAMP, "logName": "projects/%s/logs/%s" % (self.PROJECT, self.LOGGER_NAME), }, ] } client = Client(project=self.PROJECT, credentials=_make_credentials(), _use_grpc=False) client._connection = _Connection(RETURNED) api = self._make_one(client) iterator = api.list_entries( resource_names=[PROJECT1_PATH, PROJECT2_PATH], filter_=self.FILTER, order_by=DESCENDING, page_size=PAGE_SIZE, page_token=TOKEN, ) entries = list(iterator) token = iterator.next_page_token # First check the token. self.assertIsNone(token) # Then check the entries returned. self.assertEqual(len(entries), 2) entry1 = entries[0] self.assertIsInstance(entry1, StructEntry) self.assertEqual(entry1.payload, PAYLOAD) self.assertIsInstance(entry1.logger, Logger) self.assertEqual(entry1.logger.name, self.LOGGER_NAME) self.assertEqual(entry1.insert_id, IID1) self.assertEqual(entry1.timestamp, NOW) self.assertIsNone(entry1.labels) self.assertIsNone(entry1.severity) self.assertIsNone(entry1.http_request) entry2 = entries[1] self.assertIsInstance(entry2, ProtobufEntry) self.assertEqual(entry2.payload, PROTO_PAYLOAD) self.assertIsInstance(entry2.logger, Logger) self.assertEqual(entry2.logger.name, self.LOGGER_NAME) self.assertEqual(entry2.insert_id, IID2) self.assertEqual(entry2.timestamp, NOW) self.assertIsNone(entry2.labels) self.assertIsNone(entry2.severity) self.assertIsNone(entry2.http_request) called_with = client._connection._called_with expected_path = "/%s" % (self.LIST_ENTRIES_PATH, ) self.assertEqual(called_with, { "method": "POST", "path": expected_path, "data": SENT })