def check_estimated_size_bytes(self, entity_bytes, timestamp, namespace=None): """A helper method to test get_estimated_size_bytes""" timestamp_req = helper.make_request( self._PROJECT, namespace, helper.make_latest_timestamp_query(namespace)) timestamp_resp = self.make_stats_response( {'timestamp': datastore_helper.from_timestamp(timestamp)}) kind_stat_req = helper.make_request( self._PROJECT, namespace, helper.make_kind_stats_query( namespace, self._query.kind[0].name, datastore_helper.micros_from_timestamp(timestamp))) kind_stat_resp = self.make_stats_response( {'entity_bytes': entity_bytes}) def fake_run_query(req): if req == timestamp_req: return timestamp_resp elif req == kind_stat_req: return kind_stat_resp else: print kind_stat_req raise ValueError("Unknown req: %s" % req) self._mock_datastore.run_query.side_effect = fake_run_query self.assertEqual(entity_bytes, ReadFromDatastore.get_estimated_size_bytes( self._PROJECT, namespace, self._query, self._mock_datastore)) self.assertEqual(self._mock_datastore.run_query.call_args_list, [call(timestamp_req), call(kind_stat_req)])
def get_estimated_size_bytes(project, namespace, query, datastore): """Get the estimated size of the data returned by the given query. Cloud Datastore provides no way to get a good estimate of how large the result of a query is going to be. Hence we use the __Stat_Kind__ system table to get size of the entire kind as an approximate estimate, assuming exactly 1 kind is specified in the query. See https://cloud.google.com/datastore/docs/concepts/stats. """ kind = query.kind[0].name latest_timestamp = ReadFromDatastore.query_latest_statistics_timestamp( project, namespace, datastore) logging.info('Latest stats timestamp for kind %s is %s', kind, latest_timestamp) kind_stats_query = (helper.make_kind_stats_query( namespace, kind, latest_timestamp)) req = helper.make_request(project, namespace, kind_stats_query) resp = datastore.run_query(req) if len(resp.batch.entity_results) == 0: raise RuntimeError("Datastore statistics for kind %s unavailable" % kind) entity = resp.batch.entity_results[0].entity return datastore_helper.get_value(entity.properties['entity_bytes'])
def query_latest_statistics_timestamp(project, namespace, datastore): """Fetches the latest timestamp of statistics from Cloud Datastore. Cloud Datastore system tables with statistics are periodically updated. This method fethes the latest timestamp (in microseconds) of statistics update using the `__Stat_Total__` table. """ query = helper.make_latest_timestamp_query(namespace) req = helper.make_request(project, namespace, query) resp = datastore.run_query(req) if len(resp.batch.entity_results) == 0: raise RuntimeError("Datastore total statistics unavailable.") entity = resp.batch.entity_results[0].entity return datastore_helper.micros_from_timestamp( entity.properties['timestamp'].timestamp_value)