def get_cached_result(params: inputs.Inputs, context: LambdaContext, cache: Cache): ''' Backing job is already running. So just query cached data from and return result ''' def wait_for_backing_job_to_exit_batch_phase( keepalive_state: KeepaliveState, cache: Cache, cache_keys: CacheKeys, wait_until_ms: int): print("wait_for_backing_job_to_exit_batch_phase: started", cache_keys.keepalive) while not keepalive_state or not keepalive_state.in_streaming_phase: # wait for backing job to be running and advance to streaming state if utils.millitime() > wait_until_ms: raise Exception( "wait_for_backing_job_to_exit_batch_phase: timed out") print( "get_cached_result: waiting for batch phase to end. keepalive_state=", keepalive_state) time.sleep(1) try: keepalive_state: KeepaliveState = pickle.loads( cache.get(cache_keys.keepalive)) except Exception as e: print( "wait_for_backing_job_to_exit_batch_phase: failed to read keepalive from cache", cache_keys.keepalive, e) print("wait_for_backing_job_to_exit_batch_phase: backing job is ready", keepalive_state) return keepalive_state print("get_cached_result: started") # Update 'lastaccess' timestamp in memcache to indicate the corresponding backing job's data was recently queried cache_keys: CacheKeys = CacheKeys(params.cache_key_prefix()) now_ms = params.invoke_time_ms try: cache.set(cache_keys.lastaccess, now_ms) except Exception as e: print( "get_cached_result: failed to set lastaccess cache key {}={}, {}". format(cache_keys.lastaccess, now_ms, e)) # start the backing job if one is not running, or if the backing job's keepalive timestamp is stale keepalive_state: KeepaliveState = start_backing_job_if_necessary( params, context, cache) # now that backing job is surely running, wait for it to become 'ready' - i.e. go from batch to streaming phase keepalive_state = wait_for_backing_job_to_exit_batch_phase( keepalive_state, cache, cache_keys, now_ms + defaults.API_TIMEOUT_MS) # compute which cache keys need to be fetched if not params.is_streaming(): tstart = params.absolute_ms(params.start_time_ms) tend = params.absolute_ms(params.end_time_ms) else: tend = now_ms tstart = tend - params.duration_ms() timestamps = sorted([ ts for ts in keepalive_state.data_timestamps if ts >= tstart and ts <= tend ]) data_keys = [cache_keys.data_prefix + str(ts) for ts in timestamps] # retrieve metadata and data from cache. retry if necessary metadata = cache.get(cache_keys.metadata) if len(timestamps): print( "get_cached_result: fetching {} timestamps {} - {} @ {}ms".format( len(timestamps), time.ctime(timestamps[0] / 1000), time.ctime(timestamps[-1] / 1000), keepalive_state.resolution_ms)) data = cache.multiget(data_keys) missing_keys = set(data_keys) - set(data.keys()) if (len(missing_keys)): print("get_cached_result: retrying fetch of {}/{} keys: {}".format( len(missing_keys), len(data_keys), sorted(missing_keys))) data.update(cache.multiget(list(missing_keys))) # Fill in results in results struct result = { "start_time_ms": tstart, "end_time_ms": tend, "earliest_result_ms": 0, "latest_result_ms": 0, "resolution_ms": keepalive_state.resolution_ms, "metadata": metadata, "data": {}, "missing_timestamps_ms": [] } # First fill in retrieved data tsids = set() missing_timestamps = [] for timestamp in timestamps: k = cache_keys.data_prefix + str(timestamp) if k in data.keys(): for tsid, value in data[k].items(): if not result["earliest_result_ms"]: result["earliest_result_ms"] = timestamp if timestamp > result["latest_result_ms"]: result["latest_result_ms"] = timestamp tsids.add(tsid) result["data"].setdefault(tsid, []) result["data"][tsid].append([timestamp, value]) else: missing_timestamps.append(timestamp) # Second, fill in metadata of only the relevant mts that have data remove_metadata_ids = set(metadata.keys()).difference(tsids) for tsid in remove_metadata_ids: metadata.pop(tsid) result["missing_timestamps_ms"] = missing_timestamps return result