예제 #1
0
 def __init__(self, params: inputs.Inputs, id: str, resolution_ms: int):
     self.id = id
     self.cache_keys = CacheKeys(params.cache_key_prefix())
     self.last_keepalive_ms = 0
     self.data_timestamps = set()
     self.resolution_ms = resolution_ms
     self.in_streaming_phase = False
예제 #2
0
 def start_backing_job_as_process(params: inputs.Inputs, tstart, tend):
     # Start new backing job that runs as a python process
     print("start_backing_job_as_process: started")
     cmd: str = "nohup python3 {script} --program=\"{program}\" --token={token} \
                 --start_time_ms={tstart} --end_time_ms={tend} --resolution_hint_ms={res} --endpoint={endpoint}".format(
         script=__file__,
         program=params.program,
         tstart=tstart,
         tend=tend,
         res=params.resolution_hint_ms,
         token=params.api_token,
         endpoint=params.api_endpoint)
     cmd += " --daemon > /tmp/{}.log 2>&1 &".format(
         params.cache_key_prefix())
     print("start_backing_job_as_process:", cmd)
     os.system(cmd)
예제 #3
0
def get_cached_result(params: inputs.Inputs, context: LambdaContext,
                      cache: Cache):
    ''' Backing job is already running. So just query cached data from and return result '''
    def wait_for_backing_job_to_exit_batch_phase(
            keepalive_state: KeepaliveState, cache: Cache,
            cache_keys: CacheKeys, wait_until_ms: int):
        print("wait_for_backing_job_to_exit_batch_phase: started",
              cache_keys.keepalive)
        while not keepalive_state or not keepalive_state.in_streaming_phase:
            # wait for backing job to be running and advance to streaming state
            if utils.millitime() > wait_until_ms:
                raise Exception(
                    "wait_for_backing_job_to_exit_batch_phase: timed out")
            print(
                "get_cached_result: waiting for batch phase to end. keepalive_state=",
                keepalive_state)
            time.sleep(1)
            try:
                keepalive_state: KeepaliveState = pickle.loads(
                    cache.get(cache_keys.keepalive))
            except Exception as e:
                print(
                    "wait_for_backing_job_to_exit_batch_phase: failed to read keepalive from cache",
                    cache_keys.keepalive, e)
        print("wait_for_backing_job_to_exit_batch_phase: backing job is ready",
              keepalive_state)
        return keepalive_state

    print("get_cached_result: started")

    # Update 'lastaccess' timestamp in memcache to indicate the corresponding backing job's data was recently queried
    cache_keys: CacheKeys = CacheKeys(params.cache_key_prefix())
    now_ms = params.invoke_time_ms
    try:
        cache.set(cache_keys.lastaccess, now_ms)
    except Exception as e:
        print(
            "get_cached_result: failed to set lastaccess cache key {}={}, {}".
            format(cache_keys.lastaccess, now_ms, e))

    # start the backing job if one is not running, or if the backing job's keepalive timestamp is stale
    keepalive_state: KeepaliveState = start_backing_job_if_necessary(
        params, context, cache)

    # now that backing job is surely running, wait for it to become 'ready' - i.e. go from batch to streaming phase
    keepalive_state = wait_for_backing_job_to_exit_batch_phase(
        keepalive_state, cache, cache_keys, now_ms + defaults.API_TIMEOUT_MS)

    # compute which cache keys need to be fetched
    if not params.is_streaming():
        tstart = params.absolute_ms(params.start_time_ms)
        tend = params.absolute_ms(params.end_time_ms)
    else:
        tend = now_ms
        tstart = tend - params.duration_ms()

    timestamps = sorted([
        ts for ts in keepalive_state.data_timestamps
        if ts >= tstart and ts <= tend
    ])
    data_keys = [cache_keys.data_prefix + str(ts) for ts in timestamps]

    # retrieve metadata and data from cache. retry if necessary
    metadata = cache.get(cache_keys.metadata)
    if len(timestamps):
        print(
            "get_cached_result: fetching {} timestamps {} - {} @ {}ms".format(
                len(timestamps), time.ctime(timestamps[0] / 1000),
                time.ctime(timestamps[-1] / 1000),
                keepalive_state.resolution_ms))
    data = cache.multiget(data_keys)
    missing_keys = set(data_keys) - set(data.keys())
    if (len(missing_keys)):
        print("get_cached_result: retrying fetch of {}/{} keys: {}".format(
            len(missing_keys), len(data_keys), sorted(missing_keys)))
        data.update(cache.multiget(list(missing_keys)))

    # Fill in results in results struct
    result = {
        "start_time_ms": tstart,
        "end_time_ms": tend,
        "earliest_result_ms": 0,
        "latest_result_ms": 0,
        "resolution_ms": keepalive_state.resolution_ms,
        "metadata": metadata,
        "data": {},
        "missing_timestamps_ms": []
    }

    # First fill in retrieved data
    tsids = set()
    missing_timestamps = []
    for timestamp in timestamps:
        k = cache_keys.data_prefix + str(timestamp)
        if k in data.keys():
            for tsid, value in data[k].items():
                if not result["earliest_result_ms"]:
                    result["earliest_result_ms"] = timestamp
                if timestamp > result["latest_result_ms"]:
                    result["latest_result_ms"] = timestamp
                tsids.add(tsid)
                result["data"].setdefault(tsid, [])
                result["data"][tsid].append([timestamp, value])
        else:
            missing_timestamps.append(timestamp)

    # Second, fill in metadata of only the relevant mts that have data
    remove_metadata_ids = set(metadata.keys()).difference(tsids)
    for tsid in remove_metadata_ids:
        metadata.pop(tsid)

    result["missing_timestamps_ms"] = missing_timestamps
    return result
예제 #4
0
def start_backing_job_if_necessary(params: inputs.Inputs,
                                   context: LambdaContext, cache: Cache):
    ''' If no backing job is running for a given signalflow program and duration, start one
    Returns keepalive_state from cache if active backing job is found (to prevent a duplicate cache read by callers '''
    def start_backing_job_as_lambda(params: inputs.Inputs, tstart, tend,
                                    context: LambdaContext):
        # Start new backing job that runs as a lambda function
        print("start_backing_job_as_lambda: started")
        import boto3
        lambda_client = boto3.client('lambda')
        lambda_client.invoke(FunctionName=context.invoked_function_arn,
                             InvocationType='Event',
                             Payload=json.dumps({
                                 "program": params.program,
                                 "start_time_ms": tstart,
                                 "end_time_ms": tend,
                                 "resolution_hint_ms":
                                 params.resolution_hint_ms,
                                 "api_token": params.api_token,
                                 "api_endpoint": params.api_endpoint,
                                 "daemon": True
                             }))

    def start_backing_job_as_process(params: inputs.Inputs, tstart, tend):
        # Start new backing job that runs as a python process
        print("start_backing_job_as_process: started")
        cmd: str = "nohup python3 {script} --program=\"{program}\" --token={token} \
                    --start_time_ms={tstart} --end_time_ms={tend} --resolution_hint_ms={res} --endpoint={endpoint}".format(
            script=__file__,
            program=params.program,
            tstart=tstart,
            tend=tend,
            res=params.resolution_hint_ms,
            token=params.api_token,
            endpoint=params.api_endpoint)
        cmd += " --daemon > /tmp/{}.log 2>&1 &".format(
            params.cache_key_prefix())
        print("start_backing_job_as_process:", cmd)
        os.system(cmd)

    # begin code for start_backing_job_if_necessary()
    try:
        cache_keys = CacheKeys(params.cache_key_prefix())
        print("start_backing_job_if_necessary: started", cache_keys)
        now_ms = utils.millitime()
        cached_state: KeepaliveState = pickle.loads(
            cache.get(cache_keys.keepalive))
        keepalive_age_ms = now_ms - cached_state.last_keepalive_ms
        expiry_ms = defaults.KEEPALIVE_EXPIRY_MULTIPLE * defaults.KEEPALIVE_INTERVAL_SEC * 1000

        if keepalive_age_ms < expiry_ms:
            print(
                "start_backing_job_if_necessary: found active backing job already running. keepalive_age_ms =",
                keepalive_age_ms)
            return cached_state

        print(
            "start_backing_job_if_necessary: found expired keepalive_age_ms =",
            keepalive_age_ms)
        cache.set(cache_keys.keepalive, None)
    except Exception as e:
        print("start_backing_job_if_necessary: no keeplive found in cache", e)

    tstart = params.start_time_ms
    tend = params.end_time_ms
    if not params.is_streaming():
        tstart = params.absolute_ms(tstart)
        tend = params.absolute_ms(tend)

    if context.invoked_function_arn:
        # This backing job was invoked as a lambda. So invoke a new lambda
        start_backing_job_as_lambda(params, tstart, tend, context)
    else:
        start_backing_job_as_process(params, tstart, tend)

    return None