def get_lambda_code(func_name, retries=1, cache_time=None, env=None): if MOCK_OBJ: return '' env = aws_stack.get_environment(env) if cache_time is None and env.region != REGION_LOCAL: cache_time = AWS_LAMBDA_CODE_CACHE_TIMEOUT out = cmd_lambda('get-function --function-name %s' % func_name, env, cache_time) out = json.loads(out) loc = out['Code']['Location'] hash = md5(loc) # print("Location %s %s" % (hash, func_name)) folder = TMP_DOWNLOAD_FILE_PATTERN.replace('*', '%s') % hash filename = 'archive.zip' archive = '%s/%s' % (folder, filename) try: run('mkdir -p %s' % folder) if not os.path.isfile(archive): # print("Downloading %s" % archive) run("wget -O %s '%s'" % (archive, loc)) if len(os.listdir(folder)) <= 1: # print("Unzipping %s/%s" % (folder, filename)) run("cd %s && unzip -o %s" % (folder, filename)) except Exception, e: print("WARN: %s" % e) sh.rm('-f', archive) if retries > 0: return get_lambda_code(func_name, retries=retries - 1, cache_time=1, env=env) else: print("WARNING: Unable to retrieve lambda code: %s" % e)
def wait_for_stream_created(table_name): stream_name = get_kinesis_stream_name(table_name) stream = KinesisStream(id=stream_name, num_shards=1) kinesis = aws_stack.create_external_boto_client( "kinesis", env=get_environment(None)) stream.connect(kinesis) stream.wait_for()
def get_stream_info(stream_name, log_file=None, shards=None, env=None, endpoint_url=None, ddb_lease_table_suffix=None, env_vars={}): if not ddb_lease_table_suffix: ddb_lease_table_suffix = DEFAULT_DDB_LEASE_TABLE_SUFFIX # construct stream info env = aws_stack.get_environment(env) props_file = os.path.join('/tmp/', 'kclipy.%s.properties' % short_uid()) app_name = '%s%s' % (stream_name, ddb_lease_table_suffix) stream_info = { 'name': stream_name, 'region': DEFAULT_REGION, 'shards': shards, 'properties_file': props_file, 'log_file': log_file, 'app_name': app_name, 'env_vars': env_vars } # set local connection if env.region == REGION_LOCAL: from localstack.constants import LOCALHOST, DEFAULT_PORT_KINESIS stream_info['conn_kwargs'] = { 'host': LOCALHOST, 'port': DEFAULT_PORT_KINESIS, 'is_secure': False } if endpoint_url: if 'conn_kwargs' not in stream_info: stream_info['conn_kwargs'] = {} url = urlparse(endpoint_url) stream_info['conn_kwargs']['host'] = url.hostname stream_info['conn_kwargs']['port'] = url.port stream_info['conn_kwargs']['is_secure'] = url.scheme == 'https' return stream_info
def wait_for_stream_created(table_name): stream_name = get_kinesis_stream_name(table_name) stream = KinesisStream(id=stream_name, num_shards=1) kinesis = aws_stack.connect_to_service('kinesis', env=get_environment(None)) stream.connect(kinesis) stream.wait_for()
def get_stream_info(stream_name, log_file=None, shards=None, env=None, endpoint_url=None, ddb_lease_table_suffix=None, env_vars={}): if not ddb_lease_table_suffix: ddb_lease_table_suffix = DEFAULT_DDB_LEASE_TABLE_SUFFIX # construct stream info env = aws_stack.get_environment(env) props_file = os.path.join(tempfile.gettempdir(), 'kclipy.%s.properties' % short_uid()) app_name = '%s%s' % (stream_name, ddb_lease_table_suffix) stream_info = { 'name': stream_name, 'region': DEFAULT_REGION, 'shards': shards, 'properties_file': props_file, 'log_file': log_file, 'app_name': app_name, 'env_vars': env_vars } # set local connection if env.region == REGION_LOCAL: stream_info['conn_kwargs'] = { 'host': HOSTNAME, 'port': DEFAULT_PORT_KINESIS, 'is_secure': bool(USE_SSL) } if endpoint_url: if 'conn_kwargs' not in stream_info: stream_info['conn_kwargs'] = {} url = urlparse(endpoint_url) stream_info['conn_kwargs']['host'] = url.hostname stream_info['conn_kwargs']['port'] = url.port stream_info['conn_kwargs']['is_secure'] = url.scheme == 'https' return stream_info
def start_kcl_client_process(stream_name, listener_script, log_file=None, env=None, configs={}, endpoint_url=None, ddb_lease_table_suffix=None, env_vars={}, region_name=None, kcl_log_level=DEFAULT_KCL_LOG_LEVEL, log_subscribers=[]): env = aws_stack.get_environment(env) # make sure to convert stream ARN to stream name stream_name = aws_stack.kinesis_stream_name(stream_name) # decide which credentials provider to use credentialsProvider = None if (('AWS_ASSUME_ROLE_ARN' in os.environ or 'AWS_ASSUME_ROLE_ARN' in env_vars) and ('AWS_ASSUME_ROLE_SESSION_NAME' in os.environ or 'AWS_ASSUME_ROLE_SESSION_NAME' in env_vars)): # use special credentials provider that can assume IAM roles and handle temporary STS auth tokens credentialsProvider = 'cloud.localstack.DefaultSTSAssumeRoleSessionCredentialsProvider' # pass through env variables to child process for var_name in ['AWS_ASSUME_ROLE_ARN', 'AWS_ASSUME_ROLE_SESSION_NAME', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'AWS_SESSION_TOKEN']: if var_name in os.environ and var_name not in env_vars: env_vars[var_name] = os.environ[var_name] if aws_stack.is_local_env(env): # need to disable CBOR protocol, enforce use of plain JSON, # see https://github.com/mhart/kinesalite/issues/31 env_vars['AWS_CBOR_DISABLE'] = 'true' if kcl_log_level or (len(log_subscribers) > 0): if not log_file: log_file = LOG_FILE_PATTERN.replace('*', short_uid()) TMP_FILES.append(log_file) run('touch %s' % log_file) # start log output reader thread which will read the KCL log # file and print each line to stdout of this process... reader_thread = OutputReaderThread({'file': log_file, 'level': kcl_log_level, 'log_prefix': 'KCL', 'log_subscribers': log_subscribers}) reader_thread.start() # construct stream info stream_info = get_stream_info(stream_name, log_file, env=env, endpoint_url=endpoint_url, ddb_lease_table_suffix=ddb_lease_table_suffix, env_vars=env_vars) props_file = stream_info['properties_file'] # set kcl config options kwargs = { 'metricsLevel': 'NONE', 'initialPositionInStream': 'LATEST' } # set parameters for local connection if aws_stack.is_local_env(env): kwargs['kinesisEndpoint'] = '%s:%s' % (HOSTNAME, config.PORT_KINESIS) kwargs['dynamodbEndpoint'] = '%s:%s' % (HOSTNAME, config.PORT_DYNAMODB) kwargs['kinesisProtocol'] = get_service_protocol() kwargs['dynamodbProtocol'] = get_service_protocol() kwargs['disableCertChecking'] = 'true' kwargs.update(configs) # create config file kclipy_helper.create_config_file(config_file=props_file, executableName=listener_script, streamName=stream_name, applicationName=stream_info['app_name'], credentialsProvider=credentialsProvider, region_name=region_name, **kwargs) TMP_FILES.append(props_file) # start stream consumer stream = KinesisStream(id=stream_name, params=stream_info) thread_consumer = KinesisProcessorThread.start_consumer(stream) TMP_THREADS.append(thread_consumer) return thread_consumer
def start_kcl_client_process(stream_name, listener_script, log_file=None, env=None, configs={}, endpoint_url=None, ddb_lease_table_suffix=None, env_vars={}, kcl_log_level=DEFAULT_KCL_LOG_LEVEL, log_subscribers=[]): env = aws_stack.get_environment(env) # decide which credentials provider to use credentialsProvider = None if (('AWS_ASSUME_ROLE_ARN' in os.environ or 'AWS_ASSUME_ROLE_ARN' in env_vars) and ('AWS_ASSUME_ROLE_SESSION_NAME' in os.environ or 'AWS_ASSUME_ROLE_SESSION_NAME' in env_vars)): # use special credentials provider that can assume IAM roles and handle temporary STS auth tokens credentialsProvider = 'com.atlassian.DefaultSTSAssumeRoleSessionCredentialsProvider' # pass through env variables to child process for var_name in ['AWS_ASSUME_ROLE_ARN', 'AWS_ASSUME_ROLE_SESSION_NAME', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'AWS_SESSION_TOKEN']: if var_name in os.environ and var_name not in env_vars: env_vars[var_name] = os.environ[var_name] if env.region == REGION_LOCAL: # need to disable CBOR protocol, enforce use of plain JSON, # see https://github.com/mhart/kinesalite/issues/31 env_vars['AWS_CBOR_DISABLE'] = 'true' if kcl_log_level or (len(log_subscribers) > 0): if not log_file: log_file = LOG_FILE_PATTERN.replace('*', short_uid()) TMP_FILES.append(log_file) run('touch %s' % log_file) # start log output reader thread which will read the KCL log # file and print each line to stdout of this process... reader_thread = OutputReaderThread({'file': log_file, 'level': kcl_log_level, 'log_prefix': 'KCL', 'log_subscribers': log_subscribers}) reader_thread.start() # construct stream info stream_info = get_stream_info(stream_name, log_file, env=env, endpoint_url=endpoint_url, ddb_lease_table_suffix=ddb_lease_table_suffix, env_vars=env_vars) props_file = stream_info['properties_file'] # set kcl config options kwargs = { 'metricsLevel': 'NONE', 'initialPositionInStream': 'LATEST' } # set parameters for local connection if env.region == REGION_LOCAL: from localstack.constants import LOCALHOST, DEFAULT_PORT_KINESIS, DEFAULT_PORT_DYNAMODB kwargs['kinesisEndpoint'] = '%s:%s' % (LOCALHOST, DEFAULT_PORT_KINESIS) kwargs['dynamodbEndpoint'] = '%s:%s' % (LOCALHOST, DEFAULT_PORT_DYNAMODB) kwargs['kinesisProtocol'] = 'http' kwargs['dynamodbProtocol'] = 'http' kwargs['disableCertChecking'] = 'true' kwargs.update(configs) # create config file kclipy_helper.create_config_file(config_file=props_file, executableName=listener_script, streamName=stream_name, applicationName=stream_info['app_name'], credentialsProvider=credentialsProvider, **kwargs) TMP_FILES.append(props_file) # start stream consumer stream = KinesisStream(id=stream_name, params=stream_info) thread_consumer = KinesisProcessorThread.start_consumer(stream) TMP_THREADS.append(thread_consumer) return thread_consumer
def get_kinesis_events(stream_name, shard_id, max_results=10, env=None): env = aws_stack.get_environment(env) records = aws_stack.kinesis_get_latest_records(stream_name, shard_id, count=max_results, env=env) for r in records: r['ApproximateArrivalTimestamp'] = mktime(r['ApproximateArrivalTimestamp']) result = { 'events': records } return result
def get_kinesis_events(stream_name, shard_id, max_results=10, env=None): timestamp = now() - KINESIS_RECENT_EVENTS_TIME_DIFF_SECS env = aws_stack.get_environment(env) records = aws_stack.kinesis_get_latest_records(stream_name, shard_id, count=max_results, env=env) for r in records: r['ApproximateArrivalTimestamp'] = mktime(r['ApproximateArrivalTimestamp']) result = { 'events': records } return result
def get_lambda_code(func_name, retries=1, cache_time=None, env=None, region=None): if MOCK_OBJ: return "" env = aws_stack.get_environment(env) if cache_time is None and not aws_stack.is_local_env(env): cache_time = AWS_LAMBDA_CODE_CACHE_TIMEOUT lambda_client = _connect("lambda", env=env, region=region) out = lambda_client.get_function(FunctionName=func_name) loc = out["Code"]["Location"] hash = md5(loc) folder = TMP_DOWNLOAD_FILE_PATTERN.replace("*", hash) filename = "archive.zip" archive = "%s/%s" % (folder, filename) try: mkdir(folder) if not os.path.isfile(archive): download(loc, archive, verify_ssl=False) if len(os.listdir(folder)) <= 1: zip_path = os.path.join(folder, filename) unzip(zip_path, folder) except Exception as e: print("WARN: %s" % e) rm_rf(archive) if retries > 0: return get_lambda_code(func_name, retries=retries - 1, cache_time=1, env=env) else: print("WARNING: Unable to retrieve lambda code: %s" % e) # traverse subdirectories and get script sources result = {} for root, subdirs, files in os.walk(folder): for file in files: prefix = root.split(folder)[-1] key = "%s/%s" % (prefix, file) if re.match(r".+\.py$", key) or re.match(r".+\.js$", key): codefile = "%s/%s" % (root, file) result[key] = load_file(codefile) # cleanup cache clean_cache( file_pattern=TMP_DOWNLOAD_FILE_PATTERN, last_clean_time=last_cache_cleanup_time, max_age=TMP_DOWNLOAD_CACHE_MAX_AGE, ) # TODO: delete only if cache_time is over rm_rf(folder) return result
def __init__(self, params): props_file = params['properties_file'] env_vars = params['env_vars'] cmd = kclipy_helper.get_kcl_app_command('java', MULTI_LANG_DAEMON_CLASS, props_file) if not params['log_file']: params['log_file'] = '%s.log' % props_file TMP_FILES.append(params['log_file']) env = aws_stack.get_environment() quiet = aws_stack.is_local_env(env) ShellCommandThread.__init__(self, cmd, outfile=params['log_file'], env_vars=env_vars, quiet=quiet)
def aws_cmd(service, env): # TODO: use boto3 instead of running aws-cli commands here! cmd = '. .venv/bin/activate; aws' endpoint_url = None env = aws_stack.get_environment(env) if env.region == REGION_LOCAL: endpoint_url = aws_stack.get_local_service_url(service) if endpoint_url: cmd = '%s --endpoint-url="%s"' % (cmd, endpoint_url) cmd = '%s %s' % (cmd, service) return cmd
def __init__(self, params): props_file = params["properties_file"] env_vars = params["env_vars"] cmd = kclipy_helper.get_kcl_app_command("java", MULTI_LANG_DAEMON_CLASS, props_file) if not params["log_file"]: params["log_file"] = "%s.log" % props_file TMP_FILES.append(params["log_file"]) env = aws_stack.get_environment() quiet = aws_stack.is_local_env(env) ShellCommandThread.__init__( self, cmd, outfile=params["log_file"], env_vars=env_vars, quiet=quiet )
def __init__(self, params): props_file = params['properties_file'] env_vars = params['env_vars'] cmd = kclipy_helper.get_kcl_app_command('java', MULTI_LANG_DAEMON_CLASS, props_file) if not params['log_file']: params['log_file'] = '%s.log' % props_file TMP_FILES.append(params['log_file']) # print(cmd) env = aws_stack.get_environment() quiet = env.region == REGION_LOCAL ShellCommandThread.__init__(self, cmd, outfile=params['log_file'], env_vars=env_vars, quiet=quiet)
def get_kinesis_events(stream_name, shard_id, max_results=10, env=None): records = [] try: env = aws_stack.get_environment(env) records = aws_stack.kinesis_get_latest_records( stream_name, shard_id, count=max_results, env=env ) for r in records: r["ApproximateArrivalTimestamp"] = mktime(r["ApproximateArrivalTimestamp"]) except Exception: pass result = {"events": records} return result
def get_lambda_code(func_name, retries=1, cache_time=None, env=None): if MOCK_OBJ: return '' env = aws_stack.get_environment(env) if cache_time is None and env.region != REGION_LOCAL: cache_time = AWS_LAMBDA_CODE_CACHE_TIMEOUT out = cmd_lambda('get-function --function-name %s' % func_name, env, cache_time) out = json.loads(out) loc = out['Code']['Location'] hash = md5(loc) # print("Location %s %s" % (hash, func_name)) folder = TMP_DOWNLOAD_FILE_PATTERN.replace('*', '%s') % hash filename = 'archive.zip' archive = '%s/%s' % (folder, filename) try: run('mkdir -p %s' % folder) if not os.path.isfile(archive): # print("Downloading %s" % archive) run("wget -O %s '%s'" % (archive, loc)) if len(os.listdir(folder)) <= 1: # print("Unzipping %s/%s" % (folder, filename)) run("cd %s && unzip -o %s" % (folder, filename)) except Exception as e: print("WARN: %s" % e) sh.rm('-f', archive) if retries > 0: return get_lambda_code(func_name, retries=retries - 1, cache_time=1, env=env) else: print("WARNING: Unable to retrieve lambda code: %s" % e) # traverse subdirectories and get script sources result = {} for root, subdirs, files in os.walk(folder): for file in files: prefix = root.split(folder)[-1] key = '%s/%s' % (prefix, file) if re.match(r'.+\.py$', key) or re.match(r'.+\.js$', key): codefile = '%s/%s' % (root, file) result[key] = load_file(codefile) # cleanup cache clean_cache(file_pattern=TMP_DOWNLOAD_FILE_PATTERN, last_clean_time=last_cache_cleanup_time, max_age=TMP_DOWNLOAD_CACHE_MAX_AGE) return result
def get_lambda_code(func_name, retries=1, cache_time=None, env=None): if MOCK_OBJ: return '' env = aws_stack.get_environment(env) if cache_time is None and not aws_stack.is_local_env(env): cache_time = AWS_LAMBDA_CODE_CACHE_TIMEOUT out = cmd_lambda('get-function --function-name %s' % func_name, env, cache_time) out = json.loads(out) loc = out['Code']['Location'] hash = md5(loc) folder = TMP_DOWNLOAD_FILE_PATTERN.replace('*', hash) filename = 'archive.zip' archive = '%s/%s' % (folder, filename) try: mkdir(folder) if not os.path.isfile(archive): download(loc, archive, verify_ssl=False) if len(os.listdir(folder)) <= 1: zip_path = os.path.join(folder, filename) unzip(zip_path, folder) except Exception as e: print('WARN: %s' % e) rm_rf(archive) if retries > 0: return get_lambda_code(func_name, retries=retries - 1, cache_time=1, env=env) else: print('WARNING: Unable to retrieve lambda code: %s' % e) # traverse subdirectories and get script sources result = {} for root, subdirs, files in os.walk(folder): for file in files: prefix = root.split(folder)[-1] key = '%s/%s' % (prefix, file) if re.match(r'.+\.py$', key) or re.match(r'.+\.js$', key): codefile = '%s/%s' % (root, file) result[key] = load_file(codefile) # cleanup cache clean_cache(file_pattern=TMP_DOWNLOAD_FILE_PATTERN, last_clean_time=last_cache_cleanup_time, max_age=TMP_DOWNLOAD_CACHE_MAX_AGE) # TODO: delete only if cache_time is over rm_rf(folder) return result
def listen_to_kinesis(stream_name, listener_func=None, processor_script=None, events_file=None, endpoint_url=None, log_file=None, configs={}, env=None, ddb_lease_table_suffix=None, env_vars={}, kcl_log_level=DEFAULT_KCL_LOG_LEVEL, log_subscribers=[], wait_until_started=False): """ High-level function that allows to subscribe to a Kinesis stream and receive events in a listener function. A KCL client process is automatically started in the background. """ env = aws_stack.get_environment(env) if not events_file: events_file = EVENTS_FILE_PATTERN.replace('*', short_uid()) TMP_FILES.append(events_file) if not processor_script: processor_script = generate_processor_script(events_file, log_file=log_file) run('rm -f %s' % events_file) # start event reader thread (this process) ready_mutex = threading.Semaphore(0) thread = EventFileReaderThread(events_file, listener_func, ready_mutex=ready_mutex) thread.start() # Wait until the event reader thread is ready (to avoid 'Connection refused' error on the UNIX socket) ready_mutex.acquire() # start KCL client (background process) if processor_script[-4:] == '.pyc': processor_script = processor_script[0:-1] # add log listener that notifies when KCL is started if wait_until_started: listener = KclStartedLogListener() log_subscribers.append(listener) process = start_kcl_client_process(stream_name, processor_script, endpoint_url=endpoint_url, log_file=log_file, configs=configs, env=env, ddb_lease_table_suffix=ddb_lease_table_suffix, env_vars=env_vars, kcl_log_level=kcl_log_level, log_subscribers=log_subscribers) if wait_until_started: # Wait at most 90 seconds for initialization. Note that creating the DDB table can take quite a bit try: listener.sync_init.get(block=True, timeout=90) except Exception as e: raise Exception('Timeout when waiting for KCL initialization.') # wait at most 30 seconds for shard lease notification try: listener.sync_take_shard.get(block=True, timeout=30) except Exception as e: # this merely means that there is no shard available to take. Do nothing. pass return process
def aws_cmd(service, env): # TODO: use boto3 instead of running aws-cli commands here! cmd = '{ test `which aws` || . .venv/bin/activate; }; aws' endpoint_url = None env = aws_stack.get_environment(env) if env.region == REGION_LOCAL: endpoint_url = aws_stack.get_local_service_url(service) if endpoint_url: cmd = '%s --endpoint-url="%s"' % (cmd, endpoint_url) if not is_port_open(endpoint_url): raise socket.error() cmd = '%s %s' % (cmd, service) return cmd
def listen_to_kinesis(stream_name, listener_func=None, processor_script=None, events_file=None, endpoint_url=None, log_file=None, configs={}, env=None, ddb_lease_table_suffix=None, env_vars={}, kcl_log_level=DEFAULT_KCL_LOG_LEVEL, log_subscribers=[], wait_until_started=False): """ High-level function that allows to subscribe to a Kinesis stream and receive events in a listener function. A KCL client process is automatically started in the background. """ env = aws_stack.get_environment(env) if not events_file: events_file = EVENTS_FILE_PATTERN.replace('*', short_uid()) TMP_FILES.append(events_file) if not processor_script: processor_script = generate_processor_script(events_file, log_file=log_file) run('rm -f %s' % events_file) # start event reader thread (this process) ready_mutex = threading.Semaphore(0) thread = EventFileReaderThread(events_file, listener_func, ready_mutex=ready_mutex) thread.start() # Wait until the event reader thread is ready (to avoid 'Connection refused' error on the UNIX socket) ready_mutex.acquire() # start KCL client (background process) if processor_script[-4:] == '.pyc': processor_script = processor_script[0:-1] # add log listener that notifies when KCL is started if wait_until_started: listener = KclStartedLogListener() log_subscribers.append(listener) process = start_kcl_client_process(stream_name, processor_script, endpoint_url=endpoint_url, log_file=log_file, configs=configs, env=env, ddb_lease_table_suffix=ddb_lease_table_suffix, env_vars=env_vars, kcl_log_level=kcl_log_level, log_subscribers=log_subscribers) if wait_until_started: # Wait at most 90 seconds for initialization. Note that creating the DDB table can take quite a bit try: listener.sync_init.get(block=True, timeout=90) except Exception: raise Exception('Timeout when waiting for KCL initialization.') # wait at most 30 seconds for shard lease notification try: listener.sync_take_shard.get(block=True, timeout=30) except Exception: # this merely means that there is no shard available to take. Do nothing. pass return process
def aws_cmd(service, env): # TODO: use boto3 instead of running aws-cli commands here! cmd = '{ test `which aws` || . .venv/bin/activate; }; aws' endpoint_url = None env = aws_stack.get_environment(env) if env.region == REGION_LOCAL: endpoint_url = aws_stack.get_local_service_url(service) if endpoint_url: if endpoint_url.startswith('https://'): cmd += ' --no-verify-ssl' cmd = '%s --endpoint-url="%s"' % (cmd, endpoint_url) if not is_port_open(endpoint_url): raise socket.error() cmd = '%s %s' % (cmd, service) return cmd
def aws_cmd(service, env): # TODO: use boto3 instead of running aws-cli commands here! cmd = '{ test `which aws` || . .venv/bin/activate; }; aws' endpoint_url = None env = aws_stack.get_environment(env) if aws_stack.is_local_env(env): endpoint_url = aws_stack.get_local_service_url(service) if endpoint_url: if endpoint_url.startswith('https://'): cmd += ' --no-verify-ssl' cmd = '%s --endpoint-url="%s"' % (cmd, endpoint_url) if not is_port_open(endpoint_url): raise socket.error() cmd = '%s %s' % (cmd, service) return cmd
def get_lambda_code(func_name, retries=1, cache_time=None, env=None): if MOCK_OBJ: return '' env = aws_stack.get_environment(env) if cache_time is None and env.region != REGION_LOCAL: cache_time = AWS_LAMBDA_CODE_CACHE_TIMEOUT out = cmd_lambda('get-function --function-name %s' % func_name, env, cache_time) out = json.loads(out) loc = out['Code']['Location'] hash = md5(loc) folder = TMP_DOWNLOAD_FILE_PATTERN.replace('*', hash) filename = 'archive.zip' archive = '%s/%s' % (folder, filename) try: mkdir(folder) if not os.path.isfile(archive): download(loc, archive, verify_ssl=False) if len(os.listdir(folder)) <= 1: zip_path = os.path.join(folder, filename) unzip(zip_path, folder) except Exception as e: print('WARN: %s' % e) rm_rf(archive) if retries > 0: return get_lambda_code(func_name, retries=retries - 1, cache_time=1, env=env) else: print('WARNING: Unable to retrieve lambda code: %s' % e) # traverse subdirectories and get script sources result = {} for root, subdirs, files in os.walk(folder): for file in files: prefix = root.split(folder)[-1] key = '%s/%s' % (prefix, file) if re.match(r'.+\.py$', key) or re.match(r'.+\.js$', key): codefile = '%s/%s' % (root, file) result[key] = load_file(codefile) # cleanup cache clean_cache(file_pattern=TMP_DOWNLOAD_FILE_PATTERN, last_clean_time=last_cache_cleanup_time, max_age=TMP_DOWNLOAD_CACHE_MAX_AGE) # TODO: delete only if cache_time is over rm_rf(folder) return result
def get_stream_info( stream_name, log_file=None, shards=None, env=None, endpoint_url=None, ddb_lease_table_suffix=None, env_vars=None, ): if env_vars is None: env_vars = {} if not ddb_lease_table_suffix: ddb_lease_table_suffix = DEFAULT_DDB_LEASE_TABLE_SUFFIX # construct stream info env = aws_stack.get_environment(env) props_file = os.path.join(tempfile.gettempdir(), "kclipy.%s.properties" % short_uid()) # make sure to convert stream ARN to stream name stream_name = aws_stack.kinesis_stream_name(stream_name) app_name = "%s%s" % (stream_name, ddb_lease_table_suffix) stream_info = { "name": stream_name, "region": aws_stack.get_region(), "shards": shards, "properties_file": props_file, "log_file": log_file, "app_name": app_name, "env_vars": env_vars, } # set local connection if aws_stack.is_local_env(env): stream_info["conn_kwargs"] = { "host": LOCALHOST, "port": config.service_port("kinesis"), "is_secure": bool(config.USE_SSL), } if endpoint_url: if "conn_kwargs" not in stream_info: stream_info["conn_kwargs"] = {} url = urlparse(endpoint_url) stream_info["conn_kwargs"]["host"] = url.hostname stream_info["conn_kwargs"]["port"] = url.port stream_info["conn_kwargs"]["is_secure"] = url.scheme == "https" return stream_info
def start_kcl_client_process( stream_name, listener_script, log_file=None, env=None, configs=None, endpoint_url=None, ddb_lease_table_suffix=None, env_vars=None, region_name=None, kcl_log_level=DEFAULT_KCL_LOG_LEVEL, log_subscribers=None, ): if configs is None: configs = {} if env_vars is None: env_vars = {} if log_subscribers is None: log_subscribers = [] env = aws_stack.get_environment(env) # make sure to convert stream ARN to stream name stream_name = aws_stack.kinesis_stream_name(stream_name) # decide which credentials provider to use credentialsProvider = None if ("AWS_ASSUME_ROLE_ARN" in os.environ or "AWS_ASSUME_ROLE_ARN" in env_vars) and ("AWS_ASSUME_ROLE_SESSION_NAME" in os.environ or "AWS_ASSUME_ROLE_SESSION_NAME" in env_vars): # use special credentials provider that can assume IAM roles and handle temporary STS auth tokens credentialsProvider = "cloud.localstack.DefaultSTSAssumeRoleSessionCredentialsProvider" # pass through env variables to child process for var_name in [ "AWS_ASSUME_ROLE_ARN", "AWS_ASSUME_ROLE_SESSION_NAME", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN", ]: if var_name in os.environ and var_name not in env_vars: env_vars[var_name] = os.environ[var_name] if aws_stack.is_local_env(env): # need to disable CBOR protocol, enforce use of plain JSON, # see https://github.com/mhart/kinesalite/issues/31 env_vars["AWS_CBOR_DISABLE"] = "true" if kcl_log_level or (len(log_subscribers) > 0): if not log_file: log_file = LOG_FILE_PATTERN.replace("*", short_uid()) TMP_FILES.append(log_file) run("touch %s" % log_file) # start log output reader thread which will read the KCL log # file and print each line to stdout of this process... reader_thread = OutputReaderThread({ "file": log_file, "level": kcl_log_level, "log_prefix": "KCL", "log_subscribers": log_subscribers, }) reader_thread.start() # construct stream info stream_info = get_stream_info( stream_name, log_file, env=env, endpoint_url=endpoint_url, ddb_lease_table_suffix=ddb_lease_table_suffix, env_vars=env_vars, ) props_file = stream_info["properties_file"] # set kcl config options kwargs = {"metricsLevel": "NONE", "initialPositionInStream": "LATEST"} # set parameters for local connection if aws_stack.is_local_env(env): kwargs[ "kinesisEndpoint"] = f"{LOCALHOST}:{config.service_port('kinesis')}" kwargs[ "dynamodbEndpoint"] = f"{LOCALHOST}:{config.service_port('dynamodb')}" kwargs["kinesisProtocol"] = config.get_protocol() kwargs["dynamodbProtocol"] = config.get_protocol() kwargs["disableCertChecking"] = "true" kwargs.update(configs) # create config file kclipy_helper.create_config_file( config_file=props_file, executableName=listener_script, streamName=stream_name, applicationName=stream_info["app_name"], credentialsProvider=credentialsProvider, region_name=region_name, **kwargs, ) TMP_FILES.append(props_file) # start stream consumer stream = KinesisStream(id=stream_name, params=stream_info) thread_consumer = KinesisProcessorThread.start_consumer(stream) TMP_THREADS.append(thread_consumer) return thread_consumer