Example #1
0
def get_lambda_code(func_name, retries=1, cache_time=None, env=None):
    if MOCK_OBJ:
        return ''
    env = aws_stack.get_environment(env)
    if cache_time is None and env.region != REGION_LOCAL:
        cache_time = AWS_LAMBDA_CODE_CACHE_TIMEOUT
    out = cmd_lambda('get-function --function-name %s' % func_name, env,
                     cache_time)
    out = json.loads(out)
    loc = out['Code']['Location']
    hash = md5(loc)
    # print("Location %s %s" % (hash, func_name))
    folder = TMP_DOWNLOAD_FILE_PATTERN.replace('*', '%s') % hash
    filename = 'archive.zip'
    archive = '%s/%s' % (folder, filename)
    try:
        run('mkdir -p %s' % folder)
        if not os.path.isfile(archive):
            # print("Downloading %s" % archive)
            run("wget -O %s '%s'" % (archive, loc))
        if len(os.listdir(folder)) <= 1:
            # print("Unzipping %s/%s" % (folder, filename))
            run("cd %s && unzip -o %s" % (folder, filename))
    except Exception, e:
        print("WARN: %s" % e)
        sh.rm('-f', archive)
        if retries > 0:
            return get_lambda_code(func_name,
                                   retries=retries - 1,
                                   cache_time=1,
                                   env=env)
        else:
            print("WARNING: Unable to retrieve lambda code: %s" % e)
Example #2
0
 def wait_for_stream_created(table_name):
     stream_name = get_kinesis_stream_name(table_name)
     stream = KinesisStream(id=stream_name, num_shards=1)
     kinesis = aws_stack.create_external_boto_client(
         "kinesis", env=get_environment(None))
     stream.connect(kinesis)
     stream.wait_for()
def get_stream_info(stream_name, log_file=None, shards=None, env=None, endpoint_url=None,
        ddb_lease_table_suffix=None, env_vars={}):
    if not ddb_lease_table_suffix:
        ddb_lease_table_suffix = DEFAULT_DDB_LEASE_TABLE_SUFFIX
    # construct stream info
    env = aws_stack.get_environment(env)
    props_file = os.path.join('/tmp/', 'kclipy.%s.properties' % short_uid())
    app_name = '%s%s' % (stream_name, ddb_lease_table_suffix)
    stream_info = {
        'name': stream_name,
        'region': DEFAULT_REGION,
        'shards': shards,
        'properties_file': props_file,
        'log_file': log_file,
        'app_name': app_name,
        'env_vars': env_vars
    }
    # set local connection
    if env.region == REGION_LOCAL:
        from localstack.constants import LOCALHOST, DEFAULT_PORT_KINESIS
        stream_info['conn_kwargs'] = {
            'host': LOCALHOST,
            'port': DEFAULT_PORT_KINESIS,
            'is_secure': False
        }
    if endpoint_url:
        if 'conn_kwargs' not in stream_info:
            stream_info['conn_kwargs'] = {}
        url = urlparse(endpoint_url)
        stream_info['conn_kwargs']['host'] = url.hostname
        stream_info['conn_kwargs']['port'] = url.port
        stream_info['conn_kwargs']['is_secure'] = url.scheme == 'https'
    return stream_info
Example #4
0
 def wait_for_stream_created(table_name):
     stream_name = get_kinesis_stream_name(table_name)
     stream = KinesisStream(id=stream_name, num_shards=1)
     kinesis = aws_stack.connect_to_service('kinesis',
                                            env=get_environment(None))
     stream.connect(kinesis)
     stream.wait_for()
Example #5
0
def get_stream_info(stream_name, log_file=None, shards=None, env=None, endpoint_url=None,
        ddb_lease_table_suffix=None, env_vars={}):
    if not ddb_lease_table_suffix:
        ddb_lease_table_suffix = DEFAULT_DDB_LEASE_TABLE_SUFFIX
    # construct stream info
    env = aws_stack.get_environment(env)
    props_file = os.path.join(tempfile.gettempdir(), 'kclipy.%s.properties' % short_uid())
    app_name = '%s%s' % (stream_name, ddb_lease_table_suffix)
    stream_info = {
        'name': stream_name,
        'region': DEFAULT_REGION,
        'shards': shards,
        'properties_file': props_file,
        'log_file': log_file,
        'app_name': app_name,
        'env_vars': env_vars
    }
    # set local connection
    if env.region == REGION_LOCAL:
        stream_info['conn_kwargs'] = {
            'host': HOSTNAME,
            'port': DEFAULT_PORT_KINESIS,
            'is_secure': bool(USE_SSL)
        }
    if endpoint_url:
        if 'conn_kwargs' not in stream_info:
            stream_info['conn_kwargs'] = {}
        url = urlparse(endpoint_url)
        stream_info['conn_kwargs']['host'] = url.hostname
        stream_info['conn_kwargs']['port'] = url.port
        stream_info['conn_kwargs']['is_secure'] = url.scheme == 'https'
    return stream_info
Example #6
0
def start_kcl_client_process(stream_name, listener_script, log_file=None, env=None, configs={},
        endpoint_url=None, ddb_lease_table_suffix=None, env_vars={}, region_name=None,
        kcl_log_level=DEFAULT_KCL_LOG_LEVEL, log_subscribers=[]):
    env = aws_stack.get_environment(env)
    # make sure to convert stream ARN to stream name
    stream_name = aws_stack.kinesis_stream_name(stream_name)
    # decide which credentials provider to use
    credentialsProvider = None
    if (('AWS_ASSUME_ROLE_ARN' in os.environ or 'AWS_ASSUME_ROLE_ARN' in env_vars) and
            ('AWS_ASSUME_ROLE_SESSION_NAME' in os.environ or 'AWS_ASSUME_ROLE_SESSION_NAME' in env_vars)):
        # use special credentials provider that can assume IAM roles and handle temporary STS auth tokens
        credentialsProvider = 'cloud.localstack.DefaultSTSAssumeRoleSessionCredentialsProvider'
        # pass through env variables to child process
        for var_name in ['AWS_ASSUME_ROLE_ARN', 'AWS_ASSUME_ROLE_SESSION_NAME',
                'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'AWS_SESSION_TOKEN']:
            if var_name in os.environ and var_name not in env_vars:
                env_vars[var_name] = os.environ[var_name]
    if aws_stack.is_local_env(env):
        # need to disable CBOR protocol, enforce use of plain JSON,
        # see https://github.com/mhart/kinesalite/issues/31
        env_vars['AWS_CBOR_DISABLE'] = 'true'
    if kcl_log_level or (len(log_subscribers) > 0):
        if not log_file:
            log_file = LOG_FILE_PATTERN.replace('*', short_uid())
            TMP_FILES.append(log_file)
        run('touch %s' % log_file)
        # start log output reader thread which will read the KCL log
        # file and print each line to stdout of this process...
        reader_thread = OutputReaderThread({'file': log_file, 'level': kcl_log_level,
            'log_prefix': 'KCL', 'log_subscribers': log_subscribers})
        reader_thread.start()

    # construct stream info
    stream_info = get_stream_info(stream_name, log_file, env=env, endpoint_url=endpoint_url,
        ddb_lease_table_suffix=ddb_lease_table_suffix, env_vars=env_vars)
    props_file = stream_info['properties_file']
    # set kcl config options
    kwargs = {
        'metricsLevel': 'NONE',
        'initialPositionInStream': 'LATEST'
    }
    # set parameters for local connection
    if aws_stack.is_local_env(env):
        kwargs['kinesisEndpoint'] = '%s:%s' % (HOSTNAME, config.PORT_KINESIS)
        kwargs['dynamodbEndpoint'] = '%s:%s' % (HOSTNAME, config.PORT_DYNAMODB)
        kwargs['kinesisProtocol'] = get_service_protocol()
        kwargs['dynamodbProtocol'] = get_service_protocol()
        kwargs['disableCertChecking'] = 'true'
    kwargs.update(configs)
    # create config file
    kclipy_helper.create_config_file(config_file=props_file, executableName=listener_script,
        streamName=stream_name, applicationName=stream_info['app_name'],
        credentialsProvider=credentialsProvider, region_name=region_name, **kwargs)
    TMP_FILES.append(props_file)
    # start stream consumer
    stream = KinesisStream(id=stream_name, params=stream_info)
    thread_consumer = KinesisProcessorThread.start_consumer(stream)
    TMP_THREADS.append(thread_consumer)
    return thread_consumer
def start_kcl_client_process(stream_name, listener_script, log_file=None, env=None, configs={},
        endpoint_url=None, ddb_lease_table_suffix=None, env_vars={},
        kcl_log_level=DEFAULT_KCL_LOG_LEVEL, log_subscribers=[]):
    env = aws_stack.get_environment(env)
    # decide which credentials provider to use
    credentialsProvider = None
    if (('AWS_ASSUME_ROLE_ARN' in os.environ or 'AWS_ASSUME_ROLE_ARN' in env_vars) and
            ('AWS_ASSUME_ROLE_SESSION_NAME' in os.environ or 'AWS_ASSUME_ROLE_SESSION_NAME' in env_vars)):
        # use special credentials provider that can assume IAM roles and handle temporary STS auth tokens
        credentialsProvider = 'com.atlassian.DefaultSTSAssumeRoleSessionCredentialsProvider'
        # pass through env variables to child process
        for var_name in ['AWS_ASSUME_ROLE_ARN', 'AWS_ASSUME_ROLE_SESSION_NAME',
                'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'AWS_SESSION_TOKEN']:
            if var_name in os.environ and var_name not in env_vars:
                env_vars[var_name] = os.environ[var_name]
    if env.region == REGION_LOCAL:
        # need to disable CBOR protocol, enforce use of plain JSON,
        # see https://github.com/mhart/kinesalite/issues/31
        env_vars['AWS_CBOR_DISABLE'] = 'true'
    if kcl_log_level or (len(log_subscribers) > 0):
        if not log_file:
            log_file = LOG_FILE_PATTERN.replace('*', short_uid())
            TMP_FILES.append(log_file)
        run('touch %s' % log_file)
        # start log output reader thread which will read the KCL log
        # file and print each line to stdout of this process...
        reader_thread = OutputReaderThread({'file': log_file, 'level': kcl_log_level,
            'log_prefix': 'KCL', 'log_subscribers': log_subscribers})
        reader_thread.start()

    # construct stream info
    stream_info = get_stream_info(stream_name, log_file, env=env, endpoint_url=endpoint_url,
        ddb_lease_table_suffix=ddb_lease_table_suffix, env_vars=env_vars)
    props_file = stream_info['properties_file']
    # set kcl config options
    kwargs = {
        'metricsLevel': 'NONE',
        'initialPositionInStream': 'LATEST'
    }
    # set parameters for local connection
    if env.region == REGION_LOCAL:
        from localstack.constants import LOCALHOST, DEFAULT_PORT_KINESIS, DEFAULT_PORT_DYNAMODB
        kwargs['kinesisEndpoint'] = '%s:%s' % (LOCALHOST, DEFAULT_PORT_KINESIS)
        kwargs['dynamodbEndpoint'] = '%s:%s' % (LOCALHOST, DEFAULT_PORT_DYNAMODB)
        kwargs['kinesisProtocol'] = 'http'
        kwargs['dynamodbProtocol'] = 'http'
        kwargs['disableCertChecking'] = 'true'
    kwargs.update(configs)
    # create config file
    kclipy_helper.create_config_file(config_file=props_file, executableName=listener_script,
        streamName=stream_name, applicationName=stream_info['app_name'],
        credentialsProvider=credentialsProvider, **kwargs)
    TMP_FILES.append(props_file)
    # start stream consumer
    stream = KinesisStream(id=stream_name, params=stream_info)
    thread_consumer = KinesisProcessorThread.start_consumer(stream)
    TMP_THREADS.append(thread_consumer)
    return thread_consumer
Example #8
0
def get_kinesis_events(stream_name, shard_id, max_results=10, env=None):
    env = aws_stack.get_environment(env)
    records = aws_stack.kinesis_get_latest_records(stream_name, shard_id, count=max_results, env=env)
    for r in records:
        r['ApproximateArrivalTimestamp'] = mktime(r['ApproximateArrivalTimestamp'])
    result = {
        'events': records
    }
    return result
def get_kinesis_events(stream_name, shard_id, max_results=10, env=None):
    env = aws_stack.get_environment(env)
    records = aws_stack.kinesis_get_latest_records(stream_name, shard_id, count=max_results, env=env)
    for r in records:
        r['ApproximateArrivalTimestamp'] = mktime(r['ApproximateArrivalTimestamp'])
    result = {
        'events': records
    }
    return result
Example #10
0
def get_kinesis_events(stream_name, shard_id, max_results=10, env=None):
    timestamp = now() - KINESIS_RECENT_EVENTS_TIME_DIFF_SECS
    env = aws_stack.get_environment(env)
    records = aws_stack.kinesis_get_latest_records(stream_name, shard_id, count=max_results, env=env)
    for r in records:
        r['ApproximateArrivalTimestamp'] = mktime(r['ApproximateArrivalTimestamp'])
    result = {
        'events': records
    }
    return result
Example #11
0
def get_lambda_code(func_name,
                    retries=1,
                    cache_time=None,
                    env=None,
                    region=None):
    if MOCK_OBJ:
        return ""
    env = aws_stack.get_environment(env)
    if cache_time is None and not aws_stack.is_local_env(env):
        cache_time = AWS_LAMBDA_CODE_CACHE_TIMEOUT
    lambda_client = _connect("lambda", env=env, region=region)
    out = lambda_client.get_function(FunctionName=func_name)
    loc = out["Code"]["Location"]
    hash = md5(loc)
    folder = TMP_DOWNLOAD_FILE_PATTERN.replace("*", hash)
    filename = "archive.zip"
    archive = "%s/%s" % (folder, filename)
    try:
        mkdir(folder)
        if not os.path.isfile(archive):
            download(loc, archive, verify_ssl=False)
        if len(os.listdir(folder)) <= 1:
            zip_path = os.path.join(folder, filename)
            unzip(zip_path, folder)
    except Exception as e:
        print("WARN: %s" % e)
        rm_rf(archive)
        if retries > 0:
            return get_lambda_code(func_name,
                                   retries=retries - 1,
                                   cache_time=1,
                                   env=env)
        else:
            print("WARNING: Unable to retrieve lambda code: %s" % e)

    # traverse subdirectories and get script sources
    result = {}
    for root, subdirs, files in os.walk(folder):
        for file in files:
            prefix = root.split(folder)[-1]
            key = "%s/%s" % (prefix, file)
            if re.match(r".+\.py$", key) or re.match(r".+\.js$", key):
                codefile = "%s/%s" % (root, file)
                result[key] = load_file(codefile)

    # cleanup cache
    clean_cache(
        file_pattern=TMP_DOWNLOAD_FILE_PATTERN,
        last_clean_time=last_cache_cleanup_time,
        max_age=TMP_DOWNLOAD_CACHE_MAX_AGE,
    )
    # TODO: delete only if cache_time is over
    rm_rf(folder)

    return result
Example #12
0
 def __init__(self, params):
     props_file = params['properties_file']
     env_vars = params['env_vars']
     cmd = kclipy_helper.get_kcl_app_command('java',
         MULTI_LANG_DAEMON_CLASS, props_file)
     if not params['log_file']:
         params['log_file'] = '%s.log' % props_file
         TMP_FILES.append(params['log_file'])
     env = aws_stack.get_environment()
     quiet = aws_stack.is_local_env(env)
     ShellCommandThread.__init__(self, cmd, outfile=params['log_file'], env_vars=env_vars, quiet=quiet)
Example #13
0
def aws_cmd(service, env):
    # TODO: use boto3 instead of running aws-cli commands here!

    cmd = '. .venv/bin/activate; aws'
    endpoint_url = None
    env = aws_stack.get_environment(env)
    if env.region == REGION_LOCAL:
        endpoint_url = aws_stack.get_local_service_url(service)
    if endpoint_url:
        cmd = '%s --endpoint-url="%s"' % (cmd, endpoint_url)
    cmd = '%s %s' % (cmd, service)
    return cmd
Example #14
0
 def __init__(self, params):
     props_file = params["properties_file"]
     env_vars = params["env_vars"]
     cmd = kclipy_helper.get_kcl_app_command("java", MULTI_LANG_DAEMON_CLASS, props_file)
     if not params["log_file"]:
         params["log_file"] = "%s.log" % props_file
         TMP_FILES.append(params["log_file"])
     env = aws_stack.get_environment()
     quiet = aws_stack.is_local_env(env)
     ShellCommandThread.__init__(
         self, cmd, outfile=params["log_file"], env_vars=env_vars, quiet=quiet
     )
Example #15
0
 def __init__(self, params):
     props_file = params['properties_file']
     env_vars = params['env_vars']
     cmd = kclipy_helper.get_kcl_app_command('java',
         MULTI_LANG_DAEMON_CLASS, props_file)
     if not params['log_file']:
         params['log_file'] = '%s.log' % props_file
         TMP_FILES.append(params['log_file'])
     # print(cmd)
     env = aws_stack.get_environment()
     quiet = env.region == REGION_LOCAL
     ShellCommandThread.__init__(self, cmd, outfile=params['log_file'], env_vars=env_vars, quiet=quiet)
Example #16
0
def get_kinesis_events(stream_name, shard_id, max_results=10, env=None):
    records = []
    try:
        env = aws_stack.get_environment(env)
        records = aws_stack.kinesis_get_latest_records(
            stream_name, shard_id, count=max_results, env=env
        )
        for r in records:
            r["ApproximateArrivalTimestamp"] = mktime(r["ApproximateArrivalTimestamp"])
    except Exception:
        pass
    result = {"events": records}
    return result
Example #17
0
def get_lambda_code(func_name, retries=1, cache_time=None, env=None):
    if MOCK_OBJ:
        return ''
    env = aws_stack.get_environment(env)
    if cache_time is None and env.region != REGION_LOCAL:
        cache_time = AWS_LAMBDA_CODE_CACHE_TIMEOUT
    out = cmd_lambda('get-function --function-name %s' % func_name, env,
                     cache_time)
    out = json.loads(out)
    loc = out['Code']['Location']
    hash = md5(loc)
    # print("Location %s %s" % (hash, func_name))
    folder = TMP_DOWNLOAD_FILE_PATTERN.replace('*', '%s') % hash
    filename = 'archive.zip'
    archive = '%s/%s' % (folder, filename)
    try:
        run('mkdir -p %s' % folder)
        if not os.path.isfile(archive):
            # print("Downloading %s" % archive)
            run("wget -O %s '%s'" % (archive, loc))
        if len(os.listdir(folder)) <= 1:
            # print("Unzipping %s/%s" % (folder, filename))
            run("cd %s && unzip -o %s" % (folder, filename))
    except Exception as e:
        print("WARN: %s" % e)
        sh.rm('-f', archive)
        if retries > 0:
            return get_lambda_code(func_name,
                                   retries=retries - 1,
                                   cache_time=1,
                                   env=env)
        else:
            print("WARNING: Unable to retrieve lambda code: %s" % e)

    # traverse subdirectories and get script sources
    result = {}
    for root, subdirs, files in os.walk(folder):
        for file in files:
            prefix = root.split(folder)[-1]
            key = '%s/%s' % (prefix, file)
            if re.match(r'.+\.py$', key) or re.match(r'.+\.js$', key):
                codefile = '%s/%s' % (root, file)
                result[key] = load_file(codefile)

    # cleanup cache
    clean_cache(file_pattern=TMP_DOWNLOAD_FILE_PATTERN,
                last_clean_time=last_cache_cleanup_time,
                max_age=TMP_DOWNLOAD_CACHE_MAX_AGE)

    return result
Example #18
0
def get_lambda_code(func_name, retries=1, cache_time=None, env=None):
    if MOCK_OBJ:
        return ''
    env = aws_stack.get_environment(env)
    if cache_time is None and not aws_stack.is_local_env(env):
        cache_time = AWS_LAMBDA_CODE_CACHE_TIMEOUT
    out = cmd_lambda('get-function --function-name %s' % func_name, env,
                     cache_time)
    out = json.loads(out)
    loc = out['Code']['Location']
    hash = md5(loc)
    folder = TMP_DOWNLOAD_FILE_PATTERN.replace('*', hash)
    filename = 'archive.zip'
    archive = '%s/%s' % (folder, filename)
    try:
        mkdir(folder)
        if not os.path.isfile(archive):
            download(loc, archive, verify_ssl=False)
        if len(os.listdir(folder)) <= 1:
            zip_path = os.path.join(folder, filename)
            unzip(zip_path, folder)
    except Exception as e:
        print('WARN: %s' % e)
        rm_rf(archive)
        if retries > 0:
            return get_lambda_code(func_name,
                                   retries=retries - 1,
                                   cache_time=1,
                                   env=env)
        else:
            print('WARNING: Unable to retrieve lambda code: %s' % e)

    # traverse subdirectories and get script sources
    result = {}
    for root, subdirs, files in os.walk(folder):
        for file in files:
            prefix = root.split(folder)[-1]
            key = '%s/%s' % (prefix, file)
            if re.match(r'.+\.py$', key) or re.match(r'.+\.js$', key):
                codefile = '%s/%s' % (root, file)
                result[key] = load_file(codefile)

    # cleanup cache
    clean_cache(file_pattern=TMP_DOWNLOAD_FILE_PATTERN,
                last_clean_time=last_cache_cleanup_time,
                max_age=TMP_DOWNLOAD_CACHE_MAX_AGE)
    # TODO: delete only if cache_time is over
    rm_rf(folder)

    return result
Example #19
0
def listen_to_kinesis(stream_name, listener_func=None, processor_script=None,
        events_file=None, endpoint_url=None, log_file=None, configs={}, env=None,
        ddb_lease_table_suffix=None, env_vars={}, kcl_log_level=DEFAULT_KCL_LOG_LEVEL,
        log_subscribers=[], wait_until_started=False):
    """
    High-level function that allows to subscribe to a Kinesis stream
    and receive events in a listener function. A KCL client process is
    automatically started in the background.
    """
    env = aws_stack.get_environment(env)
    if not events_file:
        events_file = EVENTS_FILE_PATTERN.replace('*', short_uid())
        TMP_FILES.append(events_file)
    if not processor_script:
        processor_script = generate_processor_script(events_file, log_file=log_file)

    run('rm -f %s' % events_file)
    # start event reader thread (this process)
    ready_mutex = threading.Semaphore(0)
    thread = EventFileReaderThread(events_file, listener_func, ready_mutex=ready_mutex)
    thread.start()
    # Wait until the event reader thread is ready (to avoid 'Connection refused' error on the UNIX socket)
    ready_mutex.acquire()
    # start KCL client (background process)
    if processor_script[-4:] == '.pyc':
        processor_script = processor_script[0:-1]
    # add log listener that notifies when KCL is started
    if wait_until_started:
        listener = KclStartedLogListener()
        log_subscribers.append(listener)

    process = start_kcl_client_process(stream_name, processor_script,
        endpoint_url=endpoint_url, log_file=log_file, configs=configs, env=env,
        ddb_lease_table_suffix=ddb_lease_table_suffix, env_vars=env_vars, kcl_log_level=kcl_log_level,
        log_subscribers=log_subscribers)

    if wait_until_started:
        # Wait at most 90 seconds for initialization. Note that creating the DDB table can take quite a bit
        try:
            listener.sync_init.get(block=True, timeout=90)
        except Exception as e:
            raise Exception('Timeout when waiting for KCL initialization.')
        # wait at most 30 seconds for shard lease notification
        try:
            listener.sync_take_shard.get(block=True, timeout=30)
        except Exception as e:
            # this merely means that there is no shard available to take. Do nothing.
            pass

    return process
Example #20
0
def aws_cmd(service, env):
    # TODO: use boto3 instead of running aws-cli commands here!

    cmd = '{ test `which aws` || . .venv/bin/activate; }; aws'
    endpoint_url = None
    env = aws_stack.get_environment(env)
    if env.region == REGION_LOCAL:
        endpoint_url = aws_stack.get_local_service_url(service)
    if endpoint_url:
        cmd = '%s --endpoint-url="%s"' % (cmd, endpoint_url)
        if not is_port_open(endpoint_url):
            raise socket.error()
    cmd = '%s %s' % (cmd, service)
    return cmd
Example #21
0
def listen_to_kinesis(stream_name, listener_func=None, processor_script=None,
        events_file=None, endpoint_url=None, log_file=None, configs={}, env=None,
        ddb_lease_table_suffix=None, env_vars={}, kcl_log_level=DEFAULT_KCL_LOG_LEVEL,
        log_subscribers=[], wait_until_started=False):
    """
    High-level function that allows to subscribe to a Kinesis stream
    and receive events in a listener function. A KCL client process is
    automatically started in the background.
    """
    env = aws_stack.get_environment(env)
    if not events_file:
        events_file = EVENTS_FILE_PATTERN.replace('*', short_uid())
        TMP_FILES.append(events_file)
    if not processor_script:
        processor_script = generate_processor_script(events_file, log_file=log_file)

    run('rm -f %s' % events_file)
    # start event reader thread (this process)
    ready_mutex = threading.Semaphore(0)
    thread = EventFileReaderThread(events_file, listener_func, ready_mutex=ready_mutex)
    thread.start()
    # Wait until the event reader thread is ready (to avoid 'Connection refused' error on the UNIX socket)
    ready_mutex.acquire()
    # start KCL client (background process)
    if processor_script[-4:] == '.pyc':
        processor_script = processor_script[0:-1]
    # add log listener that notifies when KCL is started
    if wait_until_started:
        listener = KclStartedLogListener()
        log_subscribers.append(listener)

    process = start_kcl_client_process(stream_name, processor_script,
        endpoint_url=endpoint_url, log_file=log_file, configs=configs, env=env,
        ddb_lease_table_suffix=ddb_lease_table_suffix, env_vars=env_vars, kcl_log_level=kcl_log_level,
        log_subscribers=log_subscribers)

    if wait_until_started:
        # Wait at most 90 seconds for initialization. Note that creating the DDB table can take quite a bit
        try:
            listener.sync_init.get(block=True, timeout=90)
        except Exception:
            raise Exception('Timeout when waiting for KCL initialization.')
        # wait at most 30 seconds for shard lease notification
        try:
            listener.sync_take_shard.get(block=True, timeout=30)
        except Exception:
            # this merely means that there is no shard available to take. Do nothing.
            pass

    return process
Example #22
0
def aws_cmd(service, env):
    # TODO: use boto3 instead of running aws-cli commands here!

    cmd = '{ test `which aws` || . .venv/bin/activate; }; aws'
    endpoint_url = None
    env = aws_stack.get_environment(env)
    if env.region == REGION_LOCAL:
        endpoint_url = aws_stack.get_local_service_url(service)
    if endpoint_url:
        if endpoint_url.startswith('https://'):
            cmd += ' --no-verify-ssl'
        cmd = '%s --endpoint-url="%s"' % (cmd, endpoint_url)
        if not is_port_open(endpoint_url):
            raise socket.error()
    cmd = '%s %s' % (cmd, service)
    return cmd
Example #23
0
def aws_cmd(service, env):
    # TODO: use boto3 instead of running aws-cli commands here!

    cmd = '{ test `which aws` || . .venv/bin/activate; }; aws'
    endpoint_url = None
    env = aws_stack.get_environment(env)
    if aws_stack.is_local_env(env):
        endpoint_url = aws_stack.get_local_service_url(service)
    if endpoint_url:
        if endpoint_url.startswith('https://'):
            cmd += ' --no-verify-ssl'
        cmd = '%s --endpoint-url="%s"' % (cmd, endpoint_url)
        if not is_port_open(endpoint_url):
            raise socket.error()
    cmd = '%s %s' % (cmd, service)
    return cmd
Example #24
0
def get_lambda_code(func_name, retries=1, cache_time=None, env=None):
    if MOCK_OBJ:
        return ''
    env = aws_stack.get_environment(env)
    if cache_time is None and env.region != REGION_LOCAL:
        cache_time = AWS_LAMBDA_CODE_CACHE_TIMEOUT
    out = cmd_lambda('get-function --function-name %s' % func_name, env, cache_time)
    out = json.loads(out)
    loc = out['Code']['Location']
    hash = md5(loc)
    folder = TMP_DOWNLOAD_FILE_PATTERN.replace('*', hash)
    filename = 'archive.zip'
    archive = '%s/%s' % (folder, filename)
    try:
        mkdir(folder)
        if not os.path.isfile(archive):
            download(loc, archive, verify_ssl=False)
        if len(os.listdir(folder)) <= 1:
            zip_path = os.path.join(folder, filename)
            unzip(zip_path, folder)
    except Exception as e:
        print('WARN: %s' % e)
        rm_rf(archive)
        if retries > 0:
            return get_lambda_code(func_name, retries=retries - 1, cache_time=1, env=env)
        else:
            print('WARNING: Unable to retrieve lambda code: %s' % e)

    # traverse subdirectories and get script sources
    result = {}
    for root, subdirs, files in os.walk(folder):
        for file in files:
            prefix = root.split(folder)[-1]
            key = '%s/%s' % (prefix, file)
            if re.match(r'.+\.py$', key) or re.match(r'.+\.js$', key):
                codefile = '%s/%s' % (root, file)
                result[key] = load_file(codefile)

    # cleanup cache
    clean_cache(file_pattern=TMP_DOWNLOAD_FILE_PATTERN,
        last_clean_time=last_cache_cleanup_time,
        max_age=TMP_DOWNLOAD_CACHE_MAX_AGE)
    # TODO: delete only if cache_time is over
    rm_rf(folder)

    return result
Example #25
0
def get_stream_info(
    stream_name,
    log_file=None,
    shards=None,
    env=None,
    endpoint_url=None,
    ddb_lease_table_suffix=None,
    env_vars=None,
):
    if env_vars is None:
        env_vars = {}
    if not ddb_lease_table_suffix:
        ddb_lease_table_suffix = DEFAULT_DDB_LEASE_TABLE_SUFFIX
    # construct stream info
    env = aws_stack.get_environment(env)
    props_file = os.path.join(tempfile.gettempdir(),
                              "kclipy.%s.properties" % short_uid())
    # make sure to convert stream ARN to stream name
    stream_name = aws_stack.kinesis_stream_name(stream_name)
    app_name = "%s%s" % (stream_name, ddb_lease_table_suffix)
    stream_info = {
        "name": stream_name,
        "region": aws_stack.get_region(),
        "shards": shards,
        "properties_file": props_file,
        "log_file": log_file,
        "app_name": app_name,
        "env_vars": env_vars,
    }
    # set local connection
    if aws_stack.is_local_env(env):
        stream_info["conn_kwargs"] = {
            "host": LOCALHOST,
            "port": config.service_port("kinesis"),
            "is_secure": bool(config.USE_SSL),
        }
    if endpoint_url:
        if "conn_kwargs" not in stream_info:
            stream_info["conn_kwargs"] = {}
        url = urlparse(endpoint_url)
        stream_info["conn_kwargs"]["host"] = url.hostname
        stream_info["conn_kwargs"]["port"] = url.port
        stream_info["conn_kwargs"]["is_secure"] = url.scheme == "https"
    return stream_info
Example #26
0
def start_kcl_client_process(
    stream_name,
    listener_script,
    log_file=None,
    env=None,
    configs=None,
    endpoint_url=None,
    ddb_lease_table_suffix=None,
    env_vars=None,
    region_name=None,
    kcl_log_level=DEFAULT_KCL_LOG_LEVEL,
    log_subscribers=None,
):
    if configs is None:
        configs = {}
    if env_vars is None:
        env_vars = {}
    if log_subscribers is None:
        log_subscribers = []
    env = aws_stack.get_environment(env)
    # make sure to convert stream ARN to stream name
    stream_name = aws_stack.kinesis_stream_name(stream_name)
    # decide which credentials provider to use
    credentialsProvider = None
    if ("AWS_ASSUME_ROLE_ARN" in os.environ or "AWS_ASSUME_ROLE_ARN"
            in env_vars) and ("AWS_ASSUME_ROLE_SESSION_NAME" in os.environ
                              or "AWS_ASSUME_ROLE_SESSION_NAME" in env_vars):
        # use special credentials provider that can assume IAM roles and handle temporary STS auth tokens
        credentialsProvider = "cloud.localstack.DefaultSTSAssumeRoleSessionCredentialsProvider"
        # pass through env variables to child process
        for var_name in [
                "AWS_ASSUME_ROLE_ARN",
                "AWS_ASSUME_ROLE_SESSION_NAME",
                "AWS_ACCESS_KEY_ID",
                "AWS_SECRET_ACCESS_KEY",
                "AWS_SESSION_TOKEN",
        ]:
            if var_name in os.environ and var_name not in env_vars:
                env_vars[var_name] = os.environ[var_name]
    if aws_stack.is_local_env(env):
        # need to disable CBOR protocol, enforce use of plain JSON,
        # see https://github.com/mhart/kinesalite/issues/31
        env_vars["AWS_CBOR_DISABLE"] = "true"
    if kcl_log_level or (len(log_subscribers) > 0):
        if not log_file:
            log_file = LOG_FILE_PATTERN.replace("*", short_uid())
            TMP_FILES.append(log_file)
        run("touch %s" % log_file)
        # start log output reader thread which will read the KCL log
        # file and print each line to stdout of this process...
        reader_thread = OutputReaderThread({
            "file": log_file,
            "level": kcl_log_level,
            "log_prefix": "KCL",
            "log_subscribers": log_subscribers,
        })
        reader_thread.start()

    # construct stream info
    stream_info = get_stream_info(
        stream_name,
        log_file,
        env=env,
        endpoint_url=endpoint_url,
        ddb_lease_table_suffix=ddb_lease_table_suffix,
        env_vars=env_vars,
    )
    props_file = stream_info["properties_file"]
    # set kcl config options
    kwargs = {"metricsLevel": "NONE", "initialPositionInStream": "LATEST"}
    # set parameters for local connection
    if aws_stack.is_local_env(env):
        kwargs[
            "kinesisEndpoint"] = f"{LOCALHOST}:{config.service_port('kinesis')}"
        kwargs[
            "dynamodbEndpoint"] = f"{LOCALHOST}:{config.service_port('dynamodb')}"
        kwargs["kinesisProtocol"] = config.get_protocol()
        kwargs["dynamodbProtocol"] = config.get_protocol()
        kwargs["disableCertChecking"] = "true"
    kwargs.update(configs)
    # create config file
    kclipy_helper.create_config_file(
        config_file=props_file,
        executableName=listener_script,
        streamName=stream_name,
        applicationName=stream_info["app_name"],
        credentialsProvider=credentialsProvider,
        region_name=region_name,
        **kwargs,
    )
    TMP_FILES.append(props_file)
    # start stream consumer
    stream = KinesisStream(id=stream_name, params=stream_info)
    thread_consumer = KinesisProcessorThread.start_consumer(stream)
    TMP_THREADS.append(thread_consumer)
    return thread_consumer