Beispiel #1
0
def create_zip_file(file_path,
                    zip_file=None,
                    get_content=False,
                    content_root=None,
                    mode="w"):
    """
    Creates a zipfile to the designated file_path.

    By default, a new zip file is created but the mode parameter can be used to append to an existing zip file
    """
    base_dir = file_path
    if not os.path.isdir(file_path):
        base_dir = tempfile.mkdtemp(prefix=ARCHIVE_DIR_PREFIX)
        shutil.copy(file_path, base_dir)
        TMP_FILES.append(base_dir)
    tmp_dir = tempfile.mkdtemp(prefix=ARCHIVE_DIR_PREFIX)
    full_zip_file = zip_file
    if not full_zip_file:
        zip_file_name = "archive.zip"
        full_zip_file = os.path.join(tmp_dir, zip_file_name)
    # special case where target folder is empty -> create empty zip file
    if is_empty_dir(base_dir):
        # see https://stackoverflow.com/questions/25195495/how-to-create-an-empty-zip-file#25195628
        content = (
            b"PK\x05\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
        )
        if get_content:
            return content
        save_file(full_zip_file, content)
        return full_zip_file

    # create zip file
    if is_debian():
        # todo: extend CLI with the new parameters
        create_zip_file_cli(source_path=file_path,
                            base_dir=base_dir,
                            zip_file=full_zip_file)
    else:
        create_zip_file_python(
            source_path=file_path,
            base_dir=base_dir,
            zip_file=full_zip_file,
            content_root=content_root,
            mode=mode,
        )
    if not get_content:
        TMP_FILES.append(tmp_dir)
        return full_zip_file
    with open(full_zip_file, "rb") as file_obj:
        zip_file_content = file_obj.read()
    rm_dir(tmp_dir)
    return zip_file_content
 def __init__(self, params):
     props_file = params["properties_file"]
     env_vars = params["env_vars"]
     cmd = kclipy_helper.get_kcl_app_command("java",
                                             MULTI_LANG_DAEMON_CLASS,
                                             props_file)
     if not params["log_file"]:
         params["log_file"] = "%s.log" % props_file
         TMP_FILES.append(params["log_file"])
     env = aws_stack.get_environment()
     quiet = aws_stack.is_local_env(env)
     ShellCommandThread.__init__(self,
                                 cmd,
                                 outfile=params["log_file"],
                                 env_vars=env_vars,
                                 quiet=quiet)
def listen_to_kinesis(
    stream_name,
    listener_func=None,
    processor_script=None,
    events_file=None,
    endpoint_url=None,
    log_file=None,
    configs=None,
    env=None,
    ddb_lease_table_suffix=None,
    env_vars=None,
    kcl_log_level=DEFAULT_KCL_LOG_LEVEL,
    log_subscribers=None,
    wait_until_started=False,
    fh_d_stream=None,
    region_name=None,
):
    """
    High-level function that allows to subscribe to a Kinesis stream
    and receive events in a listener function. A KCL client process is
    automatically started in the background.
    """
    if configs is None:
        configs = {}
    if env_vars is None:
        env_vars = {}
    if log_subscribers is None:
        log_subscribers = []
    env = aws_stack.get_environment(env)
    if not events_file:
        events_file = EVENTS_FILE_PATTERN.replace("*", short_uid())
        TMP_FILES.append(events_file)
    if not processor_script:
        processor_script = generate_processor_script(events_file,
                                                     log_file=log_file)

    rm_rf(events_file)
    # start event reader thread (this process)
    ready_mutex = threading.Semaphore(0)
    thread = EventFileReaderThread(events_file,
                                   listener_func,
                                   ready_mutex=ready_mutex,
                                   fh_d_stream=fh_d_stream)
    thread.start()
    # Wait until the event reader thread is ready (to avoid 'Connection refused' error on the UNIX socket)
    ready_mutex.acquire()
    # start KCL client (background process)
    if processor_script[-4:] == ".pyc":
        processor_script = processor_script[0:-1]
    # add log listener that notifies when KCL is started
    if wait_until_started:
        listener = KclStartedLogListener()
        log_subscribers.append(listener)

    process = start_kcl_client_process(
        stream_name,
        processor_script,
        endpoint_url=endpoint_url,
        log_file=log_file,
        configs=configs,
        env=env,
        ddb_lease_table_suffix=ddb_lease_table_suffix,
        env_vars=env_vars,
        kcl_log_level=kcl_log_level,
        log_subscribers=log_subscribers,
        region_name=region_name,
    )

    if wait_until_started:
        # Wait at most 90 seconds for initialization. Note that creating the DDB table can take quite a bit
        try:
            listener.sync_init.get(block=True, timeout=90)
        except Exception:
            raise Exception("Timeout when waiting for KCL initialization.")
        # wait at most 30 seconds for shard lease notification
        try:
            listener.sync_take_shard.get(block=True, timeout=30)
        except Exception:
            # this merely means that there is no shard available to take. Do nothing.
            pass

    return process
def generate_processor_script(events_file, log_file=None):
    script_file = os.path.join(tempfile.gettempdir(),
                               "kclipy.%s.processor.py" % short_uid())
    if log_file:
        log_file = "'%s'" % log_file
    else:
        log_file = "None"
    content = """#!/usr/bin/env python
import os, sys, glob, json, socket, time, logging, subprocess, tempfile
logging.basicConfig(level=logging.INFO)
for path in glob.glob('%s/lib/python*/site-packages'):
    sys.path.insert(0, path)
sys.path.insert(0, '%s')
from localstack.config import DEFAULT_ENCODING
from localstack.utils.kinesis import kinesis_connector
from localstack.utils.time import timestamp
events_file = '%s'
log_file = %s
error_log = os.path.join(tempfile.gettempdir(), 'kclipy.error.log')
if __name__ == '__main__':
    sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)

    num_tries = 3
    sleep_time = 2
    error = None
    for i in range(0, num_tries):
        try:
            sock.connect(events_file)
            error = None
            break
        except Exception as e:
            error = e
            if i < num_tries:
                msg = '%%s: Unable to connect to UNIX socket. Retrying.' %% timestamp()
                subprocess.check_output('echo "%%s" >> %%s' %% (msg, error_log), shell=True)
                time.sleep(sleep_time)
    if error:
        print("WARN: Unable to connect to UNIX socket after retrying: %%s" %% error)
        raise error

    def receive_msg(records, checkpointer, shard_id):
        try:
            # records is a list of amazon_kclpy.messages.Record objects -> convert to JSON
            records_dicts = [j._json_dict for j in records]
            message_to_send = {'shard_id': shard_id, 'records': records_dicts}
            string_to_send = '%%s\\n' %% json.dumps(message_to_send)
            bytes_to_send = string_to_send.encode(DEFAULT_ENCODING)
            sock.send(bytes_to_send)
        except Exception as e:
            msg = "WARN: Unable to forward event: %%s" %% e
            print(msg)
            subprocess.check_output('echo "%%s" >> %%s' %% (msg, error_log), shell=True)
    kinesis_connector.KinesisProcessor.run_processor(log_file=log_file, processor_func=receive_msg)
    """ % (
        LOCALSTACK_VENV_FOLDER,
        LOCALSTACK_ROOT_FOLDER,
        events_file,
        log_file,
    )
    save_file(script_file, content)
    chmod_r(script_file, 0o755)
    TMP_FILES.append(script_file)
    return script_file
def start_kcl_client_process(
    stream_name,
    listener_script,
    log_file=None,
    env=None,
    configs=None,
    endpoint_url=None,
    ddb_lease_table_suffix=None,
    env_vars=None,
    region_name=None,
    kcl_log_level=DEFAULT_KCL_LOG_LEVEL,
    log_subscribers=None,
):
    if configs is None:
        configs = {}
    if env_vars is None:
        env_vars = {}
    if log_subscribers is None:
        log_subscribers = []
    env = aws_stack.get_environment(env)
    # make sure to convert stream ARN to stream name
    stream_name = aws_stack.kinesis_stream_name(stream_name)
    # decide which credentials provider to use
    credentialsProvider = None
    if ("AWS_ASSUME_ROLE_ARN" in os.environ or "AWS_ASSUME_ROLE_ARN"
            in env_vars) and ("AWS_ASSUME_ROLE_SESSION_NAME" in os.environ
                              or "AWS_ASSUME_ROLE_SESSION_NAME" in env_vars):
        # use special credentials provider that can assume IAM roles and handle temporary STS auth tokens
        credentialsProvider = "cloud.localstack.DefaultSTSAssumeRoleSessionCredentialsProvider"
        # pass through env variables to child process
        for var_name in [
                "AWS_ASSUME_ROLE_ARN",
                "AWS_ASSUME_ROLE_SESSION_NAME",
                "AWS_ACCESS_KEY_ID",
                "AWS_SECRET_ACCESS_KEY",
                "AWS_SESSION_TOKEN",
        ]:
            if var_name in os.environ and var_name not in env_vars:
                env_vars[var_name] = os.environ[var_name]
    if aws_stack.is_local_env(env):
        # need to disable CBOR protocol, enforce use of plain JSON,
        # see https://github.com/mhart/kinesalite/issues/31
        env_vars["AWS_CBOR_DISABLE"] = "true"
    if kcl_log_level or (len(log_subscribers) > 0):
        if not log_file:
            log_file = LOG_FILE_PATTERN.replace("*", short_uid())
            TMP_FILES.append(log_file)
        run("touch %s" % log_file)
        # start log output reader thread which will read the KCL log
        # file and print each line to stdout of this process...
        reader_thread = OutputReaderThread({
            "file": log_file,
            "level": kcl_log_level,
            "log_prefix": "KCL",
            "log_subscribers": log_subscribers,
        })
        reader_thread.start()

    # construct stream info
    stream_info = get_stream_info(
        stream_name,
        log_file,
        env=env,
        endpoint_url=endpoint_url,
        ddb_lease_table_suffix=ddb_lease_table_suffix,
        env_vars=env_vars,
    )
    props_file = stream_info["properties_file"]
    # set kcl config options
    kwargs = {"metricsLevel": "NONE", "initialPositionInStream": "LATEST"}
    # set parameters for local connection
    if aws_stack.is_local_env(env):
        kwargs[
            "kinesisEndpoint"] = f"{LOCALHOST}:{config.service_port('kinesis')}"
        kwargs[
            "dynamodbEndpoint"] = f"{LOCALHOST}:{config.service_port('dynamodb')}"
        kwargs["kinesisProtocol"] = config.get_protocol()
        kwargs["dynamodbProtocol"] = config.get_protocol()
        kwargs["disableCertChecking"] = "true"
    kwargs.update(configs)
    # create config file
    kclipy_helper.create_config_file(
        config_file=props_file,
        executableName=listener_script,
        streamName=stream_name,
        applicationName=stream_info["app_name"],
        credentialsProvider=credentialsProvider,
        region_name=region_name,
        **kwargs,
    )
    TMP_FILES.append(props_file)
    # start stream consumer
    stream = KinesisStream(id=stream_name, params=stream_info)
    thread_consumer = KinesisProcessorThread.start_consumer(stream)
    TMP_THREADS.append(thread_consumer)
    return thread_consumer
 def mountable_tmp_file():
     f = os.path.join(config.dirs.tmp, short_uid())
     TMP_FILES.append(f)
     return f