def create_zip_file(file_path, zip_file=None, get_content=False, content_root=None, mode="w"): """ Creates a zipfile to the designated file_path. By default, a new zip file is created but the mode parameter can be used to append to an existing zip file """ base_dir = file_path if not os.path.isdir(file_path): base_dir = tempfile.mkdtemp(prefix=ARCHIVE_DIR_PREFIX) shutil.copy(file_path, base_dir) TMP_FILES.append(base_dir) tmp_dir = tempfile.mkdtemp(prefix=ARCHIVE_DIR_PREFIX) full_zip_file = zip_file if not full_zip_file: zip_file_name = "archive.zip" full_zip_file = os.path.join(tmp_dir, zip_file_name) # special case where target folder is empty -> create empty zip file if is_empty_dir(base_dir): # see https://stackoverflow.com/questions/25195495/how-to-create-an-empty-zip-file#25195628 content = ( b"PK\x05\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" ) if get_content: return content save_file(full_zip_file, content) return full_zip_file # create zip file if is_debian(): # todo: extend CLI with the new parameters create_zip_file_cli(source_path=file_path, base_dir=base_dir, zip_file=full_zip_file) else: create_zip_file_python( source_path=file_path, base_dir=base_dir, zip_file=full_zip_file, content_root=content_root, mode=mode, ) if not get_content: TMP_FILES.append(tmp_dir) return full_zip_file with open(full_zip_file, "rb") as file_obj: zip_file_content = file_obj.read() rm_dir(tmp_dir) return zip_file_content
def __init__(self, params): props_file = params["properties_file"] env_vars = params["env_vars"] cmd = kclipy_helper.get_kcl_app_command("java", MULTI_LANG_DAEMON_CLASS, props_file) if not params["log_file"]: params["log_file"] = "%s.log" % props_file TMP_FILES.append(params["log_file"]) env = aws_stack.get_environment() quiet = aws_stack.is_local_env(env) ShellCommandThread.__init__(self, cmd, outfile=params["log_file"], env_vars=env_vars, quiet=quiet)
def listen_to_kinesis( stream_name, listener_func=None, processor_script=None, events_file=None, endpoint_url=None, log_file=None, configs=None, env=None, ddb_lease_table_suffix=None, env_vars=None, kcl_log_level=DEFAULT_KCL_LOG_LEVEL, log_subscribers=None, wait_until_started=False, fh_d_stream=None, region_name=None, ): """ High-level function that allows to subscribe to a Kinesis stream and receive events in a listener function. A KCL client process is automatically started in the background. """ if configs is None: configs = {} if env_vars is None: env_vars = {} if log_subscribers is None: log_subscribers = [] env = aws_stack.get_environment(env) if not events_file: events_file = EVENTS_FILE_PATTERN.replace("*", short_uid()) TMP_FILES.append(events_file) if not processor_script: processor_script = generate_processor_script(events_file, log_file=log_file) rm_rf(events_file) # start event reader thread (this process) ready_mutex = threading.Semaphore(0) thread = EventFileReaderThread(events_file, listener_func, ready_mutex=ready_mutex, fh_d_stream=fh_d_stream) thread.start() # Wait until the event reader thread is ready (to avoid 'Connection refused' error on the UNIX socket) ready_mutex.acquire() # start KCL client (background process) if processor_script[-4:] == ".pyc": processor_script = processor_script[0:-1] # add log listener that notifies when KCL is started if wait_until_started: listener = KclStartedLogListener() log_subscribers.append(listener) process = start_kcl_client_process( stream_name, processor_script, endpoint_url=endpoint_url, log_file=log_file, configs=configs, env=env, ddb_lease_table_suffix=ddb_lease_table_suffix, env_vars=env_vars, kcl_log_level=kcl_log_level, log_subscribers=log_subscribers, region_name=region_name, ) if wait_until_started: # Wait at most 90 seconds for initialization. Note that creating the DDB table can take quite a bit try: listener.sync_init.get(block=True, timeout=90) except Exception: raise Exception("Timeout when waiting for KCL initialization.") # wait at most 30 seconds for shard lease notification try: listener.sync_take_shard.get(block=True, timeout=30) except Exception: # this merely means that there is no shard available to take. Do nothing. pass return process
def generate_processor_script(events_file, log_file=None): script_file = os.path.join(tempfile.gettempdir(), "kclipy.%s.processor.py" % short_uid()) if log_file: log_file = "'%s'" % log_file else: log_file = "None" content = """#!/usr/bin/env python import os, sys, glob, json, socket, time, logging, subprocess, tempfile logging.basicConfig(level=logging.INFO) for path in glob.glob('%s/lib/python*/site-packages'): sys.path.insert(0, path) sys.path.insert(0, '%s') from localstack.config import DEFAULT_ENCODING from localstack.utils.kinesis import kinesis_connector from localstack.utils.time import timestamp events_file = '%s' log_file = %s error_log = os.path.join(tempfile.gettempdir(), 'kclipy.error.log') if __name__ == '__main__': sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) num_tries = 3 sleep_time = 2 error = None for i in range(0, num_tries): try: sock.connect(events_file) error = None break except Exception as e: error = e if i < num_tries: msg = '%%s: Unable to connect to UNIX socket. Retrying.' %% timestamp() subprocess.check_output('echo "%%s" >> %%s' %% (msg, error_log), shell=True) time.sleep(sleep_time) if error: print("WARN: Unable to connect to UNIX socket after retrying: %%s" %% error) raise error def receive_msg(records, checkpointer, shard_id): try: # records is a list of amazon_kclpy.messages.Record objects -> convert to JSON records_dicts = [j._json_dict for j in records] message_to_send = {'shard_id': shard_id, 'records': records_dicts} string_to_send = '%%s\\n' %% json.dumps(message_to_send) bytes_to_send = string_to_send.encode(DEFAULT_ENCODING) sock.send(bytes_to_send) except Exception as e: msg = "WARN: Unable to forward event: %%s" %% e print(msg) subprocess.check_output('echo "%%s" >> %%s' %% (msg, error_log), shell=True) kinesis_connector.KinesisProcessor.run_processor(log_file=log_file, processor_func=receive_msg) """ % ( LOCALSTACK_VENV_FOLDER, LOCALSTACK_ROOT_FOLDER, events_file, log_file, ) save_file(script_file, content) chmod_r(script_file, 0o755) TMP_FILES.append(script_file) return script_file
def start_kcl_client_process( stream_name, listener_script, log_file=None, env=None, configs=None, endpoint_url=None, ddb_lease_table_suffix=None, env_vars=None, region_name=None, kcl_log_level=DEFAULT_KCL_LOG_LEVEL, log_subscribers=None, ): if configs is None: configs = {} if env_vars is None: env_vars = {} if log_subscribers is None: log_subscribers = [] env = aws_stack.get_environment(env) # make sure to convert stream ARN to stream name stream_name = aws_stack.kinesis_stream_name(stream_name) # decide which credentials provider to use credentialsProvider = None if ("AWS_ASSUME_ROLE_ARN" in os.environ or "AWS_ASSUME_ROLE_ARN" in env_vars) and ("AWS_ASSUME_ROLE_SESSION_NAME" in os.environ or "AWS_ASSUME_ROLE_SESSION_NAME" in env_vars): # use special credentials provider that can assume IAM roles and handle temporary STS auth tokens credentialsProvider = "cloud.localstack.DefaultSTSAssumeRoleSessionCredentialsProvider" # pass through env variables to child process for var_name in [ "AWS_ASSUME_ROLE_ARN", "AWS_ASSUME_ROLE_SESSION_NAME", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN", ]: if var_name in os.environ and var_name not in env_vars: env_vars[var_name] = os.environ[var_name] if aws_stack.is_local_env(env): # need to disable CBOR protocol, enforce use of plain JSON, # see https://github.com/mhart/kinesalite/issues/31 env_vars["AWS_CBOR_DISABLE"] = "true" if kcl_log_level or (len(log_subscribers) > 0): if not log_file: log_file = LOG_FILE_PATTERN.replace("*", short_uid()) TMP_FILES.append(log_file) run("touch %s" % log_file) # start log output reader thread which will read the KCL log # file and print each line to stdout of this process... reader_thread = OutputReaderThread({ "file": log_file, "level": kcl_log_level, "log_prefix": "KCL", "log_subscribers": log_subscribers, }) reader_thread.start() # construct stream info stream_info = get_stream_info( stream_name, log_file, env=env, endpoint_url=endpoint_url, ddb_lease_table_suffix=ddb_lease_table_suffix, env_vars=env_vars, ) props_file = stream_info["properties_file"] # set kcl config options kwargs = {"metricsLevel": "NONE", "initialPositionInStream": "LATEST"} # set parameters for local connection if aws_stack.is_local_env(env): kwargs[ "kinesisEndpoint"] = f"{LOCALHOST}:{config.service_port('kinesis')}" kwargs[ "dynamodbEndpoint"] = f"{LOCALHOST}:{config.service_port('dynamodb')}" kwargs["kinesisProtocol"] = config.get_protocol() kwargs["dynamodbProtocol"] = config.get_protocol() kwargs["disableCertChecking"] = "true" kwargs.update(configs) # create config file kclipy_helper.create_config_file( config_file=props_file, executableName=listener_script, streamName=stream_name, applicationName=stream_info["app_name"], credentialsProvider=credentialsProvider, region_name=region_name, **kwargs, ) TMP_FILES.append(props_file) # start stream consumer stream = KinesisStream(id=stream_name, params=stream_info) thread_consumer = KinesisProcessorThread.start_consumer(stream) TMP_THREADS.append(thread_consumer) return thread_consumer
def mountable_tmp_file(): f = os.path.join(config.dirs.tmp, short_uid()) TMP_FILES.append(f) return f