def _build_plugin_image( plugin_name: str, source: str, alias: str, source_image=None, push=False, pre=False, ignore_cache=False, ) -> str: source = source or plugin_name alias = alias or plugin_name image_name = f"{BASE_DOCKER_REPO}:{alias}" build_cmd = "docker build" if ignore_cache: build_cmd += " --no-cache" if source_image: build_cmd += f" --build-arg source_image={source_image}" if pre: build_cmd += " --build-arg prerelease=true" image_name += "--pre" if "Dockerfile" in source: dockerfile = source else: dockerfile = "singer-plugin.Dockerfile" build_cmd += f" --build-arg PLUGIN_SOURCE={source}" build_cmd += (f" --build-arg PLUGIN_NAME={plugin_name}" f" --build-arg PLUGIN_ALIAS={alias}" f" -t {image_name}" f" -f {dockerfile}" f" .") runnow.run(build_cmd) if push: _push(image_name) return image_name
def login(raise_error=False): usr = os.environ.get("DOCKER_USERNAME", "") pwd = os.environ.get("DOCKER_PASSWORD", "") registry = os.environ.get("DOCKER_REGISTRY", "") or "index.docker.io" if not (usr and pwd): error_msg = ("Could not login to docker registry." "Missing env variable DOCKER_USERNAME or DOCKER_PASSWORD") if raise_error: raise RuntimeError(error_msg) logging.warning(error_msg) return False logging.info( f"Logging into docker registry '{registry}' as user '{usr}'...") try: runnow.run( f"docker login {registry} --username {usr} --password {pwd}", hide=True) if registry == "index.docker.io": runnow.run(f"docker login --username {usr} --password {pwd}", hide=True) except Exception as ex: if raise_error: raise RuntimeError(f"Docker login failed. {ex}") logging.warning(f"Docker login failed. {ex}") return True
def apply(infra_dir: str = "./infra/", save_output: bool = False, prompt: bool = False): infra_dir = os.path.realpath(infra_dir) runnow.run(f"terraform apply {'' if prompt else '-auto-approve'}", working_dir=infra_dir) if save_output: update_var_outputs(infra_dir=infra_dir)
def ecs_login(region): logging.info("Logging into ECS...") try: _, ecs_login_cmd = runnow.run( f"aws ecr get-login --region {region} --no-include-email", echo=False) _, _ = runnow.run(ecs_login_cmd, hide=True) except Exception as ex: raise RuntimeError(f"ECS login failed. {ex}")
def build(dockerfile_path, tag_as, addl_args=None): """ Build an image. 'tag_as' can be a string or list of strings """ folder_path = os.path.dirname(dockerfile_path) addl_args = addl_args or "" tag_as = _to_list(tag_as) if tag_as: tags = " ".join([f"-t {t}" for t in tag_as]) cmd = f"docker build {addl_args} {tags} {folder_path} -f {dockerfile_path}" else: cmd = f"docker build {addl_args} {folder_path} -f {dockerfile_path}" runnow.run(cmd)
def change_upstream_source( dir_to_update=".", git_repo="https://github.com/slalom-ggp/dataops-infra", branch="master", relative_path="../../dataops-infra", to_relative=False, to_git=False, dry_run=False, ): """Change Terraform source""" if to_relative and to_git or not (to_relative or to_git): raise ValueError( "Must specify `--to_git` or `--to_relative`, but not both.") for tf_file in uio.list_local_files(dir_to_update, recursive=False): if tf_file.endswith(".tf"): # print(tf_file) new_lines = [] for line in uio.get_text_file_contents(tf_file).splitlines(): new_line = line if line.lstrip().startswith("source "): current_path = line.lstrip().split('"')[1] start_pos = max([ current_path.find("catalog/"), current_path.find("components/") ]) if start_pos > 0: module_path = current_path[start_pos:].split( "?ref=")[0] if to_relative: local_patten = "{relative_path}/{path}" new_path = local_patten.format( relative_path=relative_path, path=module_path) elif to_git: git_pattern = "git::{git_repo}//{path}?ref={branch}" new_path = git_pattern.format(git_repo=git_repo, path=module_path, branch=branch) if current_path == new_path: print(f"{current_path} \n\t\t\t-> (unchanged)") else: print(f"{current_path} \n\t\t\t-> {new_path}") new_line = f' source = "{new_path}"' new_lines.append(new_line) new_file_text = "\n".join(new_lines) if dry_run: print(f"\n\n------------\n-- {tf_file}\n------------") print(new_file_text) else: uio.create_text_file(tf_file, new_file_text) if not dry_run: runnow.run("terraform fmt -recursive", dir_to_update)
def start_jupyter(nb_directory="/home/jovyan/work", nb_token="qwerty123"): jupyter_run_command = ( f"jupyter lab" f" --NotebookApp.notebook_dir='{nb_directory}'" f" --NotebookApp.token='{nb_token}'" f" --allow-root" ) log_file = "jupyter_log.txt" runnow.run(jupyter_run_command, daemon=True, log_file_path=log_file) time.sleep(5) logging.info("\nJUPYTER_LOG:".join(uio.get_text_file_contents(log_file).splitlines())) logging.info( "Jupyter notebooks server started at: https://localhost:8888/?token=qwerty123" )
def pull(image_name, skip_if_exists=False, silent=False): if skip_if_exists and exists_locally(image_name): logging.info( f"Skipping image pull. Already exists locally: {image_name}") return image_name try: runnow.run(f"docker pull {image_name}", raise_error=True) except Exception as ex: logging.info(f"Failed to pull image: {image_name}\n{ex}") if silent: return False raise ex if not exists_locally(image_name): logging.warning( "Pull was successful in API but could not be confirmed") return image_name
def get_tf_metadata( tf_dir: str, recursive: bool = False, ): """ Return a dictionary of Terraform module paths to JSON metadata about each module, a wrapper around the `terraform-docs` tool. Parameters: ---------- tf_dir: Directory of terraform scripts to scan. recursive : Optional (default=True). 'True' to run on all subdirectories, recursively. Returns: ------- dict """ import json result = {} if (".git" not in tf_dir and ".terraform" not in tf_dir and "samples" not in tf_dir and "tests" not in tf_dir): if [ x for x in uio.list_local_files(tf_dir, recursive=False) if x.endswith(".tf") ]: _, json_text = runnow.run(f"terraform-docs json {tf_dir}", echo=False) result[tf_dir] = json.loads(json_text) if recursive: for folder in uio.list_local_files(tf_dir, recursive=False): folder = folder.replace("\\", "/") if os.path.isdir(folder): result.update(get_tf_metadata(folder, recursive=recursive)) return result
def test_raised_fail(): # Try with raised error try: exit_code, result = run("return 1") except Exception as ex: expected, actual = "Command failed (exit code 1)", str(ex) assert actual.startswith(expected), ( f"Wrong error message. Expected={expected}.\nActual={actual}", ) else: assert False, "Should have raised error."
def test_success(): try: exit_code, output = run("echo Hey") except Exception as ex: raise ex else: assert ( exit_code == 0 ), f"Should have returns exit code 0. Actual return={exit_code}" assert output == "Hey", "Should have printed 'Hey'"
def install(program_name, install_cmd): if not install_cmd: install_cmd = _default_install_cmd(program_name=program_name) elif " " not in install_cmd: install_cmd = _default_install_cmd(program_name=install_cmd) if not is_admin(): return_code = run_as_admin(cmd=install_cmd.split(" "), prompt=True) else: return_code, output = runnow.run(install_cmd) return return_code == 0
def _discover( tap_name: str, taps_dir: str, *, config_file: str, catalog_dir: str, dockerized: bool, tap_exe: str, ) -> None: catalog_file = config.get_raw_catalog_file( taps_dir, catalog_dir, tap_name, allow_custom=False ) uio.create_folder(catalog_dir) img = f"{docker.BASE_DOCKER_REPO}:{tap_exe}" hide_cmd = False if dockerized: cdw = os.getcwd().replace("\\", "/") tap_config = json.loads(uio.get_text_file_contents(config_file)) tap_docker_args = "" # TODO: Replace with logic to parse from AWS_SHARED_CREDENTIALS_FILE env var: for k in ["aws_access_key_id", "aws_secret_access_key", "aws_session_token"]: if k in tap_config: key = f"TAP_{tap_name}_{k}".replace("-", "_").upper() os.environ[key] = tap_config[k] tap_docker_args += f' -e {k.upper()}="{tap_config[k]}"' hide_cmd = True _, _ = runnow.run(f"docker pull {img}") _, output_text = runnow.run( f"docker run --rm -i " f"-v {cdw}:/home/local {tap_docker_args} " f"{img} --config {config.dockerize_cli_args(config_file)} --discover", echo=False, capture_stderr=False, hide=hide_cmd, ) if not _is_valid_json(output_text): raise RuntimeError(f"Could not parse json file from output:\n{output_text}") uio.create_text_file(catalog_file, output_text) else: runnow.run( f"{tap_exe} --config {config_file} --discover > {catalog_file}", hide=hide_cmd, )
def _init_spark(dockerized=False, with_jupyter=False, daemon=False): """Return an initialized spark object""" global spark, sc, thrift if dockerized: container = _init_spark_container(with_jupyter=with_jupyter) # context = SparkContext(conf=conf) os.environ["PYSPARK_PYTHON"] = sys.executable with logged_block("connecting to spark container"): spark = SparkSession.builder.master(CONTAINER_ENDPOINT).getOrCreate() spark.sparkContext.setLogLevel(SPARK_LOG_LEVEL) sc = spark.sparkContext elif daemon: cmd = f"{sys.executable} -m slalom.dataops.sparkutils start_server" wait_test = lambda line: _SERVING_SPARK_REQUESTS in line wait_max = 120 # Max wait in seconds if with_jupyter: cmd = f"{cmd} --with_jupyter" runnow.run(cmd, daemon=True, wait_test=wait_test, wait_max=wait_max) else: _init_local_spark()
def install(plugin_name: str, source: str = None, alias: str = None): """ Install the requested plugin to the local machine. Arguments: plugin_name {str} -- The name of the plugin to install, including the tap- or target- prefix. Keyword Arguments: source {str} -- Optional. Overrides the pip installation source. alias {str} -- Optional. Overrides the name (alias) of the plugin. Raises: RuntimeError: [description] """ source = source or plugin_name alias = alias or plugin_name venv_dir = os.path.join(config.VENV_ROOT, alias) install_path = os.path.join(config.INSTALL_ROOT, alias) if uio.file_exists(install_path): response = input(f"The file '{install_path}' already exists. " f"Are you sure you want to replace this file? [y/n]") if not response.lower() in ["y", "yes"]: raise RuntimeError(f"File already exists '{install_path}'.") uio.delete_file(install_path) runnow.run(f"python3 -m venv {venv_dir}") runnow.run(f"{os.path.join(venv_dir ,'bin', 'pip3')} install {source}") runnow.run(f"ln -s {venv_dir}/bin/{plugin_name} {install_path}")
def _ecs_wait_for( wait_for, task_arn, cluster, region, timeout=1200, heartbeat_interval=None, raise_error=True, ): task_id = task_arn.split("/")[-1] wait_cmd = f"aws ecs wait tasks-{wait_for} --cluster {cluster} --tasks {task_arn}" desc_cmd = f"aws ecs describe-tasks --cluster {cluster} --tasks {task_arn}" with logged_block( f"waiting for ECS job to reach '{wait_for}' status", heartbeat_interval=heartbeat_interval, ): timeout_time = time.time() + (timeout or MAX_ECS_WAIT) return_code, output_text = runnow.run(wait_cmd, raise_error=False) while return_code == 255 and time.time() < timeout_time: logging.info("aws cli timeout expired. Retrying...") return_code, output_text = runnow.run(wait_cmd, raise_error=True) if return_code != 0: raise RuntimeError( f"ECS wait command failed or timed out (return={return_code}).\n" f"{output_text}") return_code, output_text = runnow.run(desc_cmd, raise_error=False) if return_code != 0: raise RuntimeError(f"ECS task describe failed.\n{output_text}") jsonobj = json.loads(output_text) if len(jsonobj.get("tasks", [])) == 0 or len(jsonobj.get("failures", [])) > 0: RuntimeError(f"Could not start task ({jsonobj.get('failures', '')})") task_arn = jsonobj["tasks"][0]["taskArn"] logging.info( f"ECS task status: {get_ecs_task_detail_url(region, task_arn, cluster)}" ) logging.info(f"ECS task logs: {get_ecs_log_url(region, task_arn)}") return task_arn
def ecs_retag(image_name, existing_tag, tag_as): tag_as = _to_list(tag_as) if "amazonaws.com/" in image_name: image_name = image_name.split("amazonaws.com/")[1] get_manifest_cmd = ( f"aws ecr batch-get-image" f" --repository-name {image_name} --image-ids imageTag={existing_tag}" f" --query 'images[].imageManifest' --output text") _, manifest = runnow.run(get_manifest_cmd, echo=False) for new_tag in tag_as: if "amazonaws.com/" in new_tag: new_tag = new_tag.split("amazonaws.com/")[1] if ":" in new_tag: if image_name != new_tag.split(":")[0]: raise RuntimeError( f"Image names do not match: '{image_name}', '{new_tag.split(':')[0]}'" ) new_tag = new_tag.split(":")[1] put_image_cmd = [ "aws", "ecr", "put-image", "--repository-name", image_name, "--image-tag", new_tag, "--image-manifest", manifest, ] return_code, output_text = runnow.run(put_image_cmd, shell=False, echo=False, hide=True, raise_error=False) if return_code != 0 and "ImageAlreadyExistsException" in output_text: logging.info("Image already exists. No tagging changes were made.") elif return_code != 0: raise RuntimeError( f"Could not retag the specified image.\n{output_text}")
def _build_composite_image( tap_alias: str, target_alias: str, *, push: bool = False, pre: bool = False, ignore_cache: bool = False, has_custom_tap: bool, has_custom_target: bool, ) -> str: if has_custom_tap and has_custom_target: raise NotImplementedError( "Cannot combine a custom tap ('tap-{tap_alias}') " "with a custom target '{target_alias}'.") if tap_alias.startswith("tap-"): tap_alias = tap_alias.replace("tap-", "", 1) if target_alias.startswith("target-"): target_alias = target_alias.replace("target-", "", 1) image_name = f"{BASE_DOCKER_REPO}:{tap_alias}-to-{target_alias}" build_cmd = "docker build" if has_custom_tap: dockerfile = "tap-to-target-w-custom-tap.Dockerfile" else: dockerfile = "tap-to-target.Dockerfile" if ignore_cache: build_cmd += " --no-cache" if pre: build_cmd += " --build-arg source_image_suffix=--pre" image_name += "--pre" build_cmd += (f" --build-arg tap_alias={tap_alias}" f" --build-arg target_alias={target_alias}" f" -t {image_name}" f" -f {dockerfile}" f" .") runnow.run(build_cmd) if push: _push(image_name) return image_name
def check_install(program_name: str, install_if_missing: bool = None, install_cmd: str = None) -> bool: installed = False installed_programs = get_installed_programs() installed = program_name.lower() in installed_programs.keys() if not installed: for test_cmd in [f"{program_name} --version", f"which {program_name}"]: return_code, output = runnow.run(test_cmd, raise_error=False) if return_code == 0 and len(output) > 1: installed = True break if install_if_missing and not installed: install(program_name, install_cmd) installed = True return installed
def get_installed_programs(): global CACHED_INSTALL_LIST if CACHED_INSTALL_LIST: return CACHED_INSTALL_LIST if uio.is_windows(): return_code, output = runnow.run("choco list --local", raise_error=False) if return_code == 0: CACHED_INSTALL_LIST = { x.split(" ")[0].lower(): x.split(" ")[1] for x in output.split("\n") if len(x.split(" ")) == 2 } else: CACHED_INSTALL_LIST = {} elif uio.is_linux(): CACHED_INSTALL_LIST = {} elif uio.is_mac(): CACHED_INSTALL_LIST = {} return CACHED_INSTALL_LIST
def parse_aws_creds(): """Return a 3-part tuple: (AccessKeyId, SecretAccessKey, Token=None)""" if "AWS_ACCESS_KEY_ID" in _os.environ and "AWS_SECRET_ACCESS_KEY" in _os.environ: _LOGGER.info("Parsing AWS credentials from env vars...") return ( _os.environ["AWS_ACCESS_KEY_ID"], _os.environ["AWS_SECRET_ACCESS_KEY"], _os.environ.get("AWS_SESSION_TOKEN", None), ) if "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" in _os.environ: return_code, output = _jobs.run( "curl --silent 169.254.170.2$AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", raise_error=False, echo=False, ) if return_code == 0: _LOGGER.info("Parsing AWS credentials from ECS role...") # If successful, object will have the following keys: # AccessKeyId, SecretAccessKey, Token, Expiration, RoleArn creds_dict = _json.loads(output) return ( creds_dict["AccessKeyId"], creds_dict["SecretAccessKey"], creds_dict["Token"], ) creds_file = None if "AWS_SHARED_CREDENTIALS_FILE" in _os.environ and file_exists( _os.environ["AWS_SHARED_CREDENTIALS_FILE"]): _LOGGER.info( "Parsing AWS credentials from AWS_SHARED_CREDENTIALS_FILE env var..." ) creds_file = _os.environ["AWS_SHARED_CREDENTIALS_FILE"] if file_exists(_os.path.realpath("~/.aws/credentials")): _LOGGER.info("Parsing AWS credentials from '~./.aws/credentials'...") creds_file = _os.path.realpath("~/.aws/credentials") if file_exists(_os.path.realpath("~/.aws/config")): _LOGGER.info("Parsing AWS credentials from '~./.aws/config'...") creds_file = _os.path.realpath("~/.aws/config") if creds_file: return parse_aws_creds_from_file(creds_file) return None, None, None
def init(infra_dir: str = "./infra/"): infra_dir = os.path.realpath(infra_dir) runnow.run("terraform init", working_dir=infra_dir)
def download_git_repo(repo_url, git_ref, target_dir): _jobs.run(f"git clone https://{repo_url} .", cwd=target_dir) if git_ref != "master": _jobs.run(f"git fetch", cwd=target_dir) _jobs.run(f"git checkout {git_ref}", cwd=target_dir)
def _push(image_name) -> None: runnow.run(f"docker push {image_name}")
def update_module_docs( tf_dir: str, *, recursive: bool = True, readme: str = "README.md", footer: bool = True, header: bool = True, special_case_words: List[str] = None, extra_docs_names: List[str] = ["USAGE.md", "NOTES.md"], git_repo: str = "https://github.com/slalom-ggp/dataops-infra", ): """ Replace all README.md files with auto-generated documentation, a wrapper around the `terraform-docs` tool. Parameters: ---------- tf_dir: Directory of terraform scripts to document. recursive : Optional (default=True). 'True' to run on all subdirectories, recursively. readme : Optional (default="README.md"). The filename to create when generating docs. footnote: Optional (default=True). 'True' to include the standard footnote. special_case_words: Optional. A list of words to override special casing rules. extra_docs_names: (Optional.) A list of filenames which, if found, will be appended to each module's README.md file. git_repo: Optional. The git repo path to use in rendering 'source' paths. Returns: ------- None """ markdown_text = "" if ".git" not in tf_dir and ".terraform" not in tf_dir: tf_files = [ x for x in uio.list_local_files(tf_dir, recursive=False) if x.endswith(".tf") ] extra_docs = [ x for x in uio.list_local_files(tf_dir, recursive=False) if extra_docs_names and os.path.basename(x) in extra_docs_names ] if tf_files: module_title = _proper(os.path.basename(tf_dir), special_case_words=special_case_words) parent_dir_name = os.path.basename(Path(tf_dir).parent) if parent_dir_name != ".": module_title = _proper( f"{parent_dir_name} {module_title}", special_case_words=special_case_words, ) module_path = tf_dir.replace(".", "").replace("//", "/").replace("\\", "/") _, markdown_output = runnow.run( f"terraform-docs md --no-providers --sort-by-required {tf_dir}", echo=False, ) if header: markdown_text += DOCS_HEADER.format(module_title=module_title, module_path=module_path) markdown_text += markdown_output for extra_file in extra_docs: markdown_text += uio.get_text_file_contents(extra_file) + "\n" if footer: markdown_text += DOCS_FOOTER.format(src="\n".join([ "* [{file}]({repo}/tree/master/{dir}/{file})".format( repo=git_repo, dir=module_path, file=os.path.basename(tf_file), ) for tf_file in tf_files ])) uio.create_text_file(f"{tf_dir}/{readme}", markdown_text) if recursive: for folder in uio.list_local_files(tf_dir, recursive=False): if os.path.isdir(folder): update_module_docs(folder, recursive=recursive, readme=readme)
def ecs_submit( task_name: str, cluster: str, region: str, container_name: str = None, cmd_override: dict = None, env_overrides: dict = None, use_fargate: str = False, wait_for_start=True, wait_for_stop=False, max_wait=None, yyyymmdd=None, ): cmd = (f"aws ecs run-task" f" --task-definition {task_name}" f" --cluster {cluster}" f" --region {region}") if use_fargate: cmd += f" --launch-type FARGATE" else: cmd += f" --launch-type EC2" if env_overrides and isinstance(env_overrides, str): env_overrides = { x.split("=")[0]: x.split("=")[1] for x in env_overrides.split(",") } if yyyymmdd and yyyymmdd != "0": if str(yyyymmdd).lower() == "today": yyyymmdd = datetime.today().strftime("%Y%m%d") env_overrides = env_overrides or {} env_overrides["YYYYMMDD"] = yyyymmdd if env_overrides or cmd_override: if not container_name: raise ValueError("container_name is required if " "cmd_override or env_overrides are specified") env_override_str = "" cmd_override_str = "" if env_overrides: env_override_str = (',"environment":[' + ",".join([ "{" + f'"name":"{k}","value":"{v}"' + "}" for k, v in env_overrides.items() ]) + "]") if cmd_override: cmd_override_str = f", 'command': ['{cmd_override}']" overrides = (' --overrides \'{"containerOverrides":' f'[{{"name":"{container_name}"' f"{cmd_override_str}{env_override_str}" "}]}'") cmd += overrides return_code, output_text = runnow.run(cmd, raise_error=False, echo=False) if return_code != 0: raise RuntimeError(f"Could not start task: {output_text}") jsonobj = json.loads(output_text) if len(jsonobj.get("tasks", [])) == 0 or len(jsonobj.get("failures", [])) > 0: raise RuntimeError( f"Could not start task ({jsonobj.get('failures', '')})\n{output_text}" ) task_arn = jsonobj["tasks"][0]["taskArn"] logging.info( f"ECS task status: {get_ecs_task_detail_url(region, task_arn, cluster)}" ) logging.info(f"ECS task logs: {get_ecs_log_url(region, task_arn)}") if wait_for_start: ecs_wait_for_start(task_arn=task_arn, cluster=cluster, region=region) if wait_for_stop: ecs_wait_for_stop(task_arn=task_arn, cluster=cluster, region=region) if not wait_for_start and not wait_for_stop: logging.debug(f"ECS submit result: {output_text}") return task_arn
def tag(image_name: str, tag_as): """Tag an image. 'tag_as' can be a string or list of strings.""" tag_as = _to_list(tag_as) for tag in tag_as: runnow.run(f"docker tag {image_name} {tag}")
def _update_var_output(output_var): return_code, output = runnow.run(f"terraform output {output_var}", echo=False) uio.create_text_file(os.path.join("outputs", output_var), contents=output) return True
def push(image_name): # docker_client.images.push(image_name) cmd = f"docker push {image_name}" runnow.run(cmd)
def _sync_one_table( tap_name: str, table_name: str, taps_dir: str, config_file: str, target_name: str, target_config_file: str, table_catalog_file: str, table_state_file: str, log_dir: str, dockerized: bool, tap_exe: str, target_exe: str, ) -> None: if not tap_exe: tap_exe = f"tap-{tap_name}" pipeline_version_num = config.get_pipeline_version_number() table_state_file = config.replace_placeholders( {"table_state_file": table_state_file}, tap_name, table_name, pipeline_version_num, )["table_state_file"] tap_args = f"--config {config_file} --catalog {table_catalog_file} " if uio.file_exists(table_state_file): local_state_file_in = os.path.join( config.get_tap_output_dir(tap_name, taps_dir), f"{tap_name}-{table_name}-state.json", ) if not uio.get_text_file_contents(table_state_file): logging.warning( f"Ignoring blank state file from '{table_state_file}'.") else: states.make_aggregate_state_file(table_state_file, local_state_file_in) tap_args += f" --state {local_state_file_in}" local_state_file_out = ( f"{'.'.join(local_state_file_in.split('.')[:-1])}-new.json") else: local_state_file_out = os.path.join( config.get_tap_output_dir(tap_name, taps_dir), f"{tap_name}-{table_name}-state-new.json", ) tmp_target_config = config.get_single_table_target_config_file( target_name, target_config_file, tap_name=tap_name, table_name=table_name, pipeline_version_num=pipeline_version_num, ) target_args = f"--config {tmp_target_config} " hide_cmd = False if dockerized: cdw = os.getcwd().replace("\\", "/") tap_image_name = docker._get_docker_tap_image(tap_exe) target_image_name = docker._get_docker_tap_image(target_exe=target_exe) _, _ = runnow.run(f"docker pull {tap_image_name}") _, _ = runnow.run(f"docker pull {target_image_name}") tap_config = json.loads(uio.get_text_file_contents(config_file)) target_config = json.loads( uio.get_text_file_contents(target_config_file)) tap_docker_args = "" target_docker_args = "" # TODO: Replace with logic to parse from AWS_SHARED_CREDENTIALS_FILE env var: for k in [ "aws_access_key_id", "aws_secret_access_key", "aws_session_token" ]: if k in tap_config: key = f"TAP_{tap_name}_{k}".replace("-", "_").upper() os.environ[key] = tap_config[k] tap_docker_args += f' -e {k.upper()}="{tap_config[k]}"' hide_cmd = True if k in target_config: key = f"TARGET_{target_name}_{k}".replace("-", "_").upper() os.environ[key] = target_config[k] target_docker_args += f' -e {k.upper()}="{target_config[k]}"' hide_cmd = True sync_cmd = ( f"docker run --rm -i -v {cdw}:/home/local {tap_docker_args} {tap_image_name} " f"{config.dockerize_cli_args(tap_args)} " "| " f"docker run --rm -i -v {cdw}:/home/local {target_docker_args} {target_image_name} " f"{config.dockerize_cli_args(target_args)} " ">> " f"{local_state_file_out}") else: sync_cmd = (f"{tap_exe} " f"{tap_args} " "| " f"{target_exe} " f"{target_args} " "> " f"{local_state_file_out}") runnow.run(sync_cmd, hide=hide_cmd) if not uio.file_exists(local_state_file_out): logging.warning( f"State file does not exist at path '{local_state_file_out}'. Skipping upload. " f"This can be caused by having no data, or no new data, in the source table." ) else: uio.upload_file(local_state_file_out, table_state_file)