def visualize_graph(kernel: CWLKernel, tool_id: str): """Visualize a Workflow""" tool_id = tool_id.strip() path = kernel.workflow_repository.get_tools_path_by_id(tool_id) rdf_stream = StringIO() import logging handler = logging.StreamHandler() cwltool_main(['--print-rdf', os.path.abspath(path)], stdout=rdf_stream, logger_handler=handler) cwl_viewer = CWLViewer(rdf_stream.getvalue()) (dot_object,) = pydot.graph_from_dot_data(cwl_viewer.dot()) ET.register_namespace('', 'http://www.w3.org/2000/svg') image_xml = ET.fromstring(dot_object.create('dot', 'svg').decode()) image_container = f'<div style="max-width: 100%;">{ET.tostring(image_xml, method="html").decode()}</div>' kernel.send_response( kernel.iopub_socket, 'display_data', { 'data': { "text/html": image_container, "text/plain": image_container }, 'metadata': {}, }, )
def cwl_load(workflow_file): """Validate and return cwl workflow specification. :param workflow_file: A specification file compliant with `cwl` workflow specification. :returns: A dictionary which represents the valid `cwl` workflow. """ mystdout = StringIO() mystderr = StringIO() cwltool_main(argsl=["--pack", "--quiet", workflow_file], stdout=mystdout, stderr=mystderr) value = mystdout.getvalue() return json.loads(value)
def get_command_and_env(args): shellcode = ["set -a", "if [ -f /etc/environment ]; then source /etc/environment; fi", "if [ -f /etc/default/locale ]; then source /etc/default/locale; fi", "set +a", "if [ -f /etc/profile ]; then source /etc/profile; fi", "set -euo pipefail"] if args.restart: restart_policy = dict(tries=args.restart, prior_failures=[]) args.environment.append(dict(name="AEGEA_RESTART_POLICY", value=json.dumps(restart_policy))) if args.storage: args.privileged = True args.volumes.append(["/dev", "/dev"]) for mountpoint, size_gb in args.storage: shellcode += (ebs_vol_mgr_shellcode % (size_gb, mountpoint)).splitlines() elif args.efs_storage: args.privileged = True commands = (efs_vol_shellcode % (args.efs_storage, args.efs_storage)).splitlines() shellcode += commands if args.execute: payload = base64.b64encode(args.execute.read()).decode() args.environment.append(dict(name="BATCH_SCRIPT_B64", value=payload)) shellcode += ['BATCH_SCRIPT=$(mktemp --tmpdir "$AWS_BATCH_CE_NAME.$AWS_BATCH_JQ_NAME.$AWS_BATCH_JOB_ID.XXXXX")', "echo $BATCH_SCRIPT_B64 | base64 -d > $BATCH_SCRIPT", "chmod +x $BATCH_SCRIPT", "$BATCH_SCRIPT"] elif args.cwl: ensure_dynamodb_table("aegea-batch-jobs", hash_key_name="job_id") bucket = ensure_s3_bucket("aegea-batch-jobs-{}".format(ARN.get_account_id())) args.environment.append(dict(name="AEGEA_BATCH_S3_BASE_URL", value="s3://" + bucket.name)) from cwltool.main import main as cwltool_main with io.BytesIO() as preprocessed_cwl: if cwltool_main(["--print-pre", args.cwl], stdout=preprocessed_cwl) != 0: raise AegeaException("Error while running cwltool") cwl_spec = yaml.load(preprocessed_cwl.getvalue()) payload = base64.b64encode(preprocessed_cwl.getvalue()).decode() args.environment.append(dict(name="AEGEA_BATCH_CWL_DEF_B64", value=payload)) payload = base64.b64encode(args.cwl_input.read()).decode() args.environment.append(dict(name="AEGEA_BATCH_CWL_JOB_B64", value=payload)) for requirement in cwl_spec.get("requirements", []): if requirement["class"] == "DockerRequirement": # FIXME: dockerFile support: ensure_ecr_image(...) # container_props["image"] = requirement["dockerPull"] pass shellcode += [ # 'sed -i -e "s|http://archive.ubuntu.com|http://us-east-1.ec2.archive.ubuntu.com|g" /etc/apt/sources.list', # "apt-get update -qq", # "apt-get install -qqy --no-install-suggests --no-install-recommends --force-yes python-pip python-requests python-yaml python-lockfile python-pyparsing awscli", # noqa # "pip install ruamel.yaml==0.13.4 cwltool==1.0.20161227200419 dynamoq tractorbeam", "cwltool --no-container --preserve-entire-environment <(echo $AEGEA_BATCH_CWL_DEF_B64 | base64 -d) <(echo $AEGEA_BATCH_CWL_JOB_B64 | base64 -d | tractor pull) | tractor push $AEGEA_BATCH_S3_BASE_URL/$AWS_BATCH_JOB_ID | dynamoq update aegea-batch-jobs $AWS_BATCH_JOB_ID" # noqa ] args.command = bash_cmd_preamble + shellcode + (args.command or []) return args.command, args.environment
def get_command_and_env(args): # shellcode = ['for var in ${{!AWS_BATCH_@}}; do echo "{}.env.$var=${{!var}}"; done'.format(__name__)] shellcode = env_mgr_shellcode.strip().format( region=ARN.get_region()).splitlines() if args.mount_instance_storage or args.storage: args.privileged = True args.volumes.append(["/dev", "/dev"]) if args.mount_instance_storage: shellcode += instance_storage_mgr_shellcode.strip().format( region=ARN.get_region(), mountpoint=args.mount_instance_storage, mkfs=get_mkfs_command(fs_type="ext4")).splitlines() if args.storage: for mountpoint, size_gb in args.storage: volume_type = "st1" if args.volume_type: volume_type = args.volume_type shellcode += ebs_vol_mgr_shellcode.strip().format( region=ARN.get_region(), aegea_version=__version__, size_gb=size_gb, volume_type=volume_type, mountpoint=mountpoint).splitlines() elif args.efs_storage: args.privileged = True if "=" in args.efs_storage: mountpoint, efs_id = args.efs_storage.split("=") else: mountpoint, efs_id = args.efs_storage, __name__ if not efs_id.startswith("fs-"): for filesystem in clients.efs.describe_file_systems( )["FileSystems"]: if filesystem["Name"] == efs_id: efs_id = filesystem["FileSystemId"] break else: raise AegeaException( 'Could not resolve "{}" to a valid EFS filesystem ID'. format(efs_id)) mount_targets = clients.efs.describe_mount_targets( FileSystemId=efs_id)["MountTargets"] args.environment.append( dict(name="AEGEA_EFS_DESC", value=json.dumps(mount_targets))) commands = efs_vol_shellcode.format(efs_mountpoint=args.efs_storage, efs_id=efs_id).splitlines() shellcode += commands if args.execute: bucket = ensure_s3_bucket( args.staging_s3_bucket or "aegea-batch-jobs-" + ARN.get_account_id()) key_name = hashlib.sha256(args.execute.read()).hexdigest() args.execute.seek(0) bucket.upload_fileobj(args.execute, key_name) payload_url = clients.s3.generate_presigned_url( ClientMethod='get_object', Params=dict(Bucket=bucket.name, Key=key_name), ExpiresIn=3600 * 24 * 7) tmpdir_fmt = "${AWS_BATCH_CE_NAME:-$AWS_EXECUTION_ENV}.${AWS_BATCH_JQ_NAME:-}.${AWS_BATCH_JOB_ID:-}.XXXXX" shellcode += [ 'BATCH_SCRIPT=$(mktemp --tmpdir "{tmpdir_fmt}")'.format( tmpdir_fmt=tmpdir_fmt), "apt-get update -qq", "apt-get install -qqy --no-install-suggests --no-install-recommends curl ca-certificates gnupg", "curl '{payload_url}' > $BATCH_SCRIPT".format( payload_url=payload_url), "chmod +x $BATCH_SCRIPT", "$BATCH_SCRIPT" ] elif args.cwl: ensure_dynamodb_table("aegea-batch-jobs", hash_key_name="job_id") bucket = ensure_s3_bucket( args.staging_s3_bucket or "aegea-batch-jobs-" + ARN.get_account_id()) args.environment.append( dict(name="AEGEA_BATCH_S3_BASE_URL", value="s3://" + bucket.name)) from cwltool.main import main as cwltool_main with io.BytesIO() as preprocessed_cwl: if cwltool_main(["--print-pre", args.cwl], stdout=preprocessed_cwl) != 0: raise AegeaException("Error while running cwltool") cwl_spec = yaml.load(preprocessed_cwl.getvalue()) payload = base64.b64encode(preprocessed_cwl.getvalue()).decode() args.environment.append( dict(name="AEGEA_BATCH_CWL_DEF_B64", value=payload)) payload = base64.b64encode(args.cwl_input.read()).decode() args.environment.append( dict(name="AEGEA_BATCH_CWL_JOB_B64", value=payload)) for requirement in cwl_spec.get("requirements", []): if requirement["class"] == "DockerRequirement": # FIXME: dockerFile support: ensure_ecr_image(...) # container_props["image"] = requirement["dockerPull"] pass shellcode += [ # 'sed -i -e "s|http://archive.ubuntu.com|http://us-east-1.ec2.archive.ubuntu.com|g" /etc/apt/sources.list', # "apt-get update -qq", # "apt-get install -qqy --no-install-suggests --no-install-recommends --force-yes python-pip python-requests python-yaml python-lockfile python-pyparsing awscli", # noqa # "pip install ruamel.yaml==0.13.4 cwltool==1.0.20161227200419 dynamoq tractorbeam", "cwltool --no-container --preserve-entire-environment <(echo $AEGEA_BATCH_CWL_DEF_B64 | base64 -d) <(echo $AEGEA_BATCH_CWL_JOB_B64 | base64 -d | tractor pull) | tractor push $AEGEA_BATCH_S3_BASE_URL/$AWS_BATCH_JOB_ID | dynamoq update aegea-batch-jobs $AWS_BATCH_JOB_ID" # noqa ] args.command = bash_cmd_preamble + shellcode + (args.command or []) return args.command, args.environment
def _cwl2rdf(self) -> str: console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) stdout = StringIO() cwltool_main(['--print-rdf', str(self._filename)], stdout=stdout, logger_handler=console_handler) return stdout.getvalue()
def get_command_and_env(args): # shellcode = ['for var in ${{!AWS_BATCH_@}}; do echo "{}.env.$var=${{!var}}"; done'.format(__name__)] shellcode = ["set -a", "if [ -f /etc/environment ]; then source /etc/environment; fi", "if [ -f /etc/default/locale ]; then source /etc/default/locale; fi", "set +a", "if [ -f /etc/profile ]; then source /etc/profile; fi", "set -euo pipefail"] if args.storage: args.privileged = True args.volumes.append(["/dev", "/dev"]) for mountpoint, size_gb in args.storage: shellcode += (ebs_vol_mgr_shellcode % (size_gb, mountpoint)).splitlines() elif args.efs_storage: args.privileged = True if "=" in args.efs_storage: mountpoint, efs_id = args.efs_storage.split("=") else: mountpoint, efs_id = args.efs_storage, __name__ if not efs_id.startswith("fs-"): for filesystem in clients.efs.describe_file_systems()["FileSystems"]: if filesystem["Name"] == efs_id: efs_id = filesystem["FileSystemId"] break else: raise AegeaException('Could not resolve "{}" to a valid EFS filesystem ID'.format(efs_id)) mount_targets = clients.efs.describe_mount_targets(FileSystemId=efs_id)["MountTargets"] args.environment.append(dict(name="AEGEA_EFS_DESC", value=json.dumps(mount_targets))) commands = efs_vol_shellcode.format(efs_mountpoint=args.efs_storage, efs_id=efs_id).splitlines() shellcode += commands if args.execute: payload = base64.b64encode(args.execute.read()).decode() args.environment.append(dict(name="BATCH_SCRIPT_B64", value=payload)) shellcode += ['BATCH_SCRIPT=$(mktemp --tmpdir "$AWS_BATCH_CE_NAME.$AWS_BATCH_JQ_NAME.$AWS_BATCH_JOB_ID.XXXXX")', "echo $BATCH_SCRIPT_B64 | base64 -d > $BATCH_SCRIPT", "chmod +x $BATCH_SCRIPT", "$BATCH_SCRIPT"] elif args.cwl: ensure_dynamodb_table("aegea-batch-jobs", hash_key_name="job_id") bucket = ensure_s3_bucket("aegea-batch-jobs-{}".format(ARN.get_account_id())) args.environment.append(dict(name="AEGEA_BATCH_S3_BASE_URL", value="s3://" + bucket.name)) from cwltool.main import main as cwltool_main with io.BytesIO() as preprocessed_cwl: if cwltool_main(["--print-pre", args.cwl], stdout=preprocessed_cwl) != 0: raise AegeaException("Error while running cwltool") cwl_spec = yaml.load(preprocessed_cwl.getvalue()) payload = base64.b64encode(preprocessed_cwl.getvalue()).decode() args.environment.append(dict(name="AEGEA_BATCH_CWL_DEF_B64", value=payload)) payload = base64.b64encode(args.cwl_input.read()).decode() args.environment.append(dict(name="AEGEA_BATCH_CWL_JOB_B64", value=payload)) for requirement in cwl_spec.get("requirements", []): if requirement["class"] == "DockerRequirement": # FIXME: dockerFile support: ensure_ecr_image(...) # container_props["image"] = requirement["dockerPull"] pass shellcode += [ # 'sed -i -e "s|http://archive.ubuntu.com|http://us-east-1.ec2.archive.ubuntu.com|g" /etc/apt/sources.list', # "apt-get update -qq", # "apt-get install -qqy --no-install-suggests --no-install-recommends --force-yes python-pip python-requests python-yaml python-lockfile python-pyparsing awscli", # noqa # "pip install ruamel.yaml==0.13.4 cwltool==1.0.20161227200419 dynamoq tractorbeam", "cwltool --no-container --preserve-entire-environment <(echo $AEGEA_BATCH_CWL_DEF_B64 | base64 -d) <(echo $AEGEA_BATCH_CWL_JOB_B64 | base64 -d | tractor pull) | tractor push $AEGEA_BATCH_S3_BASE_URL/$AWS_BATCH_JOB_ID | dynamoq update aegea-batch-jobs $AWS_BATCH_JOB_ID" # noqa ] args.command = bash_cmd_preamble + shellcode + (args.command or []) return args.command, args.environment