def handler(event, context): ''' config: instance_type: EC2 instance type ebs_size: EBS storage size in GB ebs_type: EBS storage type (available values: gp2, io1, st1, sc1, standard (default: io1) ebs_iops: EBS storage IOPS password: password for ssh connection for user ec2-user EBS_optimized: Use this flag if the instance type is EBS-optimized (default: EBS-optimized) shutdown_min: Number of minutes before shutdown after the jobs are finished. (default now) copy_to_s3: Upload or copy the json file to S3 bucket json_bucket launch_instance: Launch instance based on the json file log_bucket: bucket for collecting logs (started, postrun, success, error, log) args: cwl_main_filename: main cwl file name cwl_child_filenames: names of the other cwl files used by main cwl file, delimiated by comma app_name: name of the app app_version: version of the app cwl_directory_url: the url and subdirectories for the main cwl file cwl_version: the version of cwl (either 'draft3' or 'v1') input_reference_files_directory: bucket name and subdirectory for input reference files output_S3_bucket: bucket name and subdirectory for output files and logs input_files: input files in json format (parametername: {'bucket_name':bucketname, 'object_key':filename}) secondary_files: secondary files in json format (parametername: {'bucket_name':bucketnname, 'object_ke':filename}) input_parameters: input parameters in json format (parametername:value) ''' # read default variables in config CONFIG_FIELD = "config" CONFIG_KEYS = [ "EBS_optimized", "shutdown_min", "copy_to_s3", "instance_type", "ebs_size", "launch_instance", "key_name", "ebs_type", "ebs_iops", "json_bucket", "password", "log_bucket" ] ARGS_FIELD = "args" ARGS_KEYS = [ "cwl_main_filename", "cwl_child_filenames", "app_name", "app_version", "input_files", "output_S3_bucket", "cwl_directory_url", "input_parameters", "secondary_files", "output_target", "secondary_output_target" ] cfg = event.get(CONFIG_FIELD) for k in CONFIG_KEYS: assert k in cfg, "%s not in config_field" % k args = event.get(ARGS_FIELD) for k in ARGS_KEYS: assert k in args, "%s not in args field" % k # args: parameters needed by the instance to run a workflow # cfg: parameters needed to launch an instance cfg['job_tag'] = args.get('app_name') cfg['userdata_dir'] = '/tmp/userdata' # local directory in which the json file will be first created. cfg['json_dir'] = '/tmp/json' # AMI and script directory according to cwl version if args['cwl_version'] == 'v1': cfg['ami_id'] = os.environ.get('AMI_ID_CWL_V1') cfg['script_url'] = 'https://raw.githubusercontent.com/' + \ os.environ.get('TIBANNA_REPO_NAME') + '/' + \ os.environ.get('TIBANNA_REPO_BRANCH') + '/awsf_cwl_v1/' else: cfg['ami_id'] = os.environ.get('AMI_ID_CWL_DRAFT3') cfg['script_url'] = 'https://raw.githubusercontent.com/' + \ os.environ.get('TIBANNA_REPO_NAME') + '/' + \ os.environ.get('TIBANNA_REPO_BRANCH') + '/awsf_cwl_draft3/' utils.update_config(cfg, args['app_name'], args['input_files'], args['input_parameters']) # create json and copy to s3 jobid = utils.create_json(event, '') # launch instance and execute workflow if cfg.get('launch_instance'): launch_instance_log = utils.launch_instance(cfg, jobid) event.update({'jobid': jobid}) event.update(launch_instance_log) return (event)
def handler(event, context): ''' config: instance_type: EC2 instance type ebs_size: EBS storage size in GB ebs_type: EBS storage type (available values: gp2, io1, st1, sc1, standard (default: io1) ebs_iops: EBS storage IOPS s3_access_arn: IAM instance profile for S3 access ami_id: ID of AMI used for the instance - it should have docker daemon and cwl-runner (either toil or cwltools) installed password: password for ssh connection for user ec2-user EBS_optimized: Use this flag if the instance type is EBS-optimized (default: EBS-optimized) shutdown_min: Number of minutes before shutdown after the jobs are finished. (default now) copy_to_s3: Upload or copy the json file to S3 bucket json_bucket launch_instance: Launch instance based on the json file log_bucket: bucket for collecting logs (started, postrun, success, error, log) args: cwl_main_filename: main cwl file name cwl_child_filenames: names of the other cwl files used by main cwl file, delimiated by comma app_name: name of the app app_version: version of the app cwl_directory_url: the url and subdirectories for the main cwl file input_reference_files_directory: bucket name and subdirectory for input reference files output_S3_bucket: bucket name and subdirectory for output files and logs input_files: input files in json format (parametername: {'bucket_name':bucketname, 'object_key':filename}) secondary_files: secondary files in json format (parametername: {'bucket_name':bucketnname, 'object_ke':filename}) input_parameters: input parameters in json format (parametername:value) ''' # read default variables in config CONFIG_FIELD = "config" CONFIG_KEYS = ["s3_access_arn", "EBS_optimized", "shutdown_min", "copy_to_s3", "ami_id", "instance_type", "ebs_size", "launch_instance", "script_url", "key_name", "ebs_type", "ebs_iops", "json_bucket", "password", "log_bucket"] ARGS_FIELD = "args" ARGS_KEYS = ["cwl_main_filename", "cwl_child_filenames", "app_name", "app_version", "input_files", "output_S3_bucket", "cwl_directory_url", "input_parameters", "secondary_files", "output_target", "secondary_output_target"] cfg = event.get(CONFIG_FIELD) for k in CONFIG_KEYS: assert k in cfg, "%s not in config_field" % k args = event.get(ARGS_FIELD) for k in ARGS_KEYS: assert k in args, "%s not in args field" % k # args: parameters needed by the instance to run a workflow # cfg: parameters needed to launch an instance cfg['job_tag'] = args.get('app_name') cfg['outbucket'] = args.get('output_bucket') cfg['userdata_dir'] = '/tmp/userdata' # local directory in which the json file will be first created. cfg['json_dir'] = '/tmp/json' # create json and copy to s3 jobid = utils.create_json(event, '') # launch instance and execute workflow if cfg.get('launch_instance'): launch_instance_log = utils.launch_instance(cfg, jobid) event.update({'jobid': jobid}) event.update(launch_instance_log) return(event)
def handler(event, context): ''' config: cwl_url: the url and subdirectories for the main cwl file reference_S3_bucket: bucket name and subdirectory for input reference files output_S3_bucket: bucket name and subdirectory for output files and logs default_instance_type: EC2 instance type default_ebs_size: EBS storage size in GB default_ebs_type: EBS storage type (available values: gp2, io1, st1, sc1, standard (default: io1) ebs_iops: EBS storage IOPS json_dir: Local directory in which the output json file will be written s3_access_arn: IAM instance profile for S3 access keyname: name of keypapir used for launching instances worker_ami_id: ID of AMI used for the instance - it should have docker daemon and cwl-runner (either toil or cwltools) installed userdata_dir: local directory to store userdata (used internally within lambda) args: cwl: main cwl file name cwl_children: names of the other cwl files used by main cwl file, delimiated by comma app_name: name of the app app_version: version of the app input_files: input files in json format (parametername:filename) input_reference_files: input reference files in json format (parametername:filename) input_parameters: input parameters in json format (parametername:value) input_files_directory: bucket name and subdirectory for input files not_EBS_optimized: Use this flag if the instance type is not EBS-optimized (default: EBS-optimized) shutdown_min: Number of minutes before shutdown after the jobs are finished. (default now) copy_to_s3: Upload or copy the json file to S3 bucket json_bucket launch_instance: Launch instance based on the json file ''' # read default variables in config CONFIG_FIELD = "config" CONFIG_KEYS = ["reference_S3_bucket", "output_S3_bucket", "s3_access_arn", "keyname", "worker_ami_id", "default_instance_type", "default_ebs_size", "default_ebs_type", "ebs_iops", "userdata_dir", "json_dir", "cwl_url"] ARGS_FIELD = "args" ARGS_KEYS = ["cwl", "cwl_children", "app_name", "app_version", "input_files", "input_reference_files", "input_parameters", "input_files_directory", "not_EBS_optimized", "shutdown_min", "copy_to_s3", "launch_instance"] cfg = event.get(CONFIG_FIELD) for k in CONFIG_KEYS: assert(k in cfg) args = event.get(ARGS_FIELD) for k in ARGS_KEYS: assert(k in args) # parameters that will go into the pre-run json file final_args = { 'cwl_directory': cfg.get('cwl_url'), 'cwl': args.get('cwl'), 'cwl_children': args.get('cwl_children'), 'app_name': args.get('app_name'), 'app_version': args.get('app_version'), 'input_files': args.get('input_files'), 'input_reference_files': args.get('input_reference_files'), 'input_parameters': args.get('input_parameters'), 'input_files_directory': args.get('input_files_directory'), 'input_reference_files_directory': cfg.get('reference_S3_bucket'), 'output_bucket_directory': cfg.get('output_S3_bucket'), 'instance_type': cfg.get('default_instance_type'), 'storage_size': cfg.get('default_ebs_size'), 'storage_type': cfg.get('default_ebs_type'), 'storage_iops': cfg.get('ebs_iops') } # parameters needed to launch an instance par = { 's3_access_arn': cfg.get('s3_access_arn'), 'worker_ami_id': cfg.get('worker_ami_id'), 'keyname': cfg.get('keyname'), 'userdata_dir': cfg.get('userdata_dir'), 'instance_type': cfg.get('default_instance_type'), # redundant with final_args 'storage_size': cfg.get('default_ebs_size'), # redundant with final_args 'storage_type': cfg.get('default_ebs_type'), # redundant with final_args 'storage_iops': cfg.get('ebs_iops'), # redundant with final_args 'EBS_optimized': True, 'job_tag': final_args.get('app_name'), 'outbucket': cfg.get('output_S3_bucket') # redundant with output_bucket_directory in final_args } shutdown_min = args.get('shutdown_min') copy_to_s3 = args.get('copy_to_s3') # local directory in which the json file will be first created. json_dir = cfg.get('json_dir') # create json and copy to s3 jobid = utils.create_json(final_args, json_dir, '', copy_to_s3) # launch instance and execute workflow if args.get('launch_instance'): utils.launch_instance(par, jobid, shutdown_min)
def handler(event, context): ''' config: # required instance_type: EC2 instance type ebs_size: EBS storage size in GB ebs_type: EBS storage type (available values: gp2, io1, st1, sc1, standard (default: io1) ebs_iops: EBS storage IOPS password: password for ssh connection for user ec2-user EBS_optimized: Use this flag if the instance type is EBS-optimized (default: EBS-optimized) shutdown_min: Number of minutes before shutdown after the jobs are finished. (default now) log_bucket: bucket for collecting logs (started, postrun, success, error, log) # optional public_postrun_json (optional): whether postrun json should be made public (default false) cloudwatch_dashboard (optional) : create a cloudwatch dashboard named awsem-<jobid> args: # required (i.e. field must exist): input_files: input files in json format (parametername: {'bucket_name':bucketname, 'object_key':filename}) output_S3_bucket: bucket name and subdirectory for output files and logs # optional app_name: name of the app, used by Benchmark app_version: version of the app secondary_files: secondary files in json format (parametername: {'bucket_name':bucketnname, 'object_ke':filename}) input_parameters: input parameters in json format (parametername:value) secondary_output_target: secondary output files in json format (similar to secondary_files) # required for cwl cwl_main_filename: main cwl file name cwl_directory_url: the url and subdirectories for the main cwl file cwl_version: the version of cwl (either 'draft3' or 'v1') cwl_child_filenames (optional): names of the other cwl files used by main cwl file, delimited by comma language (optional for cwl): 'cwl_v1' or 'cwl_draft3' # required for wdl language: 'wdl' wdl_main_filename: main wdl file name wdl_directory_url: the url of the wdl file wdl_child_filenames (optional): names of the other wdl files used by main wdl file, delimited by comma # optional dependency: {'exec_arn': [exec_arns]} spot_duration: 60 # block minutes 60-360 if requesting spot instance ''' # read default variables in config CONFIG_FIELD = "config" CONFIG_KEYS = ["log_bucket"] ARGS_FIELD = "args" ARGS_KEYS = ["input_files", "output_S3_bucket", "output_target"] ARGS_KEYS_CWL = ["cwl_main_filename", "cwl_directory_url"] ARGS_KEYS_WDL = ["wdl_main_filename", "wdl_directory_url", "language"] # args: parameters needed by the instance to run a workflow # cfg: parameters needed to launch an instance cfg = event.get(CONFIG_FIELD) for k in CONFIG_KEYS: assert k in cfg, "%s not in config_field" % k args = event.get(ARGS_FIELD) for k in ARGS_KEYS: assert k in args, "%s not in args field" % k if 'language' in args and args['language'] == 'wdl': for k in ARGS_KEYS_WDL: assert k in args, "%s not in args field" % k else: for k in ARGS_KEYS_CWL: assert k in args, "%s not in args field" % k if 'dependency' in args: check_dependency(**args['dependency']) # update input json to add various other info automatically ec2_utils.auto_update_input_json(args, cfg) # create json and copy to s3 jobid = ec2_utils.create_json(event) # profile if os.environ.get('TIBANNA_PROFILE_ACCESS_KEY', None) and \ os.environ.get('TIBANNA_PROFILE_SECRET_KEY', None): profile = { 'access_key': os.environ.get('TIBANNA_PROFILE_ACCESS_KEY'), 'secret_key': os.environ.get('TIBANNA_PROFILE_SECRET_KEY') } else: profile = None # launch instance and execute workflow launch_instance_log = ec2_utils.launch_instance(cfg, jobid, profile=profile) # setup cloudwatch dashboard if 'cloudwatch_dashboard' in cfg and cfg['cloudwatch_dashboard']: instance_id = launch_instance_log['instance_id'] ec2_utils.create_cloudwatch_dashboard(instance_id, 'awsem-' + jobid) if 'jobid' not in event: event.update({'jobid': jobid}) event.update(launch_instance_log) return (event)
def handler(event, context): ''' config: cwl_url: the url and subdirectories for the main cwl file reference_S3_bucket: bucket name and subdirectory for input reference files output_S3_bucket: bucket name and subdirectory for output files and logs default_instance_type: EC2 instance type default_ebs_size: EBS storage size in GB default_ebs_type: EBS storage type (available values: gp2, io1, st1, sc1, standard (default: io1) ebs_iops: EBS storage IOPS json_dir: Local directory in which the output json file will be written s3_access_arn: IAM instance profile for S3 access keyname: name of keypapir used for launching instances worker_ami_id: ID of AMI used for the instance - it should have docker daemon and cwl-runner (either toil or cwltools) installed userdata_dir: local directory to store userdata (used internally within lambda) args: cwl: main cwl file name cwl_children: names of the other cwl files used by main cwl file, delimiated by comma app_name: name of the app app_version: version of the app input_files: input files in json format (parametername:filename) input_reference_files: input reference files in json format (parametername:filename) input_parameters: input parameters in json format (parametername:value) input_files_directory: bucket name and subdirectory for input files not_EBS_optimized: Use this flag if the instance type is not EBS-optimized (default: EBS-optimized) shutdown_min: Number of minutes before shutdown after the jobs are finished. (default now) copy_to_s3: Upload or copy the json file to S3 bucket json_bucket launch_instance: Launch instance based on the json file ''' # read default variables in config CONFIG_FIELD = "config" CONFIG_KEYS = [ "reference_S3_bucket", "output_S3_bucket", "s3_access_arn", "keyname", "worker_ami_id", "default_instance_type", "default_ebs_size", "default_ebs_type", "ebs_iops", "userdata_dir", "json_dir", "cwl_url" ] ARGS_FIELD = "args" ARGS_KEYS = [ "cwl", "cwl_children", "app_name", "app_version", "input_files", "input_reference_files", "input_parameters", "input_files_directory", "not_EBS_optimized", "shutdown_min", "copy_to_s3", "launch_instance" ] cfg = event.get(CONFIG_FIELD) for k in CONFIG_KEYS: assert (k in cfg) args = event.get(ARGS_FIELD) for k in ARGS_KEYS: assert (k in args) # parameters that will go into the pre-run json file final_args = { 'cwl_directory': cfg.get('cwl_url'), 'cwl': args.get('cwl'), 'cwl_children': args.get('cwl_children'), 'app_name': args.get('app_name'), 'app_version': args.get('app_version'), 'input_files': args.get('input_files'), 'input_reference_files': args.get('input_reference_files'), 'input_parameters': args.get('input_parameters'), 'input_files_directory': args.get('input_files_directory'), 'input_reference_files_directory': cfg.get('reference_S3_bucket'), 'output_bucket_directory': cfg.get('output_S3_bucket'), 'instance_type': cfg.get('default_instance_type'), 'storage_size': cfg.get('default_ebs_size'), 'storage_type': cfg.get('default_ebs_type'), 'storage_iops': cfg.get('ebs_iops') } # parameters needed to launch an instance par = { 's3_access_arn': cfg.get('s3_access_arn'), 'worker_ami_id': cfg.get('worker_ami_id'), 'keyname': cfg.get('keyname'), 'userdata_dir': cfg.get('userdata_dir'), 'instance_type': cfg.get('default_instance_type'), # redundant with final_args 'storage_size': cfg.get('default_ebs_size'), # redundant with final_args 'storage_type': cfg.get('default_ebs_type'), # redundant with final_args 'storage_iops': cfg.get('ebs_iops'), # redundant with final_args 'EBS_optimized': True, 'job_tag': final_args.get('app_name'), 'outbucket': cfg.get('output_S3_bucket' ) # redundant with output_bucket_directory in final_args } shutdown_min = args.get('shutdown_min') copy_to_s3 = args.get('copy_to_s3') # local directory in which the json file will be first created. json_dir = cfg.get('json_dir') # create json and copy to s3 jobid = utils.create_json(final_args, json_dir, '', copy_to_s3) # launch instance and execute workflow if args.get('launch_instance'): utils.launch_instance(par, jobid, shutdown_min)