Example #1
0
def handler(event, context):
    '''
    config:
    instance_type: EC2 instance type
    ebs_size: EBS storage size in GB
    ebs_type: EBS storage type (available values: gp2, io1, st1, sc1, standard (default: io1)
    ebs_iops: EBS storage IOPS
    password: password for ssh connection for user ec2-user
    EBS_optimized: Use this flag if the instance type is EBS-optimized (default: EBS-optimized)
    shutdown_min: Number of minutes before shutdown after the jobs are finished. (default now)
    copy_to_s3: Upload or copy the json file to S3 bucket json_bucket
    launch_instance: Launch instance based on the json file
    log_bucket: bucket for collecting logs (started, postrun, success, error, log)

    args:
    cwl_main_filename: main cwl file name
    cwl_child_filenames: names of the other cwl files used by main cwl file, delimiated by comma
    app_name: name of the app
    app_version: version of the app
    cwl_directory_url: the url and subdirectories for the main cwl file
    cwl_version: the version of cwl (either 'draft3' or 'v1')
    input_reference_files_directory: bucket name and subdirectory for input reference files
    output_S3_bucket: bucket name and subdirectory for output files and logs
    input_files: input files in json format (parametername: {'bucket_name':bucketname, 'object_key':filename})
    secondary_files: secondary files in json format (parametername: {'bucket_name':bucketnname, 'object_ke':filename})
    input_parameters: input parameters in json format (parametername:value)
    '''

    # read default variables in config
    CONFIG_FIELD = "config"
    CONFIG_KEYS = [
        "EBS_optimized", "shutdown_min", "copy_to_s3", "instance_type",
        "ebs_size", "launch_instance", "key_name", "ebs_type", "ebs_iops",
        "json_bucket", "password", "log_bucket"
    ]
    ARGS_FIELD = "args"
    ARGS_KEYS = [
        "cwl_main_filename", "cwl_child_filenames", "app_name", "app_version",
        "input_files", "output_S3_bucket", "cwl_directory_url",
        "input_parameters", "secondary_files", "output_target",
        "secondary_output_target"
    ]

    cfg = event.get(CONFIG_FIELD)
    for k in CONFIG_KEYS:
        assert k in cfg, "%s not in config_field" % k

    args = event.get(ARGS_FIELD)
    for k in ARGS_KEYS:
        assert k in args, "%s not in args field" % k

    # args: parameters needed by the instance to run a workflow
    # cfg: parameters needed to launch an instance
    cfg['job_tag'] = args.get('app_name')
    cfg['userdata_dir'] = '/tmp/userdata'

    # local directory in which the json file will be first created.
    cfg['json_dir'] = '/tmp/json'

    # AMI and script directory according to cwl version
    if args['cwl_version'] == 'v1':
        cfg['ami_id'] = os.environ.get('AMI_ID_CWL_V1')
        cfg['script_url'] = 'https://raw.githubusercontent.com/' + \
            os.environ.get('TIBANNA_REPO_NAME') + '/' + \
            os.environ.get('TIBANNA_REPO_BRANCH') + '/awsf_cwl_v1/'
    else:
        cfg['ami_id'] = os.environ.get('AMI_ID_CWL_DRAFT3')
        cfg['script_url'] = 'https://raw.githubusercontent.com/' + \
            os.environ.get('TIBANNA_REPO_NAME') + '/' + \
            os.environ.get('TIBANNA_REPO_BRANCH') + '/awsf_cwl_draft3/'

    utils.update_config(cfg, args['app_name'], args['input_files'],
                        args['input_parameters'])

    # create json and copy to s3
    jobid = utils.create_json(event, '')

    # launch instance and execute workflow
    if cfg.get('launch_instance'):
        launch_instance_log = utils.launch_instance(cfg, jobid)

    event.update({'jobid': jobid})
    event.update(launch_instance_log)
    return (event)
Example #2
0
def handler(event, context):
    '''
    config:
    instance_type: EC2 instance type
    ebs_size: EBS storage size in GB
    ebs_type: EBS storage type (available values: gp2, io1, st1, sc1, standard (default: io1)
    ebs_iops: EBS storage IOPS
    s3_access_arn: IAM instance profile for S3 access
    ami_id: ID of AMI used for the instance - it should have docker daemon and
            cwl-runner (either toil or cwltools) installed
    password: password for ssh connection for user ec2-user
    EBS_optimized: Use this flag if the instance type is EBS-optimized (default: EBS-optimized)
    shutdown_min: Number of minutes before shutdown after the jobs are finished. (default now)
    copy_to_s3: Upload or copy the json file to S3 bucket json_bucket
    launch_instance: Launch instance based on the json file
    log_bucket: bucket for collecting logs (started, postrun, success, error, log)

    args:
    cwl_main_filename: main cwl file name
    cwl_child_filenames: names of the other cwl files used by main cwl file, delimiated by comma
    app_name: name of the app
    app_version: version of the app
    cwl_directory_url: the url and subdirectories for the main cwl file
    input_reference_files_directory: bucket name and subdirectory for input reference files
    output_S3_bucket: bucket name and subdirectory for output files and logs
    input_files: input files in json format (parametername: {'bucket_name':bucketname, 'object_key':filename})
    secondary_files: secondary files in json format (parametername: {'bucket_name':bucketnname, 'object_ke':filename})
    input_parameters: input parameters in json format (parametername:value)
    '''

    # read default variables in config
    CONFIG_FIELD = "config"
    CONFIG_KEYS = ["s3_access_arn", "EBS_optimized", "shutdown_min", "copy_to_s3",
                   "ami_id", "instance_type", "ebs_size", "launch_instance",
                   "script_url", "key_name",
                   "ebs_type", "ebs_iops", "json_bucket", "password", "log_bucket"]
    ARGS_FIELD = "args"
    ARGS_KEYS = ["cwl_main_filename", "cwl_child_filenames", "app_name", "app_version",
                 "input_files", "output_S3_bucket", "cwl_directory_url",
                 "input_parameters", "secondary_files", "output_target", "secondary_output_target"]

    cfg = event.get(CONFIG_FIELD)
    for k in CONFIG_KEYS:
        assert k in cfg, "%s not in config_field" % k

    args = event.get(ARGS_FIELD)
    for k in ARGS_KEYS:
        assert k in args, "%s not in args field" % k

    # args: parameters needed by the instance to run a workflow
    # cfg: parameters needed to launch an instance
    cfg['job_tag'] = args.get('app_name')
    cfg['outbucket'] = args.get('output_bucket')
    cfg['userdata_dir'] = '/tmp/userdata'

    # local directory in which the json file will be first created.
    cfg['json_dir'] = '/tmp/json'

    # create json and copy to s3
    jobid = utils.create_json(event, '')

    # launch instance and execute workflow
    if cfg.get('launch_instance'):
        launch_instance_log = utils.launch_instance(cfg, jobid)

    event.update({'jobid': jobid})
    event.update(launch_instance_log)
    return(event)
Example #3
0
def handler(event, context):
    '''
    config:
    cwl_url: the url and subdirectories for the main cwl file
    reference_S3_bucket: bucket name and subdirectory for input reference files
    output_S3_bucket: bucket name and subdirectory for output files and logs
    default_instance_type: EC2 instance type
    default_ebs_size: EBS storage size in GB
    default_ebs_type: EBS storage type (available values: gp2, io1, st1, sc1, standard (default: io1)
    ebs_iops: EBS storage IOPS
    json_dir: Local directory in which the output json file will be written
    s3_access_arn: IAM instance profile for S3 access
    keyname: name of keypapir used for launching instances
    worker_ami_id: ID of AMI used for the instance - it should have docker daemon and
                   cwl-runner (either toil or cwltools) installed
    userdata_dir: local directory to store userdata (used internally within lambda)

    args:
    cwl: main cwl file name
    cwl_children: names of the other cwl files used by main cwl file, delimiated by comma
    app_name: name of the app
    app_version: version of the app
    input_files: input files in json format (parametername:filename)
    input_reference_files: input reference files in json format (parametername:filename)
    input_parameters: input parameters in json format (parametername:value)
    input_files_directory: bucket name and subdirectory for input files
    not_EBS_optimized: Use this flag if the instance type is not EBS-optimized (default: EBS-optimized)
    shutdown_min: Number of minutes before shutdown after the jobs are finished. (default now)
    copy_to_s3: Upload or copy the json file to S3 bucket json_bucket
    launch_instance: Launch instance based on the json file
    '''

    # read default variables in config
    CONFIG_FIELD = "config"
    CONFIG_KEYS = ["reference_S3_bucket", "output_S3_bucket", "s3_access_arn",
                   "keyname", "worker_ami_id", "default_instance_type", "default_ebs_size",
                   "default_ebs_type", "ebs_iops", "userdata_dir", "json_dir", "cwl_url"]
    ARGS_FIELD = "args"
    ARGS_KEYS = ["cwl", "cwl_children", "app_name", "app_version", "input_files",
                 "input_reference_files", "input_parameters", "input_files_directory",
                 "not_EBS_optimized", "shutdown_min", "copy_to_s3", "launch_instance"]

    cfg = event.get(CONFIG_FIELD)
    for k in CONFIG_KEYS:
        assert(k in cfg)

    args = event.get(ARGS_FIELD)
    for k in ARGS_KEYS:
        assert(k in args)

    # parameters that will go into the pre-run json file
    final_args = {
     'cwl_directory': cfg.get('cwl_url'),
     'cwl': args.get('cwl'),
     'cwl_children': args.get('cwl_children'),
     'app_name': args.get('app_name'),
     'app_version': args.get('app_version'),
     'input_files': args.get('input_files'),
     'input_reference_files': args.get('input_reference_files'),
     'input_parameters': args.get('input_parameters'),
     'input_files_directory': args.get('input_files_directory'),
     'input_reference_files_directory': cfg.get('reference_S3_bucket'),
     'output_bucket_directory': cfg.get('output_S3_bucket'),
     'instance_type': cfg.get('default_instance_type'),
     'storage_size': cfg.get('default_ebs_size'),
     'storage_type': cfg.get('default_ebs_type'),
     'storage_iops': cfg.get('ebs_iops')
    }

    # parameters needed to launch an instance
    par = {
     's3_access_arn': cfg.get('s3_access_arn'),
     'worker_ami_id': cfg.get('worker_ami_id'),
     'keyname': cfg.get('keyname'),
     'userdata_dir': cfg.get('userdata_dir'),
     'instance_type': cfg.get('default_instance_type'),  # redundant with final_args
     'storage_size': cfg.get('default_ebs_size'),  # redundant with final_args
     'storage_type': cfg.get('default_ebs_type'),  # redundant with final_args
     'storage_iops': cfg.get('ebs_iops'),  # redundant with final_args
     'EBS_optimized': True,
     'job_tag': final_args.get('app_name'),
     'outbucket': cfg.get('output_S3_bucket')  # redundant with output_bucket_directory in final_args
    }

    shutdown_min = args.get('shutdown_min')
    copy_to_s3 = args.get('copy_to_s3')

    # local directory in which the json file will be first created.
    json_dir = cfg.get('json_dir')

    # create json and copy to s3
    jobid = utils.create_json(final_args, json_dir, '', copy_to_s3)

    # launch instance and execute workflow
    if args.get('launch_instance'):
        utils.launch_instance(par, jobid, shutdown_min)
Example #4
0
def handler(event, context):
    '''
    config:
    # required
      instance_type: EC2 instance type
      ebs_size: EBS storage size in GB
      ebs_type: EBS storage type (available values: gp2, io1, st1, sc1, standard (default: io1)
      ebs_iops: EBS storage IOPS
      password: password for ssh connection for user ec2-user
      EBS_optimized: Use this flag if the instance type is EBS-optimized (default: EBS-optimized)
      shutdown_min: Number of minutes before shutdown after the jobs are finished. (default now)
      log_bucket: bucket for collecting logs (started, postrun, success, error, log)
    # optional
      public_postrun_json (optional): whether postrun json should be made public (default false)
      cloudwatch_dashboard (optional) : create a cloudwatch dashboard named awsem-<jobid>

    args:
    # required (i.e. field must exist):
      input_files: input files in json format (parametername: {'bucket_name':bucketname, 'object_key':filename})
      output_S3_bucket: bucket name and subdirectory for output files and logs
    # optional
      app_name: name of the app, used by Benchmark
      app_version: version of the app
      secondary_files: secondary files in json format (parametername: {'bucket_name':bucketnname, 'object_ke':filename})
      input_parameters: input parameters in json format (parametername:value)
      secondary_output_target: secondary output files in json format (similar to secondary_files)
    # required for cwl
      cwl_main_filename: main cwl file name
      cwl_directory_url: the url and subdirectories for the main cwl file
      cwl_version: the version of cwl (either 'draft3' or 'v1')
      cwl_child_filenames (optional): names of the other cwl files used by main cwl file, delimited by comma
      language (optional for cwl): 'cwl_v1' or 'cwl_draft3'
    # required for wdl
      language: 'wdl'
      wdl_main_filename: main wdl file name
      wdl_directory_url: the url of the wdl file
      wdl_child_filenames (optional): names of the other wdl files used by main wdl file, delimited by comma
    # optional
      dependency: {'exec_arn': [exec_arns]}
      spot_duration: 60  # block minutes 60-360 if requesting spot instance
    '''

    # read default variables in config
    CONFIG_FIELD = "config"
    CONFIG_KEYS = ["log_bucket"]
    ARGS_FIELD = "args"
    ARGS_KEYS = ["input_files", "output_S3_bucket", "output_target"]
    ARGS_KEYS_CWL = ["cwl_main_filename", "cwl_directory_url"]
    ARGS_KEYS_WDL = ["wdl_main_filename", "wdl_directory_url", "language"]

    # args: parameters needed by the instance to run a workflow
    # cfg: parameters needed to launch an instance
    cfg = event.get(CONFIG_FIELD)
    for k in CONFIG_KEYS:
        assert k in cfg, "%s not in config_field" % k

    args = event.get(ARGS_FIELD)
    for k in ARGS_KEYS:
        assert k in args, "%s not in args field" % k
    if 'language' in args and args['language'] == 'wdl':
        for k in ARGS_KEYS_WDL:
            assert k in args, "%s not in args field" % k
    else:
        for k in ARGS_KEYS_CWL:
            assert k in args, "%s not in args field" % k

    if 'dependency' in args:
        check_dependency(**args['dependency'])

    # update input json to add various other info automatically
    ec2_utils.auto_update_input_json(args, cfg)

    # create json and copy to s3
    jobid = ec2_utils.create_json(event)

    # profile
    if os.environ.get('TIBANNA_PROFILE_ACCESS_KEY', None) and \
            os.environ.get('TIBANNA_PROFILE_SECRET_KEY', None):
        profile = {
            'access_key': os.environ.get('TIBANNA_PROFILE_ACCESS_KEY'),
            'secret_key': os.environ.get('TIBANNA_PROFILE_SECRET_KEY')
        }
    else:
        profile = None

    # launch instance and execute workflow
    launch_instance_log = ec2_utils.launch_instance(cfg,
                                                    jobid,
                                                    profile=profile)

    # setup cloudwatch dashboard
    if 'cloudwatch_dashboard' in cfg and cfg['cloudwatch_dashboard']:
        instance_id = launch_instance_log['instance_id']
        ec2_utils.create_cloudwatch_dashboard(instance_id, 'awsem-' + jobid)

    if 'jobid' not in event:
        event.update({'jobid': jobid})
    event.update(launch_instance_log)
    return (event)
Example #5
0
def handler(event, context):
    '''
    config:
    cwl_url: the url and subdirectories for the main cwl file
    reference_S3_bucket: bucket name and subdirectory for input reference files
    output_S3_bucket: bucket name and subdirectory for output files and logs
    default_instance_type: EC2 instance type
    default_ebs_size: EBS storage size in GB
    default_ebs_type: EBS storage type (available values: gp2, io1, st1, sc1, standard (default: io1)
    ebs_iops: EBS storage IOPS
    json_dir: Local directory in which the output json file will be written
    s3_access_arn: IAM instance profile for S3 access
    keyname: name of keypapir used for launching instances
    worker_ami_id: ID of AMI used for the instance - it should have docker daemon and
                   cwl-runner (either toil or cwltools) installed
    userdata_dir: local directory to store userdata (used internally within lambda)

    args:
    cwl: main cwl file name
    cwl_children: names of the other cwl files used by main cwl file, delimiated by comma
    app_name: name of the app
    app_version: version of the app
    input_files: input files in json format (parametername:filename)
    input_reference_files: input reference files in json format (parametername:filename)
    input_parameters: input parameters in json format (parametername:value)
    input_files_directory: bucket name and subdirectory for input files
    not_EBS_optimized: Use this flag if the instance type is not EBS-optimized (default: EBS-optimized)
    shutdown_min: Number of minutes before shutdown after the jobs are finished. (default now)
    copy_to_s3: Upload or copy the json file to S3 bucket json_bucket
    launch_instance: Launch instance based on the json file
    '''

    # read default variables in config
    CONFIG_FIELD = "config"
    CONFIG_KEYS = [
        "reference_S3_bucket", "output_S3_bucket", "s3_access_arn", "keyname",
        "worker_ami_id", "default_instance_type", "default_ebs_size",
        "default_ebs_type", "ebs_iops", "userdata_dir", "json_dir", "cwl_url"
    ]
    ARGS_FIELD = "args"
    ARGS_KEYS = [
        "cwl", "cwl_children", "app_name", "app_version", "input_files",
        "input_reference_files", "input_parameters", "input_files_directory",
        "not_EBS_optimized", "shutdown_min", "copy_to_s3", "launch_instance"
    ]

    cfg = event.get(CONFIG_FIELD)
    for k in CONFIG_KEYS:
        assert (k in cfg)

    args = event.get(ARGS_FIELD)
    for k in ARGS_KEYS:
        assert (k in args)

    # parameters that will go into the pre-run json file
    final_args = {
        'cwl_directory': cfg.get('cwl_url'),
        'cwl': args.get('cwl'),
        'cwl_children': args.get('cwl_children'),
        'app_name': args.get('app_name'),
        'app_version': args.get('app_version'),
        'input_files': args.get('input_files'),
        'input_reference_files': args.get('input_reference_files'),
        'input_parameters': args.get('input_parameters'),
        'input_files_directory': args.get('input_files_directory'),
        'input_reference_files_directory': cfg.get('reference_S3_bucket'),
        'output_bucket_directory': cfg.get('output_S3_bucket'),
        'instance_type': cfg.get('default_instance_type'),
        'storage_size': cfg.get('default_ebs_size'),
        'storage_type': cfg.get('default_ebs_type'),
        'storage_iops': cfg.get('ebs_iops')
    }

    # parameters needed to launch an instance
    par = {
        's3_access_arn': cfg.get('s3_access_arn'),
        'worker_ami_id': cfg.get('worker_ami_id'),
        'keyname': cfg.get('keyname'),
        'userdata_dir': cfg.get('userdata_dir'),
        'instance_type':
        cfg.get('default_instance_type'),  # redundant with final_args
        'storage_size':
        cfg.get('default_ebs_size'),  # redundant with final_args
        'storage_type':
        cfg.get('default_ebs_type'),  # redundant with final_args
        'storage_iops': cfg.get('ebs_iops'),  # redundant with final_args
        'EBS_optimized': True,
        'job_tag': final_args.get('app_name'),
        'outbucket':
        cfg.get('output_S3_bucket'
                )  # redundant with output_bucket_directory in final_args
    }

    shutdown_min = args.get('shutdown_min')
    copy_to_s3 = args.get('copy_to_s3')

    # local directory in which the json file will be first created.
    json_dir = cfg.get('json_dir')

    # create json and copy to s3
    jobid = utils.create_json(final_args, json_dir, '', copy_to_s3)

    # launch instance and execute workflow
    if args.get('launch_instance'):
        utils.launch_instance(par, jobid, shutdown_min)