def test_get_input_size_in_bytes_with_secondary_files(): randomstr, randomstr_1, randomstr_2 = 'test-' + create_jobid( ), 'test-' + create_jobid(), 'test-' + create_jobid() s3 = boto3.client('s3') s3.put_object(Body='haha'.encode('utf-8'), Bucket='tibanna-output', Key=randomstr) s3.put_object(Body='fooooooo'.encode('utf-8'), Bucket='tibanna-output', Key=randomstr_1) s3.put_object(Body='pippo'.encode('utf-8'), Bucket='tibanna-output', Key=randomstr_2) input_dict = { 'args': { 'input_files': { 'input_file': { 'bucket_name': 'tibanna-output', 'object_key': randomstr } }, 'secondary_files': { 'input_file': { 'bucket_name': 'tibanna-output', 'object_key': [randomstr_1, randomstr_2] } }, 'output_S3_bucket': 'somebucket', 'app_name': 'md5', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': 'tibanna-output' } } execution = Execution(input_dict) execution.input_size_in_bytes = execution.get_input_size_in_bytes() assert execution.total_input_size_in_gb == 1.5832483768463135E-8 # cleanup afterwards s3.delete_objects(Bucket='tibanna-output', Delete={ 'Objects': [{ 'Key': randomstr }, { 'Key': randomstr_1 }, { 'Key': randomstr_2 }] })
def test_launch_args(): """test creating launch arguments - also test spot_instance""" jobid = create_jobid() log_bucket = 'tibanna-output' input_dict = { 'args': { 'output_S3_bucket': 'somebucket', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': log_bucket, 'mem': 1, 'cpu': 1, 'spot_instance': True }, 'jobid': jobid } execution = Execution(input_dict) # userdata is required before launch_args is created execution.userdata = execution.create_userdata() launch_args = execution.launch_args print(launch_args) assert launch_args assert 't3.micro' in str(launch_args) assert 'InstanceMarketOptions' in str(launch_args)
def test_upload_run_json(): jobid = create_jobid() log_bucket = 'tibanna-output' input_dict = { 'args': { 'output_S3_bucket': 'somebucket', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': log_bucket, 'mem': 1, 'cpu': 1 }, 'jobid': jobid } somejson = {'haha': 'lala'} execution = Execution(input_dict) execution.upload_run_json(somejson) s3 = boto3.client('s3') res = s3.get_object(Bucket=log_bucket, Key=jobid + '.run.json') assert res # clean up afterwards s3.delete_objects(Bucket=log_bucket, Delete={'Objects': [{ 'Key': jobid + '.run.json' }]})
def test_ec2_exception_coordinator4(): """ec2 exceptions with 'other_instance_types'""" jobid = create_jobid() log_bucket = 'tibanna-output' input_dict = { 'args': { 'output_S3_bucket': 'somebucket', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': log_bucket, 'mem': 1, 'cpu': 1, 'spot_instance': True, 'behavior_on_capacity_limit': 'other_instance_types' }, 'jobid': jobid } execution = Execution(input_dict, dryrun=True) assert execution.cfg.instance_type == 't3.micro' execution.userdata = execution.create_userdata() res = execution.ec2_exception_coordinator(fun)() assert res == 'continue' assert execution.cfg.instance_type == 't2.micro' res = execution.ec2_exception_coordinator(fun)() assert res == 'continue' assert execution.cfg.instance_type == 't3.small' res = execution.ec2_exception_coordinator(fun)() assert res == 'continue' assert execution.cfg.instance_type == 't2.small'
def test_create_run_json_dict(): randomstr = 'test-' + create_jobid() s3 = boto3.client('s3') s3.put_object(Body='haha'.encode('utf-8'), Bucket='tibanna-output', Key=randomstr) input_dict = { 'args': { 'input_files': { 'input_file': { 'bucket_name': 'tibanna-output', 'object_key': randomstr } }, 'output_S3_bucket': 'somebucket', 'app_name': 'md5', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': 'tibanna-output' } } execution = Execution(input_dict) runjson = execution.create_run_json_dict() assert runjson # cleanup afterwards s3.delete_objects(Bucket='tibanna-output', Delete={'Objects': [{ 'Key': randomstr }]})
def test_update_config_ebs_size2(): """ebs_size is given as the 'x' format. The total estimated ebs_size is larger than 10""" randomstr = 'test-' + create_jobid() s3 = boto3.client('s3') s3.put_object(Body='haha'.encode('utf-8'), Bucket='tibanna-output', Key=randomstr) input_dict = { 'args': { 'input_files': { 'input_file': { 'bucket_name': 'tibanna-output', 'object_key': randomstr } }, 'output_S3_bucket': 'somebucket', 'app_name': 'md5', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': 'tibanna-output', 'ebs_size': '5000000000x' } } execution = Execution(input_dict) execution.input_size_in_bytes = execution.get_input_size_in_bytes() execution.update_config_ebs_size() assert execution.cfg.ebs_size == 19 # cleanup afterwards s3.delete_objects(Bucket='tibanna-output', Delete={'Objects': [{ 'Key': randomstr }]})
def test_ec2_exception_coordinator6(): """ec2 exceptions with 'retry_without_spot'""" jobid = create_jobid() log_bucket = 'tibanna-output' input_dict = { 'args': { 'output_S3_bucket': 'somebucket', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': log_bucket, 'instance_type': 't2.micro', 'spot_instance': True, 'behavior_on_capacity_limit': 'retry_without_spot' }, 'jobid': jobid } execution = Execution(input_dict, dryrun=True) execution.userdata = execution.create_userdata() res = execution.ec2_exception_coordinator(fun)() assert res == 'continue' assert execution.cfg.spot_instance is False # changed to non-spot assert execution.cfg.behavior_on_capacity_limit == 'fail' # changed to non-spot with pytest.raises(EC2InstanceLimitException) as exec_info: res = execution.ec2_exception_coordinator(fun)() # this time, it fails assert exec_info
def test_upload_workflow_to_s3(run_task_awsem_event_cwl_upload): jobid = create_jobid() run_task_awsem_event_cwl_upload['jobid'] = jobid log_bucket = run_task_awsem_event_cwl_upload['config']['log_bucket'] unicorn_input = UnicornInput(run_task_awsem_event_cwl_upload) upload_workflow_to_s3(unicorn_input) s3 = boto3.client('s3') res1 = s3.get_object(Bucket=log_bucket, Key=jobid + '.workflow/main.cwl') res2 = s3.get_object(Bucket=log_bucket, Key=jobid + '.workflow/child1.cwl') res3 = s3.get_object(Bucket=log_bucket, Key=jobid + '.workflow/child2.cwl') assert res1 assert res2 assert res3 assert unicorn_input.args.cwl_directory_url == 's3://tibanna-output/' + jobid + '.workflow/' # clean up afterwards s3.delete_objects(Bucket=log_bucket, Delete={ 'Objects': [{ 'Key': jobid + '.workflow/main.cwl' }, { 'Key': jobid + '.workflow/child1.cwl' }, { 'Key': jobid + '.workflow/child2.cwl' }] })
def test_launch_and_get_instance_id(): """test dryrun of ec2 launch""" jobid = create_jobid() log_bucket = 'tibanna-output' input_dict = { 'args': { 'output_S3_bucket': 'somebucket', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': log_bucket, 'mem': 1, 'cpu': 1, 'spot_instance': True }, 'jobid': jobid } execution = Execution(input_dict, dryrun=True) # userdata is required before launch_args is created execution.userdata = execution.create_userdata() with pytest.raises(Exception) as ex: execution.launch_and_get_instance_id() assert 'Request would have succeeded, but DryRun flag is set' in str( ex.value)
def test_create_userdata_w_profile(): randomstr = 'test-' + create_jobid() s3 = boto3.client('s3') s3.put_object(Body='haha'.encode('utf-8'), Bucket='tibanna-output', Key=randomstr) input_dict = { 'args': { 'input_files': { 'input_file': { 'bucket_name': 'tibanna-output', 'object_key': randomstr } }, 'output_S3_bucket': 'somebucket', 'app_name': 'md5', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': 'tibanna-output' }, 'jobid': 'myjobid' } execution = Execution(input_dict) profile = {'access_key': 'haha', 'secret_key': 'lala'} userdata = execution.create_userdata(profile=profile) print(userdata) assert userdata assert '-a haha -s lala' in userdata # cleanup afterwards s3.delete_objects(Bucket='tibanna-output', Delete={'Objects': [{ 'Key': randomstr }]})
def test_get_file_size(): randomstr = 'test-' + create_jobid() s3 = boto3.client('s3') s3.put_object(Body='haha'.encode('utf-8'), Bucket='tibanna-output', Key=randomstr) size = get_file_size(randomstr, 'tibanna-output') assert size == 4 # cleanup afterwards s3.delete_objects(Bucket='tibanna-output', Delete={'Objects': [{ 'Key': randomstr }]})
def test_upload(): randomstr = 'test-' + create_jobid() os.mkdir(randomstr) filepath = os.path.join(os.path.abspath(randomstr), randomstr) with open(filepath, 'w') as f: f.write('haha') upload(filepath, 'tibanna-output', 'uploadtest') s3 = boto3.client('s3') res = s3.get_object(Bucket='tibanna-output', Key='uploadtest/' + randomstr) assert res # cleanup afterwards shutil.rmtree(randomstr) s3.delete_objects(Bucket='tibanna-output', Delete={'Objects': [{ 'Key': 'uploadtest/' + randomstr }]})
def test_get_exec_arn_from_job_id(): jobid = 'test-' + create_jobid() exec_name = 'test_execution_name' sfn = 'tibanna_unicorn_test' exec_arn = EXECUTION_ARN(exec_name, sfn) logbucket = 'somebucket' # add a job to dynamoDB (dd) before test Job.add_to_dd(jobid, exec_name, sfn, logbucket) # get exec_arn using get_exec_arn_from_job_id res = Job.get_exec_arn_from_job_id(jobid) # clean up first dd_utils.delete_items(DYNAMODB_TABLE, 'Job Id', [{'Job Id': jobid}]) # check assert res == exec_arn
def test_add_to_dd_and_info(): jobid = 'test-' + create_jobid() execution_name = 'test_execution_name' sfn = 'tibanna_unicorn_test' logbucket = 'somebucket' # add a job to dynamoDB (dd) before test Job.add_to_dd(jobid, execution_name, sfn, logbucket) # get info from dd info = Job.info(job_id=jobid) print(info) # clean up first dd_utils.delete_items(DYNAMODB_TABLE, 'Job Id', [{'Job Id': jobid}]) # check assert info['Step Function'] == sfn assert info['Execution Name'] == execution_name assert info['Job Id'] == jobid assert info['Log Bucket'] == logbucket assert 'Time Stamp' in info
def test_ec2_exception_coordinator2(): """ec2 limit exceptions with 'fail'""" jobid = create_jobid() log_bucket = 'tibanna-output' input_dict = { 'args': { 'output_S3_bucket': 'somebucket', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': log_bucket, 'instance_type': 'c5.4xlarge', 'spot_instance': True }, 'jobid': jobid } execution = Execution(input_dict, dryrun=True) execution.userdata = execution.create_userdata() with pytest.raises(EC2InstanceLimitException) as exec_info: execution.ec2_exception_coordinator(fun)() assert exec_info
def test_ec2_exception_coordinator7(): """ec2 exceptions with 'retry_without_spot' without spot instance""" jobid = create_jobid() log_bucket = 'tibanna-output' input_dict = { 'args': { 'output_S3_bucket': 'somebucket', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': log_bucket, 'instance_type': 't2.micro', 'behavior_on_capacity_limit': 'retry_without_spot' }, 'jobid': jobid } execution = Execution(input_dict, dryrun=True) assert execution.cfg.spot_instance is False execution.userdata = execution.create_userdata() with pytest.raises(Exception) as exec_info: execution.ec2_exception_coordinator(fun)() assert "'retry_without_spot' works only with 'spot_instance'" in str( exec_info.value)
def test_execution_benchmark(): randomstr = 'test-' + create_jobid() s3 = boto3.client('s3') s3.put_object(Body='haha'.encode('utf-8'), Bucket='tibanna-output', Key=randomstr) input_dict = { 'args': { 'input_files': { 'input_file': { 'bucket_name': 'tibanna-output', 'object_key': randomstr } }, 'output_S3_bucket': 'somebucket', 'app_name': 'md5', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': 'tibanna-output' } } execution = Execution(input_dict) unicorn_dict = execution.input_dict print(unicorn_dict) assert 'args' in unicorn_dict assert 'config' in unicorn_dict assert 'instance_type' in unicorn_dict['config'] assert unicorn_dict['config']['instance_type'] == 't3.micro' assert unicorn_dict['config']['ebs_size'] == 10 # cleanup afterwards s3.delete_objects(Bucket='tibanna-output', Delete={'Objects': [{ 'Key': randomstr }]})
def test_ec2_exception_coordinator5(): """ec2 exceptions with 'other_instance_types' but had only one option""" jobid = create_jobid() log_bucket = 'tibanna-output' input_dict = { 'args': { 'output_S3_bucket': 'somebucket', 'cwl_main_filename': 'md5.cwl', 'cwl_directory_url': 'someurl' }, 'config': { 'log_bucket': log_bucket, 'instance_type': 't2.micro', 'spot_instance': True, 'behavior_on_capacity_limit': 'other_instance_types' }, 'jobid': jobid } execution = Execution(input_dict, dryrun=True) assert execution.cfg.instance_type == 't2.micro' execution.userdata = execution.create_userdata() with pytest.raises(EC2InstanceLimitException) as exec_info: execution.ec2_exception_coordinator(fun)() assert 'No more instance type available' in str(exec_info.value)