def test_two_cloud_providers(self): self.cfg.read(f"{TEST_DATA_DIR}/correct-cfg-file.ini") ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) self.cfg[CFG_CLOUD_PROVIDER][CFG_CP_AWS_REGION] = 'us-east-1' with self.assertRaises(UserReportError) as err: ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) assert 'more than one cloud provider' in str(err.exception)
def test_no_cloud_provider(self): self.cfg.read(f"{TEST_DATA_DIR}/correct-cfg-file.ini") ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) self.cfg[CFG_CLOUD_PROVIDER] = {} with self.assertRaises(UserReportError) as err: ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) assert 'Cloud provider configuration is missing' in str(err.exception)
def test_ElasticBlastConfig_init_errors(): """Test that __init__ method arguments are checked""" with pytest.raises(AttributeError) as err: cfg = ElasticBlastConfig() assert 'task parameter must be specified' in str(err.value) with pytest.raises(AttributeError) as err: cfg = ElasticBlastConfig(5) assert 'one positional parameter' in str(err.value) assert 'ConfigParser object' in str(err.value) with pytest.raises(AttributeError) as err: cfg = ElasticBlastConfig(configparser.ConfigParser(), 5) assert 'one positional parameter' in str(err.value) assert 'ConfigParser object' in str(err.value) with pytest.raises(AttributeError) as err: cfg = ElasticBlastConfig(configparser.ConfigParser(), results='s3://results') assert 'task parameter must be specified' in str(err.value) with pytest.raises(AttributeError) as err: cfg = ElasticBlastConfig(aws_region='some-region', results='s3://results') assert 'task parameter must be specified' in str(err.value)
def test_invalid_configuration_blank(self): cfg = configparser.ConfigParser() with self.assertRaises(UserReportError): ElasticBlastConfig(cfg, task=ElbCommand.SUBMIT) _set_sections(cfg) with self.assertRaises(UserReportError): ElasticBlastConfig(cfg, task=ElbCommand.SUBMIT)
def test_instance_too_small_gcp(): """Test that using too small an instance triggers an error""" args = argparse.Namespace( cfg=os.path.join(TEST_DATA_DIR, 'instance-too-small-gcp.ini')) with pytest.raises(UserReportError) as err: cfg = ElasticBlastConfig(configure(args), task=ElbCommand.SUBMIT) cfg.validate() assert err.value.returncode == INPUT_ERROR print(err.value.message) assert 'does not have enough memory' in err.value.message
def test_validate_too_many_cpus(): """Test that requesting too many CPUs is reported""" cfg = ElasticBlastConfig(aws_region='test-region', program='blastn', db='test-db', queries='test-query.fa', results='s3://results', task=ElbCommand.SUBMIT) cfg.cluster.machine_type = 'm5.large' cfg.cluster.num_cpus = 16 with pytest.raises(UserReportError) as err: cfg.validate(ElbCommand.SUBMIT) assert re.search(r'number of CPUs [\w "]* exceeds', str(err.value))
def setUp(self): """ Initialize 2 configurations: one for GCP another for AWS """ cfg_gcp = configparser.ConfigParser() cfg_aws = configparser.ConfigParser() _set_sections(cfg_gcp) _set_sections(cfg_aws) cfg_gcp.read(f"{TEST_DATA_DIR}/correct-cfg-file.ini") cfg_aws.read(f"{TEST_DATA_DIR}/elb-aws-blastn-pdbnt.ini") self.cfg_gcp = ElasticBlastConfig(cfg_gcp, task=ElbCommand.SUBMIT) with patch('elb.elb_config.aws_get_machine_properties', new=MagicMock(return_value=InstanceProperties(32, 120))): self.cfg_aws = ElasticBlastConfig(cfg_aws, task=ElbCommand.SUBMIT)
def test_invalid_configuration_missing_required_params(self): self.cfg.read(f"{TEST_DATA_DIR}/missing-required-parameters.ini") with self.assertRaises(UserReportError): ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) self.cfg[CFG_BLAST][CFG_BLAST_RESULTS] = "my-bucket" with self.assertRaises(UserReportError): ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) self.cfg[CFG_BLAST][CFG_BLAST_RESULTS] = "gs://my-bucket" with self.assertRaises(UserReportError): ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) self.cfg[CFG_BLAST][CFG_BLAST_DB] = "nr" with self.assertRaises(UserReportError): ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) self.cfg[CFG_CLOUD_PROVIDER][CFG_CP_GCP_PROJECT] = "dummy" self.cfg[CFG_BLAST][CFG_BLAST_DB_SRC] = "GCP" ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT)
def test_default_labels(): cfg = ElasticBlastConfig(gcp_project='test-gcp-project', gcp_region='test-gcp-region', gcp_zone='test-gcp-zone', program='blastn', db='My:Fancy*DB65', queries='test-queries.fa', results='gs://some-bucket-with-interesting-name', cluster_name='some-cluster-name', task=ElbCommand.SUBMIT) labels = cfg.cluster.labels # "Label keys must start with a lowercase letter." # From https://cloud.google.com/compute/docs/labeling-resources#label_format assert (not re.search(r'[A-Z]', labels)) # Parse generated labels and verify some parts parts = labels.split(',') label_dict = { key: value for key, value in map(lambda x: x.split('='), parts) } assert (label_dict['project'] == 'elastic-blast') assert (label_dict['cluster-name'] == 'some-cluster-name') assert ('client-hostname' in label_dict) assert ('created' in label_dict) created_date = label_dict['created'] assert (re.match(r'[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}-[0-9]{2}-[0-9]{2}', created_date)) assert (label_dict['owner'] == label_dict['creator']) assert (label_dict['db'] == 'my-fancy-db65') assert (label_dict['program'] == 'blastn') assert (label_dict['billingcode'] == 'elastic-blast') assert (label_dict['results'] == 'gs---some-bucket-with-interesting-name') print('labels', labels)
def test_provide_vpc_dry_run(): cfg = ElasticBlastConfig(aws_region='us-east-2', program='blastn', db='some-db', results='s3://elasticblast-test', queries='queries', task=ElbCommand.SUBMIT) cfg.cluster.dry_run = True cfg.cluster.pd_size = '1G' cfg.cluster.name = 'example' cfg.cluster.disk_type = 'gp2' cfg.cluster.iops = 2000 cfg.cluster.machine_type = 't2.nano' cfg.cluster.num_nodes = 1 # us-east-2 has default vpc, should provide it cfg.aws.region = 'us-east-2' cfg.aws.security_group = 'sg-test' b = aws.ElasticBlastAws(cfg) b.delete() # us-east-1 doesn't, should create new one cfg.aws.region = 'us-east-1' cfg.aws.security_group = 'sg-test' b = aws.ElasticBlastAws(cfg) b.delete()
def test_blastdb_not_found(gke_mock, mocker): """Test that UserReportError is raised when database is not found""" def mocked_check_cluster(cfg): """Mocked check cluster that simulates non-existent cluster status""" return '' mocker.patch('elb.commands.submit.gcp_check_cluster', side_effect=mocked_check_cluster) def mock_safe_exec(cmd): if isinstance(cmd, list): cmd = ' '.join(cmd) if cmd == 'gsutil cat gs://blast-db/latest-dir': return MockedCompletedProcess(stdout='2020-20-20') elif cmd == 'gsutil cat gs://blast-db/2020-20-20/blastdb-manifest.json': return MockedCompletedProcess( stdout='{"nt":{"size":93.36}, "nr":{"size":227.4}}') return MockedCompletedProcess() mocker.patch('elb.util.safe_exec', side_effect=mock_safe_exec) print(INI_NO_BLASTDB) args = Namespace(cfg=INI_NO_BLASTDB) # test that UserReportError is raised with pytest.raises(UserReportError) as err: submit(args, ElasticBlastConfig(configure(args), task=ElbCommand.SUBMIT), []) # test error code and message assert err.value.returncode == constants.BLASTDB_ERROR assert 'BLAST database' in err.value.message assert 'not found' in err.value.message
def test_convert_labels_to_aws_tags(): cfg = create_config_for_db('nt') cfg = ElasticBlastConfig( aws_region='test-aws-region', program='blastn', db='nt', queries='test-queries.fa', results='s3://some.bucket.with_s0me-interesting-name-end', cluster_name='some-cluster-name', task=ElbCommand.SUBMIT) labels = cfg.cluster.labels tags = convert_labels_to_aws_tags(labels) assert (isinstance(tags, list)) t = {} for i in tags: k, v = i.values() t[k] = v assert ('Project' in t.keys()) assert ('billingcode' in t.keys()) assert ('Name' in t.keys()) assert ('Owner' in t.keys()) assert ('results' in t.keys()) assert (t['results'] == 's3://some.bucket.with_s0me-interesting-name-end')
def test_check_memory_requirements(mocker): def mock_safe_exec(cmd): if isinstance(cmd, list): cmd = ' '.join(cmd) if cmd == 'gsutil cat gs://blast-db/latest-dir': return MockedCompletedProcess(stdout='2020-20-20') elif cmd == 'gsutil cat gs://blast-db/2020-20-20/blastdb-manifest.json': return MockedCompletedProcess( stdout='{"nt":{"size":93.36}, "nr":{"size":227.4}}') return MockedCompletedProcess(stdout='nt\t\t100\t') cfg = ElasticBlastConfig(gcp_project='test-gcp-project', gcp_region='test-gcp-region', gcp_zone='test-gcp-zone', program='blastn', db='nt', queries='test-queries', results='gs://results', task=ElbCommand.SUBMIT) mocker.patch('elb.util.safe_exec', side_effect=mock_safe_exec) check_memory_requirements(cfg) cfg.blast.db_mem_margin = 2.0 with pytest.raises(RuntimeError): check_memory_requirements(cfg)
def test_validate_gcp_config(): """Test validation of GCP id strings in config""" cfg = configparser.ConfigParser() cfg.read(f"{TEST_DATA_DIR}/correct-cfg-file.ini") ElasticBlastConfig(cfg, task=ElbCommand.SUBMIT) # test correct parameter values cfg[CFG_CLOUD_PROVIDER] = { CFG_CP_GCP_PROJECT: 'correct-gcp-project', CFG_CP_GCP_REGION: 'correct-region-123', CFG_CP_GCP_ZONE: 'correct-zone-456' } ElasticBlastConfig(cfg, task=ElbCommand.SUBMIT) # test missing parameter values cfg[CFG_CLOUD_PROVIDER] = {CFG_CP_GCP_NETWORK: 'test-network'} with pytest.raises(UserReportError) as err: ElasticBlastConfig(cfg, task=ElbCommand.SUBMIT) messages = str(err.value).split('\n') assert len(messages) >= 3 assert [s for s in messages if s.startswith('Missing gcp-project')] assert [s for s in messages if s.startswith('Missing gcp-region')] assert [s for s in messages if s.startswith('Missing gcp-zone')] # test incorrect parameter values cfg[CFG_CLOUD_PROVIDER] = { CFG_CP_GCP_PROJECT: 'UPPERCASE-project', CFG_CP_GCP_REGION: 'region with space', CFG_CP_GCP_ZONE: 'zone-with#' } with pytest.raises(UserReportError) as err: ElasticBlastConfig(cfg, task=ElbCommand.SUBMIT) messages = str(err.value).split('\n') assert len(messages) >= 3 assert [ s for s in messages if s.startswith('Parameter "gcp-project" has an invalid value') ] assert [ s for s in messages if s.startswith('Parameter "gcp-region" has an invalid value') ] assert [ s for s in messages if s.startswith('Parameter "gcp-zone" has an invalid value') ]
def test_multiple_query_files(): """Test getting config with multiple query files""" args = argparse.Namespace( cfg=os.path.join(TEST_DATA_DIR, 'multiple-query-files.ini')) cfg = ElasticBlastConfig(configure(args), task=ElbCommand.SUBMIT) expected_query_files = ['query-file-1', 'query-file-2'] assert sorted( cfg.blast.queries_arg.split()) == sorted(expected_query_files)
def test_cluster_name_from_environment(env_config): """Test cluster name from environment overrides everything else""" args = argparse.Namespace( cfg=os.path.join(TEST_DATA_DIR, 'gcp-defaults.ini')) cfg = ElasticBlastConfig(configure(args), task=ElbCommand.SUBMIT) assert cfg.cluster.results == env_config['ELB_RESULTS'] assert cfg.cluster.name == env_config['ELB_CLUSTER_NAME']
def test_default_outfmt(self): """ Test that default optional BLAST parameters has -outfmt 11 set """ args = argparse.Namespace( cfg=os.path.join(TEST_DATA_DIR, 'minimal-cfg-file.ini')) self.cfg = configure(args) cfg = ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) self.assertEqual(cfg.blast.options.strip(), f'-outfmt {ELB_DFLT_OUTFMT}')
def cfg(mocked_get_machine_properties): """Create an ElasticBlastConfig object""" cfg = ElasticBlastConfig(aws_region='test-region', program='blastn', db='test-db', queries='test-queries.fa', results='s3://test-results', task=ElbCommand.SUBMIT) yield cfg
def test_aws_defaults(): """Test that default config parameters are set correctly for AWS""" args = argparse.Namespace( cfg=os.path.join(TEST_DATA_DIR, 'aws-defaults.ini')) cfg = ElasticBlastConfig(configure(args), task=ElbCommand.SUBMIT) check_common_defaults(cfg) assert cfg.cloud_provider.cloud == CSP.AWS assert cfg.cluster.pd_size == constants.ELB_DFLT_AWS_PD_SIZE
def test_generated_cluster_name(env_config_no_cluster): """Test cluster name generated from results, and value from config file is ignored""" args = argparse.Namespace( cfg=os.path.join(TEST_DATA_DIR, 'gcp-defaults.ini')) cfg = ElasticBlastConfig(configure(args), task=ElbCommand.SUBMIT) assert cfg.cluster.results == TEST_RESULTS_BUCKET user = getpass.getuser() digest = hashlib.md5(TEST_RESULTS_BUCKET.encode()).hexdigest()[0:9] assert cfg.cluster.name == f'elasticblast-{user.lower()}-{digest}'
def test_mem_limit_too_high(): """Test that setting memory limit that exceeds cloud instance memory triggers an error""" args = argparse.Namespace( cfg=os.path.join(TEST_DATA_DIR, 'mem-limit-too-high.ini')) with pytest.raises(UserReportError) as err: cfg = ElasticBlastConfig(configure(args), task=ElbCommand.SUBMIT) assert err.value.returncode == INPUT_ERROR m = re.match(r'Memory limit.*exceeds', err.value.message) assert m is not None
def create_config_for_db(dbname): """Create minimal config for a database name""" return ElasticBlastConfig(gcp_project='test-gcp-project', gcp_region='test-gcp-region', gcp_zone='test-gcp-zone', program='blastn', db=dbname, queries='test-queries.fa', results='gs://test-bucket', task=ElbCommand.SUBMIT)
def test_invalid_gcp_cluster_name(self): self.cfg.read(f"{TEST_DATA_DIR}/correct-cfg-file.ini") cfg = ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) self.cfg[CFG_CLUSTER][CFG_CLUSTER_NAME] = 'invalid-CLUSTER_NAME' with self.assertRaises(UserReportError): ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) self.cfg[CFG_CLUSTER][CFG_CLUSTER_NAME] = 'invalid-cluster-name-' with self.assertRaises(UserReportError): ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) self.cfg[CFG_CLUSTER][ CFG_CLUSTER_NAME] = 'invalid-cluster-name-because-it-is-long-it-should-be-less-than-40-characters' with self.assertRaises(UserReportError): ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) self.cfg[CFG_CLUSTER][CFG_CLUSTER_NAME] = 'valid-name' ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT)
def test_get_gke_credentials_no_cluster_real(): """Test that util.SafeExecError is raised when getting credentials of a non-existent cluster""" data_dir = os.path.join(os.path.dirname(__file__), 'data') args = Namespace(cfg=os.path.join(data_dir, 'test-cfg-file.ini')) cfg = ElasticBlastConfig(config.configure(args), task=ElbCommand.SUBMIT) cfg.cluster.name = 'some-strange-cluster-name' assert cfg.cluster.name not in gcp.get_gke_clusters(cfg) with pytest.raises(SafeExecError): gcp.get_gke_credentials(cfg)
def blastdb_not_found_fixture(): """Cleanup cluster if it was created""" # setup args = Namespace(cfg=INI_NO_BLASTDB) yield args # teardown cfg = configparser.ConfigParser() cfg.read(args.cfg) cfg = ElasticBlastConfig(cfg, task=ElbCommand.SUBMIT) gcp.delete_cluster_with_cleanup(cfg)
def test_validate_queries_config(): """Test validation of AWS config""" cfg = configparser.ConfigParser() _set_sections(cfg) # set up test config cfg[CFG_CLOUD_PROVIDER] = { CFG_CP_AWS_REGION: 'us-east-1', CFG_CP_AWS_SUBNET: 'subnet-2345145', CFG_CP_AWS_KEY_PAIR: 'foo', CFG_CP_AWS_SECURITY_GROUP: 'sg-2345145' } # pacify submit config checks cfg[CFG_BLAST][CFG_BLAST_RESULTS] = 's3://bucket' cfg[CFG_BLAST][CFG_BLAST_DB] = 'nt' cfg[CFG_BLAST][CFG_BLAST_PROGRAM] = 'blastn' cfg[CFG_CLUSTER][CFG_CLUSTER_MACHINE_TYPE] = ELB_DFLT_AWS_MACHINE_TYPE # test correct queries # S3 bucket cfg[CFG_BLAST][CFG_BLAST_QUERY] = 's3://bucket-123/@#$*/queris!.fa' ElasticBlastConfig(cfg, task=ElbCommand.SUBMIT) # GS bucket cfg[CFG_BLAST][CFG_BLAST_QUERY] = 'gs://bucket-123/@^*?/[email protected]' ElasticBlastConfig(cfg, task=ElbCommand.SUBMIT) # local file cfg[CFG_BLAST][CFG_BLAST_QUERY] = 'queries' ElasticBlastConfig(cfg, task=ElbCommand.SUBMIT) # test illigal characters in bucket name cfg[CFG_BLAST][CFG_BLAST_QUERY] = 's3://bucket!-123/@#$*/queris!.fa' with pytest.raises(UserReportError) as err: ElasticBlastConfig(cfg, task=ElbCommand.SUBMIT) assert 'Incorrect queries' in err.value.message cfg[CFG_BLAST][CFG_BLAST_QUERY] = 'gs://bucket@-123/@#$*/queris!.fa' with pytest.raises(UserReportError) as err: ElasticBlastConfig(cfg, task=ElbCommand.SUBMIT) assert 'Incorrect queries' in err.value.message
def test_optional_blast_parameters(self): """ Test that optional BLAST parameters properly read from config file """ args = argparse.Namespace( cfg=os.path.join(TEST_DATA_DIR, 'optional-cfg-file.ini')) self.cfg = configure(args) cfg = ElasticBlastConfig(self.cfg, task=ElbCommand.SUBMIT) # str.find is not enough here, need to make sure options are properly merged # with whitespace around them. options = cfg.blast.options.strip() self.assertTrue(re.search('(^| )-outfmt 11($| )', options) != None) self.assertTrue( re.search('(^| )-task blastp-fast($| )', options) != None)
def test_label_persistent_disk(safe_exec_mock): """Exercises label_persistent_disk with mock safe_exec and prints out arguments to safe_exec Run pytest -s -v tests/kubernetes to verify correct order of calls""" from argparse import Namespace args = Namespace( cfg=os.path.join(TEST_DATA_DIR, 'initialize_persistent_disk.ini')) cfg = ElasticBlastConfig(configure(args), task=ElbCommand.SUBMIT) # Replace labels with well-known fake for the purpose of testing command match, # see above in safe_exec_mock cfg.cluster.labels = FAKE_LABELS kubernetes.label_persistent_disk(cfg)
def test_gcp_defaults(): """Test that default config parameters are set correctly for GCP""" args = argparse.Namespace( cfg=os.path.join(TEST_DATA_DIR, 'gcp-defaults.ini')) cfg = ElasticBlastConfig(configure(args), task=ElbCommand.SUBMIT) check_common_defaults(cfg) assert cfg.cloud_provider.cloud == CSP.GCP assert cfg.cluster.pd_size == constants.ELB_DFLT_GCP_PD_SIZE assert cfg.timeouts.blast_k8s == constants.ELB_DFLT_BLAST_K8S_TIMEOUT assert cfg.timeouts.init_pv == constants.ELB_DFLT_INIT_PV_TIMEOUT
def main(): """Local main entry point which sets up arguments, undo stack, and processes exceptions """ try: signal.signal(signal.SIGINT, signal.default_int_handler) clean_up_stack = [] # Check parameters for Unicode letters and reject if codes higher than 255 occur reject_cli_args_with_unicode(sys.argv[1:]) parser = create_arg_parser() args = parser.parse_args() if not args.subcommand: # report missing command line task raise UserReportError(returncode=constants.INPUT_ERROR, message=NO_TASK_MSG) config_logging(args) cfg = configure(args) logging.info(f"ElasticBLAST {args.subcommand} {VERSION}") task = ElbCommand(args.subcommand.lower()) cfg = ElasticBlastConfig(cfg, task=task) logging.debug(pprint.pformat(cfg.asdict())) check_prerequisites(cfg) #TODO: use cfg only when args.wait, args.sync, and args.run_label are replicated in cfg return args.func(args, cfg, clean_up_stack) except (SafeExecError, UserReportError) as e: logging.error(e.message) # SafeExecError return code is the exit code from command line # application ran via subprocess if isinstance(e, SafeExecError): return constants.DEPENDENCY_ERROR return e.returncode except KeyboardInterrupt: return constants.INTERRUPT_ERROR #TODO: process filehelper.TarReadError here finally: messages = clean_up(clean_up_stack) if messages: for msg in messages: logging.error(msg) sys.exit(constants.UNKNOWN_ERROR)