def test_train(mocker): # assert that train gets run TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_train.yml' config = Config() m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker) train(config, config_file=TEST_CONFIG_FILE) m_tc.assert_not_called() m_t.assert_called() # assert that train cloud gets run TEST_CONFIG_FILE = p.resolve( ).parent / 'test_configs' / 'config_train_cloud.yml' config = Config() m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker) train(config, config_file=TEST_CONFIG_FILE) m_tc.assert_called() m_t.assert_not_called()
def test_evaluate(mocker): # assert that evaluate gets run TEST_CONFIG_FILE = p.resolve( ).parent / 'test_configs' / 'config_evaluate.yml' config = Config() m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker) evaluate(config, config_file=TEST_CONFIG_FILE) m_e.assert_called() # assert that train gets not run # even though run=False in config, if user calls evaluate command we want it to run TEST_CONFIG_FILE = p.resolve( ).parent / 'test_configs' / 'config_evaluate.yml' config = Config() m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker) evaluate(config, config_file=TEST_CONFIG_FILE) m_e.assert_called()
def test_update_component_configs(): config = Config() config.image_dir = 'test_image' config.job_dir = 'test_job' result = update_component_configs(config) assert result.data_prep['image_dir'] == 'test_image' assert result.train['image_dir'] == 'test_image' assert result.evaluate['image_dir'] == 'test_image' assert result.data_prep['job_dir'] == 'test_job' assert result.train['job_dir'] == 'test_job' assert result.evaluate['job_dir'] == 'test_job'
def test_pipeline(self): config = Config() pipeline(config, config_file=TEST_CONFIG_PIPE) assert config.dataprep['run'] == True assert config.dataprep['job_dir'] == str(TEST_JOB_DIR) assert config.dataprep['samples_file'] == str(TEST_SAMPLES) assert config.dataprep['image_dir'] == TEST_IMAGE_DIR_RES assert config.dataprep['resize'] == True assert config.train['run'] == True assert config.train['cloud'] == False assert config.evaluate['run'] == True assert config.cloud['run'] == False assert config.cloud['provider'] == 'aws' assert config.cloud['tf_dir'] == 'cloud/aws' assert config.cloud['region'] == 'eu-west-1' assert config.cloud['vpc_id'] == 'abc' assert config.cloud['instance_type'] == 't2.micro' assert config.cloud['bucket'] == 's3://test_bucket' assert config.cloud['destroy'] == True assert config.cloud['cloud_tag'] == 'test_user' assert list(TEST_JOB_DIR.glob('*/confusion_matrix.pdf')) assert list(TEST_JOB_DIR.glob('*/test_set_distribution.pdf')) assert list(Path(TEST_JOB_DIR / 'models').glob('*.hdf5'))
def test_dataprep(self): config = Config() assert not TEST_IMAGE_DIR_RES.exists() assert not Path(TEST_JOB_DIR / 'class_mapping.json').exists() assert not Path(TEST_JOB_DIR / 'test_samples.json').exists() assert not Path(TEST_JOB_DIR / 'train_samples.json').exists() assert not Path(TEST_JOB_DIR / 'val_samples.json').exists() dataprep(config, config_file=TEST_CONFIG_DATAPREP) assert config.dataprep['run'] == True assert config.dataprep['job_dir'] == str(TEST_JOB_DIR) assert config.dataprep['samples_file'] == str(TEST_SAMPLES) assert config.dataprep['image_dir'] == str(TEST_IMAGE_DIR) assert config.dataprep['resize'] == True assert config.train['run'] == False assert config.evaluate['run'] == False assert config.cloud['run'] == False assert TEST_IMAGE_DIR_RES.exists() assert Path(TEST_JOB_DIR / 'class_mapping.json').exists() assert Path(TEST_JOB_DIR / 'test_samples.json').exists() assert Path(TEST_JOB_DIR / 'train_samples.json').exists() assert Path(TEST_JOB_DIR / 'val_samples.json').exists()
def test_evaluate(self, mocker): BEST_MODEL_FILE = list(Path(TEST_JOB_DIR / 'models').glob('*.hdf5'))[-1] BEST_MODEL = 'evaluation_' + BEST_MODEL_FILE.stem NB_FILEPATH = TEST_JOB_DIR / BEST_MODEL / 'evaluation_report.ipynb' def fake_execute_notebook(*args, **kwargs): shutil.copy(TEST_NB_TEMPLATE, NB_FILEPATH) mocker.patch('papermill.execute_notebook', side_effect=fake_execute_notebook) mocker.patch( 'imageatm.components.evaluation.Evaluation._determine_best_modelfile', return_value=BEST_MODEL_FILE) mocker.patch('nbconvert.PDFExporter.from_notebook_node', return_value=('ANY_DATA'.encode(), None)) config = Config() evaluate(config, config_file=TEST_CONFIG_EVAL) assert config.dataprep['run'] == False assert config.train['run'] == False assert config.cloud['run'] == False assert config.evaluate['run'] == True assert config.evaluate['job_dir'] == str(TEST_JOB_DIR) assert config.evaluate['image_dir'] == str(TEST_IMAGE_DIR_RES)
def test_cloud(mocker): # assert that evaluate gets run TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_cloud.yml' config = Config() m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker) cloud(config, config_file=TEST_CONFIG_FILE) m_c.assert_called()
def test_pipeline(self, mocker): BEST_MODEL_FILE = list(Path(TEST_JOB_DIR / 'models').glob('*.hdf5'))[-1] BEST_MODEL = 'evaluation_' + BEST_MODEL_FILE.stem NB_FILEPATH = TEST_JOB_DIR / BEST_MODEL / 'evaluation_report.ipynb' def fake_execute_notebook(*args, **kwargs): shutil.copy(TEST_NB_TEMPLATE, NB_FILEPATH) mocker.patch('papermill.execute_notebook', side_effect=fake_execute_notebook) mocker.patch( 'imageatm.components.evaluation.Evaluation._determine_best_modelfile', return_value=BEST_MODEL_FILE) mocker.patch('nbconvert.PDFExporter.from_notebook_node', return_value=('ANY_DATA'.encode(), None)) config = Config() pipeline(config, config_file=TEST_CONFIG_PIPE) assert config.dataprep['run'] == True assert config.dataprep['job_dir'] == str(TEST_JOB_DIR) assert config.dataprep['samples_file'] == str(TEST_SAMPLES) assert config.dataprep['image_dir'] == TEST_IMAGE_DIR_RES assert config.dataprep['resize'] == True assert config.train['run'] == True assert config.train['cloud'] == False assert config.evaluate['run'] == True assert config.evaluate['report']['create'] == True assert config.evaluate['report']['kernel_name'] == 'any_kernel' assert config.evaluate['report']['export_html'] == True assert config.evaluate['report']['export_pdf'] == True assert config.cloud['run'] == False assert config.cloud['provider'] == 'aws' assert config.cloud['tf_dir'] == 'cloud/aws' assert config.cloud['region'] == 'eu-west-1' assert config.cloud['vpc_id'] == 'abc' assert config.cloud['instance_type'] == 't2.micro' assert config.cloud['bucket'] == 's3://test_bucket' assert config.cloud['destroy'] == True assert config.cloud['cloud_tag'] == 'test_user' assert list(Path(TEST_JOB_DIR / 'models').glob('*.hdf5'))
def test_evaluate(self): config = Config() assert not list(TEST_JOB_DIR.glob('*/confusion_matrix.pdf')) assert not list(TEST_JOB_DIR.glob('*/test_set_distribution.pdf')) evaluate(config, config_file=TEST_CONFIG_EVAL) assert config.dataprep['run'] == False assert config.train['run'] == False assert config.cloud['run'] == False assert config.evaluate['run'] == True assert config.evaluate['job_dir'] == str(TEST_JOB_DIR) assert config.evaluate['image_dir'] == str(TEST_IMAGE_DIR_RES) assert list(TEST_JOB_DIR.glob('*/confusion_matrix.pdf')) assert list(TEST_JOB_DIR.glob('*/test_set_distribution.pdf'))
def test_train(self): config = Config() assert not list(Path(TEST_JOB_DIR / 'models').glob('*.hdf5')) train(config, config_file=TEST_CONFIG_TRAIN) assert config.train['run'] == True assert config.train['cloud'] == False assert config.train['job_dir'] == str(TEST_JOB_DIR) assert config.train['image_dir'] == str(TEST_IMAGE_DIR_RES) assert config.dataprep['run'] == False assert config.evaluate['run'] == False assert config.cloud['run'] == False assert list(Path(TEST_JOB_DIR / 'models').glob('*.hdf5'))
def test_pipeline(mocker): # assert that only dataprep gets run TEST_CONFIG_FILE = p.resolve( ).parent / 'test_configs' / 'config_dataprep.yml' config = Config() m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker) pipeline(config, config_file=TEST_CONFIG_FILE) m_dp.assert_called() m_tc.assert_not_called() m_t.assert_not_called() m_e.assert_not_called() m_c.assert_not_called() # assert that only train gets run TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_train.yml' m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker) pipeline(config, config_file=TEST_CONFIG_FILE) m_dp.assert_not_called() m_tc.assert_not_called() m_t.assert_called() m_e.assert_not_called() m_c.assert_not_called() # assert that only train cloud gets run TEST_CONFIG_FILE = p.resolve( ).parent / 'test_configs' / 'config_train_cloud.yml' m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker) pipeline(config, config_file=TEST_CONFIG_FILE) m_dp.assert_not_called() m_tc.assert_called() m_t.assert_not_called() m_e.assert_not_called() m_c.assert_not_called() # assert that only evaluate gets run TEST_CONFIG_FILE = p.resolve( ).parent / 'test_configs' / 'config_evaluate.yml' m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker) pipeline(config, config_file=TEST_CONFIG_FILE) m_dp.assert_not_called() m_tc.assert_not_called() m_t.assert_not_called() m_e.assert_called() m_c.assert_not_called() # assert that only cloud gets run TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_cloud.yml' m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker) pipeline(config, config_file=TEST_CONFIG_FILE) m_dp.assert_not_called() m_tc.assert_not_called() m_t.assert_not_called() m_e.assert_not_called() m_c.assert_called() # assert that all components get run TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_all.yml' m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker) pipeline(config, config_file=TEST_CONFIG_FILE) m_dp.assert_called() m_tc.assert_not_called() m_t.assert_called() m_e.assert_called() m_c.assert_called()
def test_config_set_image_dir(mocker): # empty config config = Config() expected_image_dir = None result = config_set_image_dir(config) assert result.image_dir == expected_image_dir # pre-set image_dir in data_prep config = Config() config.data_prep['image_dir'] = 'test' expected_image_dir = None result = config_set_image_dir(config) assert result.image_dir == expected_image_dir # pre-set image_dir in data_prep and run=True config = Config() config.data_prep['image_dir'] = 'test' config.data_prep['run'] = True expected_image_dir = 'test' result = config_set_image_dir(config) assert result.image_dir == expected_image_dir # check that data_prep overwrites train config = Config() config.data_prep['image_dir'] = 'test' config.data_prep['run'] = True config.train['image_dir'] = 'test_train' config.train['run'] = True expected_image_dir = 'test' result = config_set_image_dir(config) assert result.image_dir == expected_image_dir
def test_update_config(): # check that defaults are being set config = Config() result = update_config(config) assert result.train == {'cloud': False} assert result.data_prep == {'resize': False} assert result.cloud == {} assert result.evaluate == {} # check that defaults, image_dir, and job_dir are being set config = Config() config.image_dir = 'test_image' config.job_dir = 'test_job' result = update_config(config) assert result.train == { 'cloud': False, 'image_dir': 'test_image', 'job_dir': 'test_job' } assert result.data_prep == { 'resize': False, 'image_dir': 'test_image', 'job_dir': 'test_job' } assert result.cloud == {'job_dir': 'test_job'} assert result.evaluate == { 'image_dir': 'test_image', 'job_dir': 'test_job' } # check that config file gets populated correctly TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_train.yml' config = Config() result = update_config(config, config_file=TEST_CONFIG_FILE) assert result.train == { 'run': True, 'cloud': False, 'image_dir': 'test_train/images', 'job_dir': 'test_train/job_dir', } assert result.data_prep == { 'run': False, 'resize': True, 'image_dir': 'test_data_prep/images', 'job_dir': 'test_data_prep/job_dir', 'samples_file': 'test_data_prep/samples.json', } assert result.cloud == { 'run': False, 'provider': 'aws', # supported providers ['aws'] 'tf_dir': 'cloud/aws', 'region': 'eu-west-1', # supported regions ['eu-west-1', 'eu-central-1'] 'vpc_id': 'abc', 'instance_type': 't2.micro', # supported instances ['p2.xlarge'] 'bucket': 's3://test_bucket', # s3 bucket needs to exist, will not be created/destroyed by terraform 'destroy': True, 'cloud_tag': 'test_user', } assert result.evaluate == { 'run': False, 'image_dir': 'test_evaluate/images', 'job_dir': 'test_evaluate/job_dir', } # check that config file gets populated correctly and image and job dir are updated TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_train.yml' config = Config() config.image_dir = 'test_image' config.job_dir = 'test_job' result = update_config(config, config_file=TEST_CONFIG_FILE) print(result.cloud) assert result.train == { 'run': True, 'cloud': False, 'image_dir': 'test_image', 'job_dir': 'test_job', } assert result.data_prep == { 'run': False, 'resize': True, 'image_dir': 'test_image', 'job_dir': 'test_job', 'samples_file': 'test_data_prep/samples.json', } assert result.cloud == { 'run': False, 'provider': 'aws', 'tf_dir': 'cloud/aws', 'region': 'eu-west-1', 'vpc_id': 'abc', 'instance_type': 't2.micro', 'bucket': 's3://test_bucket', 'destroy': True, 'job_dir': 'test_job', 'cloud_tag': 'test_user', } assert result.evaluate == { 'run': False, 'image_dir': 'test_image', 'job_dir': 'test_job' } # test that options overwrite config file TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_train.yml' config = Config() result = update_config( config, config_file=TEST_CONFIG_FILE, image_dir='test_image', job_dir='test_job', region='eu-central-1', ) assert result.train == { 'run': True, 'cloud': False, 'image_dir': 'test_image', 'job_dir': 'test_job', } assert result.data_prep == { 'run': False, 'resize': True, 'image_dir': 'test_image', 'job_dir': 'test_job', 'samples_file': 'test_data_prep/samples.json', } assert result.cloud == { 'run': False, 'provider': 'aws', 'tf_dir': 'cloud/aws', 'region': 'eu-central-1', 'vpc_id': 'abc', 'instance_type': 't2.micro', 'bucket': 's3://test_bucket', 'destroy': True, 'bucket': 's3://test_bucket', 'job_dir': 'test_job', 'cloud_tag': 'test_user', } assert result.evaluate == { 'run': False, 'image_dir': 'test_image', 'job_dir': 'test_job' }