Example #1
0
def test_train(mocker):
    # assert that train gets run
    TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_train.yml'

    config = Config()

    m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker)

    train(config, config_file=TEST_CONFIG_FILE)

    m_tc.assert_not_called()
    m_t.assert_called()

    # assert that train cloud gets run
    TEST_CONFIG_FILE = p.resolve(
    ).parent / 'test_configs' / 'config_train_cloud.yml'

    config = Config()

    m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker)

    train(config, config_file=TEST_CONFIG_FILE)

    m_tc.assert_called()
    m_t.assert_not_called()
Example #2
0
def test_evaluate(mocker):
    # assert that evaluate gets run
    TEST_CONFIG_FILE = p.resolve(
    ).parent / 'test_configs' / 'config_evaluate.yml'

    config = Config()

    m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker)

    evaluate(config, config_file=TEST_CONFIG_FILE)

    m_e.assert_called()

    # assert that train gets not run
    # even though run=False in config, if user calls evaluate command we want it to run
    TEST_CONFIG_FILE = p.resolve(
    ).parent / 'test_configs' / 'config_evaluate.yml'

    config = Config()

    m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker)

    evaluate(config, config_file=TEST_CONFIG_FILE)

    m_e.assert_called()
Example #3
0
def test_update_component_configs():
    config = Config()
    config.image_dir = 'test_image'
    config.job_dir = 'test_job'

    result = update_component_configs(config)

    assert result.data_prep['image_dir'] == 'test_image'
    assert result.train['image_dir'] == 'test_image'
    assert result.evaluate['image_dir'] == 'test_image'

    assert result.data_prep['job_dir'] == 'test_job'
    assert result.train['job_dir'] == 'test_job'
    assert result.evaluate['job_dir'] == 'test_job'
Example #4
0
    def test_pipeline(self):
        config = Config()

        pipeline(config, config_file=TEST_CONFIG_PIPE)

        assert config.dataprep['run'] == True
        assert config.dataprep['job_dir'] == str(TEST_JOB_DIR)
        assert config.dataprep['samples_file'] == str(TEST_SAMPLES)
        assert config.dataprep['image_dir'] == TEST_IMAGE_DIR_RES
        assert config.dataprep['resize'] == True

        assert config.train['run'] == True
        assert config.train['cloud'] == False

        assert config.evaluate['run'] == True

        assert config.cloud['run'] == False
        assert config.cloud['provider'] == 'aws'
        assert config.cloud['tf_dir'] == 'cloud/aws'
        assert config.cloud['region'] == 'eu-west-1'
        assert config.cloud['vpc_id'] == 'abc'
        assert config.cloud['instance_type'] == 't2.micro'
        assert config.cloud['bucket'] == 's3://test_bucket'
        assert config.cloud['destroy'] == True
        assert config.cloud['cloud_tag'] == 'test_user'

        assert list(TEST_JOB_DIR.glob('*/confusion_matrix.pdf'))
        assert list(TEST_JOB_DIR.glob('*/test_set_distribution.pdf'))
        assert list(Path(TEST_JOB_DIR / 'models').glob('*.hdf5'))
Example #5
0
    def test_dataprep(self):
        config = Config()

        assert not TEST_IMAGE_DIR_RES.exists()
        assert not Path(TEST_JOB_DIR / 'class_mapping.json').exists()
        assert not Path(TEST_JOB_DIR / 'test_samples.json').exists()
        assert not Path(TEST_JOB_DIR / 'train_samples.json').exists()
        assert not Path(TEST_JOB_DIR / 'val_samples.json').exists()

        dataprep(config, config_file=TEST_CONFIG_DATAPREP)

        assert config.dataprep['run'] == True
        assert config.dataprep['job_dir'] == str(TEST_JOB_DIR)
        assert config.dataprep['samples_file'] == str(TEST_SAMPLES)
        assert config.dataprep['image_dir'] == str(TEST_IMAGE_DIR)
        assert config.dataprep['resize'] == True

        assert config.train['run'] == False
        assert config.evaluate['run'] == False
        assert config.cloud['run'] == False

        assert TEST_IMAGE_DIR_RES.exists()
        assert Path(TEST_JOB_DIR / 'class_mapping.json').exists()
        assert Path(TEST_JOB_DIR / 'test_samples.json').exists()
        assert Path(TEST_JOB_DIR / 'train_samples.json').exists()
        assert Path(TEST_JOB_DIR / 'val_samples.json').exists()
Example #6
0
    def test_evaluate(self, mocker):

        BEST_MODEL_FILE = list(Path(TEST_JOB_DIR /
                                    'models').glob('*.hdf5'))[-1]
        BEST_MODEL = 'evaluation_' + BEST_MODEL_FILE.stem
        NB_FILEPATH = TEST_JOB_DIR / BEST_MODEL / 'evaluation_report.ipynb'

        def fake_execute_notebook(*args, **kwargs):
            shutil.copy(TEST_NB_TEMPLATE, NB_FILEPATH)

        mocker.patch('papermill.execute_notebook',
                     side_effect=fake_execute_notebook)
        mocker.patch(
            'imageatm.components.evaluation.Evaluation._determine_best_modelfile',
            return_value=BEST_MODEL_FILE)
        mocker.patch('nbconvert.PDFExporter.from_notebook_node',
                     return_value=('ANY_DATA'.encode(), None))

        config = Config()

        evaluate(config, config_file=TEST_CONFIG_EVAL)

        assert config.dataprep['run'] == False
        assert config.train['run'] == False
        assert config.cloud['run'] == False

        assert config.evaluate['run'] == True
        assert config.evaluate['job_dir'] == str(TEST_JOB_DIR)
        assert config.evaluate['image_dir'] == str(TEST_IMAGE_DIR_RES)
Example #7
0
def test_cloud(mocker):
    # assert that evaluate gets run
    TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_cloud.yml'

    config = Config()

    m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker)

    cloud(config, config_file=TEST_CONFIG_FILE)

    m_c.assert_called()
Example #8
0
    def test_pipeline(self, mocker):

        BEST_MODEL_FILE = list(Path(TEST_JOB_DIR /
                                    'models').glob('*.hdf5'))[-1]
        BEST_MODEL = 'evaluation_' + BEST_MODEL_FILE.stem
        NB_FILEPATH = TEST_JOB_DIR / BEST_MODEL / 'evaluation_report.ipynb'

        def fake_execute_notebook(*args, **kwargs):
            shutil.copy(TEST_NB_TEMPLATE, NB_FILEPATH)

        mocker.patch('papermill.execute_notebook',
                     side_effect=fake_execute_notebook)
        mocker.patch(
            'imageatm.components.evaluation.Evaluation._determine_best_modelfile',
            return_value=BEST_MODEL_FILE)
        mocker.patch('nbconvert.PDFExporter.from_notebook_node',
                     return_value=('ANY_DATA'.encode(), None))

        config = Config()

        pipeline(config, config_file=TEST_CONFIG_PIPE)

        assert config.dataprep['run'] == True
        assert config.dataprep['job_dir'] == str(TEST_JOB_DIR)
        assert config.dataprep['samples_file'] == str(TEST_SAMPLES)
        assert config.dataprep['image_dir'] == TEST_IMAGE_DIR_RES
        assert config.dataprep['resize'] == True

        assert config.train['run'] == True
        assert config.train['cloud'] == False

        assert config.evaluate['run'] == True
        assert config.evaluate['report']['create'] == True
        assert config.evaluate['report']['kernel_name'] == 'any_kernel'
        assert config.evaluate['report']['export_html'] == True
        assert config.evaluate['report']['export_pdf'] == True

        assert config.cloud['run'] == False
        assert config.cloud['provider'] == 'aws'
        assert config.cloud['tf_dir'] == 'cloud/aws'
        assert config.cloud['region'] == 'eu-west-1'
        assert config.cloud['vpc_id'] == 'abc'
        assert config.cloud['instance_type'] == 't2.micro'
        assert config.cloud['bucket'] == 's3://test_bucket'
        assert config.cloud['destroy'] == True
        assert config.cloud['cloud_tag'] == 'test_user'

        assert list(Path(TEST_JOB_DIR / 'models').glob('*.hdf5'))
Example #9
0
    def test_evaluate(self):
        config = Config()

        assert not list(TEST_JOB_DIR.glob('*/confusion_matrix.pdf'))
        assert not list(TEST_JOB_DIR.glob('*/test_set_distribution.pdf'))

        evaluate(config, config_file=TEST_CONFIG_EVAL)

        assert config.dataprep['run'] == False
        assert config.train['run'] == False
        assert config.cloud['run'] == False

        assert config.evaluate['run'] == True
        assert config.evaluate['job_dir'] == str(TEST_JOB_DIR)
        assert config.evaluate['image_dir'] == str(TEST_IMAGE_DIR_RES)
        assert list(TEST_JOB_DIR.glob('*/confusion_matrix.pdf'))
        assert list(TEST_JOB_DIR.glob('*/test_set_distribution.pdf'))
Example #10
0
    def test_train(self):
        config = Config()

        assert not list(Path(TEST_JOB_DIR / 'models').glob('*.hdf5'))

        train(config, config_file=TEST_CONFIG_TRAIN)

        assert config.train['run'] == True
        assert config.train['cloud'] == False
        assert config.train['job_dir'] == str(TEST_JOB_DIR)
        assert config.train['image_dir'] == str(TEST_IMAGE_DIR_RES)

        assert config.dataprep['run'] == False
        assert config.evaluate['run'] == False
        assert config.cloud['run'] == False

        assert list(Path(TEST_JOB_DIR / 'models').glob('*.hdf5'))
Example #11
0
def test_pipeline(mocker):
    # assert that only dataprep gets run
    TEST_CONFIG_FILE = p.resolve(
    ).parent / 'test_configs' / 'config_dataprep.yml'

    config = Config()

    m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker)

    pipeline(config, config_file=TEST_CONFIG_FILE)

    m_dp.assert_called()
    m_tc.assert_not_called()
    m_t.assert_not_called()
    m_e.assert_not_called()
    m_c.assert_not_called()

    # assert that only train gets run
    TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_train.yml'

    m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker)

    pipeline(config, config_file=TEST_CONFIG_FILE)

    m_dp.assert_not_called()
    m_tc.assert_not_called()
    m_t.assert_called()
    m_e.assert_not_called()
    m_c.assert_not_called()

    # assert that only train cloud gets run
    TEST_CONFIG_FILE = p.resolve(
    ).parent / 'test_configs' / 'config_train_cloud.yml'

    m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker)

    pipeline(config, config_file=TEST_CONFIG_FILE)

    m_dp.assert_not_called()
    m_tc.assert_called()
    m_t.assert_not_called()
    m_e.assert_not_called()
    m_c.assert_not_called()

    # assert that only evaluate gets run
    TEST_CONFIG_FILE = p.resolve(
    ).parent / 'test_configs' / 'config_evaluate.yml'

    m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker)

    pipeline(config, config_file=TEST_CONFIG_FILE)

    m_dp.assert_not_called()
    m_tc.assert_not_called()
    m_t.assert_not_called()
    m_e.assert_called()
    m_c.assert_not_called()

    # assert that only cloud gets run
    TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_cloud.yml'

    m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker)

    pipeline(config, config_file=TEST_CONFIG_FILE)

    m_dp.assert_not_called()
    m_tc.assert_not_called()
    m_t.assert_not_called()
    m_e.assert_not_called()
    m_c.assert_called()

    # assert that all components get run
    TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_all.yml'

    m_dp, m_tc, m_t, m_e, m_c, m_l = mock_scripts(mocker)

    pipeline(config, config_file=TEST_CONFIG_FILE)

    m_dp.assert_called()
    m_tc.assert_not_called()
    m_t.assert_called()
    m_e.assert_called()
    m_c.assert_called()
Example #12
0
def test_config_set_image_dir(mocker):
    # empty config
    config = Config()

    expected_image_dir = None
    result = config_set_image_dir(config)
    assert result.image_dir == expected_image_dir

    # pre-set image_dir in data_prep
    config = Config()
    config.data_prep['image_dir'] = 'test'

    expected_image_dir = None
    result = config_set_image_dir(config)
    assert result.image_dir == expected_image_dir

    # pre-set image_dir in data_prep and run=True
    config = Config()
    config.data_prep['image_dir'] = 'test'
    config.data_prep['run'] = True

    expected_image_dir = 'test'
    result = config_set_image_dir(config)
    assert result.image_dir == expected_image_dir

    # check that data_prep overwrites train
    config = Config()
    config.data_prep['image_dir'] = 'test'
    config.data_prep['run'] = True
    config.train['image_dir'] = 'test_train'
    config.train['run'] = True

    expected_image_dir = 'test'
    result = config_set_image_dir(config)
    assert result.image_dir == expected_image_dir
Example #13
0
def test_update_config():
    # check that defaults are being set
    config = Config()

    result = update_config(config)

    assert result.train == {'cloud': False}
    assert result.data_prep == {'resize': False}
    assert result.cloud == {}
    assert result.evaluate == {}

    # check that defaults, image_dir, and job_dir are being set
    config = Config()
    config.image_dir = 'test_image'
    config.job_dir = 'test_job'

    result = update_config(config)

    assert result.train == {
        'cloud': False,
        'image_dir': 'test_image',
        'job_dir': 'test_job'
    }
    assert result.data_prep == {
        'resize': False,
        'image_dir': 'test_image',
        'job_dir': 'test_job'
    }
    assert result.cloud == {'job_dir': 'test_job'}
    assert result.evaluate == {
        'image_dir': 'test_image',
        'job_dir': 'test_job'
    }

    # check that config file gets populated correctly
    TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_train.yml'

    config = Config()

    result = update_config(config, config_file=TEST_CONFIG_FILE)

    assert result.train == {
        'run': True,
        'cloud': False,
        'image_dir': 'test_train/images',
        'job_dir': 'test_train/job_dir',
    }
    assert result.data_prep == {
        'run': False,
        'resize': True,
        'image_dir': 'test_data_prep/images',
        'job_dir': 'test_data_prep/job_dir',
        'samples_file': 'test_data_prep/samples.json',
    }
    assert result.cloud == {
        'run': False,
        'provider': 'aws',  # supported providers ['aws']
        'tf_dir': 'cloud/aws',
        'region':
        'eu-west-1',  # supported regions ['eu-west-1', 'eu-central-1']
        'vpc_id': 'abc',
        'instance_type': 't2.micro',  # supported instances ['p2.xlarge']
        'bucket':
        's3://test_bucket',  # s3 bucket needs to exist, will not be created/destroyed by terraform
        'destroy': True,
        'cloud_tag': 'test_user',
    }
    assert result.evaluate == {
        'run': False,
        'image_dir': 'test_evaluate/images',
        'job_dir': 'test_evaluate/job_dir',
    }

    # check that config file gets populated correctly and image and job dir are updated
    TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_train.yml'

    config = Config()
    config.image_dir = 'test_image'
    config.job_dir = 'test_job'

    result = update_config(config, config_file=TEST_CONFIG_FILE)

    print(result.cloud)

    assert result.train == {
        'run': True,
        'cloud': False,
        'image_dir': 'test_image',
        'job_dir': 'test_job',
    }
    assert result.data_prep == {
        'run': False,
        'resize': True,
        'image_dir': 'test_image',
        'job_dir': 'test_job',
        'samples_file': 'test_data_prep/samples.json',
    }
    assert result.cloud == {
        'run': False,
        'provider': 'aws',
        'tf_dir': 'cloud/aws',
        'region': 'eu-west-1',
        'vpc_id': 'abc',
        'instance_type': 't2.micro',
        'bucket': 's3://test_bucket',
        'destroy': True,
        'job_dir': 'test_job',
        'cloud_tag': 'test_user',
    }
    assert result.evaluate == {
        'run': False,
        'image_dir': 'test_image',
        'job_dir': 'test_job'
    }

    # test that options overwrite config file
    TEST_CONFIG_FILE = p.resolve().parent / 'test_configs' / 'config_train.yml'

    config = Config()

    result = update_config(
        config,
        config_file=TEST_CONFIG_FILE,
        image_dir='test_image',
        job_dir='test_job',
        region='eu-central-1',
    )

    assert result.train == {
        'run': True,
        'cloud': False,
        'image_dir': 'test_image',
        'job_dir': 'test_job',
    }

    assert result.data_prep == {
        'run': False,
        'resize': True,
        'image_dir': 'test_image',
        'job_dir': 'test_job',
        'samples_file': 'test_data_prep/samples.json',
    }

    assert result.cloud == {
        'run': False,
        'provider': 'aws',
        'tf_dir': 'cloud/aws',
        'region': 'eu-central-1',
        'vpc_id': 'abc',
        'instance_type': 't2.micro',
        'bucket': 's3://test_bucket',
        'destroy': True,
        'bucket': 's3://test_bucket',
        'job_dir': 'test_job',
        'cloud_tag': 'test_user',
    }

    assert result.evaluate == {
        'run': False,
        'image_dir': 'test_image',
        'job_dir': 'test_job'
    }