def test_33_create_entity_and_gdriveh_storage_with_wizard(self): entity_type = DATASETS self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (os.path.join(self.tmp_dir, GIT_PATH), entity_type), check_output(MLGIT_REMOTE_ADD % (entity_type, (os.path.join(self.tmp_dir, GIT_PATH))))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % entity_type)) bucket_name = 'test-wizard' storage_type = StorageType.GDRIVEH.value runner = CliRunner() runner.invoke(entity.datasets, ['create', entity_type + '-ex', '--wizard'], input='\n'.join([ 'category', 'strict', 'X', GDRIVEH, bucket_name, '' ])) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertTrue(bucket_name in config[STORAGE_CONFIG_KEY][GDRIVEH]) spec = os.path.join(self.tmp_dir, entity_type, entity_type + '-ex', entity_type + '-ex.spec') with open(spec, 'r') as s: spec_file = yaml_processor.load(s) self.assertEqual( spec_file[get_spec_key(entity_type)]['manifest'] [STORAGE_SPEC_KEY], storage_type + '://' + bucket_name)
def check_created_folders(self, entity_type, store_type=StoreType.S3H.value, version=1, bucket_name='fake_store'): folder_data = os.path.join(self.tmp_dir, entity_type, entity_type + '-ex', 'data') spec = os.path.join(self.tmp_dir, entity_type, entity_type + '-ex', entity_type + '-ex.spec') readme = os.path.join(self.tmp_dir, entity_type, entity_type + '-ex', 'README.md') with open(spec, 'r') as s: spec_file = yaml_processor.load(s) self.assertEqual(spec_file[entity_type]['manifest']['store'], store_type + '://' + bucket_name) self.assertEqual(spec_file[entity_type]['name'], entity_type + '-ex') self.assertEqual(spec_file[entity_type]['version'], version) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as y: config = yaml_processor.load(y) self.assertIn(entity_type, config) self.assertTrue(os.path.exists(folder_data)) self.assertTrue(os.path.exists(spec)) self.assertTrue(os.path.exists(readme))
def test_04_create_import_with_subdir(self): self.assertIn(messages[0], check_output(MLGIT_INIT)) sub_dir = os.path.join('subdir', 'subdir2') os.makedirs(os.path.join(self.tmp_dir, IMPORT_PATH, sub_dir)) self.assertIn( messages[38], check_output( 'ml-git dataset create dataset-ex --category=imgs --store-type=s3h --bucket-name=minio ' '--version-number=1 --import="%s"' % os.path.join(self.tmp_dir, IMPORT_PATH))) folder_data = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex', 'data') spec = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex', 'dataset-ex.spec') readme = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex', 'README.md') with open(spec, 'r') as s: spec_file = yaml_processor.load(s) self.assertEqual(spec_file['dataset']['manifest']['store'], 's3h://minio') self.assertEqual(spec_file['dataset']['name'], 'dataset-ex') self.assertEqual(spec_file['dataset']['version'], 1) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as y: config = yaml_processor.load(y) self.assertIn('dataset', config) self.assertTrue(os.path.exists(folder_data)) self.assertTrue(os.path.exists(spec)) self.assertTrue(os.path.exists(readme)) self.assertTrue(os.path.exists(os.path.join(folder_data, sub_dir)))
def test_21_add_storage(self): api.init('repository') with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertNotIn(S3H, config[STORAGE_CONFIG_KEY]) api.storage_add(bucket_name=BUCKET_NAME, credentials=PROFILE) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(PROFILE, config[STORAGE_CONFIG_KEY][S3H][BUCKET_NAME]['aws-credentials']['profile'])
def test_22_add_storage_azure_type(self): bucket_name = 'container_azure' api.init('repository') with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertNotIn(AZUREBLOBH, config[STORAGE_CONFIG_KEY]) api.storage_add(bucket_name=bucket_name, bucket_type=AZUREBLOBH) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertIn(bucket_name, config[STORAGE_CONFIG_KEY][AZUREBLOBH])
def test_23_add_storage_gdrive_type(self): bucket_name = 'my-drive' profile = 'path-to-credentials' api.init('repository') with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertNotIn(GDRIVEH, config[STORAGE_CONFIG_KEY]) api.storage_add(bucket_name=bucket_name, bucket_type=GDRIVEH, credentials=profile) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(profile, config[STORAGE_CONFIG_KEY][GDRIVEH][bucket_name]['credentials-path'])
def test_30_create_entity_and_s3h_storage_with_wizard(self): entity_type = DATASETS self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (os.path.join(self.tmp_dir, GIT_PATH), entity_type), check_output(MLGIT_REMOTE_ADD % (entity_type, (os.path.join(self.tmp_dir, GIT_PATH))))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % entity_type)) bucket_name = 'test-wizard' endpoint_url = 'www.url.com' region = 'us-east-1' storage_type = StorageType.S3H.value runner = CliRunner() runner.invoke(entity.datasets, ['create', entity_type + '-ex', '--wizard'], input='\n'.join([ 'category', 'strict', 'X', storage_type, bucket_name, PROFILE, endpoint_url, region, '' ])) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertTrue(bucket_name in config[STORAGE_CONFIG_KEY][S3H]) self.assertEqual( PROFILE, config[STORAGE_CONFIG_KEY][S3H][bucket_name] ['aws-credentials']['profile']) self.assertEqual( endpoint_url, config[STORAGE_CONFIG_KEY][S3H][bucket_name]['endpoint-url']) self.assertEqual( region, config[STORAGE_CONFIG_KEY][S3H][bucket_name]['region']) folder_data = os.path.join(self.tmp_dir, entity_type, entity_type + '-ex', 'data') spec = os.path.join(self.tmp_dir, entity_type, entity_type + '-ex', entity_type + '-ex.spec') readme = os.path.join(self.tmp_dir, entity_type, entity_type + '-ex', 'README.md') entity_spec_key = get_spec_key(entity_type) with open(spec, 'r') as s: spec_file = yaml_processor.load(s) self.assertEqual( spec_file[entity_spec_key]['manifest'][STORAGE_SPEC_KEY], storage_type + '://' + bucket_name) self.assertTrue(os.path.exists(folder_data)) self.assertTrue(os.path.exists(spec)) self.assertTrue(os.path.exists(readme))
def test_22_add_store_azure_type(self): bucket_type = 'azureblobh' bucket_name = 'container_azure' api.init('repository') with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertNotIn(bucket_type, config['store']) api.store_add(bucket_name=bucket_name, bucket_type=bucket_type) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertIn(bucket_name, config['store'][bucket_type])
def _check_index(self, index, files_in, files_not_in): with open(index, 'r') as file: added_file = yaml_processor.load(file) for file in files_in: self.assertIn(file, added_file) for file in files_not_in: self.assertNotIn(file, added_file)
def test_10_add_command_with_metric_file(self): repo_type = MODELS entity_name = '{}-ex'.format(repo_type) self.set_up_add(repo_type) create_spec(self, repo_type, self.tmp_dir) workspace = os.path.join(self.tmp_dir, repo_type, entity_name) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') csv_file = os.path.join(self.tmp_dir, 'metrics.csv') self.create_csv_file(csv_file, {'Accuracy': 1, 'Recall': 2}) metrics_options = '--metrics-file="{}"'.format(csv_file) self.assertIn( output_messages['INFO_ADDING_PATH'] % repo_type, check_output(MLGIT_ADD % (repo_type, entity_name, metrics_options))) index = os.path.join(ML_GIT_DIR, repo_type, 'index', 'metadata', entity_name, 'INDEX.yaml') self._check_index(index, ['data/file1'], []) with open(os.path.join(workspace, entity_name + '.spec')) as spec: spec_file = yaml_processor.load(spec) spec_key = get_spec_key(repo_type) metrics = spec_file[spec_key].get('metrics', {}) self.assertFalse(metrics == {}) self.assertTrue(metrics['Accuracy'] == 1) self.assertTrue(metrics['Recall'] == 2)
def test_09_add_command_with_metric_for_wrong_entity(self): repo_type = DATASETS self.set_up_add() create_spec(self, repo_type, self.tmp_dir) workspace = os.path.join(self.tmp_dir, repo_type, DATASET_NAME) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') metrics_options = '--metric Accuracy 1 --metric Recall 2' self.assertIn( output_messages['INFO_ADDING_PATH'] % repo_type, check_output(MLGIT_ADD % (repo_type, DATASET_NAME, metrics_options))) index = os.path.join(ML_GIT_DIR, repo_type, 'index', 'metadata', DATASET_NAME, 'INDEX.yaml') self._check_index(index, ['data/file1'], []) with open(os.path.join(workspace, DATASET_NAME + '.spec')) as spec: spec_file = yaml_processor.load(spec) spec_key = get_spec_key(repo_type) metrics = spec_file[spec_key].get('metrics', {}) self.assertTrue(metrics == {})
def _remote_del(self, entity_type): with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) git_url = config[entity_type]['git'] self.assertIn(output_messages['INFO_REMOVE_REMOTE'] % (git_url, entity_type), check_output(MLGIT_REMOTE_DEL % entity_type))
def _del_store(self): self.assertIn(messages[76] % (BUCKET_NAME), check_output(MLGIT_STORE_DEL % BUCKET_NAME)) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(config['store']['s3h'], {})
def test_02_disable_wizard(self): mode = WizardMode.DISABLED.value self.assertIn(output_messages['INFO_WIZARD_MODE_CHANGED'].format(mode), check_output(MLGIT_CONFIG_WIZARD % mode)) with open(os.path.join(GLOBAL_CONFIG_PATH, GLOBAL_ML_GIT_CONFIG), 'r') as config_file: config = yaml_processor.load(config_file) self.assertTrue(config[WIZARD_KEY], WizardMode.DISABLED.value)
def _verify_mutability(self, entity_type, mutability_type, spec_with_categories): with open(spec_with_categories) as y: ws_spec = yaml_processor.load(y) self.assertEqual(ws_spec[entity_type]['mutability'], mutability_type) return ws_spec
def test_05_checkout_bare_in_older_tag(self): entity_type = 'dataset' self._create_entity_with_mutability(entity_type, 'strict') data_path = os.path.join(self.tmp_dir, entity_type, 'computer-vision', 'images', entity_type+'-ex') self._clear_path() self._checkout_entity(entity_type, tag='computer-vision__images__'+entity_type+'-ex__1') os.mkdir(os.path.join(data_path, 'data')) create_file(data_path, 'file3', '1') spec_path = os.path.join(self.tmp_dir, 'dataset', 'computer-vision', 'images', 'dataset-ex', 'dataset-ex.spec') with open(spec_path, 'r') as y: spec = yaml_processor.load(y) with open(spec_path, 'w') as y: spec['dataset']['version'] = 2 yaml_processor.dump(spec, y) self._push_files(entity_type) self._clear_path() self._checkout_entity(entity_type, tag='computer-vision__images__'+entity_type+'-ex__3', bare=False) file_path = os.path.join(self.tmp_dir, entity_type, 'computer-vision', 'images', entity_type+'-ex', 'data') self.assertTrue(os.path.exists(os.path.join(file_path, 'file1'))) self.assertTrue(os.path.exists(os.path.join(file_path, 'file3')))
def test_13_commit_files(self): self.set_up_test() self.set_up_add_test() api.add(DATASETS, DATASET_NAME, bumpversion=True, fsck=False, file_path=['file']) api.commit(DATASETS, DATASET_NAME) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD') self.assertTrue(os.path.exists(HEAD)) init_repository(LABELS, self) self.create_file_in_ws(LABELS, 'file', '0') api.add(LABELS, 'labels-ex', bumpversion=True, fsck=False, file_path=['file']) api.commit(LABELS, 'labels-ex', related_dataset=DATASET_NAME) labels_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS, 'metadata') with open(os.path.join(labels_metadata, 'labels-ex', 'labels-ex.spec')) as y: spec = yaml_processor.load(y) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS, 'refs', 'labels-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertEqual('computer-vision__images__datasets-ex__11', spec[LABELS_SPEC_KEY][DATASET_SPEC_KEY]['tag'])
def _add_remote(self, entity_type): api.init('repository') api.remote_add(entity_type, os.path.join(self.tmp_dir, GIT_PATH)) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(os.path.join(self.tmp_dir, GIT_PATH), config[entity_type]['git'])
def _check_spec_version(self, repo_type, expected_version): entity_name = '{}-ex'.format(repo_type) workspace = os.path.join(self.tmp_dir, DATASETS, entity_name) with open(os.path.join(workspace, entity_name + '.spec')) as spec: spec_file = yaml_processor.load(spec) spec_key = get_spec_key(repo_type) version = spec_file[spec_key].get('version', 0) self.assertEquals(version, expected_version)
def _add_remote(self, entity_type): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.assertIn(output_messages['INFO_ADD_REMOTE'] % (os.path.join(self.tmp_dir, GIT_PATH), entity_type), check_output(MLGIT_REMOTE_ADD % (entity_type, os.path.join(self.tmp_dir, GIT_PATH)))) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(os.path.join(self.tmp_dir, GIT_PATH), config[entity_type]['git'])
def _del_storage(self): self.assertIn( output_messages['INFO_REMOVED_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME), check_output(MLGIT_STORAGE_DEL % BUCKET_NAME)) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(config[STORAGE_CONFIG_KEY][S3H], {})
def _add_store(self): self.assertIn(messages[0], check_output(MLGIT_INIT)) self.check_store() self.assertIn(messages[7] % (STORE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORE_ADD % (BUCKET_NAME, PROFILE))) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(PROFILE, config['store']['s3h'][BUCKET_NAME]['aws-credentials']['profile'])
def test_03_add_store_subfolder(self): self.assertIn(messages[0], check_output(MLGIT_INIT)) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertNotIn('s3h', config['store']) os.chdir(os.path.join(self.tmp_dir, ML_GIT_DIR)) self.assertIn(messages[7] % (STORE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORE_ADD % (BUCKET_NAME, PROFILE)))
def test_03_add_storage_subfolder(self): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertNotIn(S3H, config[STORAGE_CONFIG_KEY]) os.chdir(os.path.join(self.tmp_dir, ML_GIT_DIR)) self.assertIn(output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE)))
def _add_storage(self): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.check_storage() self.assertIn(output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE))) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(PROFILE, config[STORAGE_CONFIG_KEY][S3H][BUCKET_NAME]['aws-credentials']['profile'])
def test_10_add_storage_without_credentials(self): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.check_storage() self.assertIn(output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] % (STORAGE_TYPE, BUCKET_NAME), check_output(MLGIT_STORAGE_ADD_WITHOUT_CREDENTIALS % BUCKET_NAME)) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(None, config[STORAGE_CONFIG_KEY][S3H][BUCKET_NAME]['aws-credentials']['profile']) self.assertEqual('us-east-1', config[STORAGE_CONFIG_KEY][S3H][BUCKET_NAME]['region'])
def add_store_type(self, bucket, profile, store_type): self.assertIn(messages[0], check_output(MLGIT_INIT)) result = check_output(MLGIT_STORE_ADD_WITH_TYPE % (bucket, profile, store_type)) if store_type == STORE_TYPE: self.assertIn(messages[7] % (store_type, bucket, profile), result) else: self.assertIn(messages[87] % (store_type, bucket), result) with open(os.path.join(ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) return config
def _clean_up_local_config(self): with open(os.path.join(self.tmp_dir, '.ml-git/config.yaml')) as file: config = yaml_processor.load(file) config['datasets']['git'] = '' config['labels']['git'] = '' config['models']['git'] = '' with open(os.path.join(self.tmp_dir, '.ml-git/config.yaml'), 'w') as file: yaml_processor.dump(config, file)
def add_storage_type(self, bucket, profile, storage_type): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) result = check_output(MLGIT_STORAGE_ADD_WITH_TYPE % (bucket, profile, storage_type)) if storage_type == STORAGE_TYPE: self.assertIn(output_messages['INFO_ADD_STORAGE'] % (storage_type, bucket, profile), result) else: self.assertIn(output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] % (storage_type, bucket), result) with open(os.path.join(ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) return config
def _add_remote(self, entity_type): self.assertIn(messages[0], check_output(MLGIT_INIT)) self.assertIn( messages[2] % (os.path.join(self.tmp_dir, GIT_PATH), entity_type), check_output(MLGIT_REMOTE_ADD % (entity_type, os.path.join(self.tmp_dir, GIT_PATH)))) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(os.path.join(self.tmp_dir, GIT_PATH), config[entity_type]['git'])