def test_13_add_storage_with_empty_parameter(self): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.check_storage() self.assertIn(output_messages['ERROR_INVALID_VALUE_FOR'] % ('--type', output_messages['ERROR_EMPTY_VALUE']), check_output(MLGIT_STORAGE_ADD_WITHOUT_CREDENTIALS % ('{}{}'.format(BUCKET_NAME, ' --type='))))
def test_04_create_with_wrong_import_url(self): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.assertIn(output_messages['ERROR_INVALID_URL'] % 'import_url', check_output(MLGIT_CREATE % (DATASETS, DATASET_NAME) + ' --categories=img --version=1 --import-url="import_url" ' + '--credentials-path=' + CREDENTIALS_PATH + ' --mutability=' + STRICT))
def test_14_add_storage_with_invalid_type(self): invalid_type = 'not_a_type' self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.check_storage() self.assertIn(output_messages['ERROR_INVALID_VALUE_FOR'] % ('--type', output_messages['ERROR_STORAGE_TYPE_INPUT_INVALID'].format(invalid_type)), check_output(MLGIT_STORAGE_ADD_WITHOUT_CREDENTIALS % ('{}{}'.format(BUCKET_NAME, ' --type=' + invalid_type))))
def test_12_add_sftph_storage_with_invalid_port(self): invalid_port = 'port' self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.check_storage() self.assertIn(output_messages['ERROR_INVALID_VALUE_FOR'] % ('--port', invalid_port), check_output(MLGIT_STORAGE_ADD_WITHOUT_CREDENTIALS % ('{}{}'.format(BUCKET_NAME, ' --region=any --type=sftph --port=' + invalid_port))))
def _add_remote(self, entity_type): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.assertIn(output_messages['INFO_ADD_REMOTE'] % (os.path.join(self.tmp_dir, GIT_PATH), entity_type), check_output(MLGIT_REMOTE_ADD % (entity_type, os.path.join(self.tmp_dir, GIT_PATH)))) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(os.path.join(self.tmp_dir, GIT_PATH), config[entity_type]['git'])
def test_03_add_storage_subfolder(self): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertNotIn(S3H, config[STORAGE_CONFIG_KEY]) os.chdir(os.path.join(self.tmp_dir, ML_GIT_DIR)) self.assertIn(output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE)))
def _add_storage(self): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.check_storage() self.assertIn(output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE))) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(PROFILE, config[STORAGE_CONFIG_KEY][S3H][BUCKET_NAME]['aws-credentials']['profile'])
def test_10_add_storage_without_credentials(self): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.check_storage() self.assertIn(output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] % (STORAGE_TYPE, BUCKET_NAME), check_output(MLGIT_STORAGE_ADD_WITHOUT_CREDENTIALS % BUCKET_NAME)) with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) self.assertEqual(None, config[STORAGE_CONFIG_KEY][S3H][BUCKET_NAME]['aws-credentials']['profile']) self.assertEqual('us-east-1', config[STORAGE_CONFIG_KEY][S3H][BUCKET_NAME]['region'])
def test_18_create_without_categories_option(self): entity_type = DATASETS self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.assertIn( 'Missing option "--categories"', check_output(MLGIT_CREATE % (entity_type, entity_type + '-ex') + ' --version=1'))
def test_16_create_without_mutability_option(self): entity_type = DATASETS self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.assertIn( output_messages['ERROR_MISSING_OPTION'].format('mutability'), check_output(MLGIT_CREATE % (entity_type, entity_type + '-ex') + ' --categories=img --version=1'))
def add_storage_type(self, bucket, profile, storage_type): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) result = check_output(MLGIT_STORAGE_ADD_WITH_TYPE % (bucket, profile, storage_type)) if storage_type == STORAGE_TYPE: self.assertIn(output_messages['INFO_ADD_STORAGE'] % (storage_type, bucket, profile), result) else: self.assertIn(output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] % (storage_type, bucket), result) with open(os.path.join(ML_GIT_DIR, 'config.yaml'), 'r') as c: config = yaml_processor.load(c) return config
def test_04_update_with_git_error(self): self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.assertIn( output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE))) self.assertIn( output_messages['ERROR_METADATA_COULD_NOT_UPDATED'].format(''), check_output(MLGIT_UPDATE % DATASETS))
def set_up_global(self, entity_type=DATASETS): self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_REMOTE_ADD_GLOBAL % (entity_type, 'local_git_server.git'))) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE + ' --global'))) with open(os.path.join(self.tmp_dir, GLOBAL_ML_GIT_CONFIG), 'r') as config_file: config = yaml_processor.load(config_file) config[STORAGE_CONFIG_KEY][StorageType.S3H.value]['mlgit'][ 'endpoint-url'] = MINIO_ENDPOINT_URL with open(os.path.join(self.tmp_dir, GLOBAL_ML_GIT_CONFIG), 'w') as config_file: yaml_processor.dump(config, config_file) clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
def test_03_create_gdrive(self): self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.assertIn(output_messages['INFO_DATASETS_CREATED'], check_output(MLGIT_CREATE % (DATASETS, DATASET_NAME) + ' --categories=imgs --bucket-name=test' + ' --import-url=%s --credentials-path=%s ' % (self.gdrive_links['test-folder'], CREDENTIALS_PATH) + ' --mutability=%s' % STRICT)) file_a_test_folder = os.path.join(DATASETS, DATASET_NAME, 'data', 'test-folder', 'A') self.assertTrue(os.path.exists(file_a_test_folder)) self.assertIn(output_messages['INFO_DATASETS_CREATED'], check_output(MLGIT_CREATE % (DATASETS, 'datasets-ex2') + ' --categories=imgs --bucket-name=test' + ' --import-url=%s --credentials-path=%s' % (self.gdrive_links['B'], CREDENTIALS_PATH) + ' --mutability=%s' % STRICT)) file_b = os.path.join(DATASETS, 'datasets-ex2', 'data', 'B') self.assertTrue(os.path.exists(file_b))
def test_20_model_related(self): git_server = os.path.join(self.tmp_dir, GIT_PATH) self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (git_server, MODELS), check_output(MLGIT_REMOTE_ADD % (MODELS, git_server))) self.assertIn( output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE))) self.assertIn( output_messages['INFO_METADATA_INIT'] % (git_server, os.path.join(self.tmp_dir, '.ml-git', MODELS, 'metadata')), check_output(MLGIT_ENTITY_INIT % MODELS)) edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git')) workspace_model = os.path.join(MODELS, MODELS + '-ex') os.makedirs(workspace_model) version = 1 create_spec(self, MODELS, self.tmp_dir, version) with open(os.path.join(self.tmp_dir, workspace_model, 'file1'), 'wb') as z: z.write(b'0' * 1024) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (git_server, DATASETS), check_output(MLGIT_REMOTE_ADD % (DATASETS, git_server))) self.assertIn( output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE))) self.assertIn( output_messages['INFO_METADATA_INIT'] % (git_server, os.path.join(self.tmp_dir, '.ml-git', DATASETS, 'metadata')), check_output(MLGIT_ENTITY_INIT % DATASETS)) edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git')) workspace_dataset = os.path.join(DATASETS, DATASETS + '-ex') os.makedirs(workspace_dataset) version = 1 create_spec(self, DATASETS, self.tmp_dir, version) with open(os.path.join(self.tmp_dir, workspace_dataset, 'file1'), 'wb') as z: z.write(b'0' * 1024) expected_push_result = '2.00/2.00' self.assertIn( output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '--bumpversion'))) self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join( self.tmp_dir, '.ml-git', DATASETS, 'metadata'), DATASET_NAME), check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, ''))) self.assertIn(expected_push_result, check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME))) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (git_server, LABELS), check_output(MLGIT_REMOTE_ADD % (LABELS, git_server))) self.assertIn( output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE))) self.assertIn( output_messages['INFO_METADATA_INIT'] % (git_server, os.path.join(self.tmp_dir, '.ml-git', LABELS, 'metadata')), check_output(MLGIT_ENTITY_INIT % LABELS)) edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git')) workspace_labels = os.path.join(LABELS, LABELS + '-ex') os.makedirs(workspace_labels) version = 1 create_spec(self, LABELS, self.tmp_dir, version) with open(os.path.join(self.tmp_dir, workspace_labels, 'file1'), 'wb') as z: z.write(b'0' * 1024) self.assertIn( output_messages['INFO_ADDING_PATH'] % LABELS, check_output(MLGIT_ADD % (LABELS, LABELS + '-ex', '--bumpversion'))) self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join( self.tmp_dir, '.ml-git', LABELS, 'metadata'), LABELS + '-ex'), check_output(MLGIT_COMMIT % (LABELS, LABELS + '-ex', ''))) self.assertIn(expected_push_result, check_output(MLGIT_PUSH % (LABELS, LABELS + '-ex'))) self.assertIn( output_messages['INFO_ADDING_PATH'] % MODELS, check_output(MLGIT_ADD % (MODELS, MODELS + '-ex', '--bumpversion'))) self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join( self.tmp_dir, '.ml-git', MODELS, 'metadata'), MODELS + '-ex'), check_output(MLGIT_COMMIT % (MODELS, MODELS + '-ex', '--dataset=datasets-ex') + ' --labels=labels-ex')) self.assertIn(expected_push_result, check_output(MLGIT_PUSH % (MODELS, MODELS + '-ex'))) set_write_read(os.path.join(self.tmp_dir, workspace_model, 'file1')) set_write_read(os.path.join(self.tmp_dir, workspace_dataset, 'file1')) set_write_read(os.path.join(self.tmp_dir, workspace_labels, 'file1')) if not sys.platform.startswith('linux'): recursive_write_read(os.path.join(self.tmp_dir, '.ml-git')) clear(os.path.join(self.tmp_dir, MODELS)) clear(os.path.join(self.tmp_dir, DATASETS)) clear(os.path.join(self.tmp_dir, LABELS)) clear(os.path.join(self.tmp_dir, '.ml-git', MODELS)) clear(os.path.join(self.tmp_dir, '.ml-git', DATASETS)) clear(os.path.join(self.tmp_dir, '.ml-git', LABELS)) self.assertIn( output_messages['INFO_METADATA_INIT'] % (git_server, os.path.join(self.tmp_dir, '.ml-git', MODELS, 'metadata')), check_output(MLGIT_ENTITY_INIT % MODELS)) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_CHECKOUT % (MODELS, 'computer-vision__images__models-ex__1') + ' -d -l')) self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, MODELS))) self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, DATASETS))) self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, LABELS)))
def test_01_change_metadata(self): init_repository(DATASETS, self) self.assertRegex( check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME)), DATASET_NO_COMMITS_INFO_REGEX + r'Untracked files:\s+' + DATASET_ADD_INFO_REGEX + r'datasets-ex.spec') self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, ''))) readme = os.path.join(DATASETS, DATASET_NAME, 'README.md') with open(readme, 'w') as file: file.write('NEW') self.assertRegex( check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME)), DATASET_NO_COMMITS_INFO_REGEX + r'Changes to be committed:\s+' r'New file: datasets-ex.spec\s+' r'Untracked files:\s+' + DATASET_ADD_INFO_REGEX + r'README.md') self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, ''))) status = check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME)) self.assertIn('New file: datasets-ex.spec', status) self.assertIn('New file: README.md', status) with open(readme, 'w') as file: file.write('NEW2') spec = { DATASET_SPEC_KEY: { 'categories': ['computer-vision', 'images'], 'manifest': { 'files': 'MANIFEST.yaml', STORAGE_SPEC_KEY: '%s://mlgit' % S3H }, 'mutability': STRICT, 'name': 'datasets-ex', 'version': 16 } } with open(os.path.join(DATASETS, DATASET_NAME, 'datasets-ex.spec'), 'w') as y: spec[DATASET_SPEC_KEY]['version'] = 17 yaml_processor.dump(spec, y) status = check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME)) self.assertNotIn('new file: README.md', status) self.assertIn('README.md', status) self.assertNotIn('new file: datasets-ex.spec', status) self.assertIn('datasets-ex.spec', status) data_path = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME) create_file(data_path, 'file', '0', '') self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, ''))) self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join( self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata'), DATASET_NAME), check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, ' --version=17'))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME))) self.assertRegex(check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME)), DATASET_NO_COMMITS_INFO_REGEX) clear(ML_GIT_DIR) clear(DATASETS) self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, DATASETS), check_output(MLGIT_REMOTE_ADD % (DATASETS, GIT_PATH))) self.assertIn( output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORAGE_ADD_WITH_TYPE % (BUCKET_NAME, PROFILE, STORAGE_TYPE))) self.assertIn( output_messages['INFO_METADATA_INIT'] % (GIT_PATH, os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata')), check_output(MLGIT_ENTITY_INIT % DATASETS)) self.assertNotIn( ERROR_MESSAGE, check_output( MLGIT_CHECKOUT % (DATASETS, 'computer-vision__images__datasets-ex__17 --bare'))) spec_file = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME, 'datasets-ex.spec') readme = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME, 'README.md') with open(spec_file, 'r') as f: spec = yaml_processor.load(f) self.assertEqual(spec[DATASET_SPEC_KEY]['version'], 17) with open(readme, 'r') as f: self.assertEqual(f.read(), 'NEW2')