def test_13_commit_files(self): self.set_up_test() self.set_up_add_test() api.add(DATASETS, DATASET_NAME, bumpversion=True, fsck=False, file_path=['file']) api.commit(DATASETS, DATASET_NAME) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD') self.assertTrue(os.path.exists(HEAD)) init_repository(LABELS, self) self.create_file_in_ws(LABELS, 'file', '0') api.add(LABELS, 'labels-ex', bumpversion=True, fsck=False, file_path=['file']) api.commit(LABELS, 'labels-ex', related_dataset=DATASET_NAME) labels_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS, 'metadata') with open(os.path.join(labels_metadata, 'labels-ex', 'labels-ex.spec')) as y: spec = yaml_processor.load(y) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS, 'refs', 'labels-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertEqual('computer-vision__images__datasets-ex__11', spec[LABELS_SPEC_KEY][DATASET_SPEC_KEY]['tag'])
def test_42_graph_order_by_version_number(self): init_repository(DATASETS, self) self.create_file_in_ws(DATASETS, 'file', '1') api.add(DATASETS, DATASET_NAME, file_path=['file']) api.commit(DATASETS, DATASET_NAME) self.create_file_in_ws(DATASETS, 'file2', '1') api.add(DATASETS, DATASET_NAME, bumpversion=True, file_path=['file2']) api.commit(DATASETS, DATASET_NAME) self.create_file_in_ws(DATASETS, 'file3', '1') api.add(DATASETS, DATASET_NAME, file_path=['file3']) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, '--version=10'))) local_manager = api.init_local_entity_manager() entities_relationships = local_manager.get_project_entities_relationships( export_type=FileType.DOT.value) graph_path = local_manager.export_graph(entities_relationships) self.assertTrue(os.path.exists(graph_path)) with open(graph_path, 'r') as graph_file: content = graph_file.read() self.assertIn( '\\"{} (1)\\" -> \\"{} (2)\\"'.format(DATASET_NAME, DATASET_NAME), content) self.assertIn( '\\"{} (2)\\" -> \\"{} (10)\\"'.format(DATASET_NAME, DATASET_NAME), content)
def _push_model_with_metrics(self, entity_name): init_repository(MODELS, self) workspace = os.path.join(self.tmp_dir, MODELS, entity_name) api.create(MODELS, entity_name, categories=['computer-vision', 'images'], mutability=STRICT, bucket_name='mlgit') os.makedirs(os.path.join(workspace, 'data'), exist_ok=True) self.create_file(workspace, 'file1', '0') api.add(MODELS, entity_name, metric={'accuracy': 10.0, 'precision': 10.0}) api.commit(MODELS, entity_name) api.push(MODELS, entity_name)
def test_11_add_files_with_bumpversion(self): self.set_up_add_test() self.check_entity_version(1) api.add(DATASETS, DATASET_NAME, fsck=False, file_path=[]) api.commit(DATASETS, DATASET_NAME) file_name = 'new-file-test' self.create_file_in_ws(DATASETS, file_name, '0') api.add(DATASETS, DATASET_NAME, bumpversion=True, fsck=False, file_path=[]) self.check_add(files=[file_name]) self.check_entity_version(2)
def set_up_test(self, entity): init_repository(entity, self) workspace = os.path.join(self.tmp_dir, entity, entity + '-ex') os.makedirs(workspace, exist_ok=True) create_spec(self, entity, self.tmp_dir, 20, STRICT) os.makedirs(os.path.join(workspace, 'data'), exist_ok=True) self.create_file(workspace, 'file1', '0') self.create_file(workspace, 'file2', '1') self.create_file(workspace, 'file3', 'a') self.create_file(workspace, 'file4', 'b') api.add(entity, entity + '-ex', bumpversion=True, fsck=False, file_path=['file']) api.commit(entity, entity + '-ex')
def set_up_test(self): init_repository('dataset', self) workspace = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex') os.makedirs(workspace, exist_ok=True) spec = { 'dataset': { 'categories': ['computer-vision', 'images'], 'manifest': { 'files': 'MANIFEST.yaml', 'store': 's3h://mlgit' }, 'mutability': Mutability.STRICT.value, 'name': 'dataset-ex', 'version': 9 } } with open(os.path.join(workspace, 'dataset-ex.spec'), 'w') as y: yaml_processor.dump(spec, y) os.makedirs(os.path.join(workspace, 'data'), exist_ok=True) self.create_file(workspace, 'file1', '0') self.create_file(workspace, 'file2', '1') self.create_file(workspace, 'file3', 'a') self.create_file(workspace, 'file4', 'b') api.add('dataset', 'dataset-ex', bumpversion=True) api.commit('dataset', 'dataset-ex') api.push('dataset', 'dataset-ex') self.assertTrue( os.path.exists(os.path.join(self.tmp_dir, self.metadata))) clear(os.path.join(self.tmp_dir, ML_GIT_DIR)) clear(workspace) init_repository('dataset', self)
def set_up_test(self): init_repository(DATASETS, self) workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME) os.makedirs(workspace, exist_ok=True) spec = { DATASET_SPEC_KEY: { 'categories': ['computer-vision', 'images'], 'manifest': { 'files': 'MANIFEST.yaml', STORAGE_SPEC_KEY: '%s://mlgit' % S3H }, 'mutability': STRICT, 'name': DATASET_NAME, 'version': 10 } } with open(os.path.join(workspace, 'datasets-ex.spec'), 'w') as y: yaml_processor.dump(spec, y) os.makedirs(os.path.join(workspace, 'data'), exist_ok=True) self.create_file(workspace, 'file1', '0') self.create_file(workspace, 'file2', '1') self.create_file(workspace, 'file3', 'a') self.create_file(workspace, 'file4', 'b') api.add(DATASETS, DATASET_NAME) api.commit(DATASETS, DATASET_NAME) api.push(DATASETS, DATASET_NAME) self.assertTrue( os.path.exists(os.path.join(self.tmp_dir, self.metadata))) clear(os.path.join(self.tmp_dir, ML_GIT_DIR)) clear(workspace) init_repository(DATASETS, self)
def test_13_commit_files(self): self.set_up_test() self.set_up_add_test() api.add('dataset', 'dataset-ex', bumpversion=True, fsck=False, file_path=['file']) api.commit('dataset', 'dataset-ex') HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'refs', 'dataset-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) init_repository('labels', self) self.create_file_in_ws('labels', 'file', '0') api.add('labels', 'labels-ex', bumpversion=True, fsck=False, file_path=['file']) api.commit('labels', 'labels-ex', related_dataset='dataset-ex') labels_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, 'labels', 'metadata') with open( os.path.join(labels_metadata, "computer-vision", "images", "labels-ex", "labels-ex.spec")) as y: spec = yaml_processor.load(y) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, 'labels', 'refs', 'labels-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertEqual('computer-vision__images__dataset-ex__2', spec['labels']['dataset']['tag'])
def test_34_local_get_entity_versions(self): init_repository(DATASETS, self) self.create_file_in_ws(DATASETS, 'file', '1') api.add(DATASETS, DATASET_NAME, bumpversion=True, fsck=False, file_path=['file']) api.commit(DATASETS, DATASET_NAME) head = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD') self.assertTrue(os.path.exists(head)) self.create_file_in_ws(DATASETS, 'file2', '2') api.add(DATASETS, DATASET_NAME, bumpversion=True, fsck=False, file_path=['file2']) api.commit(DATASETS, DATASET_NAME) self.create_file_in_ws(DATASETS, 'file3', '3') api.add(DATASETS, DATASET_NAME, bumpversion=True, fsck=False, file_path=['file3']) api.commit(DATASETS, DATASET_NAME) tag = 'computer-vision__images__{}__'.format(DATASET_NAME) local_manager = api.init_local_entity_manager() spec_versions = local_manager.get_entity_versions( DATASET_NAME, DATASETS) self.assertEqual(len(spec_versions), 3) for spec_version in spec_versions: self.assertIn(spec_version.version, range(1, 4)) self.assertTrue(spec_version.tag.startswith(tag))
def init_entities_with_relationships(self, only_dataset=False): init_repository(DATASETS, self) self.create_file_in_ws(DATASETS, 'file', '1') api.add(DATASETS, DATASET_NAME, bumpversion=True, fsck=False, file_path=['file']) api.commit(DATASETS, DATASET_NAME) head = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD') self.assertTrue(os.path.exists(head)) tag = 'computer-vision__images__{}__1' if not only_dataset: label_name = 'labels-ex' init_repository(LABELS, self) self.create_file_in_ws(LABELS, 'file', '0') api.add(LABELS, label_name, bumpversion=True, fsck=False, file_path=['file']) api.commit(LABELS, label_name, related_dataset=DATASET_NAME) labels_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS, 'metadata') with open( os.path.join(labels_metadata, label_name, '{}.spec'.format(label_name))) as y: spec = yaml_processor.load(y) head = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS, 'refs', label_name, 'HEAD') self.assertTrue(os.path.exists(head)) self.assertEqual(tag.format(DATASET_NAME), spec[LABELS_SPEC_KEY][DATASET_SPEC_KEY]['tag']) model_name = 'models-ex' init_repository(MODELS, self) self.create_file_in_ws(MODELS, 'file', '0') api.add(MODELS, model_name, bumpversion=True, fsck=False, file_path=['file']) api.commit(MODELS, model_name) self.create_file_in_ws(MODELS, 'file2', '2') api.add(MODELS, model_name, bumpversion=True, fsck=False, file_path=['file2']) api.commit(MODELS, model_name, related_dataset=DATASET_NAME, related_labels=label_name) models_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, MODELS, 'metadata') with open( os.path.join(models_metadata, model_name, '{}.spec'.format(model_name))) as y: spec = yaml_processor.load(y) head = os.path.join(self.tmp_dir, ML_GIT_DIR, MODELS, 'refs', model_name, 'HEAD') self.assertTrue(os.path.exists(head)) self.assertEqual(tag.format(DATASET_NAME), spec[MODEL_SPEC_KEY][DATASET_SPEC_KEY]['tag']) self.assertEqual(tag.format(label_name), spec[MODEL_SPEC_KEY][LABELS_SPEC_KEY]['tag'])
def test_33_local_get_entities(self): init_repository(DATASETS, self) self.create_file_in_ws(DATASETS, 'file', '1') api.add(DATASETS, DATASET_NAME, bumpversion=True, fsck=False, file_path=['file']) api.commit(DATASETS, DATASET_NAME) head = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD') self.assertTrue(os.path.exists(head)) tag = 'computer-vision__images__{}__1' label_name = 'labels-ex' init_repository(LABELS, self) self.create_file_in_ws(LABELS, 'file', '0') api.add(LABELS, label_name, bumpversion=True, fsck=False, file_path=['file']) api.commit(LABELS, label_name, related_dataset=DATASET_NAME) labels_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS, 'metadata') with open( os.path.join(labels_metadata, label_name, '{}.spec'.format(label_name))) as y: spec = yaml_processor.load(y) head = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS, 'refs', label_name, 'HEAD') self.assertTrue(os.path.exists(head)) self.assertEqual(tag.format(DATASET_NAME), spec[LABELS_SPEC_KEY][DATASET_SPEC_KEY]['tag']) model_name = 'models-ex' init_repository(MODELS, self) self.create_file_in_ws(MODELS, 'file', '0') api.add(MODELS, model_name, bumpversion=True, fsck=False, file_path=['file']) api.commit(MODELS, model_name, related_dataset=DATASET_NAME, related_labels=label_name) models_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, MODELS, 'metadata') with open( os.path.join(models_metadata, model_name, '{}.spec'.format(model_name))) as y: spec = yaml_processor.load(y) head = os.path.join(self.tmp_dir, ML_GIT_DIR, MODELS, 'refs', model_name, 'HEAD') self.assertTrue(os.path.exists(head)) self.assertEqual(tag.format(DATASET_NAME), spec[MODEL_SPEC_KEY][DATASET_SPEC_KEY]['tag']) self.assertEqual(tag.format(label_name), spec[MODEL_SPEC_KEY][LABELS_SPEC_KEY]['tag']) local_manager = api.init_local_entity_manager() entities = local_manager.get_entities() self.assertEqual(len(entities), 3) entities_name = [DATASET_NAME, label_name, model_name] for e in entities: self.assertIn(e.name, entities_name)