Esempio n. 1
0
    def test_13_commit_files(self):
        self.set_up_test()
        self.set_up_add_test()
        api.add(DATASETS,
                DATASET_NAME,
                bumpversion=True,
                fsck=False,
                file_path=['file'])
        api.commit(DATASETS, DATASET_NAME)
        HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs',
                            DATASET_NAME, 'HEAD')
        self.assertTrue(os.path.exists(HEAD))

        init_repository(LABELS, self)
        self.create_file_in_ws(LABELS, 'file', '0')
        api.add(LABELS,
                'labels-ex',
                bumpversion=True,
                fsck=False,
                file_path=['file'])
        api.commit(LABELS, 'labels-ex', related_dataset=DATASET_NAME)

        labels_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS,
                                       'metadata')

        with open(os.path.join(labels_metadata, 'labels-ex',
                               'labels-ex.spec')) as y:
            spec = yaml_processor.load(y)

        HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS, 'refs',
                            'labels-ex', 'HEAD')
        self.assertTrue(os.path.exists(HEAD))

        self.assertEqual('computer-vision__images__datasets-ex__11',
                         spec[LABELS_SPEC_KEY][DATASET_SPEC_KEY]['tag'])
Esempio n. 2
0
 def test_42_graph_order_by_version_number(self):
     init_repository(DATASETS, self)
     self.create_file_in_ws(DATASETS, 'file', '1')
     api.add(DATASETS, DATASET_NAME, file_path=['file'])
     api.commit(DATASETS, DATASET_NAME)
     self.create_file_in_ws(DATASETS, 'file2', '1')
     api.add(DATASETS, DATASET_NAME, bumpversion=True, file_path=['file2'])
     api.commit(DATASETS, DATASET_NAME)
     self.create_file_in_ws(DATASETS, 'file3', '1')
     api.add(DATASETS, DATASET_NAME, file_path=['file3'])
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_COMMIT %
                      (DATASETS, DATASET_NAME, '--version=10')))
     local_manager = api.init_local_entity_manager()
     entities_relationships = local_manager.get_project_entities_relationships(
         export_type=FileType.DOT.value)
     graph_path = local_manager.export_graph(entities_relationships)
     self.assertTrue(os.path.exists(graph_path))
     with open(graph_path, 'r') as graph_file:
         content = graph_file.read()
         self.assertIn(
             '\\"{} (1)\\" -> \\"{} (2)\\"'.format(DATASET_NAME,
                                                   DATASET_NAME), content)
         self.assertIn(
             '\\"{} (2)\\" -> \\"{} (10)\\"'.format(DATASET_NAME,
                                                    DATASET_NAME), content)
Esempio n. 3
0
    def _push_model_with_metrics(self, entity_name):
        init_repository(MODELS, self)
        workspace = os.path.join(self.tmp_dir, MODELS, entity_name)
        api.create(MODELS, entity_name, categories=['computer-vision', 'images'],
                   mutability=STRICT, bucket_name='mlgit')
        os.makedirs(os.path.join(workspace, 'data'), exist_ok=True)
        self.create_file(workspace, 'file1', '0')

        api.add(MODELS, entity_name, metric={'accuracy': 10.0,
                                             'precision': 10.0})
        api.commit(MODELS, entity_name)
        api.push(MODELS, entity_name)
Esempio n. 4
0
 def test_11_add_files_with_bumpversion(self):
     self.set_up_add_test()
     self.check_entity_version(1)
     api.add(DATASETS, DATASET_NAME, fsck=False, file_path=[])
     api.commit(DATASETS, DATASET_NAME)
     file_name = 'new-file-test'
     self.create_file_in_ws(DATASETS, file_name, '0')
     api.add(DATASETS,
             DATASET_NAME,
             bumpversion=True,
             fsck=False,
             file_path=[])
     self.check_add(files=[file_name])
     self.check_entity_version(2)
Esempio n. 5
0
    def set_up_test(self, entity):
        init_repository(entity, self)
        workspace = os.path.join(self.tmp_dir, entity, entity + '-ex')
        os.makedirs(workspace, exist_ok=True)
        create_spec(self, entity, self.tmp_dir, 20, STRICT)
        os.makedirs(os.path.join(workspace, 'data'), exist_ok=True)

        self.create_file(workspace, 'file1', '0')
        self.create_file(workspace, 'file2', '1')
        self.create_file(workspace, 'file3', 'a')
        self.create_file(workspace, 'file4', 'b')

        api.add(entity,
                entity + '-ex',
                bumpversion=True,
                fsck=False,
                file_path=['file'])
        api.commit(entity, entity + '-ex')
Esempio n. 6
0
    def set_up_test(self):
        init_repository('dataset', self)

        workspace = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex')

        os.makedirs(workspace, exist_ok=True)

        spec = {
            'dataset': {
                'categories': ['computer-vision', 'images'],
                'manifest': {
                    'files': 'MANIFEST.yaml',
                    'store': 's3h://mlgit'
                },
                'mutability': Mutability.STRICT.value,
                'name': 'dataset-ex',
                'version': 9
            }
        }

        with open(os.path.join(workspace, 'dataset-ex.spec'), 'w') as y:
            yaml_processor.dump(spec, y)

        os.makedirs(os.path.join(workspace, 'data'), exist_ok=True)

        self.create_file(workspace, 'file1', '0')
        self.create_file(workspace, 'file2', '1')
        self.create_file(workspace, 'file3', 'a')
        self.create_file(workspace, 'file4', 'b')

        api.add('dataset', 'dataset-ex', bumpversion=True)
        api.commit('dataset', 'dataset-ex')
        api.push('dataset', 'dataset-ex')

        self.assertTrue(
            os.path.exists(os.path.join(self.tmp_dir, self.metadata)))

        clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
        clear(workspace)
        init_repository('dataset', self)
Esempio n. 7
0
    def set_up_test(self):
        init_repository(DATASETS, self)

        workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME)

        os.makedirs(workspace, exist_ok=True)

        spec = {
            DATASET_SPEC_KEY: {
                'categories': ['computer-vision', 'images'],
                'manifest': {
                    'files': 'MANIFEST.yaml',
                    STORAGE_SPEC_KEY: '%s://mlgit' % S3H
                },
                'mutability': STRICT,
                'name': DATASET_NAME,
                'version': 10
            }
        }

        with open(os.path.join(workspace, 'datasets-ex.spec'), 'w') as y:
            yaml_processor.dump(spec, y)

        os.makedirs(os.path.join(workspace, 'data'), exist_ok=True)

        self.create_file(workspace, 'file1', '0')
        self.create_file(workspace, 'file2', '1')
        self.create_file(workspace, 'file3', 'a')
        self.create_file(workspace, 'file4', 'b')

        api.add(DATASETS, DATASET_NAME)
        api.commit(DATASETS, DATASET_NAME)
        api.push(DATASETS, DATASET_NAME)

        self.assertTrue(
            os.path.exists(os.path.join(self.tmp_dir, self.metadata)))

        clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
        clear(workspace)
        init_repository(DATASETS, self)
Esempio n. 8
0
    def test_13_commit_files(self):
        self.set_up_test()
        self.set_up_add_test()
        api.add('dataset',
                'dataset-ex',
                bumpversion=True,
                fsck=False,
                file_path=['file'])
        api.commit('dataset', 'dataset-ex')
        HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'refs',
                            'dataset-ex', 'HEAD')
        self.assertTrue(os.path.exists(HEAD))

        init_repository('labels', self)
        self.create_file_in_ws('labels', 'file', '0')
        api.add('labels',
                'labels-ex',
                bumpversion=True,
                fsck=False,
                file_path=['file'])
        api.commit('labels', 'labels-ex', related_dataset='dataset-ex')

        labels_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, 'labels',
                                       'metadata')

        with open(
                os.path.join(labels_metadata, "computer-vision", "images",
                             "labels-ex", "labels-ex.spec")) as y:
            spec = yaml_processor.load(y)

        HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, 'labels', 'refs',
                            'labels-ex', 'HEAD')
        self.assertTrue(os.path.exists(HEAD))

        self.assertEqual('computer-vision__images__dataset-ex__2',
                         spec['labels']['dataset']['tag'])
Esempio n. 9
0
    def test_34_local_get_entity_versions(self):
        init_repository(DATASETS, self)
        self.create_file_in_ws(DATASETS, 'file', '1')
        api.add(DATASETS,
                DATASET_NAME,
                bumpversion=True,
                fsck=False,
                file_path=['file'])
        api.commit(DATASETS, DATASET_NAME)
        head = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs',
                            DATASET_NAME, 'HEAD')
        self.assertTrue(os.path.exists(head))

        self.create_file_in_ws(DATASETS, 'file2', '2')
        api.add(DATASETS,
                DATASET_NAME,
                bumpversion=True,
                fsck=False,
                file_path=['file2'])
        api.commit(DATASETS, DATASET_NAME)

        self.create_file_in_ws(DATASETS, 'file3', '3')
        api.add(DATASETS,
                DATASET_NAME,
                bumpversion=True,
                fsck=False,
                file_path=['file3'])
        api.commit(DATASETS, DATASET_NAME)

        tag = 'computer-vision__images__{}__'.format(DATASET_NAME)

        local_manager = api.init_local_entity_manager()
        spec_versions = local_manager.get_entity_versions(
            DATASET_NAME, DATASETS)
        self.assertEqual(len(spec_versions), 3)

        for spec_version in spec_versions:
            self.assertIn(spec_version.version, range(1, 4))
            self.assertTrue(spec_version.tag.startswith(tag))
Esempio n. 10
0
    def init_entities_with_relationships(self, only_dataset=False):
        init_repository(DATASETS, self)
        self.create_file_in_ws(DATASETS, 'file', '1')
        api.add(DATASETS,
                DATASET_NAME,
                bumpversion=True,
                fsck=False,
                file_path=['file'])
        api.commit(DATASETS, DATASET_NAME)
        head = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs',
                            DATASET_NAME, 'HEAD')
        self.assertTrue(os.path.exists(head))

        tag = 'computer-vision__images__{}__1'
        if not only_dataset:
            label_name = 'labels-ex'
            init_repository(LABELS, self)
            self.create_file_in_ws(LABELS, 'file', '0')
            api.add(LABELS,
                    label_name,
                    bumpversion=True,
                    fsck=False,
                    file_path=['file'])
            api.commit(LABELS, label_name, related_dataset=DATASET_NAME)
            labels_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS,
                                           'metadata')
            with open(
                    os.path.join(labels_metadata, label_name,
                                 '{}.spec'.format(label_name))) as y:
                spec = yaml_processor.load(y)
            head = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS, 'refs',
                                label_name, 'HEAD')

            self.assertTrue(os.path.exists(head))
            self.assertEqual(tag.format(DATASET_NAME),
                             spec[LABELS_SPEC_KEY][DATASET_SPEC_KEY]['tag'])

            model_name = 'models-ex'
            init_repository(MODELS, self)
            self.create_file_in_ws(MODELS, 'file', '0')
            api.add(MODELS,
                    model_name,
                    bumpversion=True,
                    fsck=False,
                    file_path=['file'])
            api.commit(MODELS, model_name)

            self.create_file_in_ws(MODELS, 'file2', '2')
            api.add(MODELS,
                    model_name,
                    bumpversion=True,
                    fsck=False,
                    file_path=['file2'])
            api.commit(MODELS,
                       model_name,
                       related_dataset=DATASET_NAME,
                       related_labels=label_name)

            models_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, MODELS,
                                           'metadata')
            with open(
                    os.path.join(models_metadata, model_name,
                                 '{}.spec'.format(model_name))) as y:
                spec = yaml_processor.load(y)
            head = os.path.join(self.tmp_dir, ML_GIT_DIR, MODELS, 'refs',
                                model_name, 'HEAD')

            self.assertTrue(os.path.exists(head))
            self.assertEqual(tag.format(DATASET_NAME),
                             spec[MODEL_SPEC_KEY][DATASET_SPEC_KEY]['tag'])
            self.assertEqual(tag.format(label_name),
                             spec[MODEL_SPEC_KEY][LABELS_SPEC_KEY]['tag'])
Esempio n. 11
0
    def test_33_local_get_entities(self):
        init_repository(DATASETS, self)
        self.create_file_in_ws(DATASETS, 'file', '1')
        api.add(DATASETS,
                DATASET_NAME,
                bumpversion=True,
                fsck=False,
                file_path=['file'])
        api.commit(DATASETS, DATASET_NAME)
        head = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs',
                            DATASET_NAME, 'HEAD')
        self.assertTrue(os.path.exists(head))

        tag = 'computer-vision__images__{}__1'

        label_name = 'labels-ex'
        init_repository(LABELS, self)
        self.create_file_in_ws(LABELS, 'file', '0')
        api.add(LABELS,
                label_name,
                bumpversion=True,
                fsck=False,
                file_path=['file'])
        api.commit(LABELS, label_name, related_dataset=DATASET_NAME)
        labels_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS,
                                       'metadata')
        with open(
                os.path.join(labels_metadata, label_name,
                             '{}.spec'.format(label_name))) as y:
            spec = yaml_processor.load(y)
        head = os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS, 'refs',
                            label_name, 'HEAD')

        self.assertTrue(os.path.exists(head))
        self.assertEqual(tag.format(DATASET_NAME),
                         spec[LABELS_SPEC_KEY][DATASET_SPEC_KEY]['tag'])

        model_name = 'models-ex'
        init_repository(MODELS, self)
        self.create_file_in_ws(MODELS, 'file', '0')
        api.add(MODELS,
                model_name,
                bumpversion=True,
                fsck=False,
                file_path=['file'])
        api.commit(MODELS,
                   model_name,
                   related_dataset=DATASET_NAME,
                   related_labels=label_name)
        models_metadata = os.path.join(self.tmp_dir, ML_GIT_DIR, MODELS,
                                       'metadata')
        with open(
                os.path.join(models_metadata, model_name,
                             '{}.spec'.format(model_name))) as y:
            spec = yaml_processor.load(y)
        head = os.path.join(self.tmp_dir, ML_GIT_DIR, MODELS, 'refs',
                            model_name, 'HEAD')

        self.assertTrue(os.path.exists(head))
        self.assertEqual(tag.format(DATASET_NAME),
                         spec[MODEL_SPEC_KEY][DATASET_SPEC_KEY]['tag'])
        self.assertEqual(tag.format(label_name),
                         spec[MODEL_SPEC_KEY][LABELS_SPEC_KEY]['tag'])

        local_manager = api.init_local_entity_manager()
        entities = local_manager.get_entities()
        self.assertEqual(len(entities), 3)
        entities_name = [DATASET_NAME, label_name, model_name]
        for e in entities:
            self.assertIn(e.name, entities_name)