def test_09_add_command_with_metric_for_wrong_entity(self):
        repo_type = DATASETS
        self.set_up_add()

        create_spec(self, repo_type, self.tmp_dir)
        workspace = os.path.join(self.tmp_dir, repo_type, DATASET_NAME)

        os.makedirs(os.path.join(workspace, 'data'))

        create_file(workspace, 'file1', '0')

        metrics_options = '--metric Accuracy 1 --metric Recall 2'

        self.assertIn(
            output_messages['INFO_ADDING_PATH'] % repo_type,
            check_output(MLGIT_ADD %
                         (repo_type, DATASET_NAME, metrics_options)))
        index = os.path.join(ML_GIT_DIR, repo_type, 'index', 'metadata',
                             DATASET_NAME, 'INDEX.yaml')
        self._check_index(index, ['data/file1'], [])

        with open(os.path.join(workspace, DATASET_NAME + '.spec')) as spec:
            spec_file = yaml_processor.load(spec)
            spec_key = get_spec_key(repo_type)
            metrics = spec_file[spec_key].get('metrics', {})
            self.assertTrue(metrics == {})
Exemple #2
0
    def set_up_push(self):
        os.makedirs(self.workspace)
        create_spec(self,
                    self.repo_type,
                    self.tmp_dir,
                    version=1,
                    mutability='strict',
                    store_type=self.store_type)

        self.assertIn(messages[0], check_output(MLGIT_INIT))
        self.assertIn(
            messages[2] % (GIT_PATH, self.repo_type),
            check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH)))
        self.assertIn(
            messages[87] % (self.store_type, self.bucket),
            check_output('ml-git repository store add %s --type=%s' %
                         (self.bucket, self.store_type)))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % 'dataset'))

        add_file(self, self.repo_type, '', 'new')
        metadata_path = os.path.join(ML_GIT_DIR, 'dataset', 'metadata')
        self.assertIn(
            messages[17] %
            (os.path.join(self.tmp_dir, metadata_path),
             os.path.join('computer-vision', 'images', 'dataset-ex')),
            check_output(MLGIT_COMMIT % (self.repo_type, 'dataset-ex', '')))
        HEAD = os.path.join(ML_GIT_DIR, 'dataset', 'refs', 'dataset-ex',
                            'HEAD')
        self.assertTrue(os.path.exists(HEAD))
Exemple #3
0
 def test_04_list_tags_without_similar_tags(self):
     self._list_tag_entity('dataset')
     entity_type = 'dataset'
     similar_entity = 'dataset-ex2'
     workspace = os.path.join('dataset', similar_entity)
     os.makedirs(workspace, exist_ok=True)
     create_spec(self,
                 'dataset',
                 self.tmp_dir,
                 artifact_name=similar_entity)
     add_file(self,
              'dataset',
              '--bumpversion',
              'new',
              artifact_name=similar_entity)
     self.assertIn(
         messages[17] %
         (os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata'),
          os.path.join('computer-vision', 'images', similar_entity)),
         check_output(MLGIT_COMMIT % ('dataset', similar_entity, '')))
     check_output(MLGIT_PUSH % ('dataset', similar_entity))
     self.assertNotIn(
         similar_entity,
         check_output(MLGIT_TAG_LIST % (entity_type, entity_type + '-ex')))
     self.assertIn(
         similar_entity,
         check_output(MLGIT_TAG_LIST % (entity_type, similar_entity)))
    def test_07_add_command_with_multiple_files(self):
        self.set_up_add()

        create_spec(self, DATASETS, self.tmp_dir)
        workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME)

        os.makedirs(os.path.join(workspace, 'data'))

        create_file(workspace, 'file1', '0')
        create_file(workspace, 'file2', '1')
        create_file(workspace, 'file3', '1')

        self.assertIn(
            output_messages['INFO_ADDING_PATH'] % DATASETS,
            check_output(
                MLGIT_ADD %
                (DATASETS, DATASET_NAME, os.path.join('data', 'file1'))))
        index = os.path.join(ML_GIT_DIR, DATASETS, 'index', 'metadata',
                             DATASET_NAME, 'INDEX.yaml')
        self._check_index(index, ['data/file1'], ['data/file2', 'data/file3'])
        self.assertIn(
            output_messages['INFO_ADDING_PATH'] % DATASETS,
            check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, 'data')))
        self._check_index(index, ['data/file1', 'data/file2', 'data/file3'],
                          [])
        create_file(workspace, 'file4', '0')
        self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS,
                      check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '')))
        self._check_index(
            index, ['data/file1', 'data/file2', 'data/file3', 'data/file4'],
            [])
    def test_10_add_command_with_metric_file(self):
        repo_type = MODELS
        entity_name = '{}-ex'.format(repo_type)
        self.set_up_add(repo_type)

        create_spec(self, repo_type, self.tmp_dir)
        workspace = os.path.join(self.tmp_dir, repo_type, entity_name)

        os.makedirs(os.path.join(workspace, 'data'))

        create_file(workspace, 'file1', '0')

        csv_file = os.path.join(self.tmp_dir, 'metrics.csv')

        self.create_csv_file(csv_file, {'Accuracy': 1, 'Recall': 2})

        metrics_options = '--metrics-file="{}"'.format(csv_file)

        self.assertIn(
            output_messages['INFO_ADDING_PATH'] % repo_type,
            check_output(MLGIT_ADD %
                         (repo_type, entity_name, metrics_options)))
        index = os.path.join(ML_GIT_DIR, repo_type, 'index', 'metadata',
                             entity_name, 'INDEX.yaml')
        self._check_index(index, ['data/file1'], [])

        with open(os.path.join(workspace, entity_name + '.spec')) as spec:
            spec_file = yaml_processor.load(spec)
            spec_key = get_spec_key(repo_type)
            metrics = spec_file[spec_key].get('metrics', {})
            self.assertFalse(metrics == {})
            self.assertTrue(metrics['Accuracy'] == 1)
            self.assertTrue(metrics['Recall'] == 2)
Exemple #6
0
    def test_05_add_command_without_file_added(self):
        self.set_up_add()

        create_spec(self, 'dataset', self.tmp_dir)

        self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ADD % ('dataset', 'dataset-ex', '')))
        self.assertIn(messages[27], check_output(MLGIT_ADD % ('dataset', 'dataset-ex', '--bumpversion')))
    def test_04_commit_command_with_version(self):
        init_repository(DATASETS, self)
        create_spec(self, DATASETS, self.tmp_dir)
        workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME)

        os.makedirs(os.path.join(workspace, 'data'))

        create_file(workspace, 'file1', '0')
        self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS,
                      check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, "")))
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] % (os.path.join(
                self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata'), DATASET_NAME),
            check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, '')))

        create_file(workspace, 'file2', '1')
        self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS,
                      check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, "")))

        self.assertIn(
            output_messages['ERROR_INVALID_VALUE_FOR'] % ('--version', '-10'),
            check_output(MLGIT_COMMIT %
                         (DATASETS, DATASET_NAME, ' --version=-10')))

        self.assertIn(
            output_messages['ERROR_INVALID_VALUE_FOR'] % ('--version', 'test'),
            check_output(MLGIT_COMMIT %
                         (DATASETS, DATASET_NAME, '--version=test')))

        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] % (os.path.join(
                self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata'), DATASET_NAME),
            check_output(MLGIT_COMMIT %
                         (DATASETS, DATASET_NAME, '--version=2')))
 def set_up_test(self, repo_type=MODELS):
     self.TAG_TIMES = []
     entity_name = '{}-ex'.format(repo_type)
     init_repository(repo_type, self)
     create_spec(self, repo_type, self.tmp_dir)
     metrics_options = '--metric Accuracy 10 --metric Recall 10'
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_ADD %
                      (repo_type, entity_name, metrics_options)))
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_COMMIT % (repo_type, entity_name, '')))
     self._git_commit_time()
     metrics_options = '--metric Accuracy 20 --metric Recall 20'
     workspace = os.path.join(self.tmp_dir, repo_type, entity_name)
     os.makedirs(os.path.join(workspace, 'data'))
     create_file(workspace, 'file1', '0')
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_ADD %
                      (repo_type, entity_name, metrics_options)))
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_COMMIT %
                      (repo_type, entity_name, ' --version=2')))
     self._git_commit_time()
Exemple #9
0
    def set_up_push(self):
        os.makedirs(self.workspace)
        create_spec(self,
                    self.repo_type,
                    self.tmp_dir,
                    version=1,
                    mutability=STRICT,
                    storage_type=self.storage_type)

        self.assertIn(
            output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir,
            check_output(MLGIT_INIT))
        self.assertIn(
            output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, self.repo_type),
            check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH)))
        self.assertIn(
            output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] %
            (self.storage_type, self.bucket),
            check_output('ml-git repository storage add %s --type=%s' %
                         (self.bucket, self.storage_type)))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % DATASETS))

        add_file(self, self.repo_type, '', 'new')
        metadata_path = os.path.join(ML_GIT_DIR, DATASETS, 'metadata')
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] %
            (os.path.join(self.tmp_dir, metadata_path), DATASET_NAME),
            check_output(MLGIT_COMMIT % (self.repo_type, DATASET_NAME, '')))
        HEAD = os.path.join(ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD')
        self.assertTrue(os.path.exists(HEAD))
Exemple #10
0
    def _create_entity_with_mutability(self, entity_type, mutability_type):
        init_repository(entity_type, self)
        workspace = os.path.join(self.tmp_dir, entity_type,
                                 entity_type + '-ex')
        create_spec(self, entity_type, self.tmp_dir, 1, mutability_type)
        os.makedirs(os.path.join(workspace, 'data'))

        create_file(workspace, 'file1', '0')
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_ADD % (entity_type, entity_type + '-ex', '')))

        self.assertIn(
            messages[17] %
            (os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'metadata'),
             os.path.join('computer-vision', 'images', entity_type + '-ex')),
            check_output(MLGIT_COMMIT %
                         (entity_type, entity_type + '-ex', '')))

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_PUSH % (entity_type, entity_type + '-ex')))
        clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
        clear(workspace)
        clear(os.path.join(self.tmp_dir, entity_type))
Exemple #11
0
 def _create_entity_with_mutability(self, entity_type, mutability_type):
     init_repository(entity_type, self)
     workspace = os.path.join(self.tmp_dir, entity_type, entity_type + '-ex')
     create_spec(self, entity_type, self.tmp_dir, 1, mutability_type)
     os.makedirs(os.path.join(workspace, 'data'))
     create_file(workspace, 'file1', '0')
     self._push_files(entity_type, '')
     self._clear_path()
Exemple #12
0
 def test_06_commit_with_large_version_number(self):
     init_repository(DATASETS, self)
     create_spec(self, DATASETS, self.tmp_dir)
     self.assertIn(output_messages['ERROR_INVALID_VALUE_FOR'] % ('--version', '9999999999'),
                   check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, ' --version=9999999999')))
     self.assertIn(output_messages['ERROR_INVALID_VALUE_FOR'] % ('--version', '9999999999'),
                   check_output(MLGIT_COMMIT % (MODELS, MODELS + '-ex', ' --version=9999999999')))
     self.assertIn(output_messages['ERROR_INVALID_VALUE_FOR'] % ('--version', '9999999999'),
                   check_output(MLGIT_COMMIT % (LABELS, LABELS + '-ex', ' --version=9999999999')))
Exemple #13
0
    def test_03_checkout(self):
        os.makedirs(self.workspace)
        create_spec(self,
                    self.repo_type,
                    self.tmp_dir,
                    version=1,
                    mutability='strict',
                    store_type=self.store_type)

        self.assertIn(messages[0], check_output(MLGIT_INIT))
        self.assertIn(
            messages[2] % (GIT_PATH, self.repo_type),
            check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH)))
        self.assertIn(
            messages[87] % (self.store_type, self.bucket),
            check_output('ml-git repository store add %s --type=%s' %
                         (self.bucket, self.store_type)))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % 'dataset'))

        add_file(self, self.repo_type, '', 'new')
        metadata_path = os.path.join(ML_GIT_DIR, 'dataset', 'metadata')
        self.assertIn(
            messages[17] %
            (os.path.join(self.tmp_dir, metadata_path),
             os.path.join('computer-vision', 'images', 'dataset-ex')),
            check_output(MLGIT_COMMIT % (self.repo_type, 'dataset-ex', '')))
        HEAD = os.path.join(ML_GIT_DIR, 'dataset', 'refs', 'dataset-ex',
                            'HEAD')
        self.assertTrue(os.path.exists(HEAD))
        self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'),
                         self.dev_store_account_)
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_PUSH % (self.repo_type, 'dataset-ex')))

        clear(self.workspace)
        clear(os.path.join(ML_GIT_DIR, 'dataset'))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % self.repo_type))
        self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'),
                         self.dev_store_account_)
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(
                MLGIT_CHECKOUT %
                (self.repo_type, 'computer-vision__images__dataset-ex__1')))
        ws_path = os.path.join(self.tmp_dir, 'dataset', 'computer-vision',
                               'images', 'dataset-ex')

        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile0')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile1')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile2')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile3')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile4')))
Exemple #14
0
 def setUp_test(self):
     init_repository('dataset', self)
     create_spec(self, 'dataset', self.tmp_dir)
     self.assertIn(messages[13] % 'dataset',
                   check_output(MLGIT_ADD % ('dataset', 'dataset-ex', '')))
     self.assertIn(
         messages[17] %
         (os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata'),
          os.path.join('computer-vision', 'images', 'dataset-ex')),
         check_output(
             MLGIT_COMMIT %
             ('dataset', 'dataset-ex', '-m ' + self.COMMIT_MESSAGE)))
Exemple #15
0
    def test_05_commit_command_with_deprecated_version_number(self):
        init_repository(DATASETS, self)
        create_spec(self, DATASETS, self.tmp_dir)
        workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME)
        os.makedirs(os.path.join(workspace, 'data'))
        create_file(workspace, 'file1', '0')
        self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS,
                      check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, "")))

        result = check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, '--version-number=2'))

        self.assertIn(output_messages['ERROR_NO_SUCH_OPTION'] % '--version-number', result)
Exemple #16
0
    def test_03_checkout(self):
        os.makedirs(self.workspace)
        create_spec(self,
                    self.repo_type,
                    self.tmp_dir,
                    version=1,
                    mutability=STRICT,
                    storage_type=self.storage_type)

        self.assertIn(
            output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir,
            check_output(MLGIT_INIT))
        self.assertIn(
            output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, self.repo_type),
            check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH)))
        self.assertIn(
            output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] %
            (self.storage_type, self.bucket),
            check_output('ml-git repository storage add %s --type=%s' %
                         (self.bucket, self.storage_type)))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % DATASETS))

        add_file(self, self.repo_type, '', 'new')
        metadata_path = os.path.join(ML_GIT_DIR, DATASETS, 'metadata')
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] %
            (os.path.join(self.tmp_dir, metadata_path), DATASET_NAME),
            check_output(MLGIT_COMMIT % (self.repo_type, DATASET_NAME, '')))
        HEAD = os.path.join(ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD')
        self.assertTrue(os.path.exists(HEAD))
        self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'),
                         self.dev_store_account_)
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_PUSH % (self.repo_type, DATASET_NAME)))

        clear(self.workspace)
        clear(os.path.join(ML_GIT_DIR, DATASETS))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % self.repo_type))
        self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'),
                         self.dev_store_account_)
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_CHECKOUT % (self.repo_type, DATASET_TAG)))
        ws_path = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME)

        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile0')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile1')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile2')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile3')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile4')))
Exemple #17
0
    def test_05_add_command_without_file_added(self):
        self.set_up_add()

        create_spec(self, DATASETS, self.tmp_dir)

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '')))
        self.assertIn(
            output_messages['INFO_NO_NEW_DATA_TO_ADD'],
            check_output(MLGIT_ADD %
                         (DATASETS, DATASET_NAME, '--bumpversion')))
Exemple #18
0
 def test_16_add_command_with_metric_file_empty(self):
     repo_type = MODELS
     entity_name = '{}-ex'.format(repo_type)
     self.set_up_add(repo_type)
     create_spec(self, repo_type, self.tmp_dir)
     workspace = os.path.join(self.tmp_dir, repo_type, entity_name)
     os.makedirs(os.path.join(workspace, 'data'))
     create_file(workspace, 'file1', '0')
     metrics_options = '--metrics-file='
     self.assertIn(
         output_messages['ERROR_EMPTY_VALUE'],
         check_output(MLGIT_ADD %
                      (repo_type, entity_name, metrics_options)))
Exemple #19
0
 def set_up_test(self, repo_type=DATASETS, with_metrics=False):
     entity = '{}-ex'.format(repo_type)
     init_repository(repo_type, self)
     create_spec(self, repo_type, self.tmp_dir)
     metrics_options = ''
     if with_metrics:
         metrics_options = '--metric Accuracy 1 --metric Recall 2'
     self.assertIn(
         output_messages['INFO_ADDING_PATH'] % repo_type,
         check_output(MLGIT_ADD % (repo_type, entity, metrics_options)))
     self.assertIn(
         output_messages['INFO_COMMIT_REPO'] % (os.path.join(
             self.tmp_dir, ML_GIT_DIR, repo_type, 'metadata'), entity),
         check_output(MLGIT_COMMIT %
                      (repo_type, entity, '-m ' + self.COMMIT_MESSAGE)))
Exemple #20
0
 def test_17_add_command_with_empty_metric_file(self):
     repo_type = MODELS
     entity_name = '{}-ex'.format(repo_type)
     self.set_up_add(repo_type)
     create_spec(self, repo_type, self.tmp_dir)
     workspace = os.path.join(self.tmp_dir, repo_type, entity_name)
     os.makedirs(os.path.join(workspace, 'data'))
     create_file(workspace, 'file1', '0')
     csv_file = os.path.join(self.tmp_dir, 'metrics.csv')
     with open(csv_file, 'wt') as f:
         f.write('')
     metrics_options = '--metrics-file="{}"'.format(csv_file)
     self.assertIn(
         output_messages['ERROR_INVALID_METRICS_FILE'],
         check_output(MLGIT_ADD %
                      (repo_type, entity_name, metrics_options)))
Exemple #21
0
 def test_06_commit_with_large_version_number(self):
     init_repository('dataset', self)
     create_spec(self, 'dataset', self.tmp_dir)
     self.assertIn(
         messages[96] % '9999999999',
         check_output(
             MLGIT_COMMIT %
             ('dataset', 'dataset' + '-ex', ' --version=9999999999')))
     self.assertIn(
         messages[96] % '9999999999',
         check_output(MLGIT_COMMIT %
                      ('model', 'model' + '-ex', ' --version=9999999999')))
     self.assertIn(
         messages[96] % '9999999999',
         check_output(
             MLGIT_COMMIT %
             ('labels', 'labels' + '-ex', ' --version=9999999999')))
Exemple #22
0
    def set_up_test(self, entity):
        init_repository(entity, self)
        workspace = os.path.join(self.tmp_dir, entity, entity + '-ex')
        os.makedirs(workspace, exist_ok=True)
        create_spec(self, entity, self.tmp_dir, 20, STRICT)
        os.makedirs(os.path.join(workspace, 'data'), exist_ok=True)

        self.create_file(workspace, 'file1', '0')
        self.create_file(workspace, 'file2', '1')
        self.create_file(workspace, 'file3', 'a')
        self.create_file(workspace, 'file4', 'b')

        api.add(entity,
                entity + '-ex',
                bumpversion=True,
                fsck=False,
                file_path=['file'])
        api.commit(entity, entity + '-ex')
Exemple #23
0
    def set_up_unlock(self, entity_type, mutability_type):
        init_repository(entity_type, self)
        workspace = os.path.join(entity_type, entity_type + '-ex')
        create_spec(self,
                    entity_type,
                    self.tmp_dir,
                    1,
                    mutability=mutability_type)

        os.makedirs(os.path.join(workspace, 'data'))

        with open(os.path.join(workspace, self.file), 'w') as file:
            file.write('0' * 2048)

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_ADD %
                         (entity_type, entity_type + '-ex', '--bumpversion')))
Exemple #24
0
    def test_05_commit_command_with_deprecated_version_number(self):
        init_repository('dataset', self)
        create_spec(self, 'dataset', self.tmp_dir)
        workspace = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex')
        os.makedirs(os.path.join(workspace, 'data'))
        create_file(workspace, 'file1', '0')
        self.assertIn(messages[13] % 'dataset',
                      check_output(MLGIT_ADD % ('dataset', 'dataset-ex', "")))

        result = check_output(
            MLGIT_COMMIT %
            ('dataset', 'dataset' + '-ex', '--version-number=2'))

        self.assertIn(messages[106] % ('--version-number', '--version'),
                      result)
        self.assertIn(
            messages[17] %
            (os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata'),
             os.path.join('computer-vision', 'images', 'dataset' + '-ex')),
            result)
Exemple #25
0
    def test_07_add_command_with_multiple_files(self):
        self.set_up_add()

        create_spec(self, 'dataset', self.tmp_dir)
        workspace = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex')

        os.makedirs(os.path.join(workspace, 'data'))

        create_file(workspace, 'file1', '0')
        create_file(workspace, 'file2', '1')
        create_file(workspace, 'file3', '1')

        self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex',
                                                                          os.path.join('data', 'file1'))))
        index = os.path.join(ML_GIT_DIR, 'dataset', 'index', 'metadata', 'dataset-ex', 'INDEX.yaml')
        self._check_index(index, ['data/file1'], ['data/file2', 'data/file3'])
        self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex', 'data')))
        self._check_index(index, ['data/file1', 'data/file2', 'data/file3'], [])
        create_file(workspace, 'file4', '0')
        self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex', '')))
        self._check_index(index, ['data/file1', 'data/file2', 'data/file3', 'data/file4'], [])
Exemple #26
0
    def set_up_push(self, create_know_file=False):
        os.makedirs(self.workspace)
        create_spec(self,
                    self.repo_type,
                    self.tmp_dir,
                    version=1,
                    mutability=STRICT,
                    storage_type=self.storage_type)

        self.assertIn(
            output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir,
            check_output(MLGIT_INIT))
        self.assertIn(
            output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, self.repo_type),
            check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH)))
        self.assertIn(
            output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] %
            (self.storage_type, self.bucket),
            check_output(
                'ml-git repository storage add %s --type=%s' %
                ('mlgit --username=mlgit_user '
                 '--endpoint-url=127.0.0.1 --port=9922 --private-key=' +
                 FAKE_SSH_KEY_PATH, self.storage_type)))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % DATASETS))

        if create_know_file:
            with open(os.path.join(self.repo_type, DATASET_NAME, 'file'),
                      'wt') as z:
                z.write(str('0' * 10011))
        add_file(self, self.repo_type, '', 'new')

        metadata_path = os.path.join(ML_GIT_DIR, self.repo_type, 'metadata')
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] %
            (os.path.join(self.tmp_dir, metadata_path), DATASET_NAME),
            check_output(MLGIT_COMMIT % (self.repo_type, DATASET_NAME, '')))
        HEAD = os.path.join(ML_GIT_DIR, self.repo_type, 'refs', DATASET_NAME,
                            'HEAD')
        self.assertTrue(os.path.exists(HEAD))
Exemple #27
0
 def test_04_list_tags_without_similar_tags(self):
     self._list_tag_entity(DATASETS)
     entity_type = DATASETS
     similar_entity = 'datasets-ex2'
     workspace = os.path.join(DATASETS, similar_entity)
     os.makedirs(workspace, exist_ok=True)
     create_spec(self, DATASETS, self.tmp_dir, artifact_name=similar_entity)
     add_file(self,
              DATASETS,
              '--bumpversion',
              'new',
              artifact_name=similar_entity)
     self.assertIn(
         output_messages['INFO_COMMIT_REPO'] %
         (os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS,
                       'metadata'), similar_entity),
         check_output(MLGIT_COMMIT % (DATASETS, similar_entity, '')))
     check_output(MLGIT_PUSH % (DATASETS, similar_entity))
     self.assertNotIn(
         similar_entity,
         check_output(MLGIT_TAG_LIST % (entity_type, entity_type + '-ex')))
     self.assertIn(
         similar_entity,
         check_output(MLGIT_TAG_LIST % (entity_type, similar_entity)))
Exemple #28
0
    def test_04_commit_command_with_version(self):
        init_repository('dataset', self)
        create_spec(self, 'dataset', self.tmp_dir)
        workspace = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex')

        os.makedirs(os.path.join(workspace, 'data'))

        create_file(workspace, 'file1', '0')
        self.assertIn(messages[13] % 'dataset',
                      check_output(MLGIT_ADD % ('dataset', 'dataset-ex', "")))
        self.assertIn(
            messages[17] %
            (os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata'),
             os.path.join('computer-vision', 'images', 'dataset' + '-ex')),
            check_output(MLGIT_COMMIT % ('dataset', 'dataset' + '-ex', '')))

        create_file(workspace, 'file2', '1')
        self.assertIn(messages[13] % 'dataset',
                      check_output(MLGIT_ADD % ('dataset', 'dataset-ex', "")))

        self.assertIn(
            messages[96] % '-10',
            check_output(MLGIT_COMMIT %
                         ('dataset', 'dataset' + '-ex', ' --version=-10')))

        self.assertIn(
            messages[96] % 'test',
            check_output(MLGIT_COMMIT %
                         ('dataset', 'dataset' + '-ex', '--version=test')))

        self.assertIn(
            messages[17] %
            (os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata'),
             os.path.join('computer-vision', 'images', 'dataset' + '-ex')),
            check_output(MLGIT_COMMIT %
                         ('dataset', 'dataset' + '-ex', '--version=2')))
    def test_20_model_related(self):
        git_server = os.path.join(self.tmp_dir, GIT_PATH)

        self.assertIn(
            output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir,
            check_output(MLGIT_INIT))
        self.assertIn(
            output_messages['INFO_ADD_REMOTE'] % (git_server, MODELS),
            check_output(MLGIT_REMOTE_ADD % (MODELS, git_server)))
        self.assertIn(
            output_messages['INFO_ADD_STORAGE'] %
            (STORAGE_TYPE, BUCKET_NAME, PROFILE),
            check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE)))
        self.assertIn(
            output_messages['INFO_METADATA_INIT'] %
            (git_server,
             os.path.join(self.tmp_dir, '.ml-git', MODELS, 'metadata')),
            check_output(MLGIT_ENTITY_INIT % MODELS))
        edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git'))
        workspace_model = os.path.join(MODELS, MODELS + '-ex')
        os.makedirs(workspace_model)
        version = 1
        create_spec(self, MODELS, self.tmp_dir, version)
        with open(os.path.join(self.tmp_dir, workspace_model, 'file1'),
                  'wb') as z:
            z.write(b'0' * 1024)

        self.assertIn(
            output_messages['INFO_ADD_REMOTE'] % (git_server, DATASETS),
            check_output(MLGIT_REMOTE_ADD % (DATASETS, git_server)))
        self.assertIn(
            output_messages['INFO_ADD_STORAGE'] %
            (STORAGE_TYPE, BUCKET_NAME, PROFILE),
            check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE)))
        self.assertIn(
            output_messages['INFO_METADATA_INIT'] %
            (git_server,
             os.path.join(self.tmp_dir, '.ml-git', DATASETS, 'metadata')),
            check_output(MLGIT_ENTITY_INIT % DATASETS))
        edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git'))
        workspace_dataset = os.path.join(DATASETS, DATASETS + '-ex')
        os.makedirs(workspace_dataset)
        version = 1
        create_spec(self, DATASETS, self.tmp_dir, version)
        with open(os.path.join(self.tmp_dir, workspace_dataset, 'file1'),
                  'wb') as z:
            z.write(b'0' * 1024)

        expected_push_result = '2.00/2.00'

        self.assertIn(
            output_messages['INFO_ADDING_PATH'] % DATASETS,
            check_output(MLGIT_ADD %
                         (DATASETS, DATASET_NAME, '--bumpversion')))
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] % (os.path.join(
                self.tmp_dir, '.ml-git', DATASETS, 'metadata'), DATASET_NAME),
            check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, '')))
        self.assertIn(expected_push_result,
                      check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME)))

        self.assertIn(
            output_messages['INFO_ADD_REMOTE'] % (git_server, LABELS),
            check_output(MLGIT_REMOTE_ADD % (LABELS, git_server)))
        self.assertIn(
            output_messages['INFO_ADD_STORAGE'] %
            (STORAGE_TYPE, BUCKET_NAME, PROFILE),
            check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE)))
        self.assertIn(
            output_messages['INFO_METADATA_INIT'] %
            (git_server,
             os.path.join(self.tmp_dir, '.ml-git', LABELS, 'metadata')),
            check_output(MLGIT_ENTITY_INIT % LABELS))
        edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git'))
        workspace_labels = os.path.join(LABELS, LABELS + '-ex')
        os.makedirs(workspace_labels)
        version = 1
        create_spec(self, LABELS, self.tmp_dir, version)
        with open(os.path.join(self.tmp_dir, workspace_labels, 'file1'),
                  'wb') as z:
            z.write(b'0' * 1024)

        self.assertIn(
            output_messages['INFO_ADDING_PATH'] % LABELS,
            check_output(MLGIT_ADD %
                         (LABELS, LABELS + '-ex', '--bumpversion')))
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] % (os.path.join(
                self.tmp_dir, '.ml-git', LABELS, 'metadata'), LABELS + '-ex'),
            check_output(MLGIT_COMMIT % (LABELS, LABELS + '-ex', '')))
        self.assertIn(expected_push_result,
                      check_output(MLGIT_PUSH % (LABELS, LABELS + '-ex')))

        self.assertIn(
            output_messages['INFO_ADDING_PATH'] % MODELS,
            check_output(MLGIT_ADD %
                         (MODELS, MODELS + '-ex', '--bumpversion')))
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] % (os.path.join(
                self.tmp_dir, '.ml-git', MODELS, 'metadata'), MODELS + '-ex'),
            check_output(MLGIT_COMMIT %
                         (MODELS, MODELS + '-ex', '--dataset=datasets-ex') +
                         ' --labels=labels-ex'))
        self.assertIn(expected_push_result,
                      check_output(MLGIT_PUSH % (MODELS, MODELS + '-ex')))
        set_write_read(os.path.join(self.tmp_dir, workspace_model, 'file1'))
        set_write_read(os.path.join(self.tmp_dir, workspace_dataset, 'file1'))
        set_write_read(os.path.join(self.tmp_dir, workspace_labels, 'file1'))
        if not sys.platform.startswith('linux'):
            recursive_write_read(os.path.join(self.tmp_dir, '.ml-git'))
        clear(os.path.join(self.tmp_dir, MODELS))
        clear(os.path.join(self.tmp_dir, DATASETS))
        clear(os.path.join(self.tmp_dir, LABELS))
        clear(os.path.join(self.tmp_dir, '.ml-git', MODELS))
        clear(os.path.join(self.tmp_dir, '.ml-git', DATASETS))
        clear(os.path.join(self.tmp_dir, '.ml-git', LABELS))
        self.assertIn(
            output_messages['INFO_METADATA_INIT'] %
            (git_server,
             os.path.join(self.tmp_dir, '.ml-git', MODELS, 'metadata')),
            check_output(MLGIT_ENTITY_INIT % MODELS))
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_CHECKOUT %
                         (MODELS, 'computer-vision__images__models-ex__2') +
                         ' -d -l'))
        self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, MODELS)))
        self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, DATASETS)))
        self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, LABELS)))
Exemple #30
0
    def test_20_model_related(self):
        model = 'model'
        dataset = 'dataset'
        labels = 'labels'
        git_server = os.path.join(self.tmp_dir, GIT_PATH)

        self.assertIn(messages[0], check_output(MLGIT_INIT))
        self.assertIn(messages[2] % (git_server, model), check_output(MLGIT_REMOTE_ADD % (model, git_server)))
        self.assertIn(messages[7] % (STORE_TYPE, BUCKET_NAME, PROFILE),
                      check_output(MLGIT_STORE_ADD % (BUCKET_NAME, PROFILE)))
        self.assertIn(messages[8] % (git_server, os.path.join(self.tmp_dir, '.ml-git', model, 'metadata')),
                      check_output(MLGIT_ENTITY_INIT % 'model'))
        edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git'))
        workspace_model = os.path.join(model, model + '-ex')
        os.makedirs(workspace_model)
        version = 1
        create_spec(self, model, self.tmp_dir, version)
        with open(os.path.join(self.tmp_dir, workspace_model, 'file1'), 'wb') as z:
            z.write(b'0' * 1024)

        self.assertIn(messages[2] % (git_server, dataset), check_output(MLGIT_REMOTE_ADD % (dataset, git_server)))
        self.assertIn(messages[7] % (STORE_TYPE, BUCKET_NAME, PROFILE),
                      check_output(MLGIT_STORE_ADD % (BUCKET_NAME, PROFILE)))
        self.assertIn(messages[8] % (git_server, os.path.join(self.tmp_dir, '.ml-git', dataset, 'metadata')),
                      check_output(MLGIT_ENTITY_INIT % 'dataset'))
        edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git'))
        workspace_dataset = os.path.join(dataset, dataset + '-ex')
        os.makedirs(workspace_dataset)
        version = 1
        create_spec(self, dataset, self.tmp_dir, version)
        with open(os.path.join(self.tmp_dir, workspace_dataset, 'file1'), 'wb') as z:
            z.write(b'0' * 1024)

        self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex', '--bumpversion')))
        self.assertIn(messages[17] % (os.path.join(self.tmp_dir, '.ml-git', 'dataset', 'metadata'),
                                      os.path.join('computer-vision', 'images', 'dataset-ex')),
                      check_output(MLGIT_COMMIT % ('dataset', 'dataset-ex', '')))
        self.assertIn(messages[47], check_output(MLGIT_PUSH % ('dataset', 'dataset-ex')))

        self.assertIn(messages[2] % (git_server, labels), check_output(MLGIT_REMOTE_ADD % (labels, git_server)))
        self.assertIn(messages[7] % (STORE_TYPE, BUCKET_NAME, PROFILE),
                      check_output(MLGIT_STORE_ADD % (BUCKET_NAME, PROFILE)))
        self.assertIn(messages[8] % (git_server, os.path.join(self.tmp_dir, '.ml-git', labels, 'metadata')),
                      check_output(MLGIT_ENTITY_INIT % labels))
        edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git'))
        workspace_labels = os.path.join(labels, labels + '-ex')
        os.makedirs(workspace_labels)
        version = 1
        create_spec(self, labels, self.tmp_dir, version)
        with open(os.path.join(self.tmp_dir, workspace_labels, 'file1'), 'wb') as z:
            z.write(b'0' * 1024)

        self.assertIn(messages[15], check_output(MLGIT_ADD % ('labels', 'labels-ex', '--bumpversion')))
        self.assertIn(messages[17] % (os.path.join(self.tmp_dir, '.ml-git', 'labels', 'metadata'),
                                      os.path.join('computer-vision', 'images', 'labels-ex')),
                      check_output(MLGIT_COMMIT % ('labels', 'labels-ex', '')))
        self.assertIn(messages[47], check_output(MLGIT_PUSH % ('labels', 'labels-ex')))

        self.assertIn(messages[14], check_output(MLGIT_ADD % ('model', 'model-ex', '--bumpversion')))
        self.assertIn(messages[17] % (os.path.join(self.tmp_dir, '.ml-git', 'model', 'metadata'),
                                      os.path.join('computer-vision', 'images', 'model-ex')),
                      check_output(MLGIT_COMMIT % ('model', 'model-ex', '--dataset=dataset-ex') + ' --labels=labels-ex'))
        self.assertIn(messages[47], check_output(MLGIT_PUSH % ('model', 'model-ex')))
        set_write_read(os.path.join(self.tmp_dir, workspace_model, 'file1'))
        set_write_read(os.path.join(self.tmp_dir, workspace_dataset, 'file1'))
        set_write_read(os.path.join(self.tmp_dir, workspace_labels, 'file1'))
        if not sys.platform.startswith('linux'):
            recursive_write_read(os.path.join(self.tmp_dir, '.ml-git'))
        clear(os.path.join(self.tmp_dir, model))
        clear(os.path.join(self.tmp_dir, dataset))
        clear(os.path.join(self.tmp_dir, labels))
        clear(os.path.join(self.tmp_dir, '.ml-git', model))
        clear(os.path.join(self.tmp_dir, '.ml-git', dataset))
        clear(os.path.join(self.tmp_dir, '.ml-git', labels))
        self.assertIn(messages[8] % (git_server, os.path.join(self.tmp_dir, '.ml-git', model, 'metadata')),
                      check_output(MLGIT_ENTITY_INIT % model))
        self.assertIn('', check_output(MLGIT_CHECKOUT % ('model', 'computer-vision__images__model-ex__2')
                                       + ' -d -l'))
        self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, model)))
        self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, dataset)))
        self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, labels)))