Esempio n. 1
0
    def test_28_checkout_entity_with_ignore_file(self):
        entity = DATASETS
        init_repository(entity, self)
        workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME)
        os.mkdir(os.path.join(workspace, 'data'))
        create_file(workspace, 'image.png', '0')
        create_file(workspace, 'file1', '0')
        create_ignore_file(workspace)
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_ADD %
                         (DATASETS, DATASET_NAME, '--bumpversion')))
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, '')))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME)))

        clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity))
        clear(workspace)

        mlgit_ignore_file_path = os.path.join(workspace,
                                              MLGIT_IGNORE_FILE_NAME)
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % entity))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_CHECKOUT % (entity, DATASET_NAME)))

        self.assertTrue(os.path.exists(mlgit_ignore_file_path))
        self.assertTrue(
            os.path.exists(os.path.join(workspace, 'data', 'file1')))
        self.assertFalse(
            os.path.exists(os.path.join(workspace, 'data', 'image.png')))
Esempio n. 2
0
    def _push_entity(self, entity_type):
        clear(
            os.path.join(MINIO_BUCKET_PATH,
                         'zdj7WWjGAAJ8gdky5FKcVLfd63aiRUGb8fkc8We2bvsp9WW12'))
        init_repository(entity_type, self)
        add_file(self, entity_type, '--bumpversion', 'new', file_content='0')
        metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type,
                                     'metadata')
        self.assertIn(
            messages[17] %
            (metadata_path,
             os.path.join('computer-vision', 'images', entity_type + '-ex')),
            check_output(MLGIT_COMMIT %
                         (entity_type, entity_type + '-ex', '')))

        HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'refs',
                            entity_type + '-ex', 'HEAD')
        self.assertTrue(os.path.exists(HEAD))

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_PUSH % (entity_type, entity_type + '-ex')))
        os.chdir(metadata_path)
        self.assertTrue(
            os.path.exists(
                os.path.join(
                    MINIO_BUCKET_PATH,
                    'zdj7WWjGAAJ8gdky5FKcVLfd63aiRUGb8fkc8We2bvsp9WW12')))
        self.assertIn('computer-vision__images__' + entity_type + '-ex__2',
                      check_output('git describe --tags'))
Esempio n. 3
0
 def _clear_workspace(self, entity):
     workspace = os.path.join(self.tmp_dir, entity)
     clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity))
     clear(workspace)
     self.assertIn(output_messages['INFO_METADATA_INIT'] % (
         os.path.join(self.tmp_dir, GIT_PATH), os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')),
                   check_output(MLGIT_ENTITY_INIT % entity))
Esempio n. 4
0
    def set_up_add_test(self, entity=DATASETS):
        clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
        clear(os.path.join(self.tmp_dir, entity))
        init_repository(entity, self)

        self.create_file_in_ws(entity, 'file', '0')
        self.create_file_in_ws(entity, 'file2', '1')
Esempio n. 5
0
 def test_07_gc_basic_flow(self):
     entity = 'dataset'
     self.set_up_gc(entity)
     original_size, number_of_files = self._get_metadata_info()
     result = check_output(MLGIT_REPOSITORY_GC)
     self._check_result(result,
                        entity,
                        original_size,
                        number_of_files,
                        expected_removed_files=3,
                        expected_reclaimed_space='2.1 kB')
     file = os.path.join(self.tmp_dir, 'dataset', 'computer-vision',
                         'images', 'dataset-ex', 'file-after-gc')
     with open(file, 'wb') as z:
         z.write(b'1' * 1024)
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_ADD % (entity, entity + '-ex', '')))
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_COMMIT %
                      (entity, entity + '-ex', '--version=3')))
     self.assertNotIn(ERROR_MESSAGE,
                      check_output(MLGIT_PUSH % (entity, entity + '-ex')))
     clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity))
     clear(os.path.join(self.tmp_dir, entity))
     self.assertNotIn(ERROR_MESSAGE,
                      check_output(MLGIT_ENTITY_INIT % entity))
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_CHECKOUT %
                      (entity, entity + '-ex --version=3')))
     self.assertTrue(os.path.exists(file))
Esempio n. 6
0
 def set_up_checkout(self, entity):
     metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity,
                                  'metadata')
     workspace = os.path.join(self.tmp_dir, entity)
     self.set_up_status('dataset')
     data_path = os.path.join(workspace, 'dataset-ex', 'data')
     os.makedirs(data_path, exist_ok=True)
     create_file(data_path, 'file', '0', '')
     create_file(data_path, 'file2', '0', '')
     self.assertIn(messages[13] % 'dataset',
                   check_output(MLGIT_ADD % ('dataset', 'dataset-ex', '')))
     self.assertIn(
         messages[17] %
         (metadata_path,
          os.path.join('computer-vision', 'images', entity + '-ex')),
         check_output(MLGIT_COMMIT % (entity, entity + '-ex', '')))
     HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'refs',
                         entity + '-ex', 'HEAD')
     self.assertTrue(os.path.exists(HEAD))
     self.assertNotIn(ERROR_MESSAGE,
                      check_output(MLGIT_PUSH % (entity, entity + '-ex')))
     clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity))
     clear(workspace)
     self.assertIn(
         messages[8] %
         (os.path.join(self.tmp_dir, GIT_PATH),
          os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')),
         check_output(MLGIT_ENTITY_INIT % entity))
Esempio n. 7
0
    def test_08_push_after_remote_del(self):
        clear(
            os.path.join(MINIO_BUCKET_PATH,
                         'zdj7WWjGAAJ8gdky5FKcVLfd63aiRUGb8fkc8We2bvsp9WW12'))
        entity_type = DATASETS
        init_repository(entity_type, self)
        add_file(self, entity_type, '--bumpversion', 'new', file_content='0')
        metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type,
                                     'metadata')
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] %
            (metadata_path, entity_type + '-ex'),
            check_output(MLGIT_COMMIT %
                         (entity_type, entity_type + '-ex', '')))

        head_file = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'refs',
                                 entity_type + '-ex', 'HEAD')
        self.assertTrue(os.path.exists(head_file))

        self._remote_del(entity_type)

        self.assertIn(
            output_messages['ERROR_REMOTE_NOT_FOUND'],
            check_output(MLGIT_PUSH % (entity_type, entity_type + '-ex')))
        self.assertFalse(
            os.path.exists(
                os.path.join(
                    MINIO_BUCKET_PATH,
                    'zdj7WWjGAAJ8gdky5FKcVLfd63aiRUGb8fkc8We2bvsp9WW12')))
Esempio n. 8
0
 def set_up_checkout(self, entity):
     metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity,
                                  'metadata')
     workspace = os.path.join(self.tmp_dir, entity)
     self.set_up_status(DATASETS)
     data_path = os.path.join(workspace, DATASET_NAME, 'data')
     os.makedirs(data_path, exist_ok=True)
     create_file(data_path, 'file', '0', '')
     create_file(data_path, 'file2', '0', '')
     self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS,
                   check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '')))
     self.assertIn(
         output_messages['INFO_COMMIT_REPO'] %
         (metadata_path, entity + '-ex'),
         check_output(MLGIT_COMMIT % (entity, entity + '-ex', '')))
     HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'refs',
                         entity + '-ex', 'HEAD')
     self.assertTrue(os.path.exists(HEAD))
     self.assertNotIn(ERROR_MESSAGE,
                      check_output(MLGIT_PUSH % (entity, entity + '-ex')))
     clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity))
     clear(workspace)
     self.assertIn(
         output_messages['INFO_METADATA_INIT'] %
         (os.path.join(self.tmp_dir, GIT_PATH),
          os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')),
         check_output(MLGIT_ENTITY_INIT % entity))
Esempio n. 9
0
 def test_05_gc_deleted_entity(self):
     self.set_up_gc(DATASETS)
     self.set_up_gc(LABELS)
     original_size, number_of_files = self._get_metadata_info()
     clear(os.path.join(self.tmp_dir, LABELS))
     result = check_output(MLGIT_REPOSITORY_GC)
     self.assertIn(output_messages['INFO_STARTING_GC'] % LABELS, result)
     self._check_result(result, DATASETS, original_size, number_of_files,
                        expected_removed_files=21, expected_reclaimed_space='33.7 kB')
Esempio n. 10
0
    def test_03_checkout(self):
        os.makedirs(self.workspace)
        create_spec(self,
                    self.repo_type,
                    self.tmp_dir,
                    version=1,
                    mutability='strict',
                    store_type=self.store_type)

        self.assertIn(messages[0], check_output(MLGIT_INIT))
        self.assertIn(
            messages[2] % (GIT_PATH, self.repo_type),
            check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH)))
        self.assertIn(
            messages[87] % (self.store_type, self.bucket),
            check_output('ml-git repository store add %s --type=%s' %
                         (self.bucket, self.store_type)))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % 'dataset'))

        add_file(self, self.repo_type, '', 'new')
        metadata_path = os.path.join(ML_GIT_DIR, 'dataset', 'metadata')
        self.assertIn(
            messages[17] %
            (os.path.join(self.tmp_dir, metadata_path),
             os.path.join('computer-vision', 'images', 'dataset-ex')),
            check_output(MLGIT_COMMIT % (self.repo_type, 'dataset-ex', '')))
        HEAD = os.path.join(ML_GIT_DIR, 'dataset', 'refs', 'dataset-ex',
                            'HEAD')
        self.assertTrue(os.path.exists(HEAD))
        self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'),
                         self.dev_store_account_)
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_PUSH % (self.repo_type, 'dataset-ex')))

        clear(self.workspace)
        clear(os.path.join(ML_GIT_DIR, 'dataset'))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % self.repo_type))
        self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'),
                         self.dev_store_account_)
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(
                MLGIT_CHECKOUT %
                (self.repo_type, 'computer-vision__images__dataset-ex__1')))
        ws_path = os.path.join(self.tmp_dir, 'dataset', 'computer-vision',
                               'images', 'dataset-ex')

        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile0')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile1')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile2')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile3')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile4')))
Esempio n. 11
0
 def test_03_checkout_with_two_entities_wit_same_name(self):
     entity = 'dataset'
     self._create_entity(entity, 'images')
     clear(os.path.join(self.tmp_dir, '.ml-git'))
     self._create_entity(entity, 'video')
     self.assertIn(output_messages['INFO_METADATA_INIT'] % (
         os.path.join(self.tmp_dir, GIT_PATH), os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')),
                   check_output(MLGIT_ENTITY_INIT % entity))
     self.assertIn(output_messages['ERROR_MULTIPLES_ENTITIES_WITH_SAME_NAME'] +
                   '\tcomputer-vision__images__dataset-ex__2\n\tcomputer-vision__video__dataset-ex__2',
                   check_output(MLGIT_CHECKOUT % ('dataset', 'dataset-ex')))
Esempio n. 12
0
 def set_up_checkout(self, entity):
     configure_global(self, DATASETS)
     init_repository(entity, self)
     add_file(self, entity, '', 'new')
     workspace = os.path.join(self.tmp_dir, entity)
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_COMMIT % (entity, entity + '-ex', '')))
     self.assertNotIn(ERROR_MESSAGE,
                      check_output(MLGIT_PUSH % (entity, entity + '-ex')))
     clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
     clear(workspace)
Esempio n. 13
0
 def test_06_status_after_delete_file(self):
     self.set_up_checkout(DATASETS)
     self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_CHECKOUT % (DATASETS, DATASET_TAG)))
     data_path = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME, 'data')
     file_to_be_deleted = os.path.join(data_path, 'file')
     file_to_be_deleted2 = os.path.join(data_path, 'file2')
     clear(file_to_be_deleted)
     clear(file_to_be_deleted2)
     self.assertRegex(check_output(MLGIT_STATUS_SHORT % (DATASETS, DATASET_NAME)),
                      DATASET_NO_COMMITS_INFO_REGEX +
                      r'Changes to be committed:\s+'
                      r'Deleted: data/\t->\t2 FILES')
Esempio n. 14
0
    def test_03_checkout(self):
        os.makedirs(self.workspace)
        create_spec(self,
                    self.repo_type,
                    self.tmp_dir,
                    version=1,
                    mutability=STRICT,
                    storage_type=self.storage_type)

        self.assertIn(
            output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir,
            check_output(MLGIT_INIT))
        self.assertIn(
            output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, self.repo_type),
            check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH)))
        self.assertIn(
            output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] %
            (self.storage_type, self.bucket),
            check_output('ml-git repository storage add %s --type=%s' %
                         (self.bucket, self.storage_type)))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % DATASETS))

        add_file(self, self.repo_type, '', 'new')
        metadata_path = os.path.join(ML_GIT_DIR, DATASETS, 'metadata')
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] %
            (os.path.join(self.tmp_dir, metadata_path), DATASET_NAME),
            check_output(MLGIT_COMMIT % (self.repo_type, DATASET_NAME, '')))
        HEAD = os.path.join(ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD')
        self.assertTrue(os.path.exists(HEAD))
        self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'),
                         self.dev_store_account_)
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_PUSH % (self.repo_type, DATASET_NAME)))

        clear(self.workspace)
        clear(os.path.join(ML_GIT_DIR, DATASETS))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % self.repo_type))
        self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'),
                         self.dev_store_account_)
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_CHECKOUT % (self.repo_type, DATASET_TAG)))
        ws_path = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME)

        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile0')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile1')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile2')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile3')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile4')))
Esempio n. 15
0
    def test_26_adding_data_based_in_older_tag(self):
        entity = 'dataset'
        self.set_up_checkout(entity)

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_CHECKOUT %
                         (entity, 'computer-vision__images__dataset-ex__1')))
        workspace = os.path.join(self.tmp_dir, entity, 'computer-vision',
                                 'images', entity + '-ex')
        create_file(workspace, 'newfile5', '0', file_path='')
        populate_entity_with_new_data(self, entity)

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(
                MLGIT_CHECKOUT %
                ('dataset', 'computer-vision__images__dataset-ex__1')))
        expected_files_in_tag_1 = 6
        self.check_amount_of_files(entity,
                                   expected_files_in_tag_1,
                                   sampling=False)
        create_file(workspace, 'newfile6', '0', file_path='')
        populate_entity_with_new_data(self,
                                      entity,
                                      bumpversion='',
                                      version='--version=3')

        clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity))
        clear(workspace)
        self.assertIn(
            messages[8] %
            (os.path.join(self.tmp_dir, GIT_PATH),
             os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')),
            check_output(MLGIT_ENTITY_INIT % entity))
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_CHECKOUT %
                         (entity, 'computer-vision__images__dataset-ex__3')))

        path_of_tag_2_file = os.path.join(self.tmp_dir, entity,
                                          'computer-vision', 'images',
                                          entity + '-ex', 'newfile5')
        path_of_tag_3_file = os.path.join(self.tmp_dir, entity,
                                          'computer-vision', 'images',
                                          entity + '-ex', 'newfile6')
        self.assertFalse(os.path.exists(path_of_tag_2_file))
        self.assertTrue(os.path.exists(path_of_tag_3_file))
        expected_files_in_tag_3 = 7
        self.check_amount_of_files(entity,
                                   expected_files_in_tag_3,
                                   sampling=False)
Esempio n. 16
0
    def _push_entity(self, entity_type):
        clear(os.path.join(MINIO_BUCKET_PATH, 'zdj7WWjGAAJ8gdky5FKcVLfd63aiRUGb8fkc8We2bvsp9WW12'))
        init_repository(entity_type, self)
        add_file(self, entity_type, '--bumpversion', 'new', file_content='0')
        metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'metadata')
        self.assertIn(output_messages['INFO_COMMIT_REPO'] % (metadata_path, entity_type + '-ex'),
                      check_output(MLGIT_COMMIT % (entity_type, entity_type + '-ex', '')))

        HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'refs', entity_type+'-ex', 'HEAD')
        self.assertTrue(os.path.exists(HEAD))

        self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity_type, entity_type+'-ex')))
        self.check_metadata_after_push(entity_type)
Esempio n. 17
0
 def set_up_fetch(self, entity='dataset'):
     init_repository(entity, self)
     add_file(self, entity, '', 'new')
     metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')
     workspace = os.path.join(self.tmp_dir, entity)
     self.assertIn(messages[17] % (metadata_path, os.path.join('computer-vision', 'images', entity + '-ex')),
                   check_output(MLGIT_COMMIT % (entity, entity + '-ex', '')))
     HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'refs', entity + '-ex', 'HEAD')
     self.assertTrue(os.path.exists(HEAD))
     self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity, entity + '-ex')))
     clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity))
     clear(workspace)
     self.assertIn(messages[8] % (os.path.join(self.tmp_dir, GIT_PATH), os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')),
                   check_output(MLGIT_ENTITY_INIT % entity))
Esempio n. 18
0
 def _clear_path(self, entity_type=DATASETS):
     clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
     workspace = os.path.join(self.tmp_dir, entity_type,
                              entity_type + '-ex')
     clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'cache'))
     clear(workspace)
     clear(os.path.join(self.tmp_dir, entity_type))
Esempio n. 19
0
    def test_05_push_with_fail(self):
        clear(SFTP_BUCKET_PATH)
        os.mkdir(SFTP_BUCKET_PATH)
        self.set_up_push(create_know_file=True)

        object_path = os.path.join(
            self.tmp_dir, '.ml-git', self.repo_type, 'objects', 'hashfs', 'i9',
            '96', 'zdj7Wi996ViPiddvDGvzjBBACZzw6YfPujBCaPHunVoyiTUCj')
        clear(object_path)
        number_of_files_in_bucket = 0
        self.check_amount_of_files(number_of_files_in_bucket)
        self.assertIn(
            ERROR_MESSAGE,
            check_output(MLGIT_PUSH %
                         (self.repo_type, DATASET_NAME + ' --clearonfail')))
Esempio n. 20
0
 def set_up_checkout(self, entity):
     init_repository(entity, self)
     add_file(self, entity, '', 'new')
     metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')
     workspace = os.path.join(self.tmp_dir, entity)
     self.assertIn(output_messages['INFO_COMMIT_REPO'] % (metadata_path, entity + '-ex'),
                   check_output(MLGIT_COMMIT % (entity, entity + '-ex', '')))
     head_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'refs', entity + '-ex', 'HEAD')
     self.assertTrue(os.path.exists(head_path))
     self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity, entity + '-ex')))
     clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity))
     clear(workspace)
     self.assertIn(output_messages['INFO_METADATA_INIT'] % (
         os.path.join(self.tmp_dir, GIT_PATH), os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')),
                   check_output(MLGIT_ENTITY_INIT % entity))
Esempio n. 21
0
 def test_06_status_after_delete_file(self):
     self.set_up_checkout('dataset')
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(
             MLGIT_CHECKOUT %
             ('dataset', 'computer-vision__images__dataset-ex__1')))
     data_path = os.path.join(self.tmp_dir, 'dataset', 'computer-vision',
                              'images', 'dataset-ex', 'data')
     file_to_be_deleted = os.path.join(data_path, 'file')
     file_to_be_deleted2 = os.path.join(data_path, 'file2')
     clear(file_to_be_deleted)
     clear(file_to_be_deleted2)
     self.assertRegex(
         check_output(MLGIT_STATUS_SHORT % ('dataset', 'dataset-ex')),
         r'Changes to be committed:\s+Deleted: (\s|.)*data/\t->\t2 FILES(\s|.)*'
     )
Esempio n. 22
0
    def test_03_checkout(self):
        self.set_up_push()
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_PUSH % (self.repo_type, DATASET_NAME)))
        clear(self.workspace)
        clear(os.path.join(ML_GIT_DIR, self.repo_type))

        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % self.repo_type))
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_CHECKOUT % (self.repo_type, DATASET_NAME)))
        ws_path = os.path.join(self.tmp_dir, self.repo_type, DATASET_NAME)
        self.assertTrue(os.path.exists(ws_path))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile0')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile1')))
Esempio n. 23
0
    def test_01_push_and_checkout(self):
        cpath = 'credentials-json'
        init_repository('dataset', self, store_type='gdriveh', profile=cpath)
        add_file(self, 'dataset', '--bumpversion', 'new')
        metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset',
                                     'metadata')
        self.assertIn(
            messages[17] %
            (metadata_path,
             os.path.join('computer-vision', 'images', 'dataset-ex')),
            check_output('ml-git dataset commit dataset-ex'))

        HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'refs',
                            'dataset-ex', 'HEAD')

        self.assertTrue(os.path.exists(HEAD))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output('ml-git dataset push dataset-ex'))
        os.chdir(metadata_path)

        tag = 'computer-vision__images__dataset-ex__2'
        self.assertIn(tag, check_output('git describe --tags'))

        os.chdir(self.tmp_dir)

        workspace = os.path.join(self.tmp_dir, 'dataset')
        clear(workspace)
        clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
        init_repository('dataset', self, store_type='gdriveh', profile=cpath)

        self.assertNotIn(ERROR_MESSAGE,
                         check_output('ml-git dataset checkout %s' % tag))

        objects = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'objects')
        refs = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'refs')
        cache = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'cache')
        spec_file = os.path.join(self.tmp_dir, 'dataset', 'computer-vision',
                                 'images', 'dataset-ex', 'dataset-ex.spec')
        file = os.path.join(self.tmp_dir, 'dataset', 'computer-vision',
                            'images', 'dataset-ex', 'newfile0')

        self.assertTrue(os.path.exists(objects))
        self.assertTrue(os.path.exists(refs))
        self.assertTrue(os.path.exists(cache))
        self.assertTrue(os.path.exists(file))
        self.assertTrue(os.path.exists(spec_file))
Esempio n. 24
0
    def test_04_push_with_wrong_repository(self):
        init_repository('dataset', self)
        add_file(self, 'dataset', '--bumpversion', 'new')
        metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata')
        self.assertIn(messages[17] % (metadata_path, os.path.join('computer-vision', 'images', 'dataset-ex')),
                      check_output(MLGIT_COMMIT % ('dataset',  'dataset-ex', '')))

        HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'refs', 'dataset-ex', 'HEAD')
        self.assertTrue(os.path.exists(HEAD))

        git_path = os.path.join(self.tmp_dir, GIT_PATH)

        clear(git_path)

        output = check_output(MLGIT_PUSH % ('dataset', 'dataset-ex'))

        self.assertIn(ERROR_MESSAGE, output)
        self.assertIn(git_path, output)
Esempio n. 25
0
    def test_04_push_with_wrong_repository(self):
        init_repository(DATASETS, self)
        add_file(self, DATASETS, '--bumpversion', 'new')
        metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata')
        self.assertIn(output_messages['INFO_COMMIT_REPO'] % (metadata_path, DATASET_NAME),
                      check_output(MLGIT_COMMIT % (DATASETS,  DATASET_NAME, '')))

        HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD')
        self.assertTrue(os.path.exists(HEAD))

        git_path = os.path.join(self.tmp_dir, GIT_PATH)

        clear(git_path)

        output = check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME))

        self.assertIn(ERROR_MESSAGE, output)
        self.assertIn(GIT_PATH, output)
Esempio n. 26
0
    def test_01_push_and_checkout(self):
        cpath = 'credentials-json'
        init_repository(DATASETS, self, storage_type=GDRIVEH, profile=cpath)
        add_file(self, DATASETS, '--bumpversion', 'new')
        metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS,
                                     'metadata')
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] %
            (metadata_path, DATASET_NAME),
            check_output('ml-git datasets commit datasets-ex'))

        HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs',
                            DATASET_NAME, 'HEAD')

        self.assertTrue(os.path.exists(HEAD))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output('ml-git datasets push datasets-ex'))
        os.chdir(metadata_path)

        tag = 'computer-vision__images__datasets-ex__2'
        self.assertIn(tag, check_output('git describe --tags'))

        os.chdir(self.tmp_dir)

        workspace = os.path.join(self.tmp_dir, DATASETS)
        clear(workspace)
        clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
        init_repository(DATASETS, self, storage_type=GDRIVEH, profile=cpath)

        self.assertNotIn(ERROR_MESSAGE,
                         check_output('ml-git datasets checkout %s' % tag))

        objects = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'objects')
        refs = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs')
        cache = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'cache')
        spec_file = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME,
                                 'datasets-ex.spec')
        file = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME, 'newfile0')

        self.assertTrue(os.path.exists(objects))
        self.assertTrue(os.path.exists(refs))
        self.assertTrue(os.path.exists(cache))
        self.assertTrue(os.path.exists(file))
        self.assertTrue(os.path.exists(spec_file))
Esempio n. 27
0
    def test_04_push_with_wrong_bucket(self):
        clear(SFTP_BUCKET_PATH)
        os.mkdir(SFTP_BUCKET_PATH)
        wrong_bucket = 'wrong_bucket'
        self.assertIn(
            output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir,
            check_output(MLGIT_INIT))
        self.assertIn(
            output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, self.repo_type),
            check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH)))
        self.assertIn(
            output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] %
            (self.storage_type, wrong_bucket),
            check_output(
                'ml-git repository storage add %s --type=%s' %
                (wrong_bucket, self.storage_type + ' --username=mlgit_user '
                 '--port=9922 --endpoint-url=127.0.0.1 --private-key=' +
                 FAKE_SSH_KEY_PATH)))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % self.repo_type))

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(
                MLGIT_CREATE %
                (DATASETS, DATASET_NAME +
                 ' --storage-type=sftph --mutability=strict --category=test '
                 '--bucket-name=wrong_bucket')))
        add_file(self, self.repo_type, '', 'new')

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_ADD % (self.repo_type, DATASET_NAME, '')))
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_COMMIT % (self.repo_type, DATASET_NAME, '')))
        number_of_files_in_bucket = 0
        self.check_amount_of_files(number_of_files_in_bucket)
        self.assertIn(
            output_messages['ERROR_BUCKET_DOES_NOT_EXIST'] % wrong_bucket,
            check_output(MLGIT_PUSH % (self.repo_type, DATASET_NAME)))
        self.check_amount_of_files(number_of_files_in_bucket)
Esempio n. 28
0
 def set_up_global(self, entity_type=DATASETS):
     self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_REMOTE_ADD_GLOBAL %
                      (entity_type, 'local_git_server.git')))
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_STORAGE_ADD %
                      (BUCKET_NAME, PROFILE + ' --global')))
     with open(os.path.join(self.tmp_dir, GLOBAL_ML_GIT_CONFIG),
               'r') as config_file:
         config = yaml_processor.load(config_file)
         config[STORAGE_CONFIG_KEY][StorageType.S3H.value]['mlgit'][
             'endpoint-url'] = MINIO_ENDPOINT_URL
     with open(os.path.join(self.tmp_dir, GLOBAL_ML_GIT_CONFIG),
               'w') as config_file:
         yaml_processor.dump(config, config_file)
     clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
Esempio n. 29
0
    def set_up_test(self):
        init_repository('dataset', self)

        workspace = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex')

        os.makedirs(workspace, exist_ok=True)

        spec = {
            'dataset': {
                'categories': ['computer-vision', 'images'],
                'manifest': {
                    'files': 'MANIFEST.yaml',
                    'store': 's3h://mlgit'
                },
                'mutability': Mutability.STRICT.value,
                'name': 'dataset-ex',
                'version': 9
            }
        }

        with open(os.path.join(workspace, 'dataset-ex.spec'), 'w') as y:
            yaml_processor.dump(spec, y)

        os.makedirs(os.path.join(workspace, 'data'), exist_ok=True)

        self.create_file(workspace, 'file1', '0')
        self.create_file(workspace, 'file2', '1')
        self.create_file(workspace, 'file3', 'a')
        self.create_file(workspace, 'file4', 'b')

        api.add('dataset', 'dataset-ex', bumpversion=True)
        api.commit('dataset', 'dataset-ex')
        api.push('dataset', 'dataset-ex')

        self.assertTrue(
            os.path.exists(os.path.join(self.tmp_dir, self.metadata)))

        clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
        clear(workspace)
        init_repository('dataset', self)
Esempio n. 30
0
    def _create_entity_with_mutability(self, entity_type, mutability_type):
        init_repository(entity_type, self)
        workspace = os.path.join(self.tmp_dir, entity_type,
                                 entity_type + '-ex')
        create_spec(self, entity_type, self.tmp_dir, 1, mutability_type)
        os.makedirs(os.path.join(workspace, 'data'))

        create_file(workspace, 'file1', '0')
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_ADD % (entity_type, entity_type + '-ex', '')))

        self.assertIn(
            messages[17] %
            (os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'metadata'),
             os.path.join('computer-vision', 'images', entity_type + '-ex')),
            check_output(MLGIT_COMMIT %
                         (entity_type, entity_type + '-ex', '')))

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_PUSH % (entity_type, entity_type + '-ex')))
        clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
        clear(workspace)
        clear(os.path.join(self.tmp_dir, entity_type))