Ejemplo n.º 1
0
    def test_01_push_and_checkout(self):
        cpath = 'credentials-json'
        init_repository('dataset', self, store_type='gdriveh', profile=cpath)
        add_file(self, 'dataset', '--bumpversion', 'new')
        metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset',
                                     'metadata')
        self.assertIn(
            messages[17] %
            (metadata_path,
             os.path.join('computer-vision', 'images', 'dataset-ex')),
            check_output('ml-git dataset commit dataset-ex'))

        HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'refs',
                            'dataset-ex', 'HEAD')

        self.assertTrue(os.path.exists(HEAD))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output('ml-git dataset push dataset-ex'))
        os.chdir(metadata_path)

        tag = 'computer-vision__images__dataset-ex__2'
        self.assertIn(tag, check_output('git describe --tags'))

        os.chdir(self.tmp_dir)

        workspace = os.path.join(self.tmp_dir, 'dataset')
        clear(workspace)
        clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
        init_repository('dataset', self, store_type='gdriveh', profile=cpath)

        self.assertNotIn(ERROR_MESSAGE,
                         check_output('ml-git dataset checkout %s' % tag))

        objects = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'objects')
        refs = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'refs')
        cache = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'cache')
        spec_file = os.path.join(self.tmp_dir, 'dataset', 'computer-vision',
                                 'images', 'dataset-ex', 'dataset-ex.spec')
        file = os.path.join(self.tmp_dir, 'dataset', 'computer-vision',
                            'images', 'dataset-ex', 'newfile0')

        self.assertTrue(os.path.exists(objects))
        self.assertTrue(os.path.exists(refs))
        self.assertTrue(os.path.exists(cache))
        self.assertTrue(os.path.exists(file))
        self.assertTrue(os.path.exists(spec_file))
Ejemplo n.º 2
0
 def add_storage_type(self, bucket, profile, storage_type):
     self.assertIn(
         output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir,
         check_output(MLGIT_INIT))
     result = check_output(MLGIT_STORAGE_ADD_WITH_TYPE %
                           (bucket, profile, storage_type))
     if storage_type == STORAGE_TYPE:
         self.assertIn(
             output_messages['INFO_ADD_STORAGE'] %
             (storage_type, bucket, profile), result)
     else:
         self.assertIn(
             output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] %
             (storage_type, bucket), result)
     with open(os.path.join(ML_GIT_DIR, 'config.yaml'), 'r') as c:
         config = yaml_processor.load(c)
     return config
Ejemplo n.º 3
0
 def _checkout_entity(self, entity_type, tag=DATASET_TAG, bare=True):
     init_repository(entity_type, self)
     self.assertIn(
         output_messages['INFO_MLGIT_PULL'] %
         (os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'metadata')),
         check_output(MLGIT_UPDATE % entity_type))
     if bare:
         self.assertIn(
             output_messages['INFO_CHECKOUT_BARE_MODE'],
             check_output(MLGIT_CHECKOUT % (entity_type, tag + ' --bare')))
     else:
         self.assertNotIn(ERROR_MESSAGE,
                          check_output(MLGIT_CHECKOUT % (entity_type, tag)))
         self.assertTrue(
             os.path.exists(
                 os.path.join(self.tmp_dir, DATASETS, DATASET_NAME, 'data',
                              'file1')))
Ejemplo n.º 4
0
 def test_03_initialize_dataset_from_subfolder(self):
     self.set_up_init('dataset', os.path.join(self.tmp_dir, GIT_PATH))
     os.chdir(os.path.join(self.tmp_dir, ML_GIT_DIR))
     self.assertIn(
         messages[8] %
         (os.path.join(self.tmp_dir, GIT_PATH),
          os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata')),
         check_output(MLGIT_ENTITY_INIT % 'dataset'))
Ejemplo n.º 5
0
    def test_03_checkout(self):
        os.makedirs(self.workspace)
        create_spec(self,
                    self.repo_type,
                    self.tmp_dir,
                    version=1,
                    mutability='strict',
                    store_type=self.store_type)

        self.assertIn(messages[0], check_output(MLGIT_INIT))
        self.assertIn(
            messages[2] % (GIT_PATH, self.repo_type),
            check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH)))
        self.assertIn(
            messages[87] % (self.store_type, self.bucket),
            check_output('ml-git repository store add %s --type=%s' %
                         (self.bucket, self.store_type)))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % 'dataset'))

        add_file(self, self.repo_type, '', 'new')
        metadata_path = os.path.join(ML_GIT_DIR, 'dataset', 'metadata')
        self.assertIn(
            messages[17] %
            (os.path.join(self.tmp_dir, metadata_path),
             os.path.join('computer-vision', 'images', 'dataset-ex')),
            check_output(MLGIT_COMMIT % (self.repo_type, 'dataset-ex', '')))
        HEAD = os.path.join(ML_GIT_DIR, 'dataset', 'refs', 'dataset-ex',
                            'HEAD')
        self.assertTrue(os.path.exists(HEAD))
        self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'),
                         self.dev_store_account_)
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_PUSH % (self.repo_type, 'dataset-ex')))

        clear(self.workspace)
        clear(os.path.join(ML_GIT_DIR, 'dataset'))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_ENTITY_INIT % self.repo_type))
        self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'),
                         self.dev_store_account_)
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(
                MLGIT_CHECKOUT %
                (self.repo_type, 'computer-vision__images__dataset-ex__1')))
        ws_path = os.path.join(self.tmp_dir, 'dataset', 'computer-vision',
                               'images', 'dataset-ex')

        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile0')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile1')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile2')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile3')))
        self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile4')))
Ejemplo n.º 6
0
    def test_26_adding_data_based_in_older_tag(self):
        entity = DATASETS
        self.set_up_checkout(entity)

        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_CHECKOUT % (entity, DATASET_TAG)))
        workspace = os.path.join(self.tmp_dir, entity, entity + '-ex')
        create_file(workspace, 'newfile5', '0', file_path='')
        populate_entity_with_new_data(self, entity)

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_CHECKOUT % (DATASETS, DATASET_TAG)))
        expected_files_in_tag_1 = 6
        self.check_amount_of_files(entity,
                                   expected_files_in_tag_1,
                                   sampling=False)
        create_file(workspace, 'newfile6', '0', file_path='')
        populate_entity_with_new_data(self,
                                      entity,
                                      bumpversion='',
                                      version='--version=3')

        clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity))
        clear(workspace)
        self.assertIn(
            output_messages['INFO_METADATA_INIT'] %
            (os.path.join(self.tmp_dir, GIT_PATH),
             os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')),
            check_output(MLGIT_ENTITY_INIT % entity))
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_CHECKOUT %
                         (entity, 'computer-vision__images__datasets-ex__3')))

        path_of_tag_2_file = os.path.join(self.tmp_dir, entity, entity + '-ex',
                                          'newfile5')
        path_of_tag_3_file = os.path.join(self.tmp_dir, entity, entity + '-ex',
                                          'newfile6')
        self.assertFalse(os.path.exists(path_of_tag_2_file))
        self.assertTrue(os.path.exists(path_of_tag_3_file))
        expected_files_in_tag_3 = 7
        self.check_amount_of_files(entity,
                                   expected_files_in_tag_3,
                                   sampling=False)
Ejemplo n.º 7
0
    def test_02_fetch_with_group_sample(self):
        self.set_up_fetch()

        self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_FETCH % ('dataset',
                                                                    'computer-vision__images__dataset-ex__1')
                                                     + ' --sample-type=group --sampling=1:3 --seed=4'))

        hashfs = os.path.join(ML_GIT_DIR, 'dataset', 'objects', 'hashfs')
        self.assertTrue(os.path.exists(hashfs))
Ejemplo n.º 8
0
 def test_07_update_some_entities(self):
     self._setup_update_entity(DATASETS)
     self._setup_update_entity(MODELS)
     response = check_output(MLGIT_REPOSITORY_UPDATE)
     self._check_update_output(response, DATASETS, MODELS)
     self.assertNotIn(
         output_messages['INFO_MLGIT_PULL'] %
         os.path.join(self.tmp_dir, ML_GIT_DIR, LABELS, 'metadata'),
         response)
Ejemplo n.º 9
0
 def test_01_status_after_put_on_new_file_in_dataset(self):
     self.set_up_status(DATASETS)
     create_file(os.path.join(self.tmp_dir, DATASETS, DATASET_NAME), 'file',
                 '0', '')
     self.assertRegex(
         check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME)),
         DATASET_NO_COMMITS_INFO_REGEX + r'Untracked files:\s+' +
         DATASET_ADD_INFO_REGEX + r'datasets-ex\.spec\s+'
         r'file')
Ejemplo n.º 10
0
    def test_05_fetch_with_range_sample(self):
        self.set_up_fetch()

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(
                MLGIT_FETCH %
                ('dataset', 'computer-vision__images__dataset-ex__1') +
                ' --sample-type=range --sampling=2:4:1'))
Ejemplo n.º 11
0
    def test_14_random_sample_with_frequency_greater_or_equal_list_size(self):
        self.set_up_fetch()

        self.assertIn(
            messages[31],
            check_output(
                MLGIT_FETCH %
                ('dataset', 'computer-vision__images__dataset-ex__1') +
                ' --sample-type=random --sampling=2:10 --seed=3'))
Ejemplo n.º 12
0
    def test_11_checkout_with_random_sample(self):
        self.set_up_fetch()

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(
                MLGIT_FETCH %
                ('dataset', 'computer-vision__images__dataset-ex__1') +
                ' --sample-type=random --sampling=2:3 --seed=3'))
Ejemplo n.º 13
0
    def test_03_unlock_flexible_mode(self):
        self.set_up_unlock(DATASETS, FLEXIBLE)

        self.assertEqual(2, os.stat(self.file_path).st_nlink)
        self.assertIn(
            output_messages['INFO_PERMISSIONS_CHANGED_FOR'] % 'data/file1',
            check_output(MLGIT_UNLOCK %
                         (DATASETS, DATASET_NAME, 'data/file1')))
        self.assertTrue(os.access(self.file_path, os.W_OK))
Ejemplo n.º 14
0
    def test_02_unlock_wrong_file(self):
        self.set_up_unlock(DATASETS, FLEXIBLE)

        self.assertEqual(2, os.stat(self.file_path).st_nlink)
        self.assertIn(
            output_messages['ERROR_FILE_NOT_FOUND'] % 'data/file10',
            check_output(MLGIT_UNLOCK %
                         (DATASETS, DATASET_NAME, 'data/file10')))
        self.assertEqual(2, os.stat(self.file_path).st_nlink)
Ejemplo n.º 15
0
    def test_01_unlock_in_strict_mode(self):
        self.set_up_unlock(DATASETS, STRICT)

        self.assertEqual(2, os.stat(self.file_path).st_nlink)
        self.assertIn(
            output_messages['INFO_MUTABILITY_CANNOT_BE_STRICT'],
            check_output(MLGIT_UNLOCK %
                         (DATASETS, DATASET_NAME, 'data/file1')))
        self.assertEqual(2, os.stat(self.file_path).st_nlink)
Ejemplo n.º 16
0
    def test_07_range_sample_with_start_parameter_less_than_zero(self):
        self.set_up_fetch()

        self.assertIn(
            messages[42],
            check_output(
                MLGIT_FETCH %
                ('dataset', 'computer-vision__images__dataset-ex__1') +
                ' --sample-type=range --sampling=-3:2:1'))
Ejemplo n.º 17
0
    def test_09_range_sample_with_start_parameter_equal_to_stop(self):
        self.set_up_fetch()

        self.assertIn(
            messages[23],
            check_output(
                MLGIT_FETCH %
                ('dataset', 'computer-vision__images__dataset-ex__1') +
                ' --sample-type=range --sampling=2:2:1'))
Ejemplo n.º 18
0
    def _remote_del(self, entity_type):
        with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'),
                  'r') as c:
            config = yaml_processor.load(c)
            git_url = config[entity_type]['git']

        self.assertIn(
            output_messages['INFO_REMOVE_REMOTE'] % (git_url, entity_type),
            check_output(MLGIT_REMOTE_DEL % entity_type))
Ejemplo n.º 19
0
    def test_12_random_sample_with_frequency_less_or_equal_zero(self):
        self.set_up_fetch()

        self.assertIn(
            messages[40],
            check_output(
                MLGIT_FETCH %
                ('dataset', 'computer-vision__images__dataset-ex__1') +
                ' --sample-type=random --sampling=2:-2 --seed=3'))
Ejemplo n.º 20
0
    def test_04_push_with_wrong_repository(self):
        init_repository('dataset', self)
        add_file(self, 'dataset', '--bumpversion', 'new')
        metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata')
        self.assertIn(messages[17] % (metadata_path, os.path.join('computer-vision', 'images', 'dataset-ex')),
                      check_output(MLGIT_COMMIT % ('dataset',  'dataset-ex', '')))

        HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'refs', 'dataset-ex', 'HEAD')
        self.assertTrue(os.path.exists(HEAD))

        git_path = os.path.join(self.tmp_dir, GIT_PATH)

        clear(git_path)

        output = check_output(MLGIT_PUSH % ('dataset', 'dataset-ex'))

        self.assertIn(ERROR_MESSAGE, output)
        self.assertIn(git_path, output)
Ejemplo n.º 21
0
    def test_04_group_sample_with_seed_parameter_negative(self):
        self.set_up_fetch()

        self.assertIn(
            messages[41],
            check_output(
                MLGIT_FETCH %
                ('dataset', 'computer-vision__images__dataset-ex__1') +
                ' --sample-type=group --sampling=1:2 --seed=-4'))
Ejemplo n.º 22
0
    def test_02_clone_folder_non_empty(self):
        os.mkdir(CLONE_FOLDER)
        with open(os.path.join(CLONE_FOLDER, "file"), "wt") as file:
            file.write("0" * 2048)

        self.assertIn(
            messages[45] % (os.path.join(self.tmp_dir, CLONE_FOLDER)),
            check_output(MLGIT_CLONE %
                         (self.GIT_CLONE, "--folder=" + CLONE_FOLDER)))
Ejemplo n.º 23
0
 def test_01_models_metrics(self):
     repo_type = MODELS
     self.set_up_test(repo_type)
     output = check_output(MLGIT_MODELS_METRICS %
                           ('{}-ex'.format(repo_type), ''))
     self.assertIn(self.TAG % 1, output)
     self.assertIn(self._create_info_table(tag_version=0), output)
     self.assertIn(self.TAG % 2, output)
     self.assertIn(self._create_info_table(tag_version=1), output)
Ejemplo n.º 24
0
 def test_05_commit_tag_that_already_exists(self):
     entity_type = DATASETS
     self._commit_entity(entity_type)
     with open(
             os.path.join(self.tmp_dir, entity_type, entity_type + '-ex',
                          'newfile5'), 'wt') as z:
         z.write(str('0' * 100))
     self.assertIn(
         output_messages['INFO_ADDING_PATH'] % DATASETS,
         check_output(MLGIT_ADD % (entity_type, entity_type + '-ex', '')))
     self.assertIn(
         output_messages['INFO_TAG_ALREADY_EXISTS'] %
         'computer-vision__images__datasets-ex__2',
         check_output(MLGIT_COMMIT %
                      (entity_type, entity_type + '-ex', '')))
     head_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'refs',
                              entity_type + '-ex', 'HEAD')
     self.assertTrue(os.path.exists(head_path))
Ejemplo n.º 25
0
 def test_06_hard_entity_with_changed_dir(self):
     entity_type = DATASETS
     artifact_name = DATASET_NAME
     init_repository(entity_type, self)
     create_file(os.path.join(entity_type, artifact_name), 'file1', '0', '')
     self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ADD % (entity_type, artifact_name, '--bumpversion')))
     self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity_type, artifact_name, '')))
     create_file(os.path.join(entity_type, artifact_name), 'file2', '0', '')
     entity_dir, workspace, workspace_with_dir = move_entity_to_dir(self.tmp_dir, artifact_name, entity_type)
     self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ADD % (entity_type, artifact_name, '--bumpversion')))
     self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity_type, artifact_name, '')))
     new_file_path = os.path.join(workspace_with_dir, artifact_name, 'file2')
     self.assertFalse(os.path.exists(workspace))
     self.assertTrue(os.path.exists(new_file_path))
     self.assertIn(output_messages['INFO_INITIALIZING_RESET'] % ('--hard', 'HEAD~1'),
                   check_output(MLGIT_RESET % (entity_type, artifact_name) + ' --hard --reference=head~1'))
     self.assertFalse(os.path.exists(new_file_path))
     self.assertTrue(os.path.exists(workspace))
Ejemplo n.º 26
0
 def test_06_commit_with_large_version_number(self):
     init_repository(DATASETS, self)
     create_spec(self, DATASETS, self.tmp_dir)
     self.assertIn(
         output_messages['ERROR_INVALID_VALUE_FOR'] %
         ('--version', '9999999999'),
         check_output(MLGIT_COMMIT %
                      (DATASETS, DATASET_NAME, ' --version=9999999999')))
     self.assertIn(
         output_messages['ERROR_INVALID_VALUE_FOR'] %
         ('--version', '9999999999'),
         check_output(MLGIT_COMMIT %
                      (MODELS, MODELS + '-ex', ' --version=9999999999')))
     self.assertIn(
         output_messages['ERROR_INVALID_VALUE_FOR'] %
         ('--version', '9999999999'),
         check_output(MLGIT_COMMIT %
                      (LABELS, LABELS + '-ex', ' --version=9999999999')))
Ejemplo n.º 27
0
    def test_01_fetch_metadata_specific_tag(self):
        self.set_up_fetch()

        self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_FETCH % ('dataset',
                                                                    'computer-vision__images__dataset-ex__1')))

        hashfs = os.path.join(ML_GIT_DIR, 'dataset', 'objects', 'hashfs')

        self.assertTrue(os.path.exists(hashfs))
Ejemplo n.º 28
0
 def test_07_update_some_entities(self):
     self._setup_update_entity('dataset')
     self._setup_update_entity('model')
     response = check_output(MLGIT_REPOSITORY_UPDATE)
     self._check_update_output(response, 'dataset', 'model')
     self.assertNotIn(
         messages[37] %
         os.path.join(self.tmp_dir, ML_GIT_DIR, 'labels', 'metadata'),
         response)
Ejemplo n.º 29
0
 def test_01_status_after_put_on_new_file_in_dataset(self):
     self.set_up_status(DATASETS)
     data_path = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME, 'data')
     os.makedirs(data_path, exist_ok=True)
     create_file(data_path, 'file', '0', '')
     self.assertRegex(
         check_output(MLGIT_STATUS_SHORT % (DATASETS, DATASET_NAME)),
         r'Changes to be committed:\s+Untracked files:(\s|.)*data(\\|/)file(\s|.)*'
     )
Ejemplo n.º 30
0
 def test_04_export_metrics_without_export_path(self):
     repo_type = MODELS
     entity_name = '{}-ex'.format(repo_type)
     self.set_up_test(repo_type)
     self.assertIn(
         output_messages['ERROR_MISSING_EXPORT_PATH'],
         check_output(
             MLGIT_MODELS_METRICS %
             (entity_name, ' --export-type={}'.format(FileType.CSV.value))))