Example #1
0
 def test_13_add_storage_with_empty_parameter(self):
     self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     self.check_storage()
     self.assertIn(output_messages['ERROR_INVALID_VALUE_FOR'] % ('--type', output_messages['ERROR_EMPTY_VALUE']),
                   check_output(MLGIT_STORAGE_ADD_WITHOUT_CREDENTIALS %
                                ('{}{}'.format(BUCKET_NAME, ' --type='))))
Example #2
0
 def test_04_create_with_wrong_import_url(self):
     self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     self.assertIn(output_messages['ERROR_INVALID_URL'] % 'import_url',
                   check_output(MLGIT_CREATE % (DATASETS, DATASET_NAME)
                   + ' --categories=img --version=1 --import-url="import_url" '
                   + '--credentials-path=' + CREDENTIALS_PATH + ' --mutability=' + STRICT))
Example #3
0
 def test_14_add_storage_with_invalid_type(self):
     invalid_type = 'not_a_type'
     self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     self.check_storage()
     self.assertIn(output_messages['ERROR_INVALID_VALUE_FOR'] % ('--type', output_messages['ERROR_STORAGE_TYPE_INPUT_INVALID'].format(invalid_type)),
                   check_output(MLGIT_STORAGE_ADD_WITHOUT_CREDENTIALS %
                                ('{}{}'.format(BUCKET_NAME, ' --type=' + invalid_type))))
Example #4
0
 def test_12_add_sftph_storage_with_invalid_port(self):
     invalid_port = 'port'
     self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     self.check_storage()
     self.assertIn(output_messages['ERROR_INVALID_VALUE_FOR'] % ('--port', invalid_port),
                   check_output(MLGIT_STORAGE_ADD_WITHOUT_CREDENTIALS %
                                ('{}{}'.format(BUCKET_NAME, ' --region=any --type=sftph --port=' + invalid_port))))
Example #5
0
 def _add_remote(self, entity_type):
     self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     self.assertIn(output_messages['INFO_ADD_REMOTE'] % (os.path.join(self.tmp_dir, GIT_PATH), entity_type),
                   check_output(MLGIT_REMOTE_ADD % (entity_type, os.path.join(self.tmp_dir, GIT_PATH))))
     with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c:
         config = yaml_processor.load(c)
         self.assertEqual(os.path.join(self.tmp_dir, GIT_PATH), config[entity_type]['git'])
Example #6
0
    def test_03_add_storage_subfolder(self):
        self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT))
        disable_wizard_in_config(self.tmp_dir)
        with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c:
            config = yaml_processor.load(c)
            self.assertNotIn(S3H, config[STORAGE_CONFIG_KEY])

        os.chdir(os.path.join(self.tmp_dir, ML_GIT_DIR))
        self.assertIn(output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE),
                      check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE)))
Example #7
0
    def _add_storage(self):
        self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT))
        disable_wizard_in_config(self.tmp_dir)
        self.check_storage()
        self.assertIn(output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE),
                      check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE)))

        with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c:
            config = yaml_processor.load(c)
            self.assertEqual(PROFILE, config[STORAGE_CONFIG_KEY][S3H][BUCKET_NAME]['aws-credentials']['profile'])
Example #8
0
 def test_10_add_storage_without_credentials(self):
     self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     self.check_storage()
     self.assertIn(output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] % (STORAGE_TYPE, BUCKET_NAME),
                   check_output(MLGIT_STORAGE_ADD_WITHOUT_CREDENTIALS % BUCKET_NAME))
     with open(os.path.join(self.tmp_dir, ML_GIT_DIR, 'config.yaml'), 'r') as c:
         config = yaml_processor.load(c)
         self.assertEqual(None, config[STORAGE_CONFIG_KEY][S3H][BUCKET_NAME]['aws-credentials']['profile'])
         self.assertEqual('us-east-1', config[STORAGE_CONFIG_KEY][S3H][BUCKET_NAME]['region'])
Example #9
0
 def test_18_create_without_categories_option(self):
     entity_type = DATASETS
     self.assertIn(
         output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir,
         check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     self.assertIn(
         'Missing option "--categories"',
         check_output(MLGIT_CREATE % (entity_type, entity_type + '-ex') +
                      ' --version=1'))
Example #10
0
 def test_16_create_without_mutability_option(self):
     entity_type = DATASETS
     self.assertIn(
         output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir,
         check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     self.assertIn(
         output_messages['ERROR_MISSING_OPTION'].format('mutability'),
         check_output(MLGIT_CREATE % (entity_type, entity_type + '-ex') +
                      ' --categories=img --version=1'))
Example #11
0
 def add_storage_type(self, bucket, profile, storage_type):
     self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     result = check_output(MLGIT_STORAGE_ADD_WITH_TYPE % (bucket, profile, storage_type))
     if storage_type == STORAGE_TYPE:
         self.assertIn(output_messages['INFO_ADD_STORAGE'] % (storage_type, bucket, profile), result)
     else:
         self.assertIn(output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] % (storage_type, bucket), result)
     with open(os.path.join(ML_GIT_DIR, 'config.yaml'), 'r') as c:
         config = yaml_processor.load(c)
     return config
Example #12
0
 def test_04_update_with_git_error(self):
     self.assertIn(
         output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir,
         check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     self.assertIn(
         output_messages['INFO_ADD_STORAGE'] %
         (STORAGE_TYPE, BUCKET_NAME, PROFILE),
         check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE)))
     self.assertIn(
         output_messages['ERROR_METADATA_COULD_NOT_UPDATED'].format(''),
         check_output(MLGIT_UPDATE % DATASETS))
Example #13
0
 def set_up_global(self, entity_type=DATASETS):
     self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_REMOTE_ADD_GLOBAL %
                      (entity_type, 'local_git_server.git')))
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_STORAGE_ADD %
                      (BUCKET_NAME, PROFILE + ' --global')))
     with open(os.path.join(self.tmp_dir, GLOBAL_ML_GIT_CONFIG),
               'r') as config_file:
         config = yaml_processor.load(config_file)
         config[STORAGE_CONFIG_KEY][StorageType.S3H.value]['mlgit'][
             'endpoint-url'] = MINIO_ENDPOINT_URL
     with open(os.path.join(self.tmp_dir, GLOBAL_ML_GIT_CONFIG),
               'w') as config_file:
         yaml_processor.dump(config, config_file)
     clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
Example #14
0
    def test_03_create_gdrive(self):
        self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT))
        disable_wizard_in_config(self.tmp_dir)

        self.assertIn(output_messages['INFO_DATASETS_CREATED'],
                      check_output(MLGIT_CREATE % (DATASETS, DATASET_NAME)
                      + ' --categories=imgs --bucket-name=test'
                      + ' --import-url=%s --credentials-path=%s ' % (self.gdrive_links['test-folder'], CREDENTIALS_PATH)
                      + ' --mutability=%s' % STRICT))

        file_a_test_folder = os.path.join(DATASETS, DATASET_NAME, 'data', 'test-folder', 'A')

        self.assertTrue(os.path.exists(file_a_test_folder))

        self.assertIn(output_messages['INFO_DATASETS_CREATED'],
                      check_output(MLGIT_CREATE % (DATASETS, 'datasets-ex2')
                      + ' --categories=imgs --bucket-name=test'
                      + ' --import-url=%s --credentials-path=%s' % (self.gdrive_links['B'], CREDENTIALS_PATH)
                      + ' --mutability=%s' % STRICT))

        file_b = os.path.join(DATASETS, 'datasets-ex2', 'data', 'B')

        self.assertTrue(os.path.exists(file_b))
Example #15
0
    def test_20_model_related(self):
        git_server = os.path.join(self.tmp_dir, GIT_PATH)

        self.assertIn(
            output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir,
            check_output(MLGIT_INIT))
        disable_wizard_in_config(self.tmp_dir)
        self.assertIn(
            output_messages['INFO_ADD_REMOTE'] % (git_server, MODELS),
            check_output(MLGIT_REMOTE_ADD % (MODELS, git_server)))
        self.assertIn(
            output_messages['INFO_ADD_STORAGE'] %
            (STORAGE_TYPE, BUCKET_NAME, PROFILE),
            check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE)))
        self.assertIn(
            output_messages['INFO_METADATA_INIT'] %
            (git_server,
             os.path.join(self.tmp_dir, '.ml-git', MODELS, 'metadata')),
            check_output(MLGIT_ENTITY_INIT % MODELS))
        edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git'))
        workspace_model = os.path.join(MODELS, MODELS + '-ex')
        os.makedirs(workspace_model)
        version = 1
        create_spec(self, MODELS, self.tmp_dir, version)
        with open(os.path.join(self.tmp_dir, workspace_model, 'file1'),
                  'wb') as z:
            z.write(b'0' * 1024)

        self.assertIn(
            output_messages['INFO_ADD_REMOTE'] % (git_server, DATASETS),
            check_output(MLGIT_REMOTE_ADD % (DATASETS, git_server)))
        self.assertIn(
            output_messages['INFO_ADD_STORAGE'] %
            (STORAGE_TYPE, BUCKET_NAME, PROFILE),
            check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE)))
        self.assertIn(
            output_messages['INFO_METADATA_INIT'] %
            (git_server,
             os.path.join(self.tmp_dir, '.ml-git', DATASETS, 'metadata')),
            check_output(MLGIT_ENTITY_INIT % DATASETS))
        edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git'))
        workspace_dataset = os.path.join(DATASETS, DATASETS + '-ex')
        os.makedirs(workspace_dataset)
        version = 1
        create_spec(self, DATASETS, self.tmp_dir, version)
        with open(os.path.join(self.tmp_dir, workspace_dataset, 'file1'),
                  'wb') as z:
            z.write(b'0' * 1024)

        expected_push_result = '2.00/2.00'

        self.assertIn(
            output_messages['INFO_ADDING_PATH'] % DATASETS,
            check_output(MLGIT_ADD %
                         (DATASETS, DATASET_NAME, '--bumpversion')))
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] % (os.path.join(
                self.tmp_dir, '.ml-git', DATASETS, 'metadata'), DATASET_NAME),
            check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, '')))
        self.assertIn(expected_push_result,
                      check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME)))

        self.assertIn(
            output_messages['INFO_ADD_REMOTE'] % (git_server, LABELS),
            check_output(MLGIT_REMOTE_ADD % (LABELS, git_server)))
        self.assertIn(
            output_messages['INFO_ADD_STORAGE'] %
            (STORAGE_TYPE, BUCKET_NAME, PROFILE),
            check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE)))
        self.assertIn(
            output_messages['INFO_METADATA_INIT'] %
            (git_server,
             os.path.join(self.tmp_dir, '.ml-git', LABELS, 'metadata')),
            check_output(MLGIT_ENTITY_INIT % LABELS))
        edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git'))
        workspace_labels = os.path.join(LABELS, LABELS + '-ex')
        os.makedirs(workspace_labels)
        version = 1
        create_spec(self, LABELS, self.tmp_dir, version)
        with open(os.path.join(self.tmp_dir, workspace_labels, 'file1'),
                  'wb') as z:
            z.write(b'0' * 1024)

        self.assertIn(
            output_messages['INFO_ADDING_PATH'] % LABELS,
            check_output(MLGIT_ADD %
                         (LABELS, LABELS + '-ex', '--bumpversion')))
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] % (os.path.join(
                self.tmp_dir, '.ml-git', LABELS, 'metadata'), LABELS + '-ex'),
            check_output(MLGIT_COMMIT % (LABELS, LABELS + '-ex', '')))
        self.assertIn(expected_push_result,
                      check_output(MLGIT_PUSH % (LABELS, LABELS + '-ex')))

        self.assertIn(
            output_messages['INFO_ADDING_PATH'] % MODELS,
            check_output(MLGIT_ADD %
                         (MODELS, MODELS + '-ex', '--bumpversion')))
        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] % (os.path.join(
                self.tmp_dir, '.ml-git', MODELS, 'metadata'), MODELS + '-ex'),
            check_output(MLGIT_COMMIT %
                         (MODELS, MODELS + '-ex', '--dataset=datasets-ex') +
                         ' --labels=labels-ex'))
        self.assertIn(expected_push_result,
                      check_output(MLGIT_PUSH % (MODELS, MODELS + '-ex')))
        set_write_read(os.path.join(self.tmp_dir, workspace_model, 'file1'))
        set_write_read(os.path.join(self.tmp_dir, workspace_dataset, 'file1'))
        set_write_read(os.path.join(self.tmp_dir, workspace_labels, 'file1'))
        if not sys.platform.startswith('linux'):
            recursive_write_read(os.path.join(self.tmp_dir, '.ml-git'))
        clear(os.path.join(self.tmp_dir, MODELS))
        clear(os.path.join(self.tmp_dir, DATASETS))
        clear(os.path.join(self.tmp_dir, LABELS))
        clear(os.path.join(self.tmp_dir, '.ml-git', MODELS))
        clear(os.path.join(self.tmp_dir, '.ml-git', DATASETS))
        clear(os.path.join(self.tmp_dir, '.ml-git', LABELS))
        self.assertIn(
            output_messages['INFO_METADATA_INIT'] %
            (git_server,
             os.path.join(self.tmp_dir, '.ml-git', MODELS, 'metadata')),
            check_output(MLGIT_ENTITY_INIT % MODELS))
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_CHECKOUT %
                         (MODELS, 'computer-vision__images__models-ex__1') +
                         ' -d -l'))
        self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, MODELS)))
        self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, DATASETS)))
        self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, LABELS)))
    def test_01_change_metadata(self):
        init_repository(DATASETS, self)
        self.assertRegex(
            check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME)),
            DATASET_NO_COMMITS_INFO_REGEX + r'Untracked files:\s+' +
            DATASET_ADD_INFO_REGEX + r'datasets-ex.spec')

        self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS,
                      check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '')))

        readme = os.path.join(DATASETS, DATASET_NAME, 'README.md')

        with open(readme, 'w') as file:
            file.write('NEW')

        self.assertRegex(
            check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME)),
            DATASET_NO_COMMITS_INFO_REGEX + r'Changes to be committed:\s+'
            r'New file: datasets-ex.spec\s+'
            r'Untracked files:\s+' + DATASET_ADD_INFO_REGEX + r'README.md')

        self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS,
                      check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '')))

        status = check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME))

        self.assertIn('New file: datasets-ex.spec', status)
        self.assertIn('New file: README.md', status)

        with open(readme, 'w') as file:
            file.write('NEW2')

        spec = {
            DATASET_SPEC_KEY: {
                'categories': ['computer-vision', 'images'],
                'manifest': {
                    'files': 'MANIFEST.yaml',
                    STORAGE_SPEC_KEY: '%s://mlgit' % S3H
                },
                'mutability': STRICT,
                'name': 'datasets-ex',
                'version': 16
            }
        }

        with open(os.path.join(DATASETS, DATASET_NAME, 'datasets-ex.spec'),
                  'w') as y:
            spec[DATASET_SPEC_KEY]['version'] = 17
            yaml_processor.dump(spec, y)

        status = check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME))

        self.assertNotIn('new file: README.md', status)
        self.assertIn('README.md', status)
        self.assertNotIn('new file: datasets-ex.spec', status)
        self.assertIn('datasets-ex.spec', status)

        data_path = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME)
        create_file(data_path, 'file', '0', '')

        self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS,
                      check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '')))

        self.assertIn(
            output_messages['INFO_COMMIT_REPO'] % (os.path.join(
                self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata'), DATASET_NAME),
            check_output(MLGIT_COMMIT %
                         (DATASETS, DATASET_NAME, ' --version=17')))

        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME)))

        self.assertRegex(check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME)),
                         DATASET_NO_COMMITS_INFO_REGEX)

        clear(ML_GIT_DIR)
        clear(DATASETS)

        self.assertIn(
            output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir,
            check_output(MLGIT_INIT))
        disable_wizard_in_config(self.tmp_dir)
        self.assertIn(
            output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, DATASETS),
            check_output(MLGIT_REMOTE_ADD % (DATASETS, GIT_PATH)))
        self.assertIn(
            output_messages['INFO_ADD_STORAGE'] %
            (STORAGE_TYPE, BUCKET_NAME, PROFILE),
            check_output(MLGIT_STORAGE_ADD_WITH_TYPE %
                         (BUCKET_NAME, PROFILE, STORAGE_TYPE)))
        self.assertIn(
            output_messages['INFO_METADATA_INIT'] %
            (GIT_PATH,
             os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata')),
            check_output(MLGIT_ENTITY_INIT % DATASETS))

        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(
                MLGIT_CHECKOUT %
                (DATASETS, 'computer-vision__images__datasets-ex__17 --bare')))

        spec_file = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME,
                                 'datasets-ex.spec')
        readme = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME,
                              'README.md')

        with open(spec_file, 'r') as f:
            spec = yaml_processor.load(f)
            self.assertEqual(spec[DATASET_SPEC_KEY]['version'], 17)

        with open(readme, 'r') as f:
            self.assertEqual(f.read(), 'NEW2')